list_re_group template function. Clean up re_group function. Add documentation

2025-07-08 18:54:09 -04:00 · 2014-06-25 12:37:37 +02:00 · 2014-06-25 12:37:37 +02:00 · 41f8ab52f8
commit 41f8ab52f8
parent eb22ef0639
2 changed files with 65 additions and 20 deletions
--- a/manual/template_lang.rst
+++ b/manual/template_lang.rst
@ -301,7 +301,8 @@ The following functions are available in addition to those described in single-f
    * ``list_difference(list1, list2, separator)`` -- return a list made by removing from `list1` any item found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
    * ``list_equals(list1, sep1, list2, sep2, yes_val, no_val)`` -- return `yes_val` if `list1` and `list2` contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
    * ``list_intersection(list1, list2, separator)`` -- return a list made by removing from `list1` any item not found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
-    * ``list_re(src_list, separator, search_re, opt_replace)`` -- Construct a list by first separating `src_list` into items using the `separator` character. For each item in the list, check if it matches `search_re`. If it does, then add it to the list to be returned. If `opt_replace` is not the empty string, then apply the replacement before adding the item to the returned list.
+    * ``list_re(src_list, separator, include_re, opt_replace)`` -- Construct a list by first separating `src_list` into items using the `separator` character. For each item in the list, check if it matches `include_re`. If it does, then add it to the list to be returned. If `opt_replace` is not the empty string, then apply the replacement before adding the item to the returned list.
+	* ``list_re_group(src_list, separator, include_re, search_re, template_for_group_1, for_group_2, ...)`` -- Like list_re except replacements are not optional. It uses re_group(item, search_re, template ...) when doing the replacements.
    * ``list_sort(list, direction, separator)`` -- return list sorted using a case-insensitive sort. If `direction` is zero, the list is sorted ascending, otherwise descending. The list items are separated by separator, as are the items in the returned list.
    * ``list_union(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list.
    * ``multiply(x, y)`` -- returns x * y. Throws an exception if either x or y are not numbers.
@ -309,6 +310,10 @@ The following functions are available in addition to those described in single-f
    * ``or(value, value, ...)`` -- returns the string "1" if any value is not empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
    * ``print(a, b, ...)`` -- prints the arguments to standard output. Unless you start calibre from the command line (``calibre-debug -g``), the output will go to a black hole.
    * ``raw_field(name)`` -- returns the metadata field named by name without applying any formatting.
+	* ``re_group(val, pattern, template_for_group_1, for_group_2, ...)`` --  return a string made by applying the reqular expression pattern to the val and replacing each matched instance with the string computed by replacing each matched group by the value returned by the corresponding template. The original matched value for the group is available as $. In template program mode, like for the template and the eval functions, you use [[ for { and ]] for }. The following example in template program mode looks for series with more than one word and uppercases the first word::
+	
+		{series:'re_group($, "(\S* )(.*)", "[[$:uppercase()]]", "[[$]]")'}
+		
    * ``series_sort()`` -- returns the series sort value.
    * ``strcat(a, b, ...)`` -- can take any number of arguments. Returns a string formed by concatenating all the arguments.
    * ``strcat_max(max, string1, prefix2, string2, ...)`` -- Returns a string formed by concatenating the arguments. The returned value is initialized to string1. `Prefix, string` pairs are added to the end of the value as long as the resulting string length is less than `max`. String1 is returned even if string1 is longer than max. You can pass as many `prefix, string` pairs as you wish.
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@ -568,23 +568,24 @@ class BuiltinReGroup(BuiltinFormatterFunction):
            'with more than one word and uppercases the first word: '
            "{series:'re_group($, \"(\S* )(.*)\", \"[[$:uppercase()]]\", \"[[$]]\")'}")

-    def evaluate(self, formatter, kwargs, mi, locals, *args):
+    def evaluate(self, formatter, kwargs, mi, locals, val, pattern, *args):
        from formatter import EvalFormatter

-        if (len(args) < 2):
-            return('re_group: incorrect number of arguments')
        def repl(mo):
            res = ''
-            for dex in range(1, mo.lastindex+1):
-                if len(args) > dex + 1:
-                    template = args[dex+1].replace('[[', '{').replace(']]', '}')
-                    res += EvalFormatter().safe_format(template,
-                                       {'$': mo.group(dex)}, 'EVAL', None,
-                                       strip_results=False)
-                else:
-                    res += mo.group(dex)
+            if mo and mo.lastindex:
+                for dex in range(0, mo.lastindex):
+                    gv = mo.group(dex+1)
+                    if gv is None:
+                        continue
+                    if len(args) > dex:
+                        template = args[dex].replace('[[', '{').replace(']]', '}')
+                        res += EvalFormatter().safe_format(template, {'$': gv},
+                                           'EVAL', None, strip_results=False)
+                    else:
+                        res += gv
            return res
-        return re.sub(args[1], repl, args[0], flags=re.I)
+        return re.sub(pattern, repl, val, flags=re.I)

 class BuiltinSwapAroundComma(BuiltinFormatterFunction):
    name = 'swap_around_comma'
@ -1193,21 +1194,60 @@ class BuiltinListRe(BuiltinFormatterFunction):
    name = 'list_re'
    arg_count = 4
    category = 'List manipulation'
-    __doc__ = doc = _('list_re(src_list, separator, search_re, opt_replace) -- '
+    __doc__ = doc = _('list_re(src_list, separator, include_re, opt_replace) -- '
            'Construct a list by first separating src_list into items using '
            'the separator character. For each item in the list, check if it '
-            'matches search_re. If it does, then add it to the list to be '
+            'matches include_re. If it does, then add it to the list to be '
            'returned. If opt_replace is not the empty string, then apply the '
            'replacement before adding the item to the returned list.')

-    def evaluate(self, formatter, kwargs, mi, locals, src_list, separator, search_re, opt_replace):
+    def evaluate(self, formatter, kwargs, mi, locals, src_list, separator, include_re, opt_replace):
        l = [l.strip() for l in src_list.split(separator) if l.strip()]
        res = []
        for item in l:
-            if re.search(search_re, item, flags=re.I) is not None:
+            if re.search(include_re, item, flags=re.I) is not None:
                if opt_replace:
-                    item = re.sub(search_re, opt_replace, item)
-                for i in [t.strip() for t in item.split(',') if t.strip()]:
+                    item = re.sub(include_re, opt_replace, item)
+                for i in [t.strip() for t in item.split(separator) if t.strip()]:
+                    if i not in res:
+                        res.append(i)
+        if separator == ',':
+            return ', '.join(res)
+        return separator.join(res)
+
+class BuiltinListReGroup(BuiltinFormatterFunction):
+    name = 'list_re_group'
+    arg_count = -1
+    category = 'List manipulation'
+    __doc__ = doc = _('list_re(src_list, separator, include_re, search_re, group_1_template, ...) -- '
+                      'Like list_re except replacements are not optional. It '
+                      'uses re_group(list_item, search_re, group_1_template, ...) when '
+                      'doing the replacements on the resulting list.')
+
+    def evaluate(self, formatter, kwargs, mi, locals, src_list, separator, include_re,
+                 search_re, *args):
+        from formatter import EvalFormatter
+
+        l = [l.strip() for l in src_list.split(separator) if l.strip()]
+        res = []
+        for item in l:
+            def repl(mo):
+                newval = ''
+                if mo and mo.lastindex:
+                    for dex in range(0, mo.lastindex):
+                        gv = mo.group(dex+1)
+                        if gv is None:
+                            continue
+                        if len(args) > dex:
+                            template = args[dex].replace('[[', '{').replace(']]', '}')
+                            newval += EvalFormatter().safe_format(template, {'$': gv},
+                                              'EVAL', None, strip_results=False)
+                        else:
+                            newval += gv
+                return newval
+            if re.search(include_re, item, flags=re.I) is not None:
+                item = re.sub(search_re, repl, item, flags=re.I)
+                for i in [t.strip() for t in item.split(separator) if t.strip()]:
                    if i not in res:
                        res.append(i)
        if separator == ',':
@ -1371,7 +1411,7 @@ _formatter_builtins = [
    BuiltinIfempty(), BuiltinLanguageCodes(), BuiltinLanguageStrings(),
    BuiltinInList(), BuiltinListDifference(), BuiltinListEquals(),
    BuiltinListIntersection(), BuiltinListitem(), BuiltinListRe(),
-    BuiltinListSort(), BuiltinListUnion(), BuiltinLookup(),
+    BuiltinListReGroup(), BuiltinListSort(), BuiltinListUnion(), BuiltinLookup(),
    BuiltinLowercase(), BuiltinMultiply(), BuiltinNot(),
    BuiltinOndevice(), BuiltinOr(), BuiltinPrint(), BuiltinRawField(),
    BuiltinRe(), BuiltinReGroup(), BuiltinSelect(), BuiltinSeriesSort(),