From eb22ef063972da49fe834470a046dbd7194ced17 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Wed, 25 Jun 2014 10:30:13 +0200 Subject: [PATCH] added re_group function --- src/calibre/utils/formatter.py | 23 ++++++++++----- src/calibre/utils/formatter_functions.py | 37 ++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py index 34eb06a089..6fad97d312 100644 --- a/src/calibre/utils/formatter.py +++ b/src/calibre/utils/formatter.py @@ -473,10 +473,13 @@ class TemplateFormatter(string.Formatter): (func.arg_count > 1 and func.arg_count != len(args)+1): raise ValueError('Incorrect number of arguments for function '+ fmt[0:p]) if func.arg_count == 1: - val = func.eval_(self, self.kwargs, self.book, self.locals, val).strip() + val = func.eval_(self, self.kwargs, self.book, self.locals, val) + if self.strip_results: + val = val.strip() else: - val = func.eval_(self, self.kwargs, self.book, self.locals, - val, *args).strip() + val = func.eval_(self, self.kwargs, self.book, self.locals, val, *args) + if self.strip_results: + val = val.strip() else: return _('%s: unknown function')%fname if val: @@ -487,10 +490,12 @@ class TemplateFormatter(string.Formatter): def evaluate(self, fmt, args, kwargs): if fmt.startswith('program:'): - ans = self._eval_program(None, fmt[8:], self.column_name) + ans = self._eval_program(kwargs.get('$', None), fmt[8:], self.column_name) else: ans = self.vformat(fmt, args, kwargs) - return self.compress_spaces.sub(' ', ans).strip() + if self.strip_results: + return self.compress_spaces.sub(' ', ans).strip() + return ans ########## a formatter that throws exceptions ############ @@ -505,7 +510,9 @@ class TemplateFormatter(string.Formatter): ########## a formatter guaranteed not to throw an exception ############ def safe_format(self, fmt, kwargs, error_value, book, - column_name=None, template_cache=None): + column_name=None, template_cache=None, + strip_results=True): + self.strip_results = strip_results self.column_name = column_name self.template_cache = template_cache self.kwargs = kwargs @@ -513,9 +520,9 @@ class TemplateFormatter(string.Formatter): self.composite_values = {} self.locals = {} try: - ans = self.evaluate(fmt, [], kwargs).strip() + ans = self.evaluate(fmt, [], kwargs) except Exception as e: - if DEBUG and getattr(e, 'is_locking_error', False): + if DEBUG: # and getattr(e, 'is_locking_error', False): traceback.print_exc() ans = error_value + ' ' + e.message return ans diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index e3eac0255e..2d9648f7ab 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -553,6 +553,39 @@ class BuiltinRe(BuiltinFormatterFunction): def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement): return re.sub(pattern, replacement, val, flags=re.I) +class BuiltinReGroup(BuiltinFormatterFunction): + name = 're_group' + arg_count = -1 + category = 'String manipulation' + __doc__ = doc = _('re_group(val, pattern, template_for_group_1, for_group_2, ...) -- ' + 'return a string made by applying the reqular expression pattern ' + 'to the val and replacing each matched instance with the string ' + 'computed by replacing each matched group by the value returned ' + 'by the corresponding template. The original matched value for the ' + 'group is available as $. In template program mode, like for ' + 'the template and the eval functions, you use [[ for { and ]] for }.' + ' The following example in template program mode looks for series ' + 'with more than one word and uppercases the first word: ' + "{series:'re_group($, \"(\S* )(.*)\", \"[[$:uppercase()]]\", \"[[$]]\")'}") + + def evaluate(self, formatter, kwargs, mi, locals, *args): + from formatter import EvalFormatter + + if (len(args) < 2): + return('re_group: incorrect number of arguments') + def repl(mo): + res = '' + for dex in range(1, mo.lastindex+1): + if len(args) > dex + 1: + template = args[dex+1].replace('[[', '{').replace(']]', '}') + res += EvalFormatter().safe_format(template, + {'$': mo.group(dex)}, 'EVAL', None, + strip_results=False) + else: + res += mo.group(dex) + return res + return re.sub(args[1], repl, args[0], flags=re.I) + class BuiltinSwapAroundComma(BuiltinFormatterFunction): name = 'swap_around_comma' arg_count = 1 @@ -1341,8 +1374,8 @@ _formatter_builtins = [ BuiltinListSort(), BuiltinListUnion(), BuiltinLookup(), BuiltinLowercase(), BuiltinMultiply(), BuiltinNot(), BuiltinOndevice(), BuiltinOr(), BuiltinPrint(), BuiltinRawField(), - BuiltinRe(), BuiltinSelect(), BuiltinSeriesSort(), BuiltinShorten(), - BuiltinStrcat(), BuiltinStrcatMax(), + BuiltinRe(), BuiltinReGroup(), BuiltinSelect(), BuiltinSeriesSort(), + BuiltinShorten(), BuiltinStrcat(), BuiltinStrcatMax(), BuiltinStrcmp(), BuiltinStrInList(), BuiltinStrlen(), BuiltinSubitems(), BuiltinSublist(),BuiltinSubstr(), BuiltinSubtract(), BuiltinSwapAroundComma(), BuiltinSwitch(), BuiltinTemplate(), BuiltinTest(), BuiltinTitlecase(),