From 1e7c9fb2c37d8a55755ad93e31c15496e649768a Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Sat, 15 Jan 2011 08:37:48 +0000
Subject: [PATCH 01/19] Fix regression where the '|' character was not
converted to comma in get_metadata
---
src/calibre/library/database2.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index c2381938fb..3a2109e01e 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -694,7 +694,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
aum = []
aus = {}
for (author, author_sort) in aut_list:
- aum.append(author)
+ aum.append(author.replace('|', ','))
aus[author] = author_sort.replace('|', ',')
mi.title = row[fm['title']]
mi.authors = aum
From d7ac11d137e820e6d673118a75937a6fec81ef8d Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Sat, 15 Jan 2011 09:43:10 +0000
Subject: [PATCH 02/19] Change formatter_functions to put the program text
explicitly into a string. If source is available, test that the string == the
real program text
---
src/calibre/utils/formatter_functions.py | 191 ++++++++++++++++++++++-
1 file changed, 189 insertions(+), 2 deletions(-)
diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py
index a66d787095..77ce43ec53 100644
--- a/src/calibre/utils/formatter_functions.py
+++ b/src/calibre/utils/formatter_functions.py
@@ -81,13 +81,24 @@ class FormatterFunction(object):
class BuiltinFormatterFunction(FormatterFunction):
def __init__(self):
formatter_functions.register_builtin(self)
+ try:
+ # strip off the first character, which is a newline
+ lines = self.program_text[1:]
+ except:
+ lines = ''
+ self.program_text = lines
+
+ # If we can get the source, check if it is the same as in the string.
+ # This is to give an indication during testing that the text is wrong.
eval_func = inspect.getmembers(self.__class__,
lambda x: inspect.ismethod(x) and x.__name__ == 'evaluate')
try:
lines = [l[4:] for l in inspect.getsourcelines(eval_func[0][1])[0]]
except:
- lines = []
- self.program_text = ''.join(lines)
+ return
+ lines = ''.join(lines)
+ if lines != self.program_text:
+ print 'mismatch in program text for function ', self.name
class BuiltinStrcmp(BuiltinFormatterFunction):
name = 'strcmp'
@@ -95,6 +106,15 @@ class BuiltinStrcmp(BuiltinFormatterFunction):
doc = _('strcmp(x, y, lt, eq, gt) -- does a case-insensitive comparison of x '
'and y as strings. Returns lt if x < y. Returns eq if x == y. '
'Otherwise returns gt.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):
+ v = strcmp(x, y)
+ if v < 0:
+ return lt
+ if v == 0:
+ return eq
+ return gt
+'''
def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):
v = strcmp(x, y)
@@ -109,6 +129,16 @@ class BuiltinCmp(BuiltinFormatterFunction):
arg_count = 5
doc = _('cmp(x, y, lt, eq, gt) -- compares x and y after converting both to '
'numbers. Returns lt if x < y. Returns eq if x == y. Otherwise returns gt.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):
+ x = float(x if x else 0)
+ y = float(y if y else 0)
+ if x < y:
+ return lt
+ if x == y:
+ return eq
+ return gt
+'''
def evaluate(self, formatter, kwargs, mi, locals, x, y, lt, eq, gt):
x = float(x if x else 0)
@@ -124,6 +154,14 @@ class BuiltinStrcat(BuiltinFormatterFunction):
arg_count = -1
doc = _('strcat(a, b, ...) -- can take any number of arguments. Returns a '
'string formed by concatenating all the arguments')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, *args):
+ i = 0
+ res = ''
+ for i in range(0, len(args)):
+ res += args[i]
+ return res
+'''
def evaluate(self, formatter, kwargs, mi, locals, *args):
i = 0
@@ -136,6 +174,12 @@ class BuiltinAdd(BuiltinFormatterFunction):
name = 'add'
arg_count = 2
doc = _('add(x, y) -- returns x + y. Throws an exception if either x or y are not numbers.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, x, y):
+ x = float(x if x else 0)
+ y = float(y if y else 0)
+ return unicode(x + y)
+'''
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x else 0)
@@ -146,6 +190,12 @@ class BuiltinSubtract(BuiltinFormatterFunction):
name = 'subtract'
arg_count = 2
doc = _('subtract(x, y) -- returns x - y. Throws an exception if either x or y are not numbers.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, x, y):
+ x = float(x if x else 0)
+ y = float(y if y else 0)
+ return unicode(x - y)
+'''
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x else 0)
@@ -156,6 +206,12 @@ class BuiltinMultiply(BuiltinFormatterFunction):
name = 'multiply'
arg_count = 2
doc = _('multiply(x, y) -- returns x * y. Throws an exception if either x or y are not numbers.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, x, y):
+ x = float(x if x else 0)
+ y = float(y if y else 0)
+ return unicode(x * y)
+'''
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x else 0)
@@ -166,6 +222,12 @@ class BuiltinDivide(BuiltinFormatterFunction):
name = 'divide'
arg_count = 2
doc = _('divide(x, y) -- returns x / y. Throws an exception if either x or y are not numbers.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, x, y):
+ x = float(x if x else 0)
+ y = float(y if y else 0)
+ return unicode(x / y)
+'''
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x else 0)
@@ -182,6 +244,11 @@ class BuiltinTemplate(BuiltinFormatterFunction):
']] for the } character; they are converted automatically. '
'For example, template(\'[[title_sort]]\') will evaluate the '
'template {title_sort} and return its value.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, template):
+ template = template.replace('[[', '{').replace(']]', '}')
+ return formatter.safe_format(template, kwargs, 'TEMPLATE', mi)
+'''
def evaluate(self, formatter, kwargs, mi, locals, template):
template = template.replace('[[', '{').replace(']]', '}')
@@ -194,6 +261,12 @@ class BuiltinEval(BuiltinFormatterFunction):
'variables (those \'assign\'ed to) instead of the book metadata. '
' This permits using the template processor to construct complex '
'results from local variables.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, template):
+ from formatter import eval_formatter
+ template = template.replace('[[', '{').replace(']]', '}')
+ return eval_formatter.safe_format(template, locals, 'EVAL', None)
+'''
def evaluate(self, formatter, kwargs, mi, locals, template):
from formatter import eval_formatter
@@ -205,6 +278,11 @@ class BuiltinAssign(BuiltinFormatterFunction):
arg_count = 2
doc = _('assign(id, val) -- assigns val to id, then returns val. '
'id must be an identifier, not an expression')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, target, value):
+ locals[target] = value
+ return value
+'''
def evaluate(self, formatter, kwargs, mi, locals, target, value):
locals[target] = value
@@ -216,6 +294,11 @@ class BuiltinPrint(BuiltinFormatterFunction):
doc = _('print(a, b, ...) -- prints the arguments to standard output. '
'Unless you start calibre from the command line (calibre-debug -g), '
'the output will go to a black hole.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, *args):
+ print args
+ return None
+'''
def evaluate(self, formatter, kwargs, mi, locals, *args):
print args
@@ -225,6 +308,10 @@ class BuiltinField(BuiltinFormatterFunction):
name = 'field'
arg_count = 1
doc = _('field(name) -- returns the metadata field named by name')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, name):
+ return formatter.get_value(name, [], kwargs)
+'''
def evaluate(self, formatter, kwargs, mi, locals, name):
return formatter.get_value(name, [], kwargs)
@@ -238,6 +325,10 @@ class BuiltinSubstr(BuiltinFormatterFunction):
'characters counting from the right. If end is zero, then it '
'indicates the last character. For example, substr(\'12345\', 1, 0) '
'returns \'2345\', and substr(\'12345\', 1, -1) returns \'234\'.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):
+ return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]
+'''
def evaluate(self, formatter, kwargs, mi, locals, str_, start_, end_):
return str_[int(start_): len(str_) if int(end_) == 0 else int(end_)]
@@ -252,6 +343,23 @@ class BuiltinLookup(BuiltinFormatterFunction):
'function in one composite field to use the value of some other '
'composite field. This is extremely useful when constructing '
'variable save paths')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, *args):
+ if len(args) == 2: # here for backwards compatibility
+ if val:
+ return formatter.vformat('{'+args[0].strip()+'}', [], kwargs)
+ else:
+ return formatter.vformat('{'+args[1].strip()+'}', [], kwargs)
+ if (len(args) % 2) != 1:
+ raise ValueError(_('lookup requires either 2 or an odd number of arguments'))
+ i = 0
+ while i < len(args):
+ if i + 1 >= len(args):
+ return formatter.vformat('{' + args[i].strip() + '}', [], kwargs)
+ if re.search(args[i], val):
+ return formatter.vformat('{'+args[i+1].strip() + '}', [], kwargs)
+ i += 2
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, *args):
if len(args) == 2: # here for backwards compatibility
@@ -274,6 +382,13 @@ class BuiltinTest(BuiltinFormatterFunction):
arg_count = 3
doc = _('test(val, text if not empty, text if empty) -- return `text if not '
'empty` if the field is not empty, otherwise return `text if empty`')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):
+ if val:
+ return value_if_set
+ else:
+ return value_not_set
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):
if val:
@@ -288,6 +403,14 @@ class BuiltinContains(BuiltinFormatterFunction):
'if field contains matches for the regular expression `pattern`. '
'Returns `text if match` if matches are found, otherwise it returns '
'`text if no match`')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals,
+ val, test, value_if_present, value_if_not):
+ if re.search(test, val):
+ return value_if_present
+ else:
+ return value_if_not
+'''
def evaluate(self, formatter, kwargs, mi, locals,
val, test, value_if_present, value_if_not):
@@ -304,6 +427,18 @@ class BuiltinSwitch(BuiltinFormatterFunction):
'the regular expression `pattern` and if so, returns that '
'`value`. If no pattern matches, then else_value is returned. '
'You can have as many `pattern, value` pairs as you want')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, *args):
+ if (len(args) % 2) != 1:
+ raise ValueError(_('switch requires an odd number of arguments'))
+ i = 0
+ while i < len(args):
+ if i + 1 >= len(args):
+ return args[i]
+ if re.search(args[i], val):
+ return args[i+1]
+ i += 2
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, *args):
if (len(args) % 2) != 1:
@@ -323,6 +458,10 @@ class BuiltinRe(BuiltinFormatterFunction):
'the regular expression. All instances of `pattern` are replaced '
'with `replacement`. As in all of calibre, these are '
'python-compatible regular expressions')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):
+ return re.sub(pattern, replacement, val)
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, pattern, replacement):
return re.sub(pattern, replacement, val)
@@ -332,6 +471,13 @@ class BuiltinIfempty(BuiltinFormatterFunction):
arg_count = 2
doc = _('ifempty(val, text if empty) -- return val if val is not empty, '
'otherwise return `text if empty`')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):
+ if val:
+ return val
+ else:
+ return value_if_empty
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, value_if_empty):
if val:
@@ -354,6 +500,16 @@ class BuiltinShorten(BuiltinFormatterFunction):
'If the field\'s length is less than left chars + right chars + '
'the length of `middle text`, then the field will be used '
'intact. For example, the title `The Dome` would not be changed.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals,
+ val, leading, center_string, trailing):
+ l = max(0, int(leading))
+ t = max(0, int(trailing))
+ if len(val) > l + len(center_string) + t:
+ return val[0:l] + center_string + ('' if t == 0 else val[-t:])
+ else:
+ return val
+'''
def evaluate(self, formatter, kwargs, mi, locals,
val, leading, center_string, trailing):
@@ -371,6 +527,10 @@ class BuiltinCount(BuiltinFormatterFunction):
'separated by `separator`, returning the number of items in the '
'list. Most lists use a comma as the separator, but authors '
'uses an ampersand. Examples: {tags:count(,)}, {authors:count(&)}')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, sep):
+ return unicode(len(val.split(sep)))
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, sep):
return unicode(len(val.split(sep)))
@@ -384,6 +544,17 @@ class BuiltinListitem(BuiltinFormatterFunction):
'using `list_item(-1,separator)`. If the item is not in the list, '
'then the empty value is returned. The separator has the same '
'meaning as in the count function.')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):
+ if not val:
+ return ''
+ index = int(index)
+ val = val.split(sep)
+ try:
+ return val[index]
+ except:
+ return ''
+'''
def evaluate(self, formatter, kwargs, mi, locals, val, index, sep):
if not val:
@@ -399,6 +570,10 @@ class BuiltinUppercase(BuiltinFormatterFunction):
name = 'uppercase'
arg_count = 1
doc = _('uppercase(val) -- return value of the field in upper case')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val):
+ return val.upper()
+'''
def evaluate(self, formatter, kwargs, mi, locals, val):
return val.upper()
@@ -407,6 +582,10 @@ class BuiltinLowercase(BuiltinFormatterFunction):
name = 'lowercase'
arg_count = 1
doc = _('lowercase(val) -- return value of the field in lower case')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val):
+ return val.lower()
+'''
def evaluate(self, formatter, kwargs, mi, locals, val):
return val.lower()
@@ -415,6 +594,10 @@ class BuiltinTitlecase(BuiltinFormatterFunction):
name = 'titlecase'
arg_count = 1
doc = _('titlecase(val) -- return value of the field in title case')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val):
+ return titlecase(val)
+'''
def evaluate(self, formatter, kwargs, mi, locals, val):
return titlecase(val)
@@ -423,6 +606,10 @@ class BuiltinCapitalize(BuiltinFormatterFunction):
name = 'capitalize'
arg_count = 1
doc = _('capitalize(val) -- return value of the field capitalized')
+ program_text = r'''
+def evaluate(self, formatter, kwargs, mi, locals, val):
+ return capitalize(val)
+'''
def evaluate(self, formatter, kwargs, mi, locals, val):
return capitalize(val)
From 163043de3485cff161ad0ea0c852e34bccdc6bba Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 08:17:40 -0700
Subject: [PATCH 03/19] Fix #8365 (Capitalize not working)
---
src/calibre/utils/icu.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py
index 659984e7f9..f17ff1b17f 100644
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@@ -80,7 +80,7 @@ def icu_case_sensitive_strcmp(collator, a, b):
def icu_capitalize(s):
s = lower(s)
- return s.replace(s[0], upper(s[0]), 1)
+ return s.replace(s[0], upper(s[0]), 1) if s else s
load_icu()
load_collator()
From ae815183898a8bb2777c7e07d7b1660b0940b025 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 08:28:10 -0700
Subject: [PATCH 04/19] South Africa Mail and Guardian by 77ja65. Fixes #8375
(South Africa Mail & Guardian Newspaper Recipe to add to official list)
---
resources/recipes/mail_and_guardian.recipe | 32 ++++++++++++++++++++++
1 file changed, 32 insertions(+)
create mode 100644 resources/recipes/mail_and_guardian.recipe
diff --git a/resources/recipes/mail_and_guardian.recipe b/resources/recipes/mail_and_guardian.recipe
new file mode 100644
index 0000000000..5b58f3f938
--- /dev/null
+++ b/resources/recipes/mail_and_guardian.recipe
@@ -0,0 +1,32 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1295081935(BasicNewsRecipe):
+ title = u'Mail & Guardian ZA News'
+ __author__ = '77ja65'
+ language = 'en'
+ oldest_article = 7
+ max_articles_per_feed = 30
+ no_stylesheets = True
+ masthead_url = 'http://c1608832.cdn.cloudfiles.rackspacecloud.com/mg_logo.gif'
+ remove_tags_after = [dict(id='content')]
+
+ feeds = [
+ (u'National News', u'http://www.mg.co.za/rss/national'),
+ (u'Top Stories', u'http://www.mg.co.za/rss'),
+ (u'Africa News', u'http://www.mg.co.za/rss/africa'),
+ (u'Sport', u'http://www.mg.co.za/rss/sport'),
+ (u'Business', u'http://www.mg.co.za/rss/business'),
+ (u'And In Other News', u'http://www.mg.co.za/rss/and-in-other-news'),
+ (u'World News', u'http://www.mg.co.za/rss/world')
+ ]
+
+ def print_version(self, url):
+ return url.replace('http://www.mg.co.za/article/',
+ 'http://www.mg.co.za/printformat/single/')
+
+ extra_css = '''
+ h1{font-family:Arial,Helvetica,sans-serif; font-
+ weight:bold;font-size:large;}
+ h2{font-family:Arial,Helvetica,sans-serif; font-
+ weight:normal;font-size:small;}
+ '''
From a3ae3121eb843f57994751d6d2e93abb0642c155 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 09:53:43 -0700
Subject: [PATCH 05/19] Fix #8366 (Adding Capitalize option in Edit metadata
individually)
---
src/calibre/gui2/widgets.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index d87bb45f7a..f2ff783a76 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -386,11 +386,13 @@ class LineEditECM(object):
action_lower_case = case_menu.addAction(_('Lower Case'))
action_swap_case = case_menu.addAction(_('Swap Case'))
action_title_case = case_menu.addAction(_('Title Case'))
+ action_capitalize = case_menu.addAction(_('Capitalize'))
self.connect(action_upper_case, SIGNAL('triggered()'), self.upper_case)
self.connect(action_lower_case, SIGNAL('triggered()'), self.lower_case)
self.connect(action_swap_case, SIGNAL('triggered()'), self.swap_case)
self.connect(action_title_case, SIGNAL('triggered()'), self.title_case)
+ self.connect(action_capitalize, SIGNAL('triggered()'), self.capitalize)
menu.addMenu(case_menu)
menu.exec_(event.globalPos())
@@ -408,6 +410,10 @@ class LineEditECM(object):
from calibre.utils.titlecase import titlecase
self.setText(titlecase(unicode(self.text())))
+ def capitalize(self):
+ from calibre.utils.icu import capitalize
+ self.setText(capitalize(unicode(self.text())))
+
class EnLineEdit(LineEditECM, QLineEdit):
From 9e90f63214cb5b75900a0b762cef7735f9512b29 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 10:14:31 -0700
Subject: [PATCH 06/19] template-functions.json skeleton
---
setup/resources.py | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/setup/resources.py b/setup/resources.py
index 977d753828..03d9e28ea6 100644
--- a/setup/resources.py
+++ b/setup/resources.py
@@ -84,6 +84,12 @@ class Resources(Command):
cPickle.dump(complete, open(dest, 'wb'), -1)
+ self.info('\tCreating template-functions.json')
+ dest = self.j(self.RESOURCES, 'template-functions.json')
+ function_dict = {'test': 'def test(*args): return test'}
+ import json
+ json.dump(function_dict, open(dest, 'wb'))
+
def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle')
From cde186f1e498884003ae4ccb9d9de3e68881afdf Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Sat, 15 Jan 2011 18:08:24 +0000
Subject: [PATCH 07/19] Fix #8378 - bad author sort when diacritic is used.
Should actually be bad partitioning by first letter.
---
src/calibre/gui2/tag_view.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 90d7ce698a..291c5205cd 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -730,7 +730,7 @@ class TagsModel(QAbstractItemModel): # {{{
else:
collapse_model = 'partition'
collapse_template = tweaks['categories_collapsed_popularity_template']
- collapse_letter = None
+ collapse_letter = collapse_letter_sk = None
for i, r in enumerate(self.row_map):
if self.hidden_categories and self.categories[i] in self.hidden_categories:
@@ -782,8 +782,10 @@ class TagsModel(QAbstractItemModel): # {{{
ts = tag.sort
if not ts:
ts = ' '
- if upper(ts[0]) != collapse_letter:
+ sk = sort_key(ts)[0]
+ if sk[0] != collapse_letter_sk:
collapse_letter = upper(ts[0])
+ collapse_letter_sk = sort_key(collapse_letter)[0]
sub_cat = TagTreeItem(parent=category,
data = collapse_letter,
category_icon = category_node.icon,
From a719ed8fd9512659204237817db53fe5cf41b922 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Sat, 15 Jan 2011 18:32:06 +0000
Subject: [PATCH 08/19] Better exception handling in tag_view
---
src/calibre/gui2/tag_view.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index b9c4464c57..f6eac49426 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -785,13 +785,14 @@ class TagsModel(QAbstractItemModel): # {{{
try:
sk = sort_key(ts)[0]
except:
- sk = ' '
+ sk = ts[0]
+
if sk != collapse_letter_sk:
collapse_letter = upper(ts[0])
try:
collapse_letter_sk = sort_key(collapse_letter)[0]
except:
- collapse_letter_sk = ' '
+ collapse_letter_sk = collapse_letter
sub_cat = TagTreeItem(parent=category,
data = collapse_letter,
category_icon = category_node.icon,
From 789747f869f9679717b10a6199340ad777093042 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 12:32:47 -0700
Subject: [PATCH 09/19] Start work on refactoring the edit metadata dialog
---
src/calibre/gui2/actions/edit_metadata.py | 2 +-
src/calibre/gui2/metadata/__init__.py | 9 +++++++++
.../gui2/{metadata.py => metadata/bulk_download.py} | 0
3 files changed, 10 insertions(+), 1 deletion(-)
create mode 100644 src/calibre/gui2/metadata/__init__.py
rename src/calibre/gui2/{metadata.py => metadata/bulk_download.py} (100%)
diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index b01ab1ba21..f50251e700 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -94,7 +94,7 @@ class EditMetadataAction(InterfaceAction):
get_social_metadata = config['get_social_metadata']
else:
get_social_metadata = set_social_metadata
- from calibre.gui2.metadata import DoDownload
+ from calibre.gui2.metadata.bulk_download import DoDownload
if set_social_metadata is not None and set_social_metadata:
x = _('social metadata')
else:
diff --git a/src/calibre/gui2/metadata/__init__.py b/src/calibre/gui2/metadata/__init__.py
new file mode 100644
index 0000000000..68dfb8d2b5
--- /dev/null
+++ b/src/calibre/gui2/metadata/__init__.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__ = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+
+
diff --git a/src/calibre/gui2/metadata.py b/src/calibre/gui2/metadata/bulk_download.py
similarity index 100%
rename from src/calibre/gui2/metadata.py
rename to src/calibre/gui2/metadata/bulk_download.py
From 46119745cb1a74a95c1f176f633ef582876da475 Mon Sep 17 00:00:00 2001
From: GRiker
Date: Sat, 15 Jan 2011 13:58:23 -0700
Subject: [PATCH 10/19] more informative catalog error message exit
---
src/calibre/gui2/actions/catalog.py | 8 +-
src/calibre/library/catalog.py | 175 ++++++++++++++--------------
2 files changed, 94 insertions(+), 89 deletions(-)
diff --git a/src/calibre/gui2/actions/catalog.py b/src/calibre/gui2/actions/catalog.py
index d75b0dfa5a..6d3bb539a2 100644
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@@ -28,7 +28,7 @@ class GenerateCatalogAction(InterfaceAction):
if not ids:
return error_dialog(self.gui, _('No books selected'),
- _('No books selected to generate catalog for'),
+ _('No books selected for catalog generation'),
show=True)
db = self.gui.library_view.model().db
@@ -55,9 +55,9 @@ class GenerateCatalogAction(InterfaceAction):
def catalog_generated(self, job):
if job.result:
- # Search terms nulled catalog results
- return error_dialog(self.gui, _('No books found'),
- _("No books to catalog\nCheck job details"),
+ # Error during catalog generation
+ return error_dialog(self.gui, _('Catalog generation terminated'),
+ job.result,
show=True)
if job.failed:
return self.gui.job_exception(job)
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index c045ccf686..087d40c4eb 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1144,7 +1144,9 @@ class EPUB_MOBI(CatalogPlugin):
def error(self):
def fget(self):
return self.__error
- return property(fget=fget)
+ def fset(self, val):
+ self.__error = val
+ return property(fget=fget,fset=fset)
@dynamic_property
def generateForKindle(self):
def fget(self):
@@ -1411,6 +1413,88 @@ class EPUB_MOBI(CatalogPlugin):
except:
pass
+ def fetchBooksByAuthor(self):
+ '''
+ Generate a list of titles sorted by author from the database
+ return = Success
+ '''
+
+ self.updateProgressFullStep("Sorting database")
+
+ '''
+ # Sort titles case-insensitive, by author
+ self.booksByAuthor = sorted(self.booksByTitle,
+ key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
+ '''
+
+ self.booksByAuthor = list(self.booksByTitle)
+ self.booksByAuthor.sort(self.author_compare)
+
+ if False and self.verbose:
+ self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
+ self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
+ for title in self.booksByAuthor:
+ self.opts.log.info((u" %-30s %-20s%5s " % \
+ (title['title'][:30],
+ title['series'][:20] if title['series'] else '',
+ title['series_index'],
+ )).encode('utf-8'))
+ raise SystemExit
+
+ # Build the unique_authors set from existing data
+ authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
+
+ # authors[] contains a list of all book authors, with multiple entries for multiple books by author
+ # authors[]: (([0]:friendly [1]:sort))
+ # unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
+ books_by_current_author = 0
+ current_author = authors[0]
+ multiple_authors = False
+ unique_authors = []
+ for (i,author) in enumerate(authors):
+ if author != current_author:
+ # Note that current_author and author are tuples: (friendly, sort)
+ multiple_authors = True
+
+ if author != current_author and i:
+ # Warn, exit if friendly matches previous, but sort doesn't
+ if author[0] == current_author[0]:
+ error_msg = _('''
+\n*** Metadata error ***
+Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
+Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
+then rebuild the catalog.\n''').format(author[0])
+
+ self.opts.log.warn(error_msg)
+ self.error = error_msg
+ return False
+
+ # New author, save the previous author/sort/count
+ unique_authors.append((current_author[0], icu_title(current_author[1]),
+ books_by_current_author))
+ current_author = author
+ books_by_current_author = 1
+ elif i==0 and len(authors) == 1:
+ # Allow for single-book lists
+ unique_authors.append((current_author[0], icu_title(current_author[1]),
+ books_by_current_author))
+ else:
+ books_by_current_author += 1
+ else:
+ # Add final author to list or single-author dataset
+ if (current_author == author and len(authors) > 1) or not multiple_authors:
+ unique_authors.append((current_author[0], icu_title(current_author[1]),
+ books_by_current_author))
+
+ if False and self.verbose:
+ self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
+ for author in unique_authors:
+ self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
+ author[2])).encode('utf-8'))
+
+ self.authors = unique_authors
+ return True
+
def fetchBooksByTitle(self):
self.updateProgressFullStep("Fetching database")
@@ -1562,90 +1646,9 @@ class EPUB_MOBI(CatalogPlugin):
title['title_sort'][0:40])).decode('mac-roman'))
return True
else:
+ self.error = _("No books found to catalog.\nCheck 'Excluded books' criteria in E-book options.")
return False
- def fetchBooksByAuthor(self):
- '''
- Generate a list of titles sorted by author from the database
- return = Success
- '''
-
- self.updateProgressFullStep("Sorting database")
-
- '''
- # Sort titles case-insensitive, by author
- self.booksByAuthor = sorted(self.booksByTitle,
- key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
- '''
-
- self.booksByAuthor = list(self.booksByTitle)
- self.booksByAuthor.sort(self.author_compare)
-
- if False and self.verbose:
- self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
- self.opts.log.info(" %-30s %-20s %s" % ('title', 'series', 'series_index'))
- for title in self.booksByAuthor:
- self.opts.log.info((u" %-30s %-20s%5s " % \
- (title['title'][:30],
- title['series'][:20] if title['series'] else '',
- title['series_index'],
- )).encode('utf-8'))
- raise SystemExit
-
- # Build the unique_authors set from existing data
- authors = [(record['author'], record['author_sort'].capitalize()) for record in self.booksByAuthor]
-
- # authors[] contains a list of all book authors, with multiple entries for multiple books by author
- # authors[]: (([0]:friendly [1]:sort))
- # unique_authors[]: (([0]:friendly [1]:sort [2]:book_count))
- books_by_current_author = 0
- current_author = authors[0]
- multiple_authors = False
- unique_authors = []
- for (i,author) in enumerate(authors):
- if author != current_author:
- # Note that current_author and author are tuples: (friendly, sort)
- multiple_authors = True
-
- if author != current_author and i:
- # Warn, exit if friendly matches previous, but sort doesn't
- if author[0] == current_author[0]:
- error_msg = _('''
-\n*** Metadata error ***
-Inconsistent Author Sort values for Author '{0}', unable to continue building catalog.
-Select all books by '{0}', apply correct Author Sort value in Edit Metadata dialog,
-then rebuild the catalog.
-*** Terminating catalog generation ***\n''').format(author[0])
-
- self.opts.log.warn(error_msg)
- return False
-
- # New author, save the previous author/sort/count
- unique_authors.append((current_author[0], icu_title(current_author[1]),
- books_by_current_author))
- current_author = author
- books_by_current_author = 1
- elif i==0 and len(authors) == 1:
- # Allow for single-book lists
- unique_authors.append((current_author[0], icu_title(current_author[1]),
- books_by_current_author))
- else:
- books_by_current_author += 1
- else:
- # Add final author to list or single-author dataset
- if (current_author == author and len(authors) > 1) or not multiple_authors:
- unique_authors.append((current_author[0], icu_title(current_author[1]),
- books_by_current_author))
-
- if False and self.verbose:
- self.opts.log.info("\nfetchBooksByauthor(): %d unique authors" % len(unique_authors))
- for author in unique_authors:
- self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
- author[2])).encode('utf-8'))
-
- self.authors = unique_authors
- return True
-
def fetchBookmarks(self):
'''
Collect bookmarks for catalog entries
@@ -5069,6 +5072,8 @@ then rebuild the catalog.
abort_after_input_dump=False)
plumber.merge_ui_recommendations(recommendations)
plumber.run()
- return 0
+ # returns to gui2.actions.catalog:catalog_generated()
+ return None
else:
- return 1
+ # returns to gui2.actions.catalog:catalog_generated()
+ return catalog.error
From c858e77414dc7e66c29bbbc406bfd6a7cb74f434 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 15:11:00 -0700
Subject: [PATCH 11/19] ...
---
src/calibre/manual/faq.rst | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 2c0d2a6173..0e8c101620 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -437,6 +437,15 @@ My antivirus program claims |app| is a virus/trojan?
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
+How do I backup |app|?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The most important thing to backup is the |app| library folder, that contains all your books and metadata. This is the folder you chose for your |app| library when you ran |app| for the first time. You can get the path to the library folder by clicking the |app| icon on the main toolbar. You must backup this complete folder with all its files and sub-folders.
+
+You can switch |app| to using a backed up library folder by simply clicking the |app| icon on the toolbar and choosing your backup library folder.
+
+If you want to backup the |app| configuration/plugins, you have to backup the config directory. You can find this config directory via :guilabel:`Preferences->Miscellaneous`. Note that restoring configuration directories is not officially supported, but should work in most cases. Just copy the contents of the backup directory into the current configuration directory to restore.
+
How do I use purchased EPUB books with |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Most purchased EPUB books have `DRM `_. This prevents |app| from opening them. You can still use |app| to store and transfer them to your e-book reader. First, you must authorize your reader on a windows machine with Adobe Digital Editions. Once this is done, EPUB books transferred with |app| will work fine on your reader. When you purchase an epub book from a website, you will get an ".acsm" file. This file should be opened with Adobe Digital Editions, which will then download the actual ".epub" e-book. The e-book file will be stored in the folder "My Digital Editions", from where you can add it to |app|.
From 1005fd234f15c36593e91ce9c573e0e905974973 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 15:19:59 -0700
Subject: [PATCH 12/19] Don't use subdirs when sending files to the SNE
---
src/calibre/devices/sne/driver.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/devices/sne/driver.py b/src/calibre/devices/sne/driver.py
index bb8d34c59c..04e5cd0d76 100644
--- a/src/calibre/devices/sne/driver.py
+++ b/src/calibre/devices/sne/driver.py
@@ -33,6 +33,6 @@ class SNE(USBMS):
STORAGE_CARD_VOLUME_LABEL = 'SNE Storage Card'
EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Books'
- SUPPORTS_SUB_DIRS = True
+ SUPPORTS_SUB_DIRS = False
From f6d72fbe0b6bacb40774f6fd175bcf77bc49ef96 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 15:46:19 -0700
Subject: [PATCH 13/19] RTF Input: Fix regression in 0.7.40 that broke
conversion of some old style RTF files
---
src/calibre/ebooks/rtf/input.py | 1 -
src/calibre/ebooks/rtf2xml/ParseRtf.py | 15 ++--
.../ebooks/rtf2xml/default_encoding.py | 58 +++++++------
src/calibre/ebooks/rtf2xml/delete_info.py | 36 +++-----
src/calibre/ebooks/rtf2xml/info.py | 84 +++++++++++++------
src/calibre/ebooks/rtf2xml/process_tokens.py | 21 +++--
src/calibre/ebooks/rtf2xml/tokenize.py | 33 ++++++--
7 files changed, 153 insertions(+), 95 deletions(-)
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 92ac8a2519..d1a6b7c88a 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -286,7 +286,6 @@ class RTFInput(InputFormatPlugin):
try:
xml = self.generate_xml(stream.name)
except RtfInvalidCodeException, e:
- raise
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.\n%s')%e)
diff --git a/src/calibre/ebooks/rtf2xml/ParseRtf.py b/src/calibre/ebooks/rtf2xml/ParseRtf.py
index cdd9a3d088..d673836210 100755
--- a/src/calibre/ebooks/rtf2xml/ParseRtf.py
+++ b/src/calibre/ebooks/rtf2xml/ParseRtf.py
@@ -226,10 +226,6 @@ class ParseRtf:
try:
return_value = process_tokens_obj.process_tokens()
except InvalidRtfException, msg:
- try:
- os.remove(self.__temp_file)
- except OSError:
- pass
#Check to see if the file is correctly encoded
encode_obj = default_encoding.DefaultEncoding(
in_file = self.__temp_file,
@@ -241,14 +237,17 @@ class ParseRtf:
check_encoding_obj = check_encoding.CheckEncoding(
bug_handler = RtfInvalidCodeException,
)
- enc = encode_obj.get_codepage()
- if enc != 'mac_roman':
- enc = 'cp' + enc
+ enc = 'cp' + encode_obj.get_codepage()
+ msg = 'Exception in token processing'
if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, str) \
else self.__file.encode('utf-8')
msg = 'File %s does not appear to be correctly encoded.\n' % file_name
- raise InvalidRtfException, msg
+ try:
+ os.remove(self.__temp_file)
+ except OSError:
+ pass
+ raise InvalidRtfException, msg
delete_info_obj = delete_info.DeleteInfo(
in_file = self.__temp_file,
copy = self.__copy,
diff --git a/src/calibre/ebooks/rtf2xml/default_encoding.py b/src/calibre/ebooks/rtf2xml/default_encoding.py
index 53887e0d90..3ddfbcd321 100755
--- a/src/calibre/ebooks/rtf2xml/default_encoding.py
+++ b/src/calibre/ebooks/rtf2xml/default_encoding.py
@@ -74,9 +74,6 @@ class DefaultEncoding:
if not self.__datafetched:
self._encoding()
self.__datafetched = True
- if self.__platform == 'Macintosh':
- code_page = self.__code_page
- else:
code_page = 'ansicpg' + self.__code_page
return self.__platform, code_page, self.__default_num
@@ -94,49 +91,60 @@ class DefaultEncoding:
def _encoding(self):
with open(self.__file, 'r') as read_obj:
+ cpfound = False
if not self.__fetchraw:
for line in read_obj:
self.__token_info = line[:16]
if self.__token_info == 'mi 3:
- msg = 'flag problem\n'
+ msg = 'Flag problem\n'
raise self.__bug_handler, msg
return True
elif self.__token_info in self.__allowable :
@@ -173,8 +171,8 @@ class DeleteInfo:
Return True for all control words.
Return False otherwise.
"""
- if self.__delete_count == self.__cb_count and self.__token_info ==\
- 'cb33\n
+
def __collect_tokens_func(self, line):
"""
Requires:
@@ -194,18 +227,19 @@ class Info:
att = line[6:16]
value = line[20:-1]
att_changed = self.__token_dict.get(att)
- if att_changed == None:
+ if att_changed is None:
if self.__run_level > 3:
- msg = 'no dictionary match for %s\n' % att
+ msg = 'No dictionary match for %s\n' % att
raise self.__bug_handler, msg
else:
self.__text_string += '<%s>%s' % (att_changed, value)
+
def __single_field_func(self, line, tag):
value = line[20:-1]
self.__write_obj.write(
- 'mi%s\n' % (tag, tag, value)
+ 'mi%s\n' % (tag, tag, value)
)
+
def __after_info_table_func(self, line):
"""
Requires:
@@ -217,6 +251,7 @@ class Info:
the file.
"""
self.__write_obj.write(line)
+
def fix_info(self):
"""
Requires:
@@ -234,20 +269,15 @@ class Info:
information table, simply write the line to the output file.
"""
self.__initiate_values()
- read_obj = open(self.__file, 'r')
- self.__write_obj = open(self.__write_to, 'w')
- line_to_read = 1
- while line_to_read:
- line_to_read = read_obj.readline()
- line = line_to_read
- self.__token_info = line[:16]
- action = self.__state_dict.get(self.__state)
- if action == None:
- sys.stderr.write('no no matching state in module styles.py\n')
- sys.stderr.write(self.__state + '\n')
- action(line)
- read_obj.close()
- self.__write_obj.close()
+ with open(self.__file, 'r') as read_obj:
+ with open(self.__write_to, 'wb') as self.__write_obj:
+ for line in read_obj:
+ self.__token_info = line[:16]
+ action = self.__state_dict.get(self.__state)
+ if action is None:
+ sys.stderr.write('No matching state in module styles.py\n')
+ sys.stderr.write(self.__state + '\n')
+ action(line)
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "info.data")
diff --git a/src/calibre/ebooks/rtf2xml/process_tokens.py b/src/calibre/ebooks/rtf2xml/process_tokens.py
index 9460af07fc..c6cf124425 100755
--- a/src/calibre/ebooks/rtf2xml/process_tokens.py
+++ b/src/calibre/ebooks/rtf2xml/process_tokens.py
@@ -70,7 +70,7 @@ class ProcessTokens:
';' : ('mc', ';', self.ms_sub_func),
# this must be wrong
'-' : ('mc', '-', self.ms_sub_func),
- 'line' : ('mi', 'hardline-break', self.hardline_func), #calibre
+ 'line' : ('mi', 'hardline-break', self.direct_conv_func), #calibre
# misc => ml
'*' : ('ml', 'asterisk__', self.default_func),
':' : ('ml', 'colon_____', self.default_func),
@@ -78,7 +78,6 @@ class ProcessTokens:
'backslash' : ('nu', '\\', self.text_func),
'ob' : ('nu', '{', self.text_func),
'cb' : ('nu', '}', self.text_func),
- #'line' : ('nu', ' ', self.text_func), calibre
# paragraph formatting => pf
'page' : ('pf', 'page-break', self.default_func),
'par' : ('pf', 'par-end___', self.default_func),
@@ -231,11 +230,15 @@ class ProcessTokens:
'trhdr' : ('tb', 'row-header', self.default_func),
# preamble => pr
# document information => di
+ # TODO integrate \userprops
'info' : ('di', 'doc-info__', self.default_func),
+ 'title' : ('di', 'title_____', self.default_func),
'author' : ('di', 'author____', self.default_func),
'operator' : ('di', 'operator__', self.default_func),
- 'title' : ('di', 'title_____', self.default_func),
+ 'manager' : ('di', 'manager___', self.default_func),
+ 'company' : ('di', 'company___', self.default_func),
'keywords' : ('di', 'keywords__', self.default_func),
+ 'category' : ('di', 'category__', self.default_func),
'doccomm' : ('di', 'doc-notes_', self.default_func),
'comment' : ('di', 'doc-notes_', self.default_func),
'subject' : ('di', 'subject___', self.default_func),
@@ -244,11 +247,19 @@ class ProcessTokens:
'mo' : ('di', 'month_____', self.default_func),
'dy' : ('di', 'day_______', self.default_func),
'min' : ('di', 'minute____', self.default_func),
+ 'sec' : ('di', 'second____', self.default_func),
'revtim' : ('di', 'revis-time', self.default_func),
+ 'edmins' : ('di', 'edit-time_', self.default_func),
+ 'printim' : ('di', 'print-time', self.default_func),
+ 'buptim' : ('di', 'backuptime', self.default_func),
'nofwords' : ('di', 'num-of-wor', self.default_func),
'nofchars' : ('di', 'num-of-chr', self.default_func),
+ 'nofcharsws' : ('di', 'numofchrws', self.default_func),
'nofpages' : ('di', 'num-of-pag', self.default_func),
- 'edmins' : ('di', 'edit-time_', self.default_func),
+ 'version' : ('di', 'version___', self.default_func),
+ 'vern' : ('di', 'intern-ver', self.default_func),
+ 'hlinkbase' : ('di', 'linkbase__', self.default_func),
+ 'id' : ('di', 'internalID', self.default_func),
# headers and footers => hf
'headerf' : ('hf', 'head-first', self.default_func),
'headerl' : ('hf', 'head-left_', self.default_func),
@@ -605,7 +616,7 @@ class ProcessTokens:
def ms_sub_func(self, pre, token, num):
return 'tx ", input_file)
input_file = self.__utf_ud.sub("\\{\\uc0 \g<1>\\}", input_file)
#remove \n in bin data
@@ -127,7 +131,7 @@ class Tokenize:
# this is for older RTF
#line = re.sub(self.__par_exp, '\\par ', line)
#return filter(lambda x: len(x) > 0, \
- #(self.__remove_line.sub('', x) for x in tokens))
+ #(self.__remove_line.sub('', x) for x in tokens))
def __compile_expressions(self):
SIMPLE_RPL = {
@@ -153,8 +157,6 @@ class Tokenize:
# put a backslash in front of to eliminate special cases and
# make processing easier
"}": "\\}",
- # this is for older RTF
- r'\\$': '\\par ',
}
self.__replace_spchar = MReplace(SIMPLE_RPL)
#add ;? in case of char following \u
@@ -168,10 +170,12 @@ class Tokenize:
#why keep backslash whereas \is replaced before?
#remove \n from endline char
self.__splitexp = re.compile(r"(\\[{}]|\n|\\[^\s\\{}&]+(?:[ \t\r\f\v])?)")
+ #this is for old RTF
+ self.__par_exp = re.compile(r'\\\n+')
+ # self.__par_exp = re.compile(r'\\$')
#self.__bin_exp = re.compile(r"\\bin(-?\d{1,8}) {0,1}")
#self.__utf_exp = re.compile(r"^\\u(-?\d{3,6})")
#self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\n|\\[^\s\\{}&]+(?:\s)?)")
- #self.__par_exp = re.compile(r'\\$')
#self.__remove_line = re.compile(r'\n+')
#self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")
##self.num_exp = re.compile(r"(\*|:|[a-zA-Z]+)(.*)")
@@ -199,7 +203,24 @@ class Tokenize:
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "tokenize.data")
+ # if self.__out_file:
+ # self.__file = self.__out_file
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)
- #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
\ No newline at end of file
+ #self.__special_tokens = [ '_', '~', "'", '{', '}' ]
+
+# import sys
+# def main(args=sys.argv):
+ # if len(args) < 1:
+ # print 'No file'
+ # return
+ # file = 'data_tokens.txt'
+ # if len(args) == 3:
+ # file = args[2]
+ # to = Tokenize(args[1], Exception, out_file = file)
+ # to.tokenize()
+
+
+# if __name__ == '__main__':
+ # sys.exit(main())
\ No newline at end of file
From 631ba316dfab07be7263162880c339af4deb8228 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 16:58:57 -0700
Subject: [PATCH 14/19] ...
---
resources/recipes/sportsillustrated.recipe | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/resources/recipes/sportsillustrated.recipe b/resources/recipes/sportsillustrated.recipe
index dd1df16ac7..f5a7b4c32b 100644
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@@ -1,5 +1,5 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+#from calibre.ebooks.BeautifulSoup import BeautifulSoup
from urllib import quote
class SportsIllustratedRecipe(BasicNewsRecipe) :
@@ -91,7 +91,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
# expire : no idea what value to use
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
- def preprocess_html(self, soup):
+ '''def preprocess_html(self, soup):
header = soup.find('div', attrs = {'class' : 'siv_artheader'})
homeMadeSoup = BeautifulSoup('')
body = homeMadeSoup.body
@@ -115,4 +115,5 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
body.append(para)
return homeMadeSoup
+ '''
From 47f4df49b62b4496c04e5ee059247fa820d7e4aa Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 23:05:39 -0700
Subject: [PATCH 15/19] Fix #8400 (Not reading Nook)
---
src/calibre/library/database2.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 3a2109e01e..33593e93fe 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -690,7 +690,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
mi = Metadata(None)
aut_list = row[fm['au_map']]
- aut_list = [p.split(':::') for p in aut_list.split(':#:')]
+ if aut_list:
+ aut_list = [p.split(':::') for p in aut_list.split(':#:') if p]
+ else:
+ aut_list = []
aum = []
aus = {}
for (author, author_sort) in aut_list:
From 5d9f9325a249703d12c9f274d5f61607e1c280f6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 15 Jan 2011 23:08:00 -0700
Subject: [PATCH 16/19] Fix #8398 (Bug in FileDialog class)
---
src/calibre/gui2/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index e699551150..6a9becee50 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -505,7 +505,7 @@ class FileDialog(QObject):
self.selected_files = []
if mode == QFileDialog.AnyFile:
f = unicode(QFileDialog.getSaveFileName(parent, title, initial_dir, ftext, ""))
- if f and os.path.exists(f):
+ if f:
self.selected_files.append(f)
elif mode == QFileDialog.ExistingFile:
f = unicode(QFileDialog.getOpenFileName(parent, title, initial_dir, ftext, ""))
From 1272988089814321248ffe0c58232f1d061a67a3 Mon Sep 17 00:00:00 2001
From: ldolse
Date: Sun, 16 Jan 2011 20:11:52 +0800
Subject: [PATCH 17/19] enabled hyphen removal across the entire document text,
refactored logic to reduce false positives, added verbose debug output
---
src/calibre/ebooks/conversion/preprocess.py | 47 +++++++++-----
src/calibre/ebooks/conversion/utils.py | 69 +++++++++++----------
src/calibre/ebooks/txt/input.py | 4 +-
3 files changed, 72 insertions(+), 48 deletions(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index abaff77f33..9dedd05e33 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -174,13 +174,19 @@ class Dehyphenator(object):
retain hyphens.
'''
- def __init__(self):
+ def __init__(self, verbose=0, log=None):
+ self.log = default_log if log is None else log
+ self.verbose = verbose
# Add common suffixes to the regex below to increase the likelihood of a match -
# don't add suffixes which are also complete words, such as 'able' or 'sex'
- self.removesuffixes = re.compile(r"((ed)?ly|('e)?s|a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$", re.IGNORECASE)
+ # only remove if it's not already the point of hyphenation
+ self.suffix_string = "((ed)?ly|'?e?s||a?(t|s)?ion(s|al(ly)?)?|ings?|er|(i)?ous|(i|a)ty|(it)?ies|ive|gence|istic(ally)?|(e|a)nce|m?ents?|ism|ated|(e|u)ct(ed)?|ed|(i|ed)?ness|(e|a)ncy|ble|ier|al|ex|ian)$"
+ self.suffixes = re.compile(r"^%s" % self.suffix_string, re.IGNORECASE)
+ self.removesuffixes = re.compile(r"%s" % self.suffix_string, re.IGNORECASE)
# remove prefixes if the prefix was not already the point of hyphenation
- self.prefixes = re.compile(r'^(dis|re|un|in|ex)$', re.IGNORECASE)
- self.removeprefix = re.compile(r'^(dis|re|un|in|ex)', re.IGNORECASE)
+ self.prefix_string = '^(dis|re|un|in|ex)'
+ self.prefixes = re.compile(r'%s$' % self.prefix_string, re.IGNORECASE)
+ self.removeprefix = re.compile(r'%s' % self.prefix_string, re.IGNORECASE)
def dehyphenate(self, match):
firsthalf = match.group('firstpart')
@@ -191,31 +197,44 @@ class Dehyphenator(object):
wraptags = ''
hyphenated = unicode(firsthalf) + "-" + unicode(secondhalf)
dehyphenated = unicode(firsthalf) + unicode(secondhalf)
- lookupword = self.removesuffixes.sub('', dehyphenated)
- if self.prefixes.match(firsthalf) is None:
+ if self.suffixes.match(secondhalf) is None:
+ lookupword = self.removesuffixes.sub('', dehyphenated)
+ else:
+ lookupword = dehyphenated
+ if len(firsthalf) > 3 and self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword)
- #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
+ if self.verbose > 2:
+ self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated))
try:
searchresult = self.html.find(lookupword.lower())
except:
return hyphenated
if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
if self.html.find(lookupword) != -1 or searchresult != -1:
- #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
+ if self.verbose > 2:
+ self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated))
return dehyphenated
elif self.html.find(hyphenated) != -1:
- #print "Cleanup:returned hyphenated word: " + str(hyphenated)
+ if self.verbose > 2:
+ self.log(" Cleanup:returned hyphenated word: " + str(hyphenated))
return hyphenated
else:
- #print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
+ if self.verbose > 2:
+ self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf))
return firsthalf+u'\u2014'+wraptags+secondhalf
else:
+ if len(firsthalf) <= 2 and len(secondhalf) <= 2:
+ if self.verbose > 2:
+ self.log("too short, returned hyphenated word: " + str(hyphenated))
+ return hyphenated
if self.html.find(lookupword) != -1 or searchresult != -1:
- #print "returned dehyphenated word: " + str(dehyphenated)
+ if self.verbose > 2:
+ self.log(" returned dehyphenated word: " + str(dehyphenated))
return dehyphenated
else:
- #print " returned hyphenated word: " + str(hyphenated)
+ if self.verbose > 2:
+ self.log(" returned hyphenated word: " + str(hyphenated))
return hyphenated
def __call__(self, html, format, length=1):
@@ -228,7 +247,7 @@ class Dehyphenator(object):
elif format == 'txt':
intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length)
elif format == 'individual_words':
- intextmatch = re.compile(u'>[^<]*\b(?P[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)\u0020*(?P\w+)\b[^<]*<') # for later, not called anywhere yet
+ intextmatch = re.compile(u'(?!<)(?P\w+)(-|‐)\s*(?P\w+)(?![^<]*?>)') # for later, not called anywhere yet
elif format == 'html_cleanup':
intextmatch = re.compile(u'(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P\s*([iubp]>\s*<[iubp][^>]*>\s*)?]*>|[iubp]>\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)')
elif format == 'txt_cleanup':
@@ -512,7 +531,7 @@ class HTMLPreProcessor(object):
if is_pdftohtml and length > -1:
# Dehyphenate
- dehyphenator = Dehyphenator()
+ dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html', length)
if is_pdftohtml:
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 96a9a4783d..4a118d423c 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -322,11 +322,11 @@ class HeuristicProcessor(object):
html = re.sub(ur'\s*\s*', ' ', html)
# Delete microsoft 'smart' tags
html = re.sub('(?i)?st1:\w+>', '', html)
- # Get rid of empty span, bold, font, & italics tags
- html = re.sub(r'\s*]*>\s*\s*', '', html)
+ # Get rid of empty span, bold, font, em, & italics tags
html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html)
- html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*[ibu]>\s*){0,2}\s*[ibu]>", " ", html)
+ html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*(font|[ibu]|em)>\s*){0,2}\s*(font|[ibu]|em)>", " ", html)
html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html)
+ html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*(font|[ibu]|em)>\s*){0,2}\s*(font|[ibu]|em)>", " ", html)
self.deleted_nbsps = True
return html
@@ -376,27 +376,31 @@ class HeuristicProcessor(object):
except:
self.log("Can't get wordcount")
- if 0 < self.totalwords < 50:
+ print "found "+unicode(self.totalwords)+" words in the flow"
+ if self.totalwords < 50:
self.log("flow is too short, not running heuristics")
return html
# Arrange line feeds and
tags so the line_length and no_markup functions work correctly
html = self.arrange_htm_line_endings(html)
- ###### Check Markup ######
- #
- # some lit files don't have any tags or equivalent (generally just plain text between
- #
tags), check and mark up line endings if required before proceeding
- if self.no_markup(html, 0.1):
- self.log("not enough paragraph markers, adding now")
- # markup using text processing
- html = self.markup_pre(html)
+ if self.cleanup_required():
+ ###### Check Markup ######
+ #
+ # some lit files don't have any tags or equivalent (generally just plain text between
+ #
tags), check and mark up line endings if required before proceeding
+ # fix indents must run after this step
+ if self.no_markup(html, 0.1):
+ self.log("not enough paragraph markers, adding now")
+ # markup using text processing
+ html = self.markup_pre(html)
# Replace series of non-breaking spaces with text-indent
if getattr(self.extra_opts, 'fix_indents', False):
html = self.fix_nbsp_indents(html)
if self.cleanup_required():
+ # fix indents must run before this step, as it removes non-breaking spaces
html = self.cleanup_markup(html)
# ADE doesn't render
, change to empty paragraphs
@@ -420,26 +424,26 @@ class HeuristicProcessor(object):
self.log("deleting blank lines")
html = self.multi_blank.sub('\n
', html)
html = self.blankreg.sub('', html)
+
+ # Determine line ending type
+ # Some OCR sourced files have line breaks in the html using a combination of span & p tags
+ # span are used for hard line breaks, p for new paragraphs. Determine which is used so
+ # that lines can be un-wrapped across page boundaries
+ format = self.analyze_line_endings(html)
+
+ # Check Line histogram to determine if the document uses hard line breaks, If 50% or
+ # more of the lines break in the same region of the document then unwrapping is required
+ docanalysis = DocAnalysis(format, html)
+ hardbreaks = docanalysis.line_histogram(.50)
+ self.log("Hard line breaks check returned "+unicode(hardbreaks))
+
+ # Calculate Length
+ unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
+ length = docanalysis.line_length(unwrap_factor)
+ self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
###### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False):
- # Determine line ending type
- # Some OCR sourced files have line breaks in the html using a combination of span & p tags
- # span are used for hard line breaks, p for new paragraphs. Determine which is used so
- # that lines can be un-wrapped across page boundaries
- format = self.analyze_line_endings(html)
-
- # Check Line histogram to determine if the document uses hard line breaks, If 50% or
- # more of the lines break in the same region of the document then unwrapping is required
- docanalysis = DocAnalysis(format, html)
- hardbreaks = docanalysis.line_histogram(.50)
- self.log("Hard line breaks check returned "+unicode(hardbreaks))
-
- # Calculate Length
- unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
- length = docanalysis.line_length(unwrap_factor)
- self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format")
-
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
if hardbreaks or unwrap_factor < 0.4:
self.log("Unwrapping required, unwrapping Lines")
@@ -447,15 +451,16 @@ class HeuristicProcessor(object):
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html', length)
html = self.punctuation_unwrap(length, html, 'html')
- #check any remaining hyphens, but only unwrap if there is a match
- dehyphenator = Dehyphenator()
+ # unwrap remaining hyphens based on line length, but only remove if there is a match
+ dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html_cleanup', length)
if getattr(self.extra_opts, 'dehyphenate', False):
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
self.log("Fixing hyphenated content")
- dehyphenator = Dehyphenator()
+ dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
html = dehyphenator(html,'html_cleanup', length)
+ html = dehyphenator(html, 'individual_words', length)
# If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 5cffbafe21..8bf33c4837 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -106,7 +106,7 @@ class TXTInput(InputFormatPlugin):
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
# Dehyphenate
- dehyphenator = Dehyphenator()
+ dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
txt = dehyphenator(txt,'txt', length)
# We don't check for block because the processor assumes block.
@@ -138,7 +138,7 @@ class TXTInput(InputFormatPlugin):
setattr(options, 'dehyphenate', True)
# Dehyphenate in cleanup mode for missed txt and markdown conversion
- dehyphenator = Dehyphenator()
+ dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
html = dehyphenator(html,'txt_cleanup', length)
html = dehyphenator(html,'html_cleanup', length)
From 89dd86056e727de35ff844cf712051b96a96e712 Mon Sep 17 00:00:00 2001
From: ldolse
Date: Sun, 16 Jan 2011 20:26:52 +0800
Subject: [PATCH 18/19] ...
---
src/calibre/ebooks/conversion/preprocess.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 9dedd05e33..d1d275eb97 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -247,7 +247,7 @@ class Dehyphenator(object):
elif format == 'txt':
intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length)
elif format == 'individual_words':
- intextmatch = re.compile(u'(?!<)(?P\w+)(-|‐)\s*(?P\w+)(?![^<]*?>)') # for later, not called anywhere yet
+ intextmatch = re.compile(u'(?!<)(?P\w+)(-|‐)\s*(?P\w+)(?![^<]*?>)')
elif format == 'html_cleanup':
intextmatch = re.compile(u'(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P\s*([iubp]>\s*<[iubp][^>]*>\s*)?]*>|[iubp]>\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)')
elif format == 'txt_cleanup':
From e0d1de2ce8832eb55abacf85edbfdcb1fb5d549e Mon Sep 17 00:00:00 2001
From: ldolse
Date: Sun, 16 Jan 2011 20:54:17 +0800
Subject: [PATCH 19/19] removed hyphen removal from text input that's covered
by the heuristics option
---
src/calibre/ebooks/txt/input.py | 5 -----
1 file changed, 5 deletions(-)
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 8bf33c4837..39bfb4b132 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -137,11 +137,6 @@ class TXTInput(InputFormatPlugin):
setattr(options, 'format_scene_breaks', True)
setattr(options, 'dehyphenate', True)
- # Dehyphenate in cleanup mode for missed txt and markdown conversion
- dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
- html = dehyphenator(html,'txt_cleanup', length)
- html = dehyphenator(html,'html_cleanup', length)
-
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')
for opt in html_input.options: