From 8fea5a64e5495833aac42124352012befefe998f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 3 Dec 2010 12:09:31 -0700 Subject: [PATCH 01/40] ... --- src/calibre/utils/magick/magick.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/magick/magick.c b/src/calibre/utils/magick/magick.c index 0aab5f1fd7..9dac37f2ff 100644 --- a/src/calibre/utils/magick/magick.c +++ b/src/calibre/utils/magick/magick.c @@ -414,7 +414,7 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) { // }}} -// Image.load {{{ +// Image.open {{{ static PyObject * magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) { const char *data; From 55e17f81a0af90476d265140d571d83143b462f3 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 3 Dec 2010 19:58:10 +0000 Subject: [PATCH 02/40] Fix #7780 - non-expanding boxes in create_custom_column.ui --- .../gui2/preferences/create_custom_column.ui | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/calibre/gui2/preferences/create_custom_column.ui b/src/calibre/gui2/preferences/create_custom_column.ui index bdf4f624a4..d4e85a24c9 100644 --- a/src/calibre/gui2/preferences/create_custom_column.ui +++ b/src/calibre/gui2/preferences/create_custom_column.ui @@ -152,7 +152,7 @@ - + 0 0 @@ -211,27 +211,27 @@ + + + 0 + 0 + + A comma-separated list of permitted values. The empty value is always included, and is the default. For example, the list 'one,two,three' has four values, the first of them being the empty value. - - - 0 - 0 - - - - Default: (nothing) - The empty string is always the first value + + Default: (nothing) + From d4f76eba5b58692fd4be1285cbc5e032f649484d Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 3 Dec 2010 19:08:19 -0500 Subject: [PATCH 03/40] FB2 Output: Make FB2 output more compliant. This removes some features such as inline TOC and links. --- src/calibre/ebooks/fb2/fb2ml.py | 218 +++++++++++++------------------- 1 file changed, 87 insertions(+), 131 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 2a9a92612e..1c24213b46 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -32,7 +32,7 @@ TAG_MAP = { 'p' : 'p', 'li' : 'p', 'div': 'p', - 'br' : 'p', + 'br' : 'empty-line', } TAG_SPACE = [] @@ -42,7 +42,6 @@ TAG_IMAGES = [ ] TAG_LINKS = [ - 'a', ] BLOCK = [ @@ -56,9 +55,8 @@ STYLES = [ class FB2MLizer(object): ''' - Todo: * Ensure all style tags are inside of the p tags. - * Include more FB2 specific tags in the conversion. - * Handle reopening of a tag properly. + Todo: * Include more FB2 specific tags in the conversion. + * Handle a tags. * Figure out some way to turn oeb_book.toc items into
<p> to allow for readers to generate toc from the document. ''' @@ -66,7 +64,8 @@ class FB2MLizer(object): def __init__(self, log): self.log = log self.image_hrefs = {} - self.link_hrefs = {} + # Used to ensure text and tags are always within <p> and </p> + self.in_p = False def extract_content(self, oeb_book, opts): self.log.info('Converting XHTML to FB2 markup...') @@ -78,17 +77,13 @@ class FB2MLizer(object): self.image_hrefs = {} self.link_hrefs = {} output = [self.fb2_header()] - output.append(self.get_cover_page()) - output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk') output.append(self.get_text()) output.append(self.fb2_body_footer()) output.append(self.fb2mlize_images()) output.append(self.fb2_footer()) - output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc()) - output = self.clean_text(output) - if self.opts.sectionize_chapters: - output = self.sectionize_chapters(output) - return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) + output = self.clean_text(u''.join(output)) + #return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) + return u'<?xml version="1.0" encoding="UTF-8"?>' + output def clean_text(self, text): text = re.sub(r'(?miu)<section>\s*</section>', '', text) @@ -116,88 +111,40 @@ class FB2MLizer(object): author_middle = ' '.join(author_parts[1:-2]) author_last = author_parts[-1] - return u'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" ' \ - 'xmlns="http://www.gribuser.ru/xml/fictionbook/2.0">\n' \ - '<description>\n<title-info>\n ' \ - '<author>\n<first-name>%s</first-name>\n<middle-name>%s' \ - '</middle-name>\n<last-name>%s</last-name>\n</author>\n' \ - '<book-title>%s</book-title> ' \ - '</title-info><document-info> ' \ - '<program-used>%s - %s</program-used></document-info>\n' \ - '</description>\n<body>\n<section>' % tuple(map(prepare_string_for_xml, - (author_first, author_middle, - author_last, self.oeb_book.metadata.title[0].value, - __appname__, __version__))) - - def get_cover_page(self): - output = u'' - if 'cover' in self.oeb_book.guide: - output += '<image xlink:href="#cover.jpg" />' - self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.jpg' - if 'titlepage' in self.oeb_book.guide: - self.log.debug('Generating cover page...') - href = self.oeb_book.guide['titlepage'].href - item = self.oeb_book.manifest.hrefs[href] - if item.spine_position is None: - stylizer = Stylizer(item.data, item.href, self.oeb_book, - self.opts, self.opts.output_profile) - output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) - return output - - def get_toc(self): - toc = [] - if self.opts.inline_toc: - self.log.debug('Generating table of contents...') - toc.append(u'<p>%s</p>' % _('Table of Contents:')) - for item in self.oeb_book.toc: - if item.href in self.link_hrefs.keys(): - toc.append('<p><a xlink:href="#%s">%s</a></p>\n' % (self.link_hrefs[item.href], item.title)) - else: - self.oeb.warn('Ignoring toc item: %s not found in document.' % item) - return ''.join(toc) - - def sectionize_chapters(self, text): - def remove_p(t): - t = t.replace('<p>', '') - t = t.replace('</p>', '') - return t - text = re.sub(r'(?imsu)(<p>)\s*(?P<anchor><a\s+id="calibre_link-\d+"\s*/>)\s*(</p>)\s*(<p>)\s*(?P<strong><strong>.+?</strong>)\s*(</p>)', lambda mo: '</section><section>%s<title><p>%s</p>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text) - text = re.sub(r'(?imsu)(

)\s*(?P)\s*(

)\s*(?P.+?)', lambda mo: '
%s<p>%s</p>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text) - text = re.sub(r'(?imsu)(?P)\s*(

)\s*(?P.+?)\s*(

)', lambda mo: '
%s<p>%s</p>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text) - text = re.sub(r'(?imsu)(

)\s*(?P)\s*(?P.+?)\s*(

)', lambda mo: '
%s<p>%s</p>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text) - text = re.sub(r'(?imsu)(?P)\s*(?P.+?)', lambda mo: '
%s<p>%s</p>' % (mo.group('anchor'), remove_p(mo.group('strong'))), text) - return text + return u'' \ + '' \ + '' \ + '' \ + '' \ + '%s' \ + '%s' \ + '%s' \ + '' \ + '%s' \ + '

' \ + '' \ + '' \ + '%s - %s' \ + '' \ + '' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last, + self.oeb_book.metadata.title[0].value, __appname__, __version__))) def get_text(self): text = [] - for i, item in enumerate(self.oeb_book.spine): - if self.opts.sectionize_chapters_using_file_structure and i is not 0: - text.append('

') + for item in self.oeb_book.spine: self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) - text.append(self.add_page_anchor(item)) + text.append('
') text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) - if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1: - text.append('
') + text.append('
') return ''.join(text) def fb2_body_footer(self): - return u'\n
\n' + return u'' def fb2_footer(self): return u'' - def add_page_anchor(self, page): - return self.get_anchor(page, '') - - def get_anchor(self, page, aid): - aid = prepare_string_for_xml(aid) - aid = '%s#%s' % (page.href, aid) - if aid not in self.link_hrefs.keys(): - self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys()) - aid = self.link_hrefs[aid] - return '' % aid - def fb2mlize_images(self): images = [] for item in self.oeb_book.manifest: @@ -218,12 +165,42 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('%s\n' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data)) + images.append('%s\n' % (self.image_hrefs.get(item.href, '_0000.JPEG'), item.media_type, data)) except Exception as e: - self.log.error('Error: Could not include file %s becuase ' \ + self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e)) return ''.join(images) + def ensure_p(self): + if self.in_p: + return [], [] + else: + self.in_p = True + return ['

'], ['p'] + + def close_open_p(self, tags): + text = [''] + added_p = False + + if self.in_p: + # Close all up to p. Close p. Reopen all closed tags including p. + closed_tags = [] + tags.reverse() + for t in tags: + text.append('' % t) + closed_tags.append(t) + if t == 'p': + break + closed_tags.reverse() + for t in closed_tags: + text.append('<%s>' % t) + else: + text.append('

') + added_p = True + self.in_p = True + + return text, added_p + def dump_text(self, elem, stylizer, page, tag_stack=[]): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: @@ -242,53 +219,26 @@ class FB2MLizer(object): if tag in TAG_IMAGES: if elem.attrib.get('src', None): if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys(): - self.image_hrefs[page.abshref(elem.attrib['src'])] = '%s.jpg' % len(self.image_hrefs.keys()) + self.image_hrefs[page.abshref(elem.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) + p_txt, p_tag = self.ensure_p() + fb2_text += p_txt + tags += p_tag fb2_text.append('' % self.image_hrefs[page.abshref(elem.attrib['src'])]) - if tag in TAG_LINKS: - href = elem.get('href') - if href: - href = prepare_string_for_xml(page.abshref(href)) - href = href.replace('"', '"') - if '://' in href: - fb2_text.append('' % href) - else: - if href.startswith('#'): - href = href[1:] - if href not in self.link_hrefs.keys(): - self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys()) - href = self.link_hrefs[href] - fb2_text.append('' % href) - tags.append('a') - - # Anchor ids - id_name = elem.get('id') - if id_name: - fb2_text.append(self.get_anchor(page, id_name)) - if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title: fb2_text.append('') tags.append('title') fb2_tag = TAG_MAP.get(tag, None) if fb2_tag == 'p': - if 'p' in tag_stack+tags: - # Close all up to p. Close p. Reopen all closed tags including p. - all_tags = tag_stack+tags - closed_tags = [] - all_tags.reverse() - for t in all_tags: - fb2_text.append('</%s>' % t) - closed_tags.append(t) - if t == 'p': - break - closed_tags.reverse() - for t in closed_tags: - fb2_text.append('<%s>' % t) - else: - fb2_text.append('<p>') + p_text, added_p = self.close_open_p(tag_stack+tags) + fb2_text += p_text + if added_p: tags.append('p') elif fb2_tag and fb2_tag not in tag_stack+tags: + p_text, p_tags = self.ensure_p() + fb2_text += p_text + tags += p_tags fb2_text.append('<%s>' % fb2_tag) tags.append(fb2_tag) @@ -296,18 +246,21 @@ class FB2MLizer(object): for s in STYLES: style_tag = s[1].get(style[s[0]], None) if style_tag and style_tag not in tag_stack+tags: + p_text, p_tags = self.ensure_p() + fb2_text += p_text + tags += p_tags fb2_text.append('<%s>' % style_tag) tags.append(style_tag) if tag in TAG_SPACE: - if not fb2_text or fb2_text[-1] != ' ' or not fb2_text[-1].endswith(' '): - fb2_text.append(' ') + fb2_text.append(' ') if hasattr(elem, 'text') and elem.text: - if 'p' not in tag_stack+tags: - fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text)) - else: - fb2_text.append(prepare_string_for_xml(elem.text)) + if not self.in_p: + fb2_text.append('<p>') + fb2_text.append(prepare_string_for_xml(elem.text)) + if not self.in_p: + fb2_text.append('</p>') for item in elem: fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags) @@ -316,10 +269,11 @@ class FB2MLizer(object): fb2_text += self.close_tags(tags) if hasattr(elem, 'tail') and elem.tail: - if 'p' not in tag_stack: - fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.tail)) - else: - fb2_text.append(prepare_string_for_xml(elem.tail)) + if not self.in_p: + fb2_text.append('<p>') + fb2_text.append(prepare_string_for_xml(elem.tail)) + if not self.in_p: + fb2_text.append('</p>') return fb2_text @@ -327,5 +281,7 @@ class FB2MLizer(object): text = [] for tag in tags: text.append('</%s>' % tag) + if tag == 'p': + self.in_p = False return text From 692b6bcf02e6dbc75f5f5088a122626ece3e4d42 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Fri, 3 Dec 2010 19:11:33 -0500 Subject: [PATCH 04/40] FB2 Output: Remove FB2 options that are no longer implemented. --- src/calibre/ebooks/fb2/output.py | 14 ------------- src/calibre/gui2/convert/fb2_output.py | 4 +--- src/calibre/gui2/convert/fb2_output.ui | 27 +++----------------------- 3 files changed, 4 insertions(+), 41 deletions(-) diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index bacaf0da91..88508b83e0 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -16,20 +16,6 @@ class FB2Output(OutputFormatPlugin): file_type = 'fb2' options = set([ - OptionRecommendation(name='inline_toc', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Add Table of Contents to beginning of the book.')), - OptionRecommendation(name='sectionize_chapters', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Try to turn chapters into individual sections. ' \ - 'WARNING: ' \ - 'This option is experimental. It can cause conversion ' \ - 'to fail. It can also produce unexpected output.')), - OptionRecommendation(name='sectionize_chapters_using_file_structure', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Try to turn chapters into individual sections using the ' \ - 'internal structure of the ebook. This works well for EPUB ' \ - 'books that have been internally split by chapter.')), OptionRecommendation(name='h1_to_title', recommended_value=False, level=OptionRecommendation.LOW, help=_('Wrap all h1 tags with fb2 title elements.')), diff --git a/src/calibre/gui2/convert/fb2_output.py b/src/calibre/gui2/convert/fb2_output.py index 5d927146a5..6b1497a9db 100644 --- a/src/calibre/gui2/convert/fb2_output.py +++ b/src/calibre/gui2/convert/fb2_output.py @@ -17,8 +17,6 @@ class PluginWidget(Widget, Ui_Form): ICON = I('mimetypes/fb2.png') def __init__(self, parent, get_option, get_help, db=None, book_id=None): - Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters', - 'sectionize_chapters_using_file_structure', 'h1_to_title', - 'h2_to_title', 'h3_to_title']) + Widget.__init__(self, parent, ['h1_to_title', 'h2_to_title', 'h3_to_title']) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/fb2_output.ui b/src/calibre/gui2/convert/fb2_output.ui index a90ecd615e..436719aed4 100644 --- a/src/calibre/gui2/convert/fb2_output.ui +++ b/src/calibre/gui2/convert/fb2_output.ui @@ -14,7 +14,7 @@ <string>Form</string> </property> <layout class="QGridLayout" name="gridLayout"> - <item row="6" column="0"> + <item row="3" column="0"> <spacer name="verticalSpacer"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -28,41 +28,20 @@ </spacer> </item> <item row="0" column="0"> - <widget class="QCheckBox" name="opt_inline_toc"> - <property name="text"> - <string>&Inline TOC</string> - </property> - </widget> - </item> - <item row="1" column="0"> - <widget class="QCheckBox" name="opt_sectionize_chapters"> - <property name="text"> - <string>Sectionize Chapters (Use with care!)</string> - </property> - </widget> - </item> - <item row="2" column="0"> - <widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure"> - <property name="text"> - <string>Sectionize Chapters using file structure</string> - </property> - </widget> - </item> - <item row="3" column="0"> <widget class="QCheckBox" name="opt_h1_to_title"> <property name="text"> <string>Wrap h1 tags with <title> elements</string> </property> </widget> </item> - <item row="4" column="0"> + <item row="1" column="0"> <widget class="QCheckBox" name="opt_h2_to_title"> <property name="text"> <string>Wrap h2 tags with <title> elements</string> </property> </widget> </item> - <item row="5" column="0"> + <item row="2" column="0"> <widget class="QCheckBox" name="opt_h3_to_title"> <property name="text"> <string>Wrap h3 tags with <title> elements</string> From 1fd503a12ea53196d12a1969220e42a23c027a98 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Fri, 3 Dec 2010 19:14:09 -0500 Subject: [PATCH 05/40] FB2 Output: Use pretty print option. --- src/calibre/ebooks/fb2/fb2ml.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 1c24213b46..e658dce25a 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -82,8 +82,10 @@ class FB2MLizer(object): output.append(self.fb2mlize_images()) output.append(self.fb2_footer()) output = self.clean_text(u''.join(output)) - #return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) - return u'<?xml version="1.0" encoding="UTF-8"?>' + output + if self.opts.pretty_print: + return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) + else: + return u'<?xml version="1.0" encoding="UTF-8"?>' + output def clean_text(self, text): text = re.sub(r'(?miu)<section>\s*</section>', '', text) From 9409781f4a4446078e2a1637d7b4303abdc81bff Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Fri, 3 Dec 2010 19:43:50 -0500 Subject: [PATCH 06/40] FB2 Output: Insert empty lines properly. --- src/calibre/ebooks/fb2/fb2ml.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index e658dce25a..252453d25e 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -32,7 +32,6 @@ TAG_MAP = { 'p' : 'p', 'li' : 'p', 'div': 'p', - 'br' : 'empty-line', } TAG_SPACE = [] @@ -126,7 +125,7 @@ class FB2MLizer(object): '<annotation><p/></annotation>' \ '</title-info>' \ '<document-info>' \ - '<program-used>%s - %s</program-used>' \ + '<program-used>%s %s</program-used>' \ '</document-info>' \ '</description><body>' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last, self.oeb_book.metadata.title[0].value, __appname__, __version__))) @@ -180,6 +179,24 @@ class FB2MLizer(object): self.in_p = True return ['<p>'], ['p'] + def insert_empty_line(self, tags): + if self.in_p: + text = [''] + closed_tags = [] + tags.reverse() + for t in tags: + text.append('</%s>' % t) + closed_tags.append(t) + if t == 'p': + break + text.append('<empty-line />') + closed_tags.reverse() + for t in closed_tags: + text.append('<%s>' % t) + return text + else: + return ['<empty-line />'] + def close_open_p(self, tags): text = [''] added_p = False @@ -230,6 +247,8 @@ class FB2MLizer(object): if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title: fb2_text.append('<title>') tags.append('title') + if tag == 'br': + fb2_text += self.insert_empty_line(tag_stack+tags) fb2_tag = TAG_MAP.get(tag, None) if fb2_tag == 'p': From 6ec27b1234d40ac866ab36d4f4d941fbecfc01d2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 3 Dec 2010 19:36:31 -0700 Subject: [PATCH 07/40] ... --- setup/installer/linux/freeze2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 684b33b80d..693bf28121 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -14,7 +14,7 @@ from setup import Command, modules, basenames, functions, __version__, \ SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize', 'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml', - 'sipconfig.py', 'xdg'] + 'sipconfig.py', 'xdg', 'dbus'] QTDIR = '/usr/lib/qt4' QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus') From 38be2c7fc60dc9f6285494388df3073e81833fc4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 3 Dec 2010 20:06:13 -0700 Subject: [PATCH 08/40] ... --- setup/installer/linux/freeze2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 693bf28121..cefc193f18 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -14,7 +14,8 @@ from setup import Command, modules, basenames, functions, __version__, \ SITE_PACKAGES = ['IPython', 'PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize', 'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml', - 'sipconfig.py', 'xdg', 'dbus'] + 'sipconfig.py', 'xdg', 'dbus', '_dbus_bindings.so', 'dbus_bindings.py', + '_dbus_glib_bindings.so'] QTDIR = '/usr/lib/qt4' QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus') From 67367f521d7c640ce046ed614b37f382e55653ec Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 3 Dec 2010 20:18:56 -0700 Subject: [PATCH 09/40] Use ICU for sorting --- setup/build_environment.py | 4 + setup/extensions.py | 14 +- setup/installer/windows/freeze.py | 2 +- setup/installer/windows/notes.rst | 9 ++ src/calibre/constants.py | 3 +- src/calibre/library/caches.py | 9 +- src/calibre/utils/icu.c | 220 ++++++++++++++++++++++++++++++ src/calibre/utils/icu.py | 53 +++++++ 8 files changed, 306 insertions(+), 8 deletions(-) create mode 100644 src/calibre/utils/icu.c create mode 100644 src/calibre/utils/icu.py diff --git a/setup/build_environment.py b/setup/build_environment.py index c021ebc6a6..d6581a907d 100644 --- a/setup/build_environment.py +++ b/setup/build_environment.py @@ -91,11 +91,15 @@ podofo_inc = '/usr/include/podofo' podofo_lib = '/usr/lib' chmlib_inc_dirs = chmlib_lib_dirs = [] sqlite_inc_dirs = [] +icu_inc_dirs = [] +icu_lib_dirs = [] if iswindows: prefix = r'C:\cygwin\home\kovid\sw' sw_inc_dir = os.path.join(prefix, 'include') sw_lib_dir = os.path.join(prefix, 'lib') + icu_inc_dirs = [sw_inc_dir] + icu_lib_dirs = [sw_lib_dir] sqlite_inc_dirs = [sw_inc_dir] fc_inc = os.path.join(sw_inc_dir, 'fontconfig') fc_lib = sw_lib_dir diff --git a/setup/extensions.py b/setup/extensions.py index d4ac8e188c..3862cce62a 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -18,7 +18,8 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \ QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \ magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \ magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \ - jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs + jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs, icu_inc_dirs, \ + icu_lib_dirs MT isunix = islinux or isosx or isfreebsd @@ -56,8 +57,19 @@ pdfreflow_libs = [] if iswindows: pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib'] +icu_libs = ['icudata', 'icui18n', 'icuuc', 'icuio'] +if iswindows: + icu_libs = ['icudt', 'icuin', 'icuuc', 'icuio'] + extensions = [ + Extension('icu', + ['calibre/utils/icu.c'], + libraries=icu_libs, + lib_dirs=icu_lib_dirs, + inc_dirs=icu_inc_dirs, + ), + Extension('sqlite_custom', ['calibre/library/sqlite_custom.c'], inc_dirs=sqlite_inc_dirs diff --git a/setup/installer/windows/freeze.py b/setup/installer/windows/freeze.py index 30cc2a97af..7d8ea4d80a 100644 --- a/setup/installer/windows/freeze.py +++ b/setup/installer/windows/freeze.py @@ -199,7 +199,7 @@ class Win32Freeze(Command, WixMixIn): for pat in ('*.dll',): for f in glob.glob(os.path.join(bindir, pat)): ok = True - for ex in ('expatw',): + for ex in ('expatw', 'testplug'): if ex in f.lower(): ok = False if not ok: continue diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index 9c553c42e8..af4c871dac 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -77,6 +77,15 @@ Test it on the target system with calibre-debug -c "import _imaging, _imagingmath, _imagingft, _imagingcms" +ICU +------- + +Download the win32 msvc9 binary from http://www.icu-project.org/download/4.4.html + +Note that 4.4 is the last version of ICU that can be compiled (is precompiled) with msvc9 + +Put the dlls into sw/bin and the unicode dir into sw/include and the contents of lib int sw/lib + Libunrar ---------- diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 197fe5888a..f9c177e7a8 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -67,7 +67,8 @@ if plugins is None: 'pdfreflow', 'progress_indicator', 'chmlib', - 'chm_extra' + 'chm_extra', + 'icu', ] + \ (['winutil'] if iswindows else []) + \ (['usbobserver'] if isosx else []): diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 7b4c66c8b8..7c1dea792c 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -796,11 +796,13 @@ class SortKey(object): class SortKeyGenerator(object): def __init__(self, fields, field_metadata, data): + from calibre.utils.icu import sort_key self.field_metadata = field_metadata self.orders = [-1 if x[1] else 1 for x in fields] self.entries = [(x[0], field_metadata[x[0]]) for x in fields] self.library_order = tweaks['title_series_sorting'] == 'library_order' self.data = data + self.string_sort_key = sort_key def __call__(self, record): values = tuple(self.itervals(self.data[record])) @@ -821,17 +823,14 @@ class SortKeyGenerator(object): if val is None: val = ('', 1) else: - val = val.lower() if self.library_order: val = title_sort(val) sidx_fm = self.field_metadata[name + '_index'] sidx = record[sidx_fm['rec_index']] - val = (val, sidx) + val = (self.string_sort_key(val), sidx) elif dt in ('text', 'comments', 'composite', 'enumeration'): - if val is None: - val = '' - val = val.lower() + val = self.string_sort_key(val) elif dt == 'bool': val = {True: 1, False: 2, None: 3}.get(val, 3) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c new file mode 100644 index 0000000000..6e06d54dff --- /dev/null +++ b/src/calibre/utils/icu.c @@ -0,0 +1,220 @@ +#define UNICODE +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include <unicode/utypes.h> +#include <unicode/uclean.h> +#include <unicode/ucol.h> +#include <unicode/ustring.h> + + +// Collator object definition {{{ +typedef struct { + PyObject_HEAD + // Type-specific fields go here. + UCollator *collator; + +} icu_Collator; + +static void +icu_Collator_dealloc(icu_Collator* self) +{ + if (self->collator != NULL) ucol_close(self->collator); + self->collator = NULL; + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +icu_Collator_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + icu_Collator *self; + const char *loc; + UErrorCode status = U_ZERO_ERROR; + + if (!PyArg_ParseTuple(args, "s", &loc)) return NULL; + + self = (icu_Collator *)type->tp_alloc(type, 0); + if (self != NULL) { + self->collator = ucol_open(loc, &status); + if (self->collator == NULL || U_FAILURE(status)) { + PyErr_SetString(PyExc_Exception, "Failed to create collator."); + self->collator = NULL; + Py_DECREF(self); + return NULL; + } + } + + return (PyObject *)self; +} + +// Collator.display_name {{{ +static PyObject * +icu_Collator_display_name(icu_Collator *self, void *closure) { + const char *loc = NULL; + UErrorCode status = U_ZERO_ERROR; + UChar dname[400]; + char buf[100]; + + loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); + if (loc == NULL || U_FAILURE(status)) { + PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; + } + ucol_getDisplayName(loc, "en", dname, 100, &status); + if (U_FAILURE(status)) return PyErr_NoMemory(); + + u_strToUTF8(buf, 100, NULL, dname, -1, &status); + if (U_FAILURE(status)) { + PyErr_SetString(PyExc_Exception, "Failed ot convert dname to UTF-8"); return NULL; + } + return Py_BuildValue("s", buf); +} + +// }}} + +// Collator.actual_locale {{{ +static PyObject * +icu_Collator_actual_locale(icu_Collator *self, void *closure) { + const char *loc = NULL; + UErrorCode status = U_ZERO_ERROR; + + loc = ucol_getLocaleByType(self->collator, ULOC_ACTUAL_LOCALE, &status); + if (loc == NULL || U_FAILURE(status)) { + PyErr_SetString(PyExc_Exception, "Failed to get actual locale"); return NULL; + } + return Py_BuildValue("s", loc); +} + +// }}} + +// Collator.sort_key {{{ +static PyObject * +icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { + PyObject *o; + Py_ssize_t sz; + wchar_t *buf; + UChar *buf2; + uint8_t *buf3; + PyObject *ans; + UErrorCode status = U_ZERO_ERROR; + + if (!PyArg_ParseTuple(args, "U", &o)) return NULL; + + sz = PyUnicode_GetSize(o); + + buf = (wchar_t*)calloc(sz*2 + 1, sizeof(wchar_t)); + buf2 = (UChar*)calloc(sz*2 + 1, sizeof(UChar)); + buf3 = (uint8_t*)calloc(sz*4 + 1, sizeof(uint8_t)); + + if (buf == NULL || buf2 == NULL || buf3 == NULL) return PyErr_NoMemory(); + + PyUnicode_AsWideChar((PyUnicodeObject *)o, buf, sz); + + u_strFromWCS(buf2, 2*sz+1, NULL, buf, -1, &status); + if (U_SUCCESS(status)) + ucol_getSortKey(self->collator, buf2, -1, buf3, sz*4+1); + + ans = PyBytes_FromString((char *)buf3); + free(buf3); free(buf); free(buf2); + if (ans == NULL) return PyErr_NoMemory(); + + return ans; +} + +static PyMethodDef icu_Collator_methods[] = { + {"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS, + "sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU." + }, + + {NULL} /* Sentinel */ +}; + +static PyGetSetDef icu_Collator_getsetters[] = { + {(char *)"actual_locale", + (getter)icu_Collator_actual_locale, NULL, + (char *)"Actual locale used by this collator.", + NULL}, + + {(char *)"display_name", + (getter)icu_Collator_display_name, NULL, + (char *)"Display name of this collator in English. The name reflects the actual data source used.", + NULL}, + + {NULL} /* Sentinel */ +}; + +static PyTypeObject icu_CollatorType = { // {{{ + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "icu.Collator", /*tp_name*/ + sizeof(icu_Collator), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)icu_Collator_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Collator", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + icu_Collator_methods, /* tp_methods */ + 0, /* tp_members */ + icu_Collator_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + icu_Collator_new, /* tp_new */ +}; // }}} + +// }} + + +// }}} + + +// Module initialization {{{ + +static PyMethodDef icu_methods[] = { + {NULL} /* Sentinel */ +}; + + +PyMODINIT_FUNC +initicu(void) +{ + PyObject* m; + UErrorCode status = U_ZERO_ERROR; + + u_init(&status); + + + if (PyType_Ready(&icu_CollatorType) < 0) + return; + + m = Py_InitModule3("icu", icu_methods, + "Wrapper for the ICU internationalization library"); + + Py_INCREF(&icu_CollatorType); + PyModule_AddObject(m, "Collator", (PyObject *)&icu_CollatorType); + // uint8_t must be the same size as char + PyModule_AddIntConstant(m, "ok", (U_SUCCESS(status) && sizeof(uint8_t) == sizeof(char)) ? 1 : 0); + +} +// }}} diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py new file mode 100644 index 0000000000..5b432747f0 --- /dev/null +++ b/src/calibre/utils/icu.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +from functools import partial + +from calibre.constants import plugins + +_icu = _collator = None + +_none = u'' +_none2 = b'' + +def load_icu(): + global _icu + if _icu is None: + _icu = plugins['icu'][0] + if _icu is None: + print plugins['icu'][1] + else: + if not _icu.ok: + print 'icu not ok' + _icu = None + return _icu + +def load_collator(): + global _collator + from calibre.utils.localization import get_lang + if _collator is None: + icu = load_icu() + if icu is not None: + _collator = icu.Collator(get_lang()) + return _collator + + +def py_sort_key(obj): + if not obj: + return _none + return obj.lower() + +def icu_sort_key(collator, obj): + if not obj: + return _none2 + return collator.sort_key(obj.lower()) + +load_icu() +load_collator() +sort_key = py_sort_key if _icu is None or _collator is None else \ + partial(icu_sort_key, _collator) + From 74a801f4cce869b25cd70277881b006fe3c145d8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Fri, 3 Dec 2010 20:36:56 -0700 Subject: [PATCH 10/40] Add ICU to linux binary build --- setup/installer/linux/freeze2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index cefc193f18..df2c1d6480 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -50,6 +50,10 @@ binary_includes = [ '/lib/libreadline.so.6', '/usr/lib/libchm.so.0', '/usr/lib/liblcms2.so.2', + '/usr/lib/libicudata.so.46', + '/usr/lib/libicui18n.so.46', + '/usr/lib/libicuuc.so.46', + '/usr/lib/libicuio.so.46', ] binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS] From 0b70f40709fca1197a8fb634b0dfa4958eedb8be Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 00:06:49 -0700 Subject: [PATCH 11/40] Fix ICU py->UChar string conversion and add support for OS X --- setup/extensions.py | 6 ++++++ src/calibre/utils/icu.c | 37 ++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/setup/extensions.py b/setup/extensions.py index 3862cce62a..6a9cce7625 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -58,8 +58,13 @@ if iswindows: pdfreflow_libs = ['advapi32', 'User32', 'Gdi32', 'zlib'] icu_libs = ['icudata', 'icui18n', 'icuuc', 'icuio'] +icu_cflags = [] if iswindows: icu_libs = ['icudt', 'icuin', 'icuuc', 'icuio'] +if isosx: + icu_libs = ['icucore'] + icu_cflags = ['-DU_DISABLE_RENAMING'] # Needed to use system libicucore.dylib + extensions = [ @@ -68,6 +73,7 @@ extensions = [ libraries=icu_libs, lib_dirs=icu_lib_dirs, inc_dirs=icu_inc_dirs, + cflags=icu_cflags ), Extension('sqlite_custom', diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 6e06d54dff..1da14a0252 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -88,32 +88,34 @@ icu_Collator_actual_locale(icu_Collator *self, void *closure) { // Collator.sort_key {{{ static PyObject * icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { - PyObject *o; + char *input; Py_ssize_t sz; - wchar_t *buf; - UChar *buf2; - uint8_t *buf3; + UChar *buf; + uint8_t *buf2; PyObject *ans; + int32_t key_size; UErrorCode status = U_ZERO_ERROR; - - if (!PyArg_ParseTuple(args, "U", &o)) return NULL; + + if (!PyArg_ParseTuple(args, "es", "UTF-8", &input)) return NULL; - sz = PyUnicode_GetSize(o); + sz = strlen(input); - buf = (wchar_t*)calloc(sz*2 + 1, sizeof(wchar_t)); - buf2 = (UChar*)calloc(sz*2 + 1, sizeof(UChar)); - buf3 = (uint8_t*)calloc(sz*4 + 1, sizeof(uint8_t)); + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); - if (buf == NULL || buf2 == NULL || buf3 == NULL) return PyErr_NoMemory(); + if (buf == NULL) return PyErr_NoMemory(); - PyUnicode_AsWideChar((PyUnicodeObject *)o, buf, sz); + u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status); - u_strFromWCS(buf2, 2*sz+1, NULL, buf, -1, &status); - if (U_SUCCESS(status)) - ucol_getSortKey(self->collator, buf2, -1, buf3, sz*4+1); + if (U_SUCCESS(status)) { + key_size = ucol_getSortKey(self->collator, buf, -1, NULL, 0); + buf2 = (uint8_t*)calloc(key_size + 1, sizeof(uint8_t)); + if (buf2 == NULL) return PyErr_NoMemory(); + ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1); + ans = PyBytes_FromString((char *)buf2); + free(buf2); + } else ans = PyBytes_FromString(""); - ans = PyBytes_FromString((char *)buf3); - free(buf3); free(buf); free(buf2); + free(buf); if (ans == NULL) return PyErr_NoMemory(); return ans; @@ -188,6 +190,7 @@ static PyTypeObject icu_CollatorType = { // {{{ // }}} +// }}} // Module initialization {{{ From fc9ac1d715a26d0a567349d976c0ba2105032c5d Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 00:30:42 -0700 Subject: [PATCH 12/40] Add ICU dorting test --- src/calibre/utils/icu.py | 107 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 5b432747f0..74fbe182f5 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -51,3 +51,110 @@ load_collator() sort_key = py_sort_key if _icu is None or _collator is None else \ partial(icu_sort_key, _collator) + +def test(): # {{{ + # Data {{{ + german = ''' + Sonntag +Montag +Dienstag +Januar +Februar +März +Fuße +Fluße +Flusse +flusse +fluße +flüße +flüsse +''' + german_good = ''' + Dienstag +Februar +flusse +Flusse +fluße +Fluße +flüsse +flüße +Fuße +Januar +März +Montag +Sonntag''' + french = ''' +dimanche +lundi +mardi +janvier +février +mars +déjà +Meme +deja +même +dejà +bpef +bœg +Boef +Mémé +bœf +boef +bnef +pêche +pèché +pêché +pêche +pêché''' + french_good = ''' + bnef + boef + Boef + bœf + bœg + bpef + deja + dejà + déjà + dimanche + février + janvier + lundi + mardi + mars + Meme + Mémé + même + pèché + pêche + pêche + pêché + pêché''' + # }}} + + def create(l): + l = l.decode('utf-8').splitlines() + return [x.strip() for x in l if x.strip()] + + german = create(german) + c = _icu.Collator('de') + print 'Sorted german:: (%s)'%c.actual_locale + gs = list(sorted(german, key=c.sort_key)) + for x in gs: + print '\t', x.encode('utf-8') + if gs != create(german_good): + print 'German failed' + return + print + french = create(french) + c = _icu.Collator('fr') + print 'Sorted french:: (%s)'%c.actual_locale + fs = list(sorted(french, key=c.sort_key)) + for x in fs: + print '\t', x.encode('utf-8') + if fs != create(french_good): + print 'French failed' + return +# }}} + From a9983208d7c992a393ce2c2bc6af965b3d5bc268 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 00:40:38 -0700 Subject: [PATCH 13/40] ... --- src/calibre/utils/icu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 74fbe182f5..7c2fd31f78 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -154,7 +154,7 @@ pêché''' for x in fs: print '\t', x.encode('utf-8') if fs != create(french_good): - print 'French failed' + print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)' return # }}} From f8920d18bb0b0825c0354abcaaebd91c51ba9a00 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 12:26:13 +0000 Subject: [PATCH 14/40] Refactor to use new sort_keys function --- src/calibre/devices/usbms/books.py | 5 ++++- src/calibre/gui2/actions/edit_metadata.py | 4 ++-- src/calibre/gui2/book_details.py | 5 +++-- src/calibre/gui2/convert/metadata.py | 7 ++++--- src/calibre/gui2/custom_column_widgets.py | 11 ++++++----- src/calibre/gui2/dialogs/metadata_bulk.py | 7 ++++--- src/calibre/gui2/dialogs/metadata_single.py | 7 ++++--- src/calibre/gui2/dialogs/saved_search_editor.py | 3 ++- src/calibre/gui2/dialogs/search.py | 4 ++-- src/calibre/gui2/dialogs/tag_categories.py | 7 ++++--- src/calibre/gui2/dialogs/tag_editor.py | 16 +++++++--------- src/calibre/gui2/dialogs/tag_list_editor.py | 4 ++-- src/calibre/gui2/dialogs/user_profiles.py | 3 ++- src/calibre/gui2/library/delegates.py | 7 +++++-- src/calibre/gui2/library/models.py | 16 +++++++++------- src/calibre/gui2/preferences/behavior.py | 4 ++-- src/calibre/gui2/search_box.py | 3 ++- src/calibre/gui2/shortcuts.py | 4 ++-- src/calibre/gui2/tag_view.py | 16 +++++++++------- src/calibre/library/database2.py | 11 +++++++---- src/calibre/library/server/browse.py | 15 +++++---------- src/calibre/library/server/mobile.py | 4 ++-- src/calibre/library/server/opds.py | 9 ++++----- src/calibre/library/server/utils.py | 3 ++- src/calibre/library/server/xml.py | 4 ++-- src/calibre/library/sqlite.py | 5 +++++ src/calibre/utils/search_query_parser.py | 4 ++-- 27 files changed, 104 insertions(+), 84 deletions(-) diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index 5063daa29f..4a74f68349 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -13,6 +13,7 @@ from calibre.devices.interface import BookList as _BookList from calibre.constants import preferred_encoding from calibre import isbytestring from calibre.utils.config import prefs, tweaks +from calibre.utils.icu import sort_key class Book(Metadata): def __init__(self, prefix, lpath, size=None, other=None): @@ -230,14 +231,16 @@ class CollectionsBookList(BookList): x = xx[1] y = yy[1] if x is None and y is None: + # No sort_key needed here, because defaults are ascii return cmp(xx[2], yy[2]) if x is None: return 1 if y is None: return -1 - c = cmp(x, y) + c = cmp(sort_key(x), sort_key(y)) if c != 0: return c + # same as above -- no sort_key needed here return cmp(xx[2], yy[2]) for category, lpaths in collections.items(): diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 725bf35993..6e2a4054c8 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -16,6 +16,7 @@ from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.tag_list_editor import TagListEditor from calibre.gui2.actions import InterfaceAction +from calibre.utils.icu import sort_key class EditMetadataAction(InterfaceAction): @@ -363,8 +364,7 @@ class EditMetadataAction(InterfaceAction): def edit_device_collections(self, view, oncard=None): model = view.model() result = model.get_collections_with_ids() - compare = (lambda x,y:cmp(x.lower(), y.lower())) - d = TagListEditor(self.gui, tag_to_match=None, data=result, compare=compare) + d = TagListEditor(self.gui, tag_to_match=None, data=result, key=sort_key) d.exec_() if d.result() == d.Accepted: to_rename = d.to_rename # dict of new text to old ids diff --git a/src/calibre/gui2/book_details.py b/src/calibre/gui2/book_details.py index b101d4c44f..5214f1a1d5 100644 --- a/src/calibre/gui2/book_details.py +++ b/src/calibre/gui2/book_details.py @@ -19,6 +19,7 @@ from calibre.ebooks import BOOK_EXTENSIONS from calibre.constants import preferred_encoding from calibre.library.comments import comments_to_html from calibre.gui2 import config, open_local_file +from calibre.utils.icu import sort_key # render_rows(data) {{{ WEIGHTS = collections.defaultdict(lambda : 100) @@ -31,8 +32,8 @@ WEIGHTS[_('Tags')] = 4 def render_rows(data): keys = data.keys() # First sort by name. The WEIGHTS sort will preserve this sub-order - keys.sort(cmp=lambda x, y: cmp(x.lower(), y.lower())) - keys.sort(cmp=lambda x, y: cmp(WEIGHTS[x], WEIGHTS[y])) + keys.sort(key=sort_key) + keys.sort(key=lambda x: WEIGHTS[x]) rows = [] for key in keys: txt = data[key] diff --git a/src/calibre/gui2/convert/metadata.py b/src/calibre/gui2/convert/metadata.py index 7a02cf4429..d3744bb614 100644 --- a/src/calibre/gui2/convert/metadata.py +++ b/src/calibre/gui2/convert/metadata.py @@ -17,6 +17,7 @@ from calibre.ebooks.metadata import authors_to_string, string_to_authors, \ from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ptempfile import PersistentTemporaryFile from calibre.gui2.convert import Widget +from calibre.utils.icu import sort_key def create_opf_file(db, book_id): mi = db.get_metadata(book_id, index_is_id=True) @@ -102,7 +103,7 @@ class MetadataWidget(Widget, Ui_Form): def initalize_authors(self): all_authors = self.db.all_authors() - all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_authors.sort(key=lambda x : sort_key(x[1])) for i in all_authors: id, name = i @@ -117,7 +118,7 @@ class MetadataWidget(Widget, Ui_Form): def initialize_series(self): all_series = self.db.all_series() - all_series.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_series.sort(key=lambda x : sort_key(x[1])) for i in all_series: id, name = i @@ -126,7 +127,7 @@ class MetadataWidget(Widget, Ui_Form): def initialize_publisher(self): all_publishers = self.db.all_publishers() - all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_publishers.sort(key=lambda x : sort_key(x[1])) for i in all_publishers: id, name = i diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py index 6b6669f4e0..8c41e3def1 100644 --- a/src/calibre/gui2/custom_column_widgets.py +++ b/src/calibre/gui2/custom_column_widgets.py @@ -17,6 +17,7 @@ from calibre.utils.date import qt_to_dt, now from calibre.gui2.widgets import TagsLineEdit, EnComboBox from calibre.gui2 import UNDEFINED_QDATE, error_dialog from calibre.utils.config import tweaks +from calibre.utils.icu import sort_key class Base(object): @@ -207,7 +208,7 @@ class Text(Base): def setup_ui(self, parent): values = self.all_values = list(self.db.all_custom(num=self.col_id)) - values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower())) + values.sort(key=sort_key) if self.col_metadata['is_multiple']: w = TagsLineEdit(parent, values) w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred) @@ -256,7 +257,7 @@ class Series(Base): def setup_ui(self, parent): values = self.all_values = list(self.db.all_custom(num=self.col_id)) - values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower())) + values.sort(key=sort_key) w = EnComboBox(parent) w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon) w.setMinimumContentsLength(25) @@ -369,7 +370,7 @@ def field_sort(y, z, x=None): m1, m2 = x[y], x[z] n1 = 'zzzzz' if m1['datatype'] == 'comments' else m1['name'] n2 = 'zzzzz' if m2['datatype'] == 'comments' else m2['name'] - return cmp(n1.lower(), n2.lower()) + return cmp(sort_key(n1), sort_key(n2)) def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, parent=None): def widget_factory(type, col): @@ -526,7 +527,7 @@ class BulkSeries(BulkBase): def setup_ui(self, parent): values = self.all_values = list(self.db.all_custom(num=self.col_id)) - values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower())) + values.sort(key=sort_key) w = EnComboBox(parent) w.setSizeAdjustPolicy(w.AdjustToMinimumContentsLengthWithIcon) w.setMinimumContentsLength(25) @@ -678,7 +679,7 @@ class BulkText(BulkBase): def setup_ui(self, parent): values = self.all_values = list(self.db.all_custom(num=self.col_id)) - values.sort(cmp = lambda x,y: cmp(x.lower(), y.lower())) + values.sort(key=sort_key) if self.col_metadata['is_multiple']: w = TagsLineEdit(parent, values) w.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Preferred) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 7c6125d537..362091eb2d 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -17,6 +17,7 @@ from calibre.gui2 import error_dialog from calibre.gui2.progress_indicator import ProgressIndicator from calibre.utils.config import dynamic from calibre.utils.titlecase import titlecase +from calibre.utils.icu import sort_key class MyBlockingBusy(QDialog): @@ -594,7 +595,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): def initalize_authors(self): all_authors = self.db.all_authors() - all_authors.sort(cmp=lambda x, y : cmp(x[1].lower(), y[1].lower())) + all_authors.sort(key=lambda x : sort_key(x[1])) for i in all_authors: id, name = i @@ -604,7 +605,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): def initialize_series(self): all_series = self.db.all_series() - all_series.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_series.sort(key=lambda x : sort_key(x[1])) for i in all_series: id, name = i @@ -613,7 +614,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): def initialize_publisher(self): all_publishers = self.db.all_publishers() - all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_publishers.sort(key=lambda x : sort_key(x[1])) for i in all_publishers: id, name = i diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index f50be281d7..8f068075cf 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -28,6 +28,7 @@ from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata import MetaInformation from calibre.utils.config import prefs, tweaks from calibre.utils.date import qt_to_dt, local_tz, utcfromtimestamp +from calibre.utils.icu import sort_key from calibre.customize.ui import run_plugins_on_import, get_isbndb_key from calibre.gui2.preferences.social import SocialMetadata from calibre.gui2.custom_column_widgets import populate_metadata_page @@ -660,7 +661,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): def initalize_authors(self): all_authors = self.db.all_authors() - all_authors.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_authors.sort(key=lambda x : sort_key(x[1])) for i in all_authors: id, name = i name = [name.strip().replace('|', ',') for n in name.split(',')] @@ -675,7 +676,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): def initialize_series(self): self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow) all_series = self.db.all_series() - all_series.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_series.sort(key=lambda x : sort_key(x[1])) series_id = self.db.series_id(self.row) idx, c = None, 0 for i in all_series: @@ -692,7 +693,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): def initialize_publisher(self): all_publishers = self.db.all_publishers() - all_publishers.sort(cmp=lambda x, y : cmp(x[1], y[1])) + all_publishers.sort(key=lambda x : sort_key(x[1])) publisher_id = self.db.publisher_id(self.row) idx, c = None, 0 for i in all_publishers: diff --git a/src/calibre/gui2/dialogs/saved_search_editor.py b/src/calibre/gui2/dialogs/saved_search_editor.py index 3f9f7ad437..1143a6f06a 100644 --- a/src/calibre/gui2/dialogs/saved_search_editor.py +++ b/src/calibre/gui2/dialogs/saved_search_editor.py @@ -8,6 +8,7 @@ from PyQt4.QtGui import QDialog from calibre.gui2.dialogs.saved_search_editor_ui import Ui_SavedSearchEditor from calibre.utils.search_query_parser import saved_searches +from calibre.utils.icu import sort_key from calibre.gui2.dialogs.confirm_delete import confirm class SavedSearchEditor(QDialog, Ui_SavedSearchEditor): @@ -34,7 +35,7 @@ class SavedSearchEditor(QDialog, Ui_SavedSearchEditor): def populate_search_list(self): self.search_name_box.clear() - for name in sorted(self.searches.keys()): + for name in sorted(self.searches.keys(), key=sort_key): self.search_name_box.addItem(name) def add_search(self): diff --git a/src/calibre/gui2/dialogs/search.py b/src/calibre/gui2/dialogs/search.py index ba09a34a68..8e8fd09652 100644 --- a/src/calibre/gui2/dialogs/search.py +++ b/src/calibre/gui2/dialogs/search.py @@ -8,6 +8,7 @@ from PyQt4.QtGui import QDialog, QDialogButtonBox from calibre.gui2.dialogs.search_ui import Ui_Dialog from calibre.library.caches import CONTAINS_MATCH, EQUALS_MATCH from calibre.gui2 import gprefs +from calibre.utils.icu import sort_key box_values = {} @@ -18,8 +19,7 @@ class SearchDialog(QDialog, Ui_Dialog): self.setupUi(self) self.mc = '' searchables = sorted(db.field_metadata.searchable_fields(), - lambda x, y: cmp(x if x[0] != '#' else x[1:], - y if y[0] != '#' else y[1:])) + key=lambda x: sort_key(x if x[0] != '#' else x[1:])) self.general_combo.addItems(searchables) self.box_last_values = copy.deepcopy(box_values) diff --git a/src/calibre/gui2/dialogs/tag_categories.py b/src/calibre/gui2/dialogs/tag_categories.py index 7a9660a655..210a2704bf 100644 --- a/src/calibre/gui2/dialogs/tag_categories.py +++ b/src/calibre/gui2/dialogs/tag_categories.py @@ -9,6 +9,7 @@ from PyQt4.QtGui import QDialog, QIcon, QListWidgetItem from calibre.gui2.dialogs.tag_categories_ui import Ui_TagCategories from calibre.gui2.dialogs.confirm_delete import confirm from calibre.constants import islinux +from calibre.utils.icu import sort_key class Item: def __init__(self, name, label, index, icon, exists): @@ -85,7 +86,7 @@ class TagCategories(QDialog, Ui_TagCategories): # remove any references to a category that no longer exists del self.categories[cat][item] - self.all_items_sorted = sorted(self.all_items, cmp=lambda x,y: cmp(x.name.lower(), y.name.lower())) + self.all_items_sorted = sorted(self.all_items, key=lambda x: sort_key(x.name)) self.display_filtered_categories(0) for v in category_names: @@ -135,7 +136,7 @@ class TagCategories(QDialog, Ui_TagCategories): index = self.all_items[node.data(Qt.UserRole).toPyObject()].index if index not in self.applied_items: self.applied_items.append(index) - self.applied_items.sort(cmp=lambda x, y:cmp(self.all_items[x].name.lower(), self.all_items[y].name.lower())) + self.applied_items.sort(key=lambda x:sort_key(self.all_items[x])) self.display_filtered_categories(None) def unapply_tags(self, node=None): @@ -198,5 +199,5 @@ class TagCategories(QDialog, Ui_TagCategories): self.categories[self.current_cat_name] = l def populate_category_list(self): - for n in sorted(self.categories.keys(), cmp=lambda x,y: cmp(x.lower(), y.lower())): + for n in sorted(self.categories.keys(), key=sort_key): self.category_box.addItem(n) diff --git a/src/calibre/gui2/dialogs/tag_editor.py b/src/calibre/gui2/dialogs/tag_editor.py index 34c61914fe..48a07c4b9e 100644 --- a/src/calibre/gui2/dialogs/tag_editor.py +++ b/src/calibre/gui2/dialogs/tag_editor.py @@ -6,12 +6,10 @@ from PyQt4.QtGui import QDialog from calibre.gui2.dialogs.tag_editor_ui import Ui_TagEditor from calibre.gui2 import question_dialog, error_dialog from calibre.constants import islinux +from calibre.utils.icu import sort_key class TagEditor(QDialog, Ui_TagEditor): - def tag_cmp(self, x, y): - return cmp(x.lower(), y.lower()) - def __init__(self, window, db, index=None): QDialog.__init__(self, window) Ui_TagEditor.__init__(self) @@ -25,7 +23,7 @@ class TagEditor(QDialog, Ui_TagEditor): tags = [] if tags: tags = [tag.strip() for tag in tags.split(',') if tag.strip()] - tags.sort(cmp=self.tag_cmp) + tags.sort(key=sort_key) for tag in tags: self.applied_tags.addItem(tag) else: @@ -35,7 +33,7 @@ class TagEditor(QDialog, Ui_TagEditor): all_tags = [tag for tag in self.db.all_tags()] all_tags = list(set(all_tags)) - all_tags.sort(cmp=self.tag_cmp) + all_tags.sort(key=sort_key) for tag in all_tags: if tag not in tags: self.available_tags.addItem(tag) @@ -82,7 +80,7 @@ class TagEditor(QDialog, Ui_TagEditor): self.tags.append(tag) self.available_tags.takeItem(self.available_tags.row(item)) - self.tags.sort(cmp=self.tag_cmp) + self.tags.sort(key=sort_key) self.applied_tags.clear() for tag in self.tags: self.applied_tags.addItem(tag) @@ -96,14 +94,14 @@ class TagEditor(QDialog, Ui_TagEditor): self.tags.remove(tag) self.available_tags.addItem(tag) - self.tags.sort(cmp=self.tag_cmp) + self.tags.sort(key=sort_key) self.applied_tags.clear() for tag in self.tags: self.applied_tags.addItem(tag) items = [unicode(self.available_tags.item(x).text()) for x in range(self.available_tags.count())] - items.sort(cmp=self.tag_cmp) + items.sort(key=sort_key) self.available_tags.clear() for item in items: self.available_tags.addItem(item) @@ -117,7 +115,7 @@ class TagEditor(QDialog, Ui_TagEditor): if tag not in self.tags: self.tags.append(tag) - self.tags.sort(cmp=self.tag_cmp) + self.tags.sort(key=sort_key) self.applied_tags.clear() for tag in self.tags: self.applied_tags.addItem(tag) diff --git a/src/calibre/gui2/dialogs/tag_list_editor.py b/src/calibre/gui2/dialogs/tag_list_editor.py index 7cdc0a089a..a7d6fe03e7 100644 --- a/src/calibre/gui2/dialogs/tag_list_editor.py +++ b/src/calibre/gui2/dialogs/tag_list_editor.py @@ -39,7 +39,7 @@ class ListWidgetItem(QListWidgetItem): class TagListEditor(QDialog, Ui_TagListEditor): - def __init__(self, window, tag_to_match, data, compare): + def __init__(self, window, tag_to_match, data, key): QDialog.__init__(self, window) Ui_TagListEditor.__init__(self) self.setupUi(self) @@ -54,7 +54,7 @@ class TagListEditor(QDialog, Ui_TagListEditor): for k,v in data: self.all_tags[v] = k - for tag in sorted(self.all_tags.keys(), cmp=compare): + for tag in sorted(self.all_tags.keys(), key=key): item = ListWidgetItem(tag) item.setData(Qt.UserRole, self.all_tags[tag]) self.available_tags.addItem(item) diff --git a/src/calibre/gui2/dialogs/user_profiles.py b/src/calibre/gui2/dialogs/user_profiles.py index 6901e13968..71c9ebcd04 100644 --- a/src/calibre/gui2/dialogs/user_profiles.py +++ b/src/calibre/gui2/dialogs/user_profiles.py @@ -13,6 +13,7 @@ from calibre.gui2 import error_dialog, question_dialog, open_url, \ choose_files, ResizableDialog, NONE from calibre.gui2.widgets import PythonHighlighter from calibre.ptempfile import PersistentTemporaryFile +from calibre.utils.icu import sort_key class CustomRecipeModel(QAbstractListModel): @@ -256,7 +257,7 @@ class %(classname)s(%(base_class)s): def add_builtin_recipe(self): from calibre.web.feeds.recipes.collection import \ get_builtin_recipe_by_title, get_builtin_recipe_titles - items = sorted(get_builtin_recipe_titles()) + items = sorted(get_builtin_recipe_titles(), key=sort_key) title, ok = QInputDialog.getItem(self, _('Pick recipe'), _('Pick the recipe to customize'), diff --git a/src/calibre/gui2/library/delegates.py b/src/calibre/gui2/library/delegates.py index 03309d1fba..8b6c2a8ae5 100644 --- a/src/calibre/gui2/library/delegates.py +++ b/src/calibre/gui2/library/delegates.py @@ -20,6 +20,7 @@ from calibre.gui2.widgets import EnLineEdit, TagsLineEdit from calibre.utils.date import now, format_date from calibre.utils.config import tweaks from calibre.utils.formatter import validation_formatter +from calibre.utils.icu import sort_key from calibre.gui2.dialogs.comments_dialog import CommentsDialog class RatingDelegate(QStyledItemDelegate): # {{{ @@ -173,7 +174,8 @@ class TagsDelegate(QStyledItemDelegate): # {{{ editor = TagsLineEdit(parent, self.db.all_tags()) else: editor = TagsLineEdit(parent, - sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))))) + sorted(list(self.db.all_custom(label=self.db.field_metadata.key_to_label(col))), + key=sort_key)) return editor else: editor = EnLineEdit(parent) @@ -245,7 +247,8 @@ class CcTextDelegate(QStyledItemDelegate): # {{{ editor.setDecimals(2) else: editor = EnLineEdit(parent) - complete_items = sorted(list(m.db.all_custom(label=m.db.field_metadata.key_to_label(col)))) + complete_items = sorted(list(m.db.all_custom(label=m.db.field_metadata.key_to_label(col))), + key=sort_key) completer = QCompleter(complete_items, self) completer.setCaseSensitivity(Qt.CaseInsensitive) completer.setCompletionMode(QCompleter.PopupCompletion) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index e09f85dc6b..e854ffc1bc 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -18,6 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import tweaks, prefs from calibre.utils.date import dt_factory, qt_to_dt, isoformat +from calibre.utils.icu import sort_key from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.utils.search_query_parser import SearchQueryParser from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ @@ -305,9 +306,10 @@ class BooksModel(QAbstractTableModel): # {{{ cdata = self.cover(idx) if cdata: data['cover'] = cdata - tags = self.db.tags(idx) + tags = list(self.db.get_tags(self.db.id(idx))) if tags: - tags = tags.replace(',', ', ') + tags.sort(key=sort_key) + tags = ', '.join(tags) else: tags = _('None') data[_('Tags')] = tags @@ -544,7 +546,7 @@ class BooksModel(QAbstractTableModel): # {{{ def tags(r, idx=-1): tags = self.db.data[r][idx] if tags: - return QVariant(', '.join(sorted(tags.split(',')))) + return QVariant(', '.join(sorted(tags.split(','), key=sort_key))) return None def series_type(r, idx=-1, siix=-1): @@ -595,7 +597,7 @@ class BooksModel(QAbstractTableModel): # {{{ def text_type(r, mult=False, idx=-1): text = self.db.data[r][idx] if text and mult: - return QVariant(', '.join(sorted(text.split('|')))) + return QVariant(', '.join(sorted(text.split('|'),key=sort_key))) return QVariant(text) def number_type(r, idx=-1): @@ -1033,8 +1035,8 @@ class DeviceBooksModel(BooksModel): # {{{ x, y = int(self.db[x].size), int(self.db[y].size) return cmp(x, y) def tagscmp(x, y): - x = ','.join(sorted(getattr(self.db[x], 'device_collections', []))).lower() - y = ','.join(sorted(getattr(self.db[y], 'device_collections', []))).lower() + x = ','.join(sorted(getattr(self.db[x], 'device_collections', []),key=sort_key)) + y = ','.join(sorted(getattr(self.db[y], 'device_collections', []),key=sort_key)) return cmp(x, y) def libcmp(x, y): x, y = self.db[x].in_library, self.db[y].in_library @@ -1211,7 +1213,7 @@ class DeviceBooksModel(BooksModel): # {{{ elif cname == 'collections': tags = self.db[self.map[row]].device_collections if tags: - tags.sort(cmp=lambda x,y: cmp(x.lower(), y.lower())) + tags.sort(key=sort_key) return QVariant(', '.join(tags)) elif DEBUG and cname == 'inlibrary': return QVariant(self.db[self.map[row]].in_library) diff --git a/src/calibre/gui2/preferences/behavior.py b/src/calibre/gui2/preferences/behavior.py index d6d6d7be23..169a2b76fe 100644 --- a/src/calibre/gui2/preferences/behavior.py +++ b/src/calibre/gui2/preferences/behavior.py @@ -19,6 +19,7 @@ from calibre.utils.search_query_parser import saved_searches from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks.oeb.iterator import is_supported from calibre.constants import iswindows +from calibre.utils.icu import sort_key class ConfigWidget(ConfigWidgetBase, Ui_Form): @@ -45,8 +46,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): choices = [(x.upper(), x) for x in output_formats] r('output_format', prefs, choices=choices) - restrictions = sorted(saved_searches().names(), - cmp=lambda x,y: cmp(x.lower(), y.lower())) + restrictions = sorted(saved_searches().names(), key=sort_key) choices = [('', '')] + [(x, x) for x in restrictions] r('gui_restriction', db.prefs, choices=choices) r('new_book_tags', prefs, setting=CommaSeparatedList) diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index c85dafc6d8..3d34d6c6e9 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.saved_search_editor import SavedSearchEditor from calibre.gui2.dialogs.search import SearchDialog from calibre.utils.search_query_parser import saved_searches +from calibre.utils.icu import sort_key class SearchLineEdit(QLineEdit): # {{{ key_pressed = pyqtSignal(object) @@ -417,7 +418,7 @@ class SavedSearchBoxMixin(object): # {{{ b.setStatusTip(b.toolTip()) def saved_searches_changed(self): - p = sorted(saved_searches().names(), cmp=lambda x,y: cmp(x.lower(), y.lower())) + p = sorted(saved_searches().names(), key=sort_key) t = unicode(self.search_restriction.currentText()) # rebuild the restrictions combobox using current saved searches self.search_restriction.clear() diff --git a/src/calibre/gui2/shortcuts.py b/src/calibre/gui2/shortcuts.py index 24395a22b6..bdd699a69d 100644 --- a/src/calibre/gui2/shortcuts.py +++ b/src/calibre/gui2/shortcuts.py @@ -14,6 +14,7 @@ from PyQt4.Qt import QAbstractListModel, Qt, QKeySequence, QListView, \ from calibre.gui2 import NONE, error_dialog from calibre.utils.config import XMLConfig +from calibre.utils.icu import sort_key from calibre.gui2.shortcuts_ui import Ui_Frame DEFAULTS = Qt.UserRole @@ -175,8 +176,7 @@ class Shortcuts(QAbstractListModel): for k, v in shortcuts.items(): self.keys[k] = v[0] self.order = list(shortcuts) - self.order.sort(cmp=lambda x,y : cmp(self.descriptions[x], - self.descriptions[y])) + self.order.sort(key=lambda x : sort_key(self.descriptions[x])) self.sequences = {} for k, v in self.keys.items(): self.sequences[k] = [QKeySequence(x) for x in v] diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 768b699ca9..fdae1bdbc9 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -18,6 +18,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, \ from calibre.ebooks.metadata import title_sort from calibre.gui2 import config, NONE from calibre.library.field_metadata import TagsIcons, category_icon_map +from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import saved_searches from calibre.gui2 import error_dialog from calibre.gui2.dialogs.confirm_delete import confirm @@ -225,7 +226,7 @@ class TagsView(QTreeView): # {{{ partial(self.context_menu_handler, action='hide', category=category)) if self.hidden_categories: m = self.context_menu.addMenu(_('Show category')) - for col in sorted(self.hidden_categories, cmp=lambda x,y: cmp(x.lower(), y.lower())): + for col in sorted(self.hidden_categories, key=sort_key): m.addAction(col, partial(self.context_menu_handler, action='show', category=col)) @@ -599,7 +600,8 @@ class TagsModel(QAbstractItemModel): # {{{ # Reconstruct the user categories, putting them into metadata self.db.field_metadata.remove_dynamic_categories() tb_cats = self.db.field_metadata - for user_cat in sorted(self.db.prefs.get('user_categories', {}).keys()): + for user_cat in sorted(self.db.prefs.get('user_categories', {}).keys(), + key=sort_key): cat_name = user_cat+':' # add the ':' to avoid name collision tb_cats.add_user_category(label=cat_name, name=user_cat) if len(saved_searches().names()): @@ -878,13 +880,13 @@ class TagBrowserMixin(object): # {{{ db=self.library_view.model().db if category == 'tags': result = db.get_tags_with_ids() - compare = (lambda x,y:cmp(x.lower(), y.lower())) + key = sort_key elif category == 'series': result = db.get_series_with_ids() - compare = (lambda x,y:cmp(title_sort(x).lower(), title_sort(y).lower())) + key = lambda x:sort_key(title_sort(x)) elif category == 'publisher': result = db.get_publishers_with_ids() - compare = (lambda x,y:cmp(x.lower(), y.lower())) + key = sort_key else: # should be a custom field cc_label = None if category in db.field_metadata: @@ -892,9 +894,9 @@ class TagBrowserMixin(object): # {{{ result = db.get_custom_items_with_ids(label=cc_label) else: result = [] - compare = (lambda x,y:cmp(x.lower(), y.lower())) + key = sort_key - d = TagListEditor(self, tag_to_match=tag, data=result, compare=compare) + d = TagListEditor(self, tag_to_match=tag, data=result, key=key) d.exec_() if d.result() == d.Accepted: to_rename = d.to_rename # dict of new text to old id diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 4e05aa3a95..4efb5e6233 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -14,6 +14,7 @@ from operator import itemgetter from PyQt4.QtGui import QImage + from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.library.database import LibraryDatabase @@ -33,6 +34,7 @@ from calibre import isbytestring from calibre.utils.filenames import ascii_filename from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp from calibre.utils.config import prefs, tweaks +from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import saved_searches, set_saved_searches from calibre.ebooks import BOOK_EXTENSIONS, check_ebook_format from calibre.utils.magick.draw import save_cover_data_to @@ -287,7 +289,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): # Assumption is that someone else will fix them if they change. self.field_metadata.remove_dynamic_categories() tb_cats = self.field_metadata - for user_cat in sorted(self.prefs.get('user_categories', {}).keys()): + for user_cat in sorted(self.prefs.get('user_categories', {}).keys(), key=sort_key): cat_name = user_cat+':' # add the ':' to avoid name collision tb_cats.add_user_category(label=cat_name, name=user_cat) if len(saved_searches().names()): @@ -1065,7 +1067,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if sort == 'popularity': query += ' ORDER BY count DESC, sort ASC' elif sort == 'name': - query += ' ORDER BY sort ASC' + query += ' ORDER BY sort COLLATE icucollate' else: query += ' ORDER BY avg_rating DESC, sort ASC' data = self.conn.get(query) @@ -1137,6 +1139,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): if sort == 'popularity': categories['formats'].sort(key=lambda x: x.count, reverse=True) else: # no ratings exist to sort on + # No need for ICU here. categories['formats'].sort(key = lambda x:x.name) #### Now do the user-defined categories. #### @@ -1151,7 +1154,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): for c in categories.keys(): taglist[c] = dict(map(lambda t:(t.name, t), categories[c])) - for user_cat in sorted(user_categories.keys()): + for user_cat in sorted(user_categories.keys(), key=sort_key): items = [] for (name,label,ign) in user_categories[user_cat]: if label in taglist and name in taglist[label]: @@ -1167,7 +1170,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): sorted(items, key=lambda x: x.count, reverse=True) elif sort == 'name': categories[cat_name] = \ - sorted(items, key=lambda x: x.sort.lower()) + sorted(items, key=lambda x: sort_key(x.sort)) else: categories[cat_name] = \ sorted(items, key=lambda x:x.avg_rating, reverse=True) diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py index b6dbde4c77..0a3a7f6fd2 100644 --- a/src/calibre/library/server/browse.py +++ b/src/calibre/library/server/browse.py @@ -16,6 +16,7 @@ from calibre import isbytestring, force_unicode, fit_image, \ from calibre.utils.ordered_dict import OrderedDict from calibre.utils.filenames import ascii_filename from calibre.utils.config import prefs +from calibre.utils.icu import sort_key from calibre.utils.magick import Image from calibre.library.comments import comments_to_html from calibre.library.server import custom_fields_to_display @@ -273,7 +274,7 @@ class BrowseServer(object): opts = ['<option %svalue="%s">%s</option>' % ( 'selected="selected" ' if k==sort else '', xml(k), xml(n), ) for k, n in - sorted(sort_opts, key=operator.itemgetter(1)) if k and n] + sorted(sort_opts, key=lambda x: sort_key(operator.itemgetter(1)(x))) if k and n] ans = ans.replace('{sort_select_options}', ('\n'+' '*20).join(opts)) lp = self.db.library_path if isbytestring(lp): @@ -337,8 +338,7 @@ class BrowseServer(object): return category_meta[x]['name'].lower() displayed_custom_fields = custom_fields_to_display(self.db) - for category in sorted(categories, - cmp=lambda x,y: cmp(getter(x), getter(y))): + for category in sorted(categories, key=lambda x: sort_key(getter(x))): if len(categories[category]) == 0: continue if category == 'formats': @@ -375,12 +375,7 @@ class BrowseServer(object): def browse_sort_categories(self, items, sort): if sort not in ('rating', 'name', 'popularity'): sort = 'name' - def sorter(x): - ans = getattr(x, 'sort', x.name) - if hasattr(ans, 'upper'): - ans = ans.upper() - return ans - items.sort(key=sorter) + items.sort(key=lambda x: sort_key(getattr(x, 'sort', x.name))) if sort == 'popularity': items.sort(key=operator.attrgetter('count'), reverse=True) elif sort == 'rating': @@ -703,7 +698,7 @@ class BrowseServer(object): args[field] fields.append((m['name'], r)) - fields.sort(key=lambda x: x[0].lower()) + fields.sort(key=lambda x: sort_key(x[0])) fields = [u'<div class="field">{0}</div>'.format(f[1]) for f in fields] fields = u'<div class="fields">%s</div>'%('\n\n'.join(fields)) diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py index d38c2f206e..0992e6c30b 100644 --- a/src/calibre/library/server/mobile.py +++ b/src/calibre/library/server/mobile.py @@ -21,6 +21,7 @@ from calibre.constants import __appname__ from calibre import human_readable, isbytestring from calibre.utils.date import utcfromtimestamp from calibre.utils.filenames import ascii_filename +from calibre.utils.icu import sort_key def CLASS(*args, **kwargs): # class is a reserved word in Python kwargs['class'] = ' '.join(args) @@ -211,8 +212,7 @@ class MobileServer(object): CFM = self.db.field_metadata CKEYS = [key for key in sorted(custom_fields_to_display(self.db), - cmp=lambda x,y: cmp(CFM[x]['name'].lower(), - CFM[y]['name'].lower()))] + key=lambda x:sort_key(CFM[x]['name']))] # This method uses its own book dict, not the Metadata dict. The loop # below could be changed to use db.get_metadata instead of reading # info directly from the record made by the view, but it doesn't seem diff --git a/src/calibre/library/server/opds.py b/src/calibre/library/server/opds.py index 4b5db63ac3..af635ebf48 100644 --- a/src/calibre/library/server/opds.py +++ b/src/calibre/library/server/opds.py @@ -20,6 +20,7 @@ from calibre.library.comments import comments_to_html from calibre.library.server import custom_fields_to_display from calibre.library.server.utils import format_tag_string, Offsets from calibre import guess_type +from calibre.utils.icu import sort_key from calibre.utils.ordered_dict import OrderedDict BASE_HREFS = { @@ -279,8 +280,7 @@ class AcquisitionFeed(NavFeed): NavFeed.__init__(self, id_, updated, version, offsets, page_url, up_url) CFM = db.field_metadata CKEYS = [key for key in sorted(custom_fields_to_display(db), - cmp=lambda x,y: cmp(CFM[x]['name'].lower(), - CFM[y]['name'].lower()))] + key=lambda x: sort_key(CFM[x]['name']))] for item in items: self.root.append(ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix)) @@ -492,7 +492,7 @@ class OPDSServer(object): val = 'A' starts.add(val[0].upper()) category_groups = OrderedDict() - for x in sorted(starts, cmp=lambda x,y:cmp(x.lower(), y.lower())): + for x in sorted(starts, key=sort_key): category_groups[x] = len([y for y in items if getattr(y, 'sort', y.name).startswith(x)]) items = [Group(x, y) for x, y in category_groups.items()] @@ -571,8 +571,7 @@ class OPDSServer(object): ] def getter(x): return category_meta[x]['name'].lower() - for category in sorted(categories, - cmp=lambda x,y: cmp(getter(x), getter(y))): + for category in sorted(categories, key=lambda x: sort_key(getter(x))): if len(categories[category]) == 0: continue if category == 'formats': diff --git a/src/calibre/library/server/utils.py b/src/calibre/library/server/utils.py index 1407487db3..e58dd2f19b 100644 --- a/src/calibre/library/server/utils.py +++ b/src/calibre/library/server/utils.py @@ -13,6 +13,7 @@ import cherrypy from calibre import strftime as _strftime, prints, isbytestring from calibre.utils.date import now as nowf from calibre.utils.config import tweaks +from calibre.utils.icu import sort_key class Offsets(object): 'Calculate offsets for a paginated view' @@ -73,7 +74,7 @@ def format_tag_string(tags, sep, ignore_max=False, no_tag_count=False): tlist = [t.strip() for t in tags.split(sep)] else: tlist = [] - tlist.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) + tlist.sort(key=sort_key) if len(tlist) > MAX: tlist = tlist[:MAX]+['...'] if no_tag_count: diff --git a/src/calibre/library/server/xml.py b/src/calibre/library/server/xml.py index e99fc2839c..efbceb9771 100644 --- a/src/calibre/library/server/xml.py +++ b/src/calibre/library/server/xml.py @@ -17,6 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx from calibre.constants import preferred_encoding from calibre import isbytestring from calibre.utils.filenames import ascii_filename +from calibre.utils.icu import sort_key E = ElementMaker() @@ -101,8 +102,7 @@ class XMLServer(object): CFM = self.db.field_metadata CKEYS = [key for key in sorted(custom_fields_to_display(self.db), - cmp=lambda x,y: cmp(CFM[x]['name'].lower(), - CFM[y]['name'].lower()))] + key=lambda x: sort_key(CFM[x]['name']))] custcols = [] for key in CKEYS: def concat(name, val): diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 7a86447090..b4cad8061e 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -115,6 +115,9 @@ def pynocase(one, two, encoding='utf-8'): pass return cmp(one.lower(), two.lower()) +def icu_collator(s1, s2, func=None): + return cmp(func(unicode(s1)), func(unicode(s2))) + def load_c_extensions(conn, debug=DEBUG): try: conn.enable_load_extension(True) @@ -166,6 +169,8 @@ class DBThread(Thread): self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4())) # Dummy functions for dynamically created filters self.conn.create_function('books_list_filter', 1, lambda x: 1) + from calibre.utils.icu import sort_key + self.conn.create_collation('icucollate', partial(icu_collator, func=sort_key)) def run(self): try: diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index 85a64956a8..db7c7bde5f 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -22,6 +22,7 @@ from calibre.utils.pyparsing import CaselessKeyword, Group, Forward, \ CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral, \ Optional, NoMatch, ParseException, QuotedString from calibre.constants import preferred_encoding +from calibre.utils.icu import sort_key @@ -65,8 +66,7 @@ class SavedSearchQueries(object): self.db.prefs[self.opt_name] = self.queries def names(self): - return sorted(self.queries.keys(), - cmp=lambda x,y: cmp(x.lower(), y.lower())) + return sorted(self.queries.keys(),key=sort_key) ''' Create a global instance of the saved searches. It is global so that the searches From 7f6c1712e14e04999ed4f2a7f9cf009491617d7b Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 13:06:35 +0000 Subject: [PATCH 15/40] Provide a sort_order tweak --- resources/default_tweaks.py | 12 ++++++++++++ src/calibre/utils/icu.py | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 692349283f..1a371e5610 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -217,3 +217,15 @@ generate_cover_foot_font = None # open_viewer, do_nothing, edit_cell. Default: open_viewer. # Example: doubleclick_on_library_view = 'do_nothing' doubleclick_on_library_view = 'open_viewer' + + +# Language to use when sorting. Setting this tweak will force sorting to use the +# collating order for the specified language. This might be useful if you run +# calibre in English but want sorting to work in the language where you live. +# Set the tweak to the desired ISO 639-1 language code, in lower case. +# You can find the list of supported locales at +# http://publib.boulder.ibm.com/infocenter/iseries/v5r3/topic/nls/rbagsicusortsequencetables.htm +# Default: locale_for_sorting = '' -- use the language calibre displays in +# Example: locale_for_sorting = 'fr' -- sort using French rules. +# Example: locale_for_sorting = 'nb' -- sort using Norwegian rules. +locale_for_sorting = '' diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 7c2fd31f78..6b5fc0733a 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en' from functools import partial from calibre.constants import plugins +from calibre.utils.config import tweaks _icu = _collator = None @@ -32,7 +33,10 @@ def load_collator(): if _collator is None: icu = load_icu() if icu is not None: - _collator = icu.Collator(get_lang()) + if tweaks['locale_for_sorting']: + _collator = icu.Collator(tweaks['locale_for_sorting']) + else: + _collator = icu.Collator(get_lang()) return _collator From 9dd662ac0b21654a56e8909e18ee8769e6bdb2b3 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 13:21:18 +0000 Subject: [PATCH 16/40] Fix #7787: saved search box is cleared whenever a saved search is selected. --- src/calibre/gui2/search_box.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/search_box.py b/src/calibre/gui2/search_box.py index 3d34d6c6e9..8849e2b2ec 100644 --- a/src/calibre/gui2/search_box.py +++ b/src/calibre/gui2/search_box.py @@ -205,7 +205,7 @@ class SearchBox2(QComboBox): # {{{ self.blockSignals(yes) self.line_edit.blockSignals(yes) - def set_search_string(self, txt, store_in_history=False): + def set_search_string(self, txt, store_in_history=False, emit_changed=True): self.setFocus(Qt.OtherFocusReason) if not txt: self.clear() @@ -213,7 +213,8 @@ class SearchBox2(QComboBox): # {{{ self.normalize_state() self.setEditText(txt) self.line_edit.end(False) - self.changed.emit() + if emit_changed: + self.changed.emit() self._do_search(store_in_history=store_in_history) self.focus_to_library.emit() @@ -293,7 +294,7 @@ class SavedSearchBox(QComboBox): # {{{ self.search_box.clear() self.setEditText(qname) return - self.search_box.set_search_string(u'search:"%s"' % qname) + self.search_box.set_search_string(u'search:"%s"' % qname, emit_changed=False) self.setEditText(qname) self.setToolTip(saved_searches().lookup(qname)) From d9cd3fd0b0b77185c7be38bf300815777e884be3 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 14:10:35 +0000 Subject: [PATCH 17/40] Fix problem with sorting in collections --- src/calibre/devices/usbms/books.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index 4a74f68349..9e19e9c66d 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -237,7 +237,10 @@ class CollectionsBookList(BookList): return 1 if y is None: return -1 - c = cmp(sort_key(x), sort_key(y)) + if isinstance(x, unicode): + c = cmp(sort_key(x), sort_key(y)) + else: + c = cmp(x, y) if c != 0: return c # same as above -- no sort_key needed here From 0aab245a1df8f6b4c44b5f64df85230f0819fd0b Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 09:12:38 -0700 Subject: [PATCH 18/40] Fix memory leak in ICU sort_key --- src/calibre/utils/icu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 1da14a0252..81f3ef4e9a 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -105,6 +105,7 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { if (buf == NULL) return PyErr_NoMemory(); u_strFromUTF8(buf, sz*4 + 1, &key_size, input, sz, &status); + PyMem_Free(input); if (U_SUCCESS(status)) { key_size = ucol_getSortKey(self->collator, buf, -1, NULL, 0); From d4c92f8d335c4fcbb652eb085db23b50c6caa739 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 11:39:05 -0500 Subject: [PATCH 19/40] TXT Output: Fix br conversion to space. --- src/calibre/ebooks/txt/txtml.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index 48c94c2543..a3b4ed7afe 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -179,8 +179,7 @@ class TXTMLizer(object): text.append(u'\n\n') if tag in SPACE_TAGS: - if not end.endswith('u ') and hasattr(elem, 'text') and elem.text: - text.append(u' ') + text.append(u' ') # Process tags that contain text. if hasattr(elem, 'text') and elem.text: From 97bd4f161cb970b9aacf1e3b2a33721aaa4988a2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 09:45:45 -0700 Subject: [PATCH 20/40] Speed up ICU key generation by not preflighting in most cases --- src/calibre/utils/icu.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 81f3ef4e9a..f981abe62e 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -108,11 +108,22 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { PyMem_Free(input); if (U_SUCCESS(status)) { - key_size = ucol_getSortKey(self->collator, buf, -1, NULL, 0); - buf2 = (uint8_t*)calloc(key_size + 1, sizeof(uint8_t)); + buf2 = (uint8_t*)calloc(7*sz+1, sizeof(uint8_t)); if (buf2 == NULL) return PyErr_NoMemory(); - ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1); - ans = PyBytes_FromString((char *)buf2); + + key_size = ucol_getSortKey(self->collator, buf, -1, buf2, 7*sz+1); + + if (key_size == 0) { + ans = PyBytes_FromString(""); + } else { + if (key_size >= 7*sz+1) { + free(buf2); + buf2 = (uint8_t*)calloc(key_size+1, sizeof(uint8_t)); + if (buf2 == NULL) return PyErr_NoMemory(); + ucol_getSortKey(self->collator, buf, -1, buf2, key_size+1); + } + ans = PyBytes_FromString((char *)buf2); + } free(buf2); } else ans = PyBytes_FromString(""); From 0e3a2fc39d82fdb1d44e946f6ac33c3d22a9aa4b Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 16:52:27 +0000 Subject: [PATCH 21/40] Better sorting in collection management --- src/calibre/devices/usbms/books.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index 9e19e9c66d..e5a67463e7 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -216,14 +216,17 @@ class CollectionsBookList(BookList): elif is_series: if doing_dc: collections[cat_name][lpath] = \ - (book, book.get('series_index', sys.maxint), '') + (book, book.get('series_index', sys.maxint), + book.get('title_sort', 'zzzz')) else: collections[cat_name][lpath] = \ - (book, book.get(attr+'_index', sys.maxint), '') + (book, book.get(attr+'_index', sys.maxint), + book.get('title_sort', 'zzzz')) else: if lpath not in collections[cat_name]: collections[cat_name][lpath] = \ - (book, book.get('title_sort', 'zzzz'), '') + (book, book.get('title_sort', 'zzzz'), + book.get('title_sort', 'zzzz')) # Sort collections result = {} @@ -237,7 +240,7 @@ class CollectionsBookList(BookList): return 1 if y is None: return -1 - if isinstance(x, unicode): + if isinstance(x, (unicode, str)): c = cmp(sort_key(x), sort_key(y)) else: c = cmp(x, y) From 4aab356c6904a270c59c5956ec5e944bc6d65a3e Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 17:21:44 +0000 Subject: [PATCH 22/40] Make sorting columns use a key function instead of cmp --- src/calibre/gui2/custom_column_widgets.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py index 8c41e3def1..5ab8bb6940 100644 --- a/src/calibre/gui2/custom_column_widgets.py +++ b/src/calibre/gui2/custom_column_widgets.py @@ -366,11 +366,10 @@ widgets = { 'enumeration': Enumeration } -def field_sort(y, z, x=None): - m1, m2 = x[y], x[z] +def field_sort_key(y, x=None): + m1 = x[y] n1 = 'zzzzz' if m1['datatype'] == 'comments' else m1['name'] - n2 = 'zzzzz' if m2['datatype'] == 'comments' else m2['name'] - return cmp(sort_key(n1), sort_key(n2)) + return sort_key(n1) def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, parent=None): def widget_factory(type, col): @@ -382,7 +381,7 @@ def populate_metadata_page(layout, db, book_id, bulk=False, two_column=False, pa return w x = db.custom_column_num_map cols = list(x) - cols.sort(cmp=partial(field_sort, x=x)) + cols.sort(key=partial(field_sort_key, x=x)) count_non_comment = len([c for c in cols if x[c]['datatype'] != 'comments']) layout.setColumnStretch(1, 10) From a6ad9f2c969c542b1e5b14a0d1fe8a378649045a Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 10:30:04 -0700 Subject: [PATCH 23/40] Add an ICU implementation of strcmp --- src/calibre/utils/icu.c | 36 ++++++++++++++++++++++++++++++++++++ src/calibre/utils/icu.py | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index f981abe62e..79a888f272 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -133,11 +133,47 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { return ans; } +// Collator.strcmp {{{ +static PyObject * +icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) { + char *a_, *b_; + size_t asz, bsz; + UChar *a, *b; + UErrorCode status = U_ZERO_ERROR; + UCollationResult res = UCOL_EQUAL; + + if (!PyArg_ParseTuple(args, "eses", "UTF-8", &a_, "UTF-8", &b_)) return NULL; + + asz = strlen(a_); bsz = strlen(b_); + + a = (UChar*)calloc(asz*4 + 1, sizeof(UChar)); + b = (UChar*)calloc(bsz*4 + 1, sizeof(UChar)); + + + if (a == NULL || b == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(a, asz*4 + 1, NULL, a_, asz, &status); + u_strFromUTF8(b, bsz*4 + 1, NULL, b_, bsz, &status); + PyMem_Free(a_); PyMem_Free(b_); + + if (U_SUCCESS(status)) + res = ucol_strcoll(self->collator, a, -1, b, -1); + + free(a); free(b); + + return Py_BuildValue("i", res); +} + + static PyMethodDef icu_Collator_methods[] = { {"sort_key", (PyCFunction)icu_Collator_sort_key, METH_VARARGS, "sort_key(unicode object) -> Return a sort key for the given object as a bytestring. The idea is that these bytestring will sort using the builtin cmp function, just like the original unicode strings would sort in the current locale with ICU." }, + {"strcmp", (PyCFunction)icu_Collator_strcmp, METH_VARARGS, + "strcmp(unicode object, unicode object) -> strcmp(a, b) <=> cmp(sorty_key(a), sort_key(b)), but faster." + }, + {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 7c2fd31f78..5251380973 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -46,10 +46,35 @@ def icu_sort_key(collator, obj): return _none2 return collator.sort_key(obj.lower()) +def py_case_sensitive_sort_key(obj): + if not obj: + return _none + return obj + +def icu_case_sensitive_sort_key(collator, obj): + if not obj: + return _none2 + return collator.sort_key(obj) + +def icu_strcmp(collator, a, b): + return collator.strcmp(a.lower(), b.lower()) + +def py_strcmp(a, b): + return cmp(a.lower(), b.lower()) + +def icu_case_sensitive_strcmp(collator, a, b): + return collator.strcmp(a, b) + + load_icu() load_collator() -sort_key = py_sort_key if _icu is None or _collator is None else \ - partial(icu_sort_key, _collator) +_icu_not_ok = _icu is None or _collator is None + +sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator) +strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator) +case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ + icu_case_sensitive_sort_key +case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp def test(): # {{{ @@ -137,6 +162,12 @@ pêché''' l = l.decode('utf-8').splitlines() return [x.strip() for x in l if x.strip()] + def test_strcmp(entries): + for x in entries: + for y in entries: + if strcmp(x, y) != cmp(sort_key(x), sort_key(y)): + print 'strcmp failed for %r, %r'%(x, y) + german = create(german) c = _icu.Collator('de') print 'Sorted german:: (%s)'%c.actual_locale @@ -156,5 +187,6 @@ pêché''' if fs != create(french_good): print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)' return + test_strcmp(german + french) # }}} From f424743046ec5905a5e4c561714ac048f344e3cd Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 11:29:08 -0700 Subject: [PATCH 24/40] News download: Localize the navigation bars in the downloaded news to the language the user has selected for their calibre interface --- src/calibre/web/feeds/templates.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index 6e8168be31..eefd897614 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -115,14 +115,14 @@ class FeedTemplate(Template): hr.tail = '| ' if f+1 < len(feeds): - link = A('Next section', href='../feed_%d/index.html'%(f+1)) + link = A(_('Next section'), href='../feed_%d/index.html'%(f+1)) link.tail = ' | ' navbar.append(link) - link = A('Main menu', href="../index.html") + link = A(_('Main menu'), href="../index.html") link.tail = ' | ' navbar.append(link) if f > 0: - link = A('Previous section', href='../feed_%d/index.html'%(f-1)) + link = A(_('Previous section'), href='../feed_%d/index.html'%(f-1)) link.tail = ' |' navbar.append(link) if top: @@ -208,17 +208,17 @@ class NavBarTemplate(Template): up = '../..' if art == number_of_articles_in_feed - 1 else '..' href = '%s%s/%s/index.html'%(prefix, up, next) navbar.text = '| ' - navbar.append(A('Next', href=href)) + navbar.append(A(_('Next'), href=href)) href = '%s../index.html#article_%d'%(prefix, art) navbar.iterchildren(reversed=True).next().tail = ' | ' - navbar.append(A('Section Menu', href=href)) + navbar.append(A(_('Section Menu'), href=href)) href = '%s../../index.html#feed_%d'%(prefix, feed) navbar.iterchildren(reversed=True).next().tail = ' | ' - navbar.append(A('Main Menu', href=href)) + navbar.append(A(_('Main Menu'), href=href)) if art > 0 and not bottom: href = '%s../article_%d/index.html'%(prefix, art-1) navbar.iterchildren(reversed=True).next().tail = ' | ' - navbar.append(A('Previous', href=href)) + navbar.append(A(_('Previous'), href=href)) navbar.iterchildren(reversed=True).next().tail = ' | ' if not bottom: navbar.append(HR()) @@ -300,7 +300,7 @@ class TouchscreenFeedTemplate(Template): navbar_tr.append(TD(CLASS('feed_prev'),link)) # Up to Sections - link = A('Sections', href="../index.html") + link = A(_('Sections'), href="../index.html") navbar_tr.append(TD(CLASS('feed_up'),link)) # Next Section @@ -381,16 +381,16 @@ class TouchscreenNavBarTemplate(Template): # | Previous if art > 0: - link = A(CLASS('article_link'),'Previous',href='%s../article_%d/index.html'%(prefix, art-1)) + link = A(CLASS('article_link'),_('Previous'),href='%s../article_%d/index.html'%(prefix, art-1)) navbar_tr.append(TD(CLASS('article_prev'),link)) else: navbar_tr.append(TD(CLASS('article_prev'),'')) # | Articles | Sections | - link = A(CLASS('articles_link'),'Articles', href='%s../index.html#article_%d'%(prefix, art)) + link = A(CLASS('articles_link'),_('Articles'), href='%s../index.html#article_%d'%(prefix, art)) navbar_tr.append(TD(CLASS('article_articles_list'),link)) - link = A(CLASS('sections_link'),'Sections', href='%s../../index.html#feed_%d'%(prefix, feed)) + link = A(CLASS('sections_link'),_('Sections'), href='%s../../index.html#feed_%d'%(prefix, feed)) navbar_tr.append(TD(CLASS('article_sections_list'),link)) # | Next @@ -398,7 +398,7 @@ class TouchscreenNavBarTemplate(Template): else 'article_%d'%(art+1) up = '../..' if art == number_of_articles_in_feed - 1 else '..' - link = A(CLASS('article_link'),'Next', href='%s%s/%s/index.html'%(prefix, up, next)) + link = A(CLASS('article_link'), _('Next'), href='%s%s/%s/index.html'%(prefix, up, next)) navbar_tr.append(TD(CLASS('article_next'),link)) navbar_t.append(navbar_tr) navbar.append(navbar_t) From b3868a5ef425054e400ada6a4de35cdc12cc8983 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 11:48:38 -0700 Subject: [PATCH 25/40] Add ICU case transform algorithms --- src/calibre/utils/icu.c | 154 ++++++++++++++++++++++++++++++++++++++- src/calibre/utils/icu.py | 26 ++++++- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index 79a888f272..51d9ac25ba 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -131,7 +131,7 @@ icu_Collator_sort_key(icu_Collator *self, PyObject *args, PyObject *kwargs) { if (ans == NULL) return PyErr_NoMemory(); return ans; -} +} // }}} // Collator.strcmp {{{ static PyObject * @@ -162,7 +162,8 @@ icu_Collator_strcmp(icu_Collator *self, PyObject *args, PyObject *kwargs) { free(a); free(b); return Py_BuildValue("i", res); -} +} // }}} + static PyMethodDef icu_Collator_methods[] = { @@ -242,7 +243,156 @@ static PyTypeObject icu_CollatorType = { // {{{ // Module initialization {{{ +// upper {{{ +static PyObject * +icu_upper(PyObject *self, PyObject *args) { + char *input, *ans, *buf3 = NULL; + const char *loc; + size_t sz; + UChar *buf, *buf2; + PyObject *ret; + UErrorCode status = U_ZERO_ERROR; + + + if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; + + sz = strlen(input); + + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); + buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); + + + if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); + u_strToUpper(buf2, sz*8, buf, -1, loc, &status); + + ans = input; + sz = u_strlen(buf2); + free(buf); + + if (U_SUCCESS(status) && sz > 0) { + buf3 = (char*)calloc(sz*5+1, sizeof(char)); + if (buf3 == NULL) return PyErr_NoMemory(); + u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); + if (U_SUCCESS(status)) ans = buf3; + } + + ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); + if (ret == NULL) return PyErr_NoMemory(); + + free(buf2); + if (buf3 != NULL) free(buf3); + PyMem_Free(input); + + return ret; +} + +// lower {{{ +static PyObject * +icu_lower(PyObject *self, PyObject *args) { + char *input, *ans, *buf3 = NULL; + const char *loc; + size_t sz; + UChar *buf, *buf2; + PyObject *ret; + UErrorCode status = U_ZERO_ERROR; + + + if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; + + sz = strlen(input); + + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); + buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); + + + if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); + u_strToLower(buf2, sz*8, buf, -1, loc, &status); + + ans = input; + sz = u_strlen(buf2); + free(buf); + + if (U_SUCCESS(status) && sz > 0) { + buf3 = (char*)calloc(sz*5+1, sizeof(char)); + if (buf3 == NULL) return PyErr_NoMemory(); + u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); + if (U_SUCCESS(status)) ans = buf3; + } + + ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); + if (ret == NULL) return PyErr_NoMemory(); + + free(buf2); + if (buf3 != NULL) free(buf3); + PyMem_Free(input); + + return ret; +} + +// title {{{ +static PyObject * +icu_title(PyObject *self, PyObject *args) { + char *input, *ans, *buf3 = NULL; + const char *loc; + size_t sz; + UChar *buf, *buf2; + PyObject *ret; + UErrorCode status = U_ZERO_ERROR; + + + if (!PyArg_ParseTuple(args, "ses", &loc, "UTF-8", &input)) return NULL; + + sz = strlen(input); + + buf = (UChar*)calloc(sz*4 + 1, sizeof(UChar)); + buf2 = (UChar*)calloc(sz*8 + 1, sizeof(UChar)); + + + if (buf == NULL || buf2 == NULL) return PyErr_NoMemory(); + + u_strFromUTF8(buf, sz*4, NULL, input, sz, &status); + u_strToTitle(buf2, sz*8, buf, -1, NULL, loc, &status); + + ans = input; + sz = u_strlen(buf2); + free(buf); + + if (U_SUCCESS(status) && sz > 0) { + buf3 = (char*)calloc(sz*5+1, sizeof(char)); + if (buf3 == NULL) return PyErr_NoMemory(); + u_strToUTF8(buf3, sz*5, NULL, buf2, -1, &status); + if (U_SUCCESS(status)) ans = buf3; + } + + ret = PyUnicode_DecodeUTF8(ans, strlen(ans), "replace"); + if (ret == NULL) return PyErr_NoMemory(); + + free(buf2); + if (buf3 != NULL) free(buf3); + PyMem_Free(input); + + return ret; +} + + + static PyMethodDef icu_methods[] = { + {"upper", icu_upper, METH_VARARGS, + "upper(locale, unicode object) -> upper cased unicode object using locale rules." + }, + + {"lower", icu_lower, METH_VARARGS, + "lower(locale, unicode object) -> lower cased unicode object using locale rules." + }, + + {"title", icu_title, METH_VARARGS, + "title(locale, unicode object) -> Title cased unicode object using locale rules." + }, + {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 5251380973..398d2fe75c 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -10,10 +10,18 @@ from functools import partial from calibre.constants import plugins _icu = _collator = None +_locale = None _none = u'' _none2 = b'' +def get_locale(): + global _locale + if _locale is None: + from calibre.utils.localization import get_lang + _locale = get_lang() + return _locale + def load_icu(): global _icu if _icu is None: @@ -28,11 +36,10 @@ def load_icu(): def load_collator(): global _collator - from calibre.utils.localization import get_lang if _collator is None: icu = load_icu() if icu is not None: - _collator = icu.Collator(get_lang()) + _collator = icu.Collator(get_locale()) return _collator @@ -76,6 +83,13 @@ case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ icu_case_sensitive_sort_key case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp +upper = (lambda s: s.upper()) if _icu_not_ok else \ + partial(_icu.upper, get_locale()) +lower = (lambda s: s.lower()) if _icu_not_ok else \ + partial(_icu.lower, get_locale()) +title_case = (lambda s: s.title()) if _icu_not_ok else \ + partial(_icu.title, get_locale()) + def test(): # {{{ # Data {{{ @@ -188,5 +202,13 @@ pêché''' print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)' return test_strcmp(german + french) + + print '\nTesting case transforms in current locale' + for x in ('a', 'Alice\'s code'): + print 'Upper:', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8') + print 'Lower:', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8') + print 'Title:', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8') + print + # }}} From e021f02e3522958a64d28f67580fa57e07517901 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 11:49:56 -0700 Subject: [PATCH 26/40] ... --- src/calibre/utils/icu.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 398d2fe75c..83ada4e23e 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -77,19 +77,26 @@ load_icu() load_collator() _icu_not_ok = _icu is None or _collator is None + +################# The string functions ######################################## sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator) + strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator) + case_sensitive_sort_key = py_case_sensitive_sort_key if _icu_not_ok else \ icu_case_sensitive_sort_key + case_sensitive_strcmp = cmp if _icu_not_ok else icu_case_sensitive_strcmp upper = (lambda s: s.upper()) if _icu_not_ok else \ partial(_icu.upper, get_locale()) + lower = (lambda s: s.lower()) if _icu_not_ok else \ partial(_icu.lower, get_locale()) + title_case = (lambda s: s.title()) if _icu_not_ok else \ partial(_icu.title, get_locale()) - +################################################################################ def test(): # {{{ # Data {{{ From 8716c5611fbee4432a71b261b1da809258f05930 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 11:50:36 -0700 Subject: [PATCH 27/40] ... --- src/calibre/utils/icu.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 83ada4e23e..fed6e0b89b 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -5,6 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' +# Setup code {{{ from functools import partial from calibre.constants import plugins @@ -77,8 +78,10 @@ load_icu() load_collator() _icu_not_ok = _icu is None or _collator is None +# }}} ################# The string functions ######################################## + sort_key = py_sort_key if _icu_not_ok else partial(icu_sort_key, _collator) strcmp = py_strcmp if _icu_not_ok else partial(icu_strcmp, _collator) @@ -96,6 +99,7 @@ lower = (lambda s: s.lower()) if _icu_not_ok else \ title_case = (lambda s: s.title()) if _icu_not_ok else \ partial(_icu.title, get_locale()) + ################################################################################ def test(): # {{{ From 2fe5c21da81535f1a20f99f910a25c81ac759543 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sat, 4 Dec 2010 20:45:17 +0000 Subject: [PATCH 28/40] Make sort locale tweak work with new icu.py --- src/calibre/utils/icu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index eaf236083a..796eac9dbc 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' from functools import partial from calibre.constants import plugins +from calibre.utils.config import tweaks _icu = _collator = None _locale = None From fc39c1ad11979b721830a6845a5f625229fc7f8f Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 16:57:13 -0500 Subject: [PATCH 29/40] FB2 Output: Rewrite and restructure. --- src/calibre/ebooks/fb2/fb2ml.py | 226 ++++++++++++++++---------------- 1 file changed, 114 insertions(+), 112 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 252453d25e..f27729ae8c 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -26,32 +26,6 @@ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES -TAG_MAP = { - 'b' : 'strong', - 'i' : 'emphasis', - 'p' : 'p', - 'li' : 'p', - 'div': 'p', -} - -TAG_SPACE = [] - -TAG_IMAGES = [ - 'img', -] - -TAG_LINKS = [ -] - -BLOCK = [ - 'p', -] - -STYLES = [ - ('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}), - ('font-style', {'italic' : 'emphasis'}), -] - class FB2MLizer(object): ''' Todo: * Include more FB2 specific tags in the conversion. @@ -63,24 +37,32 @@ class FB2MLizer(object): def __init__(self, log): self.log = log self.image_hrefs = {} + self.reset_state() + + def reset_state(self): # Used to ensure text and tags are always within <p> and </p> self.in_p = False + # Mapping of image names. OEB allows for images to have the same name but be stored + # in different directories. FB2 images are all in a flat layout so we rename all images + # into a sequential numbering system to ensure there are no collisions between image names. + self.image_hrefs = {} def extract_content(self, oeb_book, opts): self.log.info('Converting XHTML to FB2 markup...') self.oeb_book = oeb_book self.opts = opts + return self.fb2mlize_spine() def fb2mlize_spine(self): - self.image_hrefs = {} - self.link_hrefs = {} + self.reset_state() + output = [self.fb2_header()] output.append(self.get_text()) - output.append(self.fb2_body_footer()) output.append(self.fb2mlize_images()) output.append(self.fb2_footer()) output = self.clean_text(u''.join(output)) + if self.opts.pretty_print: return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) else: @@ -127,24 +109,21 @@ class FB2MLizer(object): '<document-info>' \ '<program-used>%s %s</program-used>' \ '</document-info>' \ - '</description><body>' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last, + '</description>' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last, self.oeb_book.metadata.title[0].value, __appname__, __version__))) + def fb2_footer(self): + return u'</FictionBook>' + def get_text(self): - text = [] + text = ['<body>'] for item in self.oeb_book.spine: self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) text.append('<section>') text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) text.append('</section>') - return ''.join(text) - - def fb2_body_footer(self): - return u'</body>' - - def fb2_footer(self): - return u'</FictionBook>' + return ''.join(text) + '</body>' def fb2mlize_images(self): images = [] @@ -178,24 +157,6 @@ class FB2MLizer(object): else: self.in_p = True return ['<p>'], ['p'] - - def insert_empty_line(self, tags): - if self.in_p: - text = [''] - closed_tags = [] - tags.reverse() - for t in tags: - text.append('</%s>' % t) - closed_tags.append(t) - if t == 'p': - break - text.append('<empty-line />') - closed_tags.reverse() - for t in closed_tags: - text.append('<%s>' % t) - return text - else: - return ['<empty-line />'] def close_open_p(self, tags): text = [''] @@ -219,84 +180,125 @@ class FB2MLizer(object): self.in_p = True return text, added_p + + def handle_simple_tag(self, tag, tags): + s_out = [] + s_tags = [] + if tag not in tags: + p_out, p_tags = self.ensure_p() + s_out += p_out + s_tags += p_tags + s_out.append('<%s>' % tag) + s_tags.append(tag) + return s_out, s_tags - def dump_text(self, elem, stylizer, page, tag_stack=[]): - if not isinstance(elem.tag, basestring) \ - or namespace(elem.tag) != XHTML_NS: + def dump_text(self, elem_tree, stylizer, page, tag_stack=[]): + ''' + This function is intended to be used in a recursive manner. dump_text will + run though all elements in the elem_tree and call itself on each element. + + self.image_hrefs will be populated by calling this function. + + @param elem_tree: etree representation of XHTML content to be transformed. + @param stylizer: Used to track the style of elements within the tree. + @param page: OEB page used to determine absolute urls. + @param tag_stack: List of open FB2 tags to take into account. + + @return: List of string representing the XHTML converted to FB2 markup. + ''' + # Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace. + if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS: return [] - style = stylizer.style(elem) - if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ - or style['visibility'] == 'hidden': + style = stylizer.style(elem_tree) + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden': return [] - fb2_text = [] + # FB2 generated output. + fb2_out = [] + # FB2 tags in the order they are opened. This will be used to close the tags. tags = [] + # First tag in tree + tag = barename(elem_tree.tag) - tag = barename(elem.tag) - - if tag in TAG_IMAGES: - if elem.attrib.get('src', None): - if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys(): - self.image_hrefs[page.abshref(elem.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) - p_txt, p_tag = self.ensure_p() - fb2_text += p_txt - tags += p_tag - fb2_text.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])]) - + # Process the XHTML tag if it needs to be converted to an FB2 tag. if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title: - fb2_text.append('<title>') + fb2_out.append('<title>') tags.append('title') - if tag == 'br': - fb2_text += self.insert_empty_line(tag_stack+tags) - - fb2_tag = TAG_MAP.get(tag, None) - if fb2_tag == 'p': + if tag == 'img': + # TODO: Check that the image is in the manifest and only write the tag if it is. + if elem_tree.attrib.get('src', None): + if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys(): + self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) + p_txt, p_tag = self.ensure_p() + fb2_out += p_txt + tags += p_tag + fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])]) + elif tag == 'br': + if self.in_p: + closed_tags = [] + open_tags = tag_stack+tags + open_tags.reverse() + for t in open_tags: + fb2_out.append('</%s>' % t) + closed_tags.append(t) + if t == 'p': + break + fb2_out.append('<empty-line />') + closed_tags.reverse() + for t in closed_tags: + fb2_out.append('<%s>' % t) + else: + fb2_out.append('<empty-line />') + elif tag in ('div', 'li', 'p'): p_text, added_p = self.close_open_p(tag_stack+tags) - fb2_text += p_text + fb2_out += p_text if added_p: tags.append('p') - elif fb2_tag and fb2_tag not in tag_stack+tags: - p_text, p_tags = self.ensure_p() - fb2_text += p_text - tags += p_tags - fb2_text.append('<%s>' % fb2_tag) - tags.append(fb2_tag) + elif tag == 'b': + s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) + fb2_out += s_out + tags += s_tags + elif tag == 'i': + s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags) + fb2_out += s_out + tags += s_tags - # Processes style information - for s in STYLES: - style_tag = s[1].get(style[s[0]], None) - if style_tag and style_tag not in tag_stack+tags: - p_text, p_tags = self.ensure_p() - fb2_text += p_text - tags += p_tags - fb2_text.append('<%s>' % style_tag) - tags.append(style_tag) - - if tag in TAG_SPACE: - fb2_text.append(' ') - - if hasattr(elem, 'text') and elem.text: + # Processes style information. + if style['font-style'] == 'italic': + s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags) + fb2_out += s_out + tags += s_tags + elif style['font-weight'] in ('bold', 'bolder'): + s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags) + fb2_out += s_out + tags += s_tags + + # Process element text. + if hasattr(elem_tree, 'text') and elem_tree.text: if not self.in_p: - fb2_text.append('<p>') - fb2_text.append(prepare_string_for_xml(elem.text)) + fb2_out.append('<p>') + fb2_out.append(prepare_string_for_xml(elem_tree.text)) if not self.in_p: - fb2_text.append('</p>') + fb2_out.append('</p>') - for item in elem: - fb2_text += self.dump_text(item, stylizer, page, tag_stack+tags) + # Process sub-elements. + for item in elem_tree: + fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags) + # Close open FB2 tags. tags.reverse() - fb2_text += self.close_tags(tags) + fb2_out += self.close_tags(tags) - if hasattr(elem, 'tail') and elem.tail: + # Process element text that comes after the close of the XHTML tag but before the next XHTML tag. + if hasattr(elem_tree, 'tail') and elem_tree.tail: if not self.in_p: - fb2_text.append('<p>') - fb2_text.append(prepare_string_for_xml(elem.tail)) + fb2_out.append('<p>') + fb2_out.append(prepare_string_for_xml(elem_tree.tail)) if not self.in_p: - fb2_text.append('</p>') + fb2_out.append('</p>') - return fb2_text + return fb2_out def close_tags(self, tags): text = [] From bc669a1f987d80523df4a4ac61b0c35039104ad7 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 19:47:16 -0500 Subject: [PATCH 30/40] FB2 Output: Produce correct metadata header. --- src/calibre/ebooks/fb2/fb2ml.py | 55 +++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index f27729ae8c..d83dc45a0f 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -10,6 +10,7 @@ Transform OEB content into FB2 markup import cStringIO from base64 import b64encode +from datetime import datetime import re try: @@ -79,38 +80,54 @@ class FB2MLizer(object): return text def fb2_header(self): - author_first = u'' - author_middle = u'' - author_last = u'' + metadata = {} + metadata['author_first'] = u'' + metadata['author_middle'] = u'' + metadata['author_last'] = u'' + metadata['title'] = self.oeb_book.metadata.title[0].value + metadata['appname'] = __appname__ + metadata['version'] = __version__ + metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) + metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' + author_parts = self.oeb_book.metadata.creator[0].value.split(' ') - if len(author_parts) == 1: - author_last = author_parts[0] + metadata['author_last'] = author_parts[0] elif len(author_parts) == 2: - author_first = author_parts[0] - author_last = author_parts[1] + metadata['author_first'] = author_parts[0] + metadata['author_last'] = author_parts[1] else: - author_first = author_parts[0] - author_middle = ' '.join(author_parts[1:-2]) - author_last = author_parts[-1] + metadata['author_first'] = author_parts[0] + metadata['author_middle'] = ' '.join(author_parts[1:-2]) + metadata['author_last'] = author_parts[-1] + + for key, value in metadata.items(): + metadata[key] = prepare_string_for_xml(value) return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \ '<description>' \ '<title-info>' \ - '<genre></genre>' \ + '<genre>antique</genre>' \ '<author>' \ - '<first-name>%s</first-name>' \ - '<middle-name>%s</middle-name>' \ - '<last-name>%s</last-name>' \ + '<first-name>%(author_first)s</first-name>' \ + '<middle-name>%(author_middle)s</middle-name>' \ + '<last-name>%(author_last)s</last-name>' \ '</author>' \ - '<book-title>%s</book-title>' \ - '<annotation><p/></annotation>' \ + '<book-title>%(title)s</book-title>' \ + '<lang>%(lang)s</lang>' \ '</title-info>' \ '<document-info>' \ - '<program-used>%s %s</program-used>' \ + '<author>' \ + '<first-name></first-name>' \ + '<middle-name></middle-name>' \ + '<last-name></last-name>' \ + '</author>' \ + '<program-used>%(appname)s %(version)s</program-used>' \ + '<date>%(date)s</date>' \ + '<id>1</id>' \ + '<version>1.0</version>' \ '</document-info>' \ - '</description>' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last, - self.oeb_book.metadata.title[0].value, __appname__, __version__))) + '</description>' % metadata def fb2_footer(self): return u'</FictionBook>' From 1f7c291aeb3bae171e5bff44d0ea47ba3fdd5524 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 20:37:16 -0500 Subject: [PATCH 31/40] FB2 Output: Use random uuid for book id. --- src/calibre/ebooks/fb2/fb2ml.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index d83dc45a0f..d89570a44e 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -12,6 +12,7 @@ import cStringIO from base64 import b64encode from datetime import datetime import re +import uuid try: from PIL import Image @@ -88,7 +89,8 @@ class FB2MLizer(object): metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) - metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' + metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' + metadata['id'] = '%s' % uuid.uuid4() author_parts = self.oeb_book.metadata.creator[0].value.split(' ') if len(author_parts) == 1: @@ -124,7 +126,7 @@ class FB2MLizer(object): '</author>' \ '<program-used>%(appname)s %(version)s</program-used>' \ '<date>%(date)s</date>' \ - '<id>1</id>' \ + '<id>%(id)s</id>' \ '<version>1.0</version>' \ '</document-info>' \ '</description>' % metadata From 63727bc608ddc2683ceb4ceca84a0e0776fb0ceb Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 19:06:47 -0700 Subject: [PATCH 32/40] ... --- src/calibre/ebooks/oeb/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 2e480a9941..a077fb0225 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -775,6 +775,7 @@ class Manifest(object): return u'Item(id=%r, href=%r, media_type=%r)' \ % (self.id, self.href, self.media_type) + # Parsing {{{ def _parse_xml(self, data): data = xml_to_unicode(data, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] @@ -1035,6 +1036,8 @@ class Manifest(object): data = item.data.cssText return ('utf-8', data) + # }}} + @dynamic_property def data(self): doc = """Provides MIME type sensitive access to the manifest From 0ea35abaf189ded4ba2e39a6201c545c21e8290e Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:04:37 -0500 Subject: [PATCH 33/40] FB2 Output: Check image is in document and manifest before referencing and writing. --- src/calibre/ebooks/fb2/fb2ml.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index d89570a44e..3020c002a5 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -145,8 +145,14 @@ class FB2MLizer(object): return ''.join(text) + '</body>' def fb2mlize_images(self): + ''' + This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. + ''' images = [] for item in self.oeb_book.manifest: + # Don't write the image if it's not referenced in the document's text. + if item.href not in self.image_hrefs: + continue if item.media_type in OEB_RASTER_IMAGES: try: im = Image.open(cStringIO.StringIO(item.data)).convert('RGB') @@ -164,7 +170,7 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '_0000.JPEG'), item.media_type, data)) + images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs[item.href], item.media_type, data)) except Exception as e: self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e)) @@ -245,14 +251,15 @@ class FB2MLizer(object): fb2_out.append('<title>') tags.append('title') if tag == 'img': - # TODO: Check that the image is in the manifest and only write the tag if it is. if elem_tree.attrib.get('src', None): - if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys(): - self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) - p_txt, p_tag = self.ensure_p() - fb2_out += p_txt - tags += p_tag - fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])]) + # Only write the image tag if it is in the manifest. + if page.abshref(elem_tree.attrib['src']) in self.oeb_book.manifest.hrefs.keys(): + if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys(): + self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) + p_txt, p_tag = self.ensure_p() + fb2_out += p_txt + tags += p_tag + fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])]) elif tag == 'br': if self.in_p: closed_tags = [] From 596c8b905bd7280093cc84eb57bef674326a63a0 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:14:30 -0500 Subject: [PATCH 34/40] FB2 Output: SVG rasterization. --- src/calibre/ebooks/fb2/output.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index 88508b83e0..33714c6e6e 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -29,6 +29,14 @@ class FB2Output(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.oeb.transforms.jacket import linearize_jacket + from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable + + try: + rasterizer = SVGRasterizer() + rasterizer(oeb_book, opts) + except Unavailable: + self.log.warn('SVG rasterizer unavailable, SVG will not be converted') + linearize_jacket(oeb_book) fb2mlizer = FB2MLizer(log) From cafe81f2e4528b596517108d501cd7a934864d15 Mon Sep 17 00:00:00 2001 From: Kovid Goyal <kovid@kovidgoyal.net> Date: Sat, 4 Dec 2010 20:20:20 -0700 Subject: [PATCH 35/40] EPUB Output: Mangle filenames to ensure they are unique, to support broken EPUB Readers like Aldiko and Stanza. Currently disabled pending further testing. --- src/calibre/ebooks/epub/output.py | 3 + .../ebooks/oeb/transforms/filenames.py | 130 ++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 src/calibre/ebooks/oeb/transforms/filenames.py diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 17f6eb9f46..c5d11edc2b 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -142,6 +142,9 @@ class EPUBOutput(OutputFormatPlugin): def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb + #from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames + #UniqueFilenames()(oeb, opts) + self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() diff --git a/src/calibre/ebooks/oeb/transforms/filenames.py b/src/calibre/ebooks/oeb/transforms/filenames.py new file mode 100644 index 0000000000..2b22474d30 --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/filenames.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +import posixpath +from urlparse import urldefrag + +from lxml import etree +import cssutils + +from calibre.ebooks.oeb.base import rewrite_links, urlnormalize + +class RenameFiles(object): + + ''' + Rename files and adjust all links pointing to them. Note that the spine + and manifest are not touched by this transform. + ''' + + def __init__(self, rename_map): + self.rename_map = rename_map + + def __call__(self, oeb, opts): + self.log = oeb.logger + self.opts = opts + self.oeb = oeb + + for item in oeb.manifest.items: + self.current_item = item + if etree.iselement(item.data): + rewrite_links(self.current_item.data, self.url_replacer) + elif hasattr(item.data, 'cssText'): + cssutils.replaceUrls(item.data, self.url_replacer) + + if self.oeb.guide: + for ref in self.oeb.guide.values(): + href = urlnormalize(ref.href) + href, frag = urldefrag(href) + replacement = self.rename_map.get(href, None) + if replacement is not None: + nhref = replacement + if frag: + nhref += '#' + frag + ref.href = nhref + + if self.oeb.toc: + self.fix_toc_entry(self.oeb.toc) + + + def fix_toc_entry(self, toc): + if toc.href: + href = urlnormalize(toc.href) + href, frag = urldefrag(href) + replacement = self.rename_map.get(href, None) + + if replacement is not None: + nhref = replacement + if frag: + nhref = '#'.join((nhref, frag)) + toc.href = nhref + + for x in toc: + self.fix_toc_entry(x) + + def url_replacer(self, orig_url): + url = urlnormalize(orig_url) + path, frag = urldefrag(url) + href = self.current_item.abshref(path) + replacement = self.rename_map.get(href, None) + if replacement is None: + return orig_url + replacement = self.current_item.relhref(replacement) + if frag: + replacement += '#' + frag + return replacement + +class UniqueFilenames(object): + + 'Ensure that every item in the manifest has a unique filename' + + def __call__(self, oeb, opts): + self.log = oeb.logger + self.opts = opts + self.oeb = oeb + + self.seen_filenames = set([]) + self.rename_map = {} + + for item in list(oeb.manifest.items): + fname = posixpath.basename(item.href) + if fname in self.seen_filenames: + suffix = self.unique_suffix(fname) + data = item.data + base, ext = posixpath.splitext(item.href) + nhref = base + suffix + ext + nhref = oeb.manifest.generate(href=nhref)[1] + nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data, + fallback=item.fallback) + self.seen_filenames.add(posixpath.basename(nhref)) + self.rename_map[item.href] = nhref + if item.spine_position is not None: + oeb.spine.insert(item.spine_position, nitem, item.linear) + oeb.spine.remove(item) + oeb.manifest.remove(item) + else: + self.seen_filenames.add(fname) + + if self.rename_map: + self.log('Found non-unique filenames, renaming to support broken' + ' EPUB readers like FBReader, Aldiko and Stanza...') + from pprint import pformat + self.log.debug(pformat(self.rename_map)) + + renamer = RenameFiles(self.rename_map) + renamer(oeb, opts) + + + def unique_suffix(self, fname): + base, ext = posixpath.splitext(fname) + c = 0 + while True: + c += 1 + suffix = '_u%d'%c + candidate = base + suffix + ext + if candidate not in self.seen_filenames: + return suffix + From f7d9571c4c51f8640841f3dfd45af7a71d3fff12 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:37:34 -0500 Subject: [PATCH 36/40] FB2 Output: Replace PIL with ImageMagick. Don't convert JPG images to JPG because it's unnecessary. --- src/calibre/ebooks/fb2/fb2ml.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 3020c002a5..b04cb50d46 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -8,18 +8,12 @@ __docformat__ = 'restructuredtext en' Transform OEB content into FB2 markup ''' -import cStringIO from base64 import b64encode from datetime import datetime +from mimetypes import types_map import re import uuid -try: - from PIL import Image - Image -except ImportError: - import Image - from lxml import etree from calibre import prepare_string_for_xml @@ -27,6 +21,7 @@ from calibre.constants import __appname__, __version__ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES +from calibre.utils.magick import Image class FB2MLizer(object): ''' @@ -155,11 +150,11 @@ class FB2MLizer(object): continue if item.media_type in OEB_RASTER_IMAGES: try: - im = Image.open(cStringIO.StringIO(item.data)).convert('RGB') - data = cStringIO.StringIO() - im.save(data, 'JPEG') - data = data.getvalue() - + if not item.media_type == types_map['.jpeg'] or not item.media_type == types_map['.jpg']: + im = Image() + im.load(item.data) + im.set_compression_quality(70) + data = im.export('jpg') raw_data = b64encode(data) # Don't put the encoded image on a single line. data = '' From 4d3e99af6733a7ed7bba89297971b8847d5483d9 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:59:37 -0500 Subject: [PATCH 37/40] FB2 Output: Fix writing incorrect mimetype. --- src/calibre/ebooks/fb2/fb2ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 479cd4d789..0748970c60 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -165,7 +165,7 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs[item.href], item.media_type, data)) + images.append('<binary id="%s">%s\n</binary>' % (self.image_hrefs[item.href], data)) except Exception as e: self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e)) From fcd87f216c6172443f3245b1ad117435aca516c9 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 23:02:55 -0500 Subject: [PATCH 38/40] FB2 Output: Add image mimetype back and set to jpg because that is what is written. --- src/calibre/ebooks/fb2/fb2ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 0748970c60..46861357e6 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -165,7 +165,7 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('<binary id="%s">%s\n</binary>' % (self.image_hrefs[item.href], data)) + images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data)) except Exception as e: self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e)) From 37d51495d227b2ce2689477ff45109bbbd12c987 Mon Sep 17 00:00:00 2001 From: Sengian <sengian1@gmail.com> Date: Sun, 5 Dec 2010 15:28:41 +0100 Subject: [PATCH 39/40] Update of nicebooks --- src/calibre/ebooks/metadata/nicebooks.py | 113 ++++++++++------------- 1 file changed, 50 insertions(+), 63 deletions(-) diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py index 4d19e9611b..01e20261b3 100644 --- a/src/calibre/ebooks/metadata/nicebooks.py +++ b/src/calibre/ebooks/metadata/nicebooks.py @@ -10,7 +10,8 @@ from copy import deepcopy from lxml.html import soupparser -from calibre.utils.date import parse_date, utcnow +from calibre.utils.date import parse_date, utcnow, replace_months +from calibre.utils.cleantext import clean_ascii_char from calibre import browser, preferred_encoding from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata import MetaInformation, check_isbn, \ @@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload): traceback.format_exc(), self.name)) +class NiceBooksError(Exception): + pass + +class ISBNNotFound(NiceBooksError): + pass + def report(verbose): if verbose: - import traceback traceback.print_exc() -def replace_monthsfr(datefr): - # Replace french months by english equivalent for parse_date - frtoen = { - u'[jJ]anvier': u'jan', - u'[fF].vrier': u'feb', - u'[mM]ars': u'mar', - u'[aA]vril': u'apr', - u'[mM]ai': u'may', - u'[jJ]uin': u'jun', - u'[jJ]uillet': u'jul', - u'[aA]o.t': u'aug', - u'[sS]eptembre': u'sep', - u'[Oo]ctobre': u'oct', - u'[nN]ovembre': u'nov', - u'[dD].cembre': u'dec' } - for k in frtoen.iterkeys(): - tmp = re.sub(k, frtoen[k], datefr) - if tmp <> datefr: break - return tmp - class Query(object): BASE_URL = 'http://fr.nicebooks.com/' @@ -119,7 +105,7 @@ class Query(object): def __call__(self, browser, verbose, timeout = 5.): if verbose: - print 'Query:', self.BASE_URL+self.urldata + print _('Query: %s') % self.BASE_URL+self.urldata try: raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read() @@ -128,7 +114,9 @@ class Query(object): if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return - raise + if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) + raise NiceBooksError(_('Nicebooks encountered an error.')) if '<title>404 - ' in raw: return raw = xml_to_unicode(raw, strip_encoding_pats=True, @@ -136,7 +124,11 @@ class Query(object): try: feed = soupparser.fromstring(raw) except: - return + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + return None #nb of page to call try: @@ -161,7 +153,11 @@ class Query(object): try: feed = soupparser.fromstring(raw) except: - continue + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + continue pages.append(feed) results = [] @@ -180,14 +176,12 @@ class ResultList(list): self.reautclean = re.compile(u'\s*\(.*\)\s*') def get_title(self, entry): - # title = deepcopy(entry.find("div[@id='book-info']")) title = deepcopy(entry) title.remove(title.find("dl[@title='Informations sur le livre']")) title = ' '.join([i.text_content() for i in title.iterchildren()]) return unicode(title.replace('\n', '')) def get_authors(self, entry): - # author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']") author = entry.find("dl[@title='Informations sur le livre']") authortext = [] for x in author.getiterator('dt'): @@ -223,7 +217,7 @@ class ResultList(list): d = x.getnext().text_content() try: default = utcnow().replace(day=15) - d = replace_monthsfr(d) + d = replace_months(d, 'fr') d = parse_date(d, assume_utc=True, default=default) mi.pubdate = d except: @@ -234,11 +228,6 @@ class ResultList(list): mi = MetaInformation(title, authors) mi.author_sort = authors_to_sort_string(authors) mi.comments = self.get_description(entry, verbose) - # entry = entry.find("dl[@title='Informations sur le livre']") - # mi.publisher = self.get_publisher(entry) - # mi.pubdate = self.get_date(entry, verbose) - # mi.isbn = self.get_ISBN(entry) - # mi.language = self.get_language(entry) return self.get_book_info(entry, mi, verbose) def get_individual_metadata(self, browser, linkdata, verbose): @@ -249,7 +238,9 @@ class ResultList(list): if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return - raise + if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) + raise NiceBooksError(_('Nicebooks encountered an error.')) if '<title>404 - ' in raw: report(verbose) return @@ -258,7 +249,11 @@ class ResultList(list): try: feed = soupparser.fromstring(raw) except: - return + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + return None # get results return feed.xpath("//div[@id='container']")[0] @@ -292,13 +287,6 @@ class ResultList(list): continue self.append(self.fill_MI(entry, title, authors, verbose)) - -class NiceBooksError(Exception): - pass - -class ISBNNotFound(NiceBooksError): - pass - class Covers(object): def __init__(self, isbn = None): @@ -329,11 +317,10 @@ class Covers(object): return cover, ext if ext else 'jpg' except Exception, err: if isinstance(getattr(err, 'args', [None])[0], socket.timeout): - err = NiceBooksError(_('Nicebooks timed out. Try again later.')) - raise err + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) if not len(self.urlimg): if not self.isbnf: - raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.')) + raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn) raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher')) @@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None, max_results=5, verbose=False, keywords=None): br = browser() entries = Query(title=title, author=author, isbn=isbn, publisher=publisher, - keywords=keywords, max_results=max_results)(br, verbose) + keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.) if entries is None or len(entries) == 0: - return + return None #List of entry ans = ResultList() @@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.): def option_parser(): parser = OptionParser(textwrap.dedent(\ - '''\ + _('''\ %prog [options] Fetch book metadata from Nicebooks. You must specify one of title, author, ISBN, publisher or keywords. Will fetch a maximum of 20 matches, so you should make your query as specific as possible. It can also get covers if the option is activated. - ''' + ''') )) - parser.add_option('-t', '--title', help='Book title') - parser.add_option('-a', '--author', help='Book author(s)') - parser.add_option('-p', '--publisher', help='Book publisher') - parser.add_option('-i', '--isbn', help='Book ISBN') - parser.add_option('-k', '--keywords', help='Keywords') + parser.add_option('-t', '--title', help=_('Book title')) + parser.add_option('-a', '--author', help=_('Book author(s)')) + parser.add_option('-p', '--publisher', help=_('Book publisher')) + parser.add_option('-i', '--isbn', help=_('Book ISBN')) + parser.add_option('-k', '--keywords', help=_('Keywords')) parser.add_option('-c', '--covers', default=0, - help='Covers: 1-Check/ 2-Download') + help=_('Covers: 1-Check/ 2-Download')) parser.add_option('-p', '--coverspath', default='', - help='Covers files path') + help=_('Covers files path')) parser.add_option('-m', '--max-results', default=20, - help='Maximum number of results to fetch') + help=_('Maximum number of results to fetch')) parser.add_option('-v', '--verbose', default=0, action='count', - help='Be more verbose about errors') + help=_('Be more verbose about errors')) return parser def main(args=sys.argv): @@ -400,15 +387,15 @@ def main(args=sys.argv): parser.print_help() return 1 if results is None or len(results) == 0: - print 'No result found for this search!' + print _('No result found for this search!') return 0 for result in results: print unicode(result).encode(preferred_encoding, 'replace') covact = int(opts.covers) if covact == 1: - textcover = 'No cover found!' + textcover = _('No cover found!') if check_for_cover(result.isbn): - textcover = 'A cover was found for this book' + textcover = _('A cover was found for this book') print textcover elif covact == 2: cover_data, ext = cover_from_isbn(result.isbn) @@ -417,7 +404,7 @@ def main(args=sys.argv): cpath = os.path.normpath(opts.coverspath + '/' + result.isbn) oname = os.path.abspath(cpath+'.'+ext) open(oname, 'wb').write(cover_data) - print 'Cover saved to file ', oname + print _('Cover saved to file '), oname print if __name__ == '__main__': From e610f16ca073fc0a4960143484c56031e8ac9069 Mon Sep 17 00:00:00 2001 From: Sengian <sengian1@gmail.com> Date: Sun, 5 Dec 2010 20:09:17 +0100 Subject: [PATCH 40/40] Update fictionwise.py (broken) --- src/calibre/ebooks/metadata/fictionwise.py | 146 +++++++++++++-------- 1 file changed, 93 insertions(+), 53 deletions(-) diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py index 828ea31c3a..e56c697e3c 100644 --- a/src/calibre/ebooks/metadata/fictionwise.py +++ b/src/calibre/ebooks/metadata/fictionwise.py @@ -3,12 +3,11 @@ __license__ = 'GPL 3' __copyright__ = '2010, sengian <sengian1@gmail.com>' __docformat__ = 'restructuredtext en' -import sys, textwrap, re +import sys, textwrap, re, traceback, socket from urllib import urlencode -from lxml import html, etree -from lxml.html import soupparser -from lxml.etree import tostring +from lxml import html +from lxml.html import soupparser, tostring from calibre import browser, preferred_encoding from calibre.ebooks.chardet import xml_to_unicode @@ -18,6 +17,7 @@ from calibre.library.comments import sanitize_comments_html from calibre.ebooks.metadata.fetch import MetadataSource from calibre.utils.config import OptionParser from calibre.utils.date import parse_date, utcnow +from calibre.utils.cleantext import clean_ascii_char class Fictionwise(MetadataSource): # {{{ @@ -37,10 +37,11 @@ class Fictionwise(MetadataSource): # {{{ # }}} +class FictionwiseError(Exception): + pass def report(verbose): if verbose: - import traceback traceback.print_exc() class Query(object): @@ -86,18 +87,20 @@ class Query(object): q = q.encode('utf-8') self.urldata = urlencode(q) - def __call__(self, browser, verbose): + def __call__(self, browser, verbose, timeout = 5.): if verbose: - print 'Query:', self.BASE_URL+self.urldata + print _('Query: %s') % self.BASE_URL+self.urldata try: - raw = browser.open_novisit(self.BASE_URL, self.urldata).read() + raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read() except Exception, e: report(verbose) if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return - raise + if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + raise FictionwiseError(_('Fictionwise timed out. Try again later.')) + raise FictionwiseError(_('Fictionwise encountered an error.')) if '<title>404 - ' in raw: return raw = xml_to_unicode(raw, strip_encoding_pats=True, @@ -105,7 +108,11 @@ class Query(object): try: feed = soupparser.fromstring(raw) except: - return + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + return None # get list of results as links results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]") @@ -139,12 +146,41 @@ class ResultList(list): self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I) def strip_tags_etree(self, etreeobj, invalid_tags): - for itag in invalid_tags: - for elt in etreeobj.getiterator(itag): - elt.drop_tag() - return etreeobj + for (itag, rmv) in invalid_tags.iteritems(): + if rmv: + for elts in etreeobj.getiterator(itag): + elts.drop_tree() + else: + for elts in etreeobj.getiterator(itag): + elts.drop_tag() - def clean_entry(self, entry, + def clean_entry(self, entry, invalid_tags = {'script': True}, + invalid_id = (), invalid_class=(), invalid_xpath = ()): + #invalid_tags: remove tag and keep content if False else remove + #remove tags + if invalid_tags: + self.strip_tags_etree(entry, invalid_tags) + #remove xpath + if invalid_xpath: + for eltid in invalid_xpath: + elt = entry.xpath(eltid) + for el in elt: + el.drop_tree() + #remove id + if invalid_id: + for eltid in invalid_id: + elt = entry.get_element_by_id(eltid) + if elt is not None: + elt.drop_tree() + #remove class + if invalid_class: + for eltclass in invalid_class: + elts = entry.find_class(eltclass) + if elts is not None: + for elt in elts: + elt.drop_tree() + + def clean_entry_dffdfbdjbf(self, entry, invalid_tags = ('font', 'strong', 'b', 'ul', 'span', 'a'), remove_tags_trees = ('script',)): for it in entry[0].iterchildren(tag='table'): @@ -170,7 +206,6 @@ class ResultList(list): authortext = entry.find('./br').tail if not self.rechkauth.search(authortext): return [] - #TODO: parse all tag if necessary authortext = self.rechkauth.sub('', authortext) return [a.strip() for a in authortext.split('&')] @@ -185,7 +220,7 @@ class ResultList(list): float(image.get('height', default=0))) \ for image in entrytable.getiterator('img')) #ratings as x/5 - return 1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()) + return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues())) def get_description(self, entry): description = self.output_entry(entry.find('./p'),htmlrm="") @@ -221,7 +256,6 @@ class ResultList(list): self.resplitbr.split(date)) if not len(date): return None - #TODO: parse all tag if necessary try: d = self.redate.sub('', date[0]) if d: @@ -279,9 +313,14 @@ class ResultList(list): return feed.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td") def populate(self, entries, browser, verbose=False): - for x in entries: + inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False, + 'ul': False, 'span': False, 'table': True} + inv_xpath =('descendant-or-self::p[1]',) + #single entry + if len(entries) == 1 and not isinstance(entries[0], str): try: - entry = self.get_individual_metadata(browser, x, verbose) + entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td") + self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath) entry = self.clean_entry(entry) title = self.get_title(entry) #ratings: get table for rating then drop @@ -292,28 +331,29 @@ class ResultList(list): authors = self.get_authors(entry) except Exception, e: if verbose: - print 'Failed to get all details for an entry' + print _('Failed to get all details for an entry') print e - continue + return self.append(self.fill_MI(entry, title, authors, ratings, verbose)) - - def populate_single(self, feed, verbose=False): - try: - entry = feed.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td") - entry = self.clean_entry(entry) - title = self.get_title(entry) - #ratings: get table for rating then drop - for elt in entry.getiterator('table'): - ratings = self.get_rating(elt, verbose) - elt.getprevious().drop_tree() - elt.drop_tree() - authors = self.get_authors(entry) - except Exception, e: - if verbose: - print 'Failed to get all details for an entry' - print e - return - self.append(self.fill_MI(entry, title, authors, ratings, verbose)) + else: + #multiple entries + for x in entries: + try: + entry = self.get_individual_metadata(browser, x, verbose) + self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath) + title = self.get_title(entry) + #ratings: get table for rating then drop + for elt in entry.getiterator('table'): + ratings = self.get_rating(elt, verbose) + elt.getprevious().drop_tree() + elt.drop_tree() + authors = self.get_authors(entry) + except Exception, e: + if verbose: + print _('Failed to get all details for an entry') + print e + continue + self.append(self.fill_MI(entry, title, authors, ratings, verbose)) def search(title=None, author=None, publisher=None, isbn=None, @@ -321,35 +361,32 @@ def search(title=None, author=None, publisher=None, isbn=None, keywords=None): br = browser() entries = Query(title=title, author=author, publisher=publisher, - keywords=keywords, max_results=max_results)(br, verbose) + keywords=keywords, max_results=max_results)(br, verbose, timeout = 10.) #List of entry ans = ResultList() - if len(entries) > 1: - ans.populate(entries, br, verbose) - else: - ans.populate_single(entries[0], verbose) + ans.populate(entries, br, verbose) return ans def option_parser(): parser = OptionParser(textwrap.dedent(\ - '''\ + _('''\ %prog [options] Fetch book metadata from Fictionwise. You must specify one of title, author, or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches, so you should make your query as specific as possible. - ''' + ''') )) - parser.add_option('-t', '--title', help='Book title') - parser.add_option('-a', '--author', help='Book author(s)') - parser.add_option('-p', '--publisher', help='Book publisher') - parser.add_option('-k', '--keywords', help='Keywords') + parser.add_option('-t', '--title', help=_('Book title')) + parser.add_option('-a', '--author', help=_('Book author(s)')) + parser.add_option('-p', '--publisher', help=_('Book publisher')) + parser.add_option('-k', '--keywords', help=_('Keywords')) parser.add_option('-m', '--max-results', default=20, - help='Maximum number of results to fetch') + help=_('Maximum number of results to fetch')) parser.add_option('-v', '--verbose', default=0, action='count', - help='Be more verbose about errors') + help=_('Be more verbose about errors')) return parser def main(args=sys.argv): @@ -362,6 +399,9 @@ def main(args=sys.argv): report(True) parser.print_help() return 1 + if results is None or len(results) == 0: + print _('No result found for this search!') + return 0 for result in results: print unicode(result).encode(preferred_encoding, 'replace') print