From 9daa9486a2a5d85af1425546a00bc841f64b0115 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 5 Dec 2010 11:37:25 +0000 Subject: [PATCH 1/7] Change instances of title() to use the ICU function. Change a few lower() as well. --- src/calibre/devices/apple/driver.py | 8 +++--- src/calibre/ebooks/markdown/mdx_toc.py | 5 ++-- src/calibre/ebooks/mobi/langcodes.py | 2 +- src/calibre/ebooks/oeb/base.py | 2 +- .../ebooks/oeb/transforms/manglecase.py | 6 ++--- src/calibre/gui2/dialogs/metadata_bulk.py | 4 +-- src/calibre/library/caches.py | 4 +-- src/calibre/library/catalog.py | 6 ++--- src/calibre/startup.py | 4 +++ src/calibre/utils/titlecase.py | 26 ++++++++++++------- 10 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index d42c51cedd..0f281ecc92 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -2637,7 +2637,7 @@ class ITUNES(DriverBase): lb_added.composer.set(metadata_x.uuid) lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) lb_added.enabled.set(True) - lb_added.sort_artist.set(metadata_x.author_sort.title()) + lb_added.sort_artist.set(icu_title(metadata_x.author_sort)) lb_added.sort_name.set(metadata.title_sort) @@ -2648,7 +2648,7 @@ class ITUNES(DriverBase): db_added.composer.set(metadata_x.uuid) db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) db_added.enabled.set(True) - db_added.sort_artist.set(metadata_x.author_sort.title()) + db_added.sort_artist.set(icu_title(metadata_x.author_sort)) db_added.sort_name.set(metadata.title_sort) if metadata_x.comments: @@ -2729,7 +2729,7 @@ class ITUNES(DriverBase): lb_added.Composer = metadata_x.uuid lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) lb_added.Enabled = True - lb_added.SortArtist = metadata_x.author_sort.title() + lb_added.SortArtist = icu_title(metadata_x.author_sort) lb_added.SortName = metadata.title_sort if db_added: @@ -2739,7 +2739,7 @@ class ITUNES(DriverBase): db_added.Composer = metadata_x.uuid db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S'))) db_added.Enabled = True - db_added.SortArtist = metadata_x.author_sort.title() + db_added.SortArtist = icu_title(metadata_x.author_sort) db_added.SortName = metadata.title_sort if metadata_x.comments: diff --git a/src/calibre/ebooks/markdown/mdx_toc.py b/src/calibre/ebooks/markdown/mdx_toc.py index 15fd5061a0..322b820a4e 100644 --- a/src/calibre/ebooks/markdown/mdx_toc.py +++ b/src/calibre/ebooks/markdown/mdx_toc.py @@ -18,9 +18,10 @@ def extract_alphanumeric(in_str=None): """ # I'm sure this is really inefficient and # could be done with a lambda/map() - #x.strip().title().replace(' ', "") + #x.strip(). title().replace(' ', "") out_str=[] - for x in in_str.title(): + for x in in_str: + x = icu_title(x) if x.isalnum(): out_str.append(x) return ''.join(out_str) diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py index 5d085906df..723b619590 100644 --- a/src/calibre/ebooks/mobi/langcodes.py +++ b/src/calibre/ebooks/mobi/langcodes.py @@ -322,7 +322,7 @@ def iana2mobi(icode): while len(subtags) > 0: subtag = subtags.pop(0) if subtag not in langdict: - subtag = subtag.title() + subtag = icu_title(subtag) if subtag not in langdict: subtag = subtag.upper() if subtag in langdict: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index a077fb0225..0f364b8030 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -607,7 +607,7 @@ class Metadata(object): key = barename(key) attrib[key] = prefixname(value, nsrmap) if namespace(self.term) == DC11_NS: - name = DC(barename(self.term).title()) + name = DC(icu_title(barename(self.term))) elem = element(dcmeta, name, attrib=attrib) elem.text = self.value else: diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py index 04bf63ac1d..240f7e7726 100644 --- a/src/calibre/ebooks/oeb/transforms/manglecase.py +++ b/src/calibre/ebooks/oeb/transforms/manglecase.py @@ -50,11 +50,11 @@ class CaseMangler(object): def text_transform(self, transform, text): if transform == 'capitalize': - return text.title() + return icu_title(text) elif transform == 'uppercase': - return text.upper() + return icu_upper(text) elif transform == 'lowercase': - return text.lower() + return icu_lower(text) return text def split_text(self, text): diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 362091eb2d..4a44b0cefa 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -184,8 +184,8 @@ class MyBlockingBusy(QDialog): class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): s_r_functions = { '' : lambda x: x, - _('Lower Case') : lambda x: x.lower(), - _('Upper Case') : lambda x: x.upper(), + _('Lower Case') : lambda x: icu_lower(x), + _('Upper Case') : lambda x: icu_upper(x), _('Title Case') : lambda x: titlecase(x), } diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 7c1dea792c..5b6b79e3df 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -223,7 +223,7 @@ EQUALS_MATCH = 1 REGEXP_MATCH = 2 def _match(query, value, matchkind): for t in value: - t = t.lower() + t = icu_lower(t) try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished if ((matchkind == EQUALS_MATCH and query == t) or (matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored @@ -505,7 +505,7 @@ class ResultCache(SearchQueryParser): # {{{ query = query[1:] if matchkind != REGEXP_MATCH: # leave case in regexps because it can be significant e.g. \S \W \D - query = query.lower() + query = icu_lower(query) if not isinstance(query, unicode): query = query.decode('utf-8') diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index eed258a6b0..0b317d6a6e 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1476,20 +1476,20 @@ class EPUB_MOBI(CatalogPlugin): self.opts.log.warn(" '%s' != '%s'" % (author[1], current_author[1])) # New author, save the previous author/sort/count - unique_authors.append((current_author[0], current_author[1].title(), + unique_authors.append((current_author[0], icu_title(current_author[1]), books_by_current_author)) current_author = author books_by_current_author = 1 elif i==0 and len(authors) == 1: # Allow for single-book lists - unique_authors.append((current_author[0], current_author[1].title(), + unique_authors.append((current_author[0], icu_title(current_author[1]), books_by_current_author)) else: books_by_current_author += 1 else: # Add final author to list or single-author dataset if (current_author == author and len(authors) > 1) or not multiple_authors: - unique_authors.append((current_author[0], current_author[1].title(), + unique_authors.append((current_author[0], icu_title(current_author[1]), books_by_current_author)) if False and self.verbose: diff --git a/src/calibre/startup.py b/src/calibre/startup.py index 9c9c7651b7..41b20f3946 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -199,6 +199,10 @@ if not _run_once: __builtin__.__dict__['lopen'] = local_open + from calibre.utils.icu import title_case, lower as icu_lower, upper as icu_upper + __builtin__.__dict__['icu_lower'] = icu_lower + __builtin__.__dict__['icu_upper'] = icu_upper + __builtin__.__dict__['icu_title'] = title_case import mimetypes mimetypes.init([P('mime.types')]) diff --git a/src/calibre/utils/titlecase.py b/src/calibre/utils/titlecase.py index 3ead4848fd..cb0d840515 100755 --- a/src/calibre/utils/titlecase.py +++ b/src/calibre/utils/titlecase.py @@ -40,6 +40,12 @@ def titlecase(text): """ + def capitalize(w): + print 'in capitalize' + w = icu_lower(w) + w = w.replace(w[0], icu_upper(w[0])) + return w + all_caps = ALL_CAPS.match(text) words = re.split('\s', text) @@ -50,29 +56,29 @@ def titlecase(text): line.append(word) continue else: - word = word.lower() + word = icu_lower(word) if APOS_SECOND.match(word): - word = word.replace(word[0], word[0].upper()) - word = word.replace(word[2], word[2].upper()) + word = word.replace(word[0], icu_upper(word[0])) + word = word.replace(word[2], icu_upper(word[2])) line.append(word) continue if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word): line.append(word) continue if SMALL_WORDS.match(word): - line.append(word.lower()) + line.append(icu_lower(word)) continue match = MAC_MC.match(word) if match: - line.append("%s%s" % (match.group(1).capitalize(), - match.group(2).capitalize())) + line.append("%s%s" % (capitalize(match.group(1)), + capitalize(match.group(2)))) continue hyphenated = [] for item in word.split('-'): - hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item)) + hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item)) line.append("-".join(hyphenated)) @@ -80,14 +86,14 @@ def titlecase(text): result = SMALL_FIRST.sub(lambda m: '%s%s' % ( m.group(1), - m.group(2).capitalize() + capitalize(m.group(2)) ), result) - result = SMALL_LAST.sub(lambda m: m.group(0).capitalize(), result) + result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result) result = SUBPHRASE.sub(lambda m: '%s%s' % ( m.group(1), - m.group(2).capitalize() + capitalize(m.group(2)) ), result) return result From 37e7cb6ec20ec0ecd63061aa8ac735b4df6b8113 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 5 Dec 2010 08:24:30 -0500 Subject: [PATCH 2/7] FB2 Output: Use precomputed uuid when avaliable. --- src/calibre/ebooks/fb2/fb2ml.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 46861357e6..51bfaa7293 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -20,7 +20,7 @@ from calibre import prepare_string_for_xml from calibre.constants import __appname__, __version__ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer -from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES +from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF from calibre.utils.magick import Image class FB2MLizer(object): @@ -85,8 +85,8 @@ class FB2MLizer(object): metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' - metadata['id'] = '%s' % uuid.uuid4() - + metadata['id'] = None + author_parts = self.oeb_book.metadata.creator[0].value.split(' ') if len(author_parts) == 1: metadata['author_last'] = author_parts[0] @@ -98,6 +98,15 @@ class FB2MLizer(object): metadata['author_middle'] = ' '.join(author_parts[1:-2]) metadata['author_last'] = author_parts[-1] + identifiers = self.oeb_book.metadata['identifier'] + for x in identifiers: + if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): + metadata['id'] = unicode(x).split(':')[-1] + break + if metadata['id'] is None: + self.log.warn('No UUID identifier found') + metadata['id'] = str(uuid.uuid4()) + for key, value in metadata.items(): metadata[key] = prepare_string_for_xml(value) From b87a738ef25896e13e08c72fafb3e499a4a98759 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 5 Dec 2010 15:42:17 +0000 Subject: [PATCH 3/7] Add some comments to tag_categories.py to explain the structure of the stored user categories --- src/calibre/gui2/dialogs/tag_categories.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/calibre/gui2/dialogs/tag_categories.py b/src/calibre/gui2/dialogs/tag_categories.py index 210a2704bf..60092e4bd2 100644 --- a/src/calibre/gui2/dialogs/tag_categories.py +++ b/src/calibre/gui2/dialogs/tag_categories.py @@ -22,6 +22,15 @@ class Item: return 'name=%s, label=%s, index=%s, exists='%(self.name, self.label, self.index, self.exists) class TagCategories(QDialog, Ui_TagCategories): + ''' + The structure of user_categories stored in preferences is + {cat_name: [ [name, category, v], [], []}, cat_name [ [name, cat, v] ...} + where name is the item name, category is where it came from (series, etc), + and v is a scratch area that this editor uses to keep track of categories. + + If you add a category, it is permissible to set v to zero. If you delete + a category, ensure that both the name and the category match. + ''' category_labels_orig = ['', 'authors', 'series', 'publisher', 'tags'] def __init__(self, window, db, on_category=None): From 741073f1823208b63adf56e88f8b6558a1e49b3f Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Sun, 5 Dec 2010 17:12:21 +0000 Subject: [PATCH 4/7] Forgot to change two instances of strcmp to icu_strcmp --- src/calibre/devices/usbms/books.py | 4 ++-- src/calibre/gui2/library/models.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py index e5a67463e7..23ce1716af 100644 --- a/src/calibre/devices/usbms/books.py +++ b/src/calibre/devices/usbms/books.py @@ -13,7 +13,7 @@ from calibre.devices.interface import BookList as _BookList from calibre.constants import preferred_encoding from calibre import isbytestring from calibre.utils.config import prefs, tweaks -from calibre.utils.icu import sort_key +from calibre.utils.icu import sort_key, strcmp as icu_strcmp class Book(Metadata): def __init__(self, prefix, lpath, size=None, other=None): @@ -241,7 +241,7 @@ class CollectionsBookList(BookList): if y is None: return -1 if isinstance(x, (unicode, str)): - c = cmp(sort_key(x), sort_key(y)) + c = strcmp(x, y) else: c = cmp(x, y) if c != 0: diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index e854ffc1bc..311cbaf369 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -18,7 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import tweaks, prefs from calibre.utils.date import dt_factory, qt_to_dt, isoformat -from calibre.utils.icu import sort_key +from calibre.utils.icu import sort_key, strcmp as icu_strcmp from calibre.ebooks.metadata.meta import set_metadata as _set_metadata from calibre.utils.search_query_parser import SearchQueryParser from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \ @@ -1023,8 +1023,8 @@ class DeviceBooksModel(BooksModel): # {{{ x = '' if y == None: y = '' - x, y = x.strip().lower(), y.strip().lower() - return cmp(x, y) + x, y = icu_lower(x.strip()), icu_lower(y.strip()) + return icu_strcmp(x, y) return _strcmp def datecmp(x, y): x = self.db[x].datetime From 29f33524ffb3b9af36bc41fd9b14cd9b4e1f4aa0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 5 Dec 2010 17:42:49 -0700 Subject: [PATCH 5/7] Fix #7810 (Updated recipe for The New Yorker) --- resources/recipes/new_yorker.recipe | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/resources/recipes/new_yorker.recipe b/resources/recipes/new_yorker.recipe index 1a2091cd52..0c95aa358d 100644 --- a/resources/recipes/new_yorker.recipe +++ b/resources/recipes/new_yorker.recipe @@ -22,8 +22,19 @@ class NewYorker(BasicNewsRecipe): masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif' extra_css = """ body {font-family: "Times New Roman",Times,serif} - .articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase} - .rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase} + .articleauthor{color: #9F9F9F; + font-family: Arial, sans-serif; + font-size: small; + text-transform: uppercase} + .rubric,.dd,h6#credit{color: #CD0021; + font-family: Arial, sans-serif; + font-size: small; + text-transform: uppercase} + .descender:first-letter{display: inline; font-size: xx-large; font-weight: bold} + .dd,h6#credit{color: gray} + .c{display: block} + .caption,h2#articleintro{font-style: italic} + .caption{font-size: small} """ conversion_options = { @@ -39,7 +50,7 @@ class NewYorker(BasicNewsRecipe): ] remove_tags = [ dict(name=['meta','iframe','base','link','embed','object']) - ,dict(attrs={'class':['utils','articleRailLinks','icons'] }) + ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] }) ,dict(attrs={'id':['show-header','show-footer'] }) ] remove_attributes = ['lang'] @@ -59,3 +70,13 @@ class NewYorker(BasicNewsRecipe): cover_url = 'http://www.newyorker.com' + cover_item['src'].strip() return cover_url + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + auth = soup.find(attrs={'id':'articleauthor'}) + if auth: + alink = auth.find('a') + if alink and alink.string is not None: + txt = alink.string + alink.replaceWith(txt) + return soup From 7ff5842e713d50772e0e7b7a99138b802f9ac1c9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 5 Dec 2010 18:04:26 -0700 Subject: [PATCH 6/7] Fix icu_collate sqlite function --- src/calibre/library/sqlite.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index b4cad8061e..ca6b0fc178 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -18,8 +18,9 @@ from functools import partial from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.utils.config import tweaks from calibre.utils.date import parse_date, isoformat -from calibre import isbytestring +from calibre import isbytestring, force_unicode from calibre.constants import iswindows, DEBUG +from calibre.utils.icu import strcmp global_lock = RLock() @@ -115,8 +116,8 @@ def pynocase(one, two, encoding='utf-8'): pass return cmp(one.lower(), two.lower()) -def icu_collator(s1, s2, func=None): - return cmp(func(unicode(s1)), func(unicode(s2))) +def icu_collator(s1, s2): + return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8')) def load_c_extensions(conn, debug=DEBUG): try: @@ -169,8 +170,7 @@ class DBThread(Thread): self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4())) # Dummy functions for dynamically created filters self.conn.create_function('books_list_filter', 1, lambda x: 1) - from calibre.utils.icu import sort_key - self.conn.create_collation('icucollate', partial(icu_collator, func=sort_key)) + self.conn.create_collation('icucollate', icu_collator) def run(self): try: From 90fa43bf371bb65361c2d746986743cddb576e68 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 5 Dec 2010 22:03:42 -0700 Subject: [PATCH 7/7] Fix regression in 0.7.32 that broke opening formats in the ebook viewer from the edit metadata dialog --- src/calibre/gui2/actions/edit_metadata.py | 5 +++++ src/calibre/gui2/actions/view.py | 9 --------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 6e2a4054c8..4a527d94d8 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -147,8 +147,13 @@ class EditMetadataAction(InterfaceAction): d = MetadataSingleDialog(self.gui, row_list[current_row], db, prev=prev, next_=next_) + d.view_format.connect(lambda + fmt:self.gui.iactions['View'].view_format(row_list[current_row], + fmt)) if d.exec_() != d.Accepted: + d.view_format.disconnect() break + d.view_format.disconnect() changed.add(d.id) if d.row_delta == 0: break diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 0a26653771..0910745ac9 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -26,7 +26,6 @@ class ViewAction(InterfaceAction): def genesis(self): self.persistent_files = [] - self.metadata_view_id = None self.qaction.triggered.connect(self.view_book) self.view_menu = QMenu() self.view_menu.addAction(_('View'), partial(self.view_book, False)) @@ -51,14 +50,6 @@ class ViewAction(InterfaceAction): if fmt_path: self._view_file(fmt_path) - def metadata_view_format(self, fmt): - fmt_path = self.gui.library_view.model().db.\ - format_abspath(self.metadata_view_id, - fmt, index_is_id=True) - if fmt_path: - self._view_file(fmt_path) - - def book_downloaded_for_viewing(self, job): if job.failed: self.gui.device_job_exception(job)