Merge from trunk

2026-06-04 13:05:31 -04:00 · 2010-12-06 08:33:31 +01:00
parent 57e0e1820a 90fa43bf37
commit fcf3dd83f9
19 changed files with 100 additions and 54 deletions
@@ -22,8 +22,19 @@ class NewYorker(BasicNewsRecipe):
    masthead_url          = 'http://www.newyorker.com/css/i/hed/logo.gif'
    extra_css             = """
                                body {font-family: "Times New Roman",Times,serif}
-                                .articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
-                                .rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
+                                .articleauthor{color: #9F9F9F; 
+                                               font-family: Arial, sans-serif;
+                                               font-size: small; 
+                                               text-transform: uppercase}
+                                .rubric,.dd,h6#credit{color: #CD0021;
+                                        font-family: Arial, sans-serif;
+                                        font-size: small;
+                                        text-transform: uppercase}
+                                .descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
+                                .dd,h6#credit{color: gray}
+                                .c{display: block}
+                                .caption,h2#articleintro{font-style: italic}
+                                .caption{font-size: small}
                            """

    conversion_options = {
@@ -39,7 +50,7 @@ class NewYorker(BasicNewsRecipe):
                     ]
    remove_tags    = [
                         dict(name=['meta','iframe','base','link','embed','object'])
-                        ,dict(attrs={'class':['utils','articleRailLinks','icons'] })
+                        ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] })
                        ,dict(attrs={'id':['show-header','show-footer'] })
                     ]
    remove_attributes = ['lang']
@@ -59,3 +70,13 @@ class NewYorker(BasicNewsRecipe):
           cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
        return cover_url

+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        auth = soup.find(attrs={'id':'articleauthor'})
+        if auth:
+           alink = auth.find('a')
+           if alink and alink.string is not None:
+              txt = alink.string
+              alink.replaceWith(txt)
+        return soup
@@ -1,5 +1,5 @@
 " Project wide builtins
-let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen"]
+let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title"]

 python << EOFPY
 import os
@@ -63,7 +63,8 @@ class Check(Command):

    description = 'Check for errors in the calibre source code'

-    BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen']
+    BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen', 'icu_lower',
+            'icu_upper', 'icu_title']
    CACHE = '.check-cache.pickle'

    def get_files(self, cache):
@@ -2637,7 +2637,7 @@ class ITUNES(DriverBase):
                lb_added.composer.set(metadata_x.uuid)
                lb_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
                lb_added.enabled.set(True)
-                lb_added.sort_artist.set(metadata_x.author_sort.title())
+                lb_added.sort_artist.set(icu_title(metadata_x.author_sort))
                lb_added.sort_name.set(metadata.title_sort)


@@ -2648,7 +2648,7 @@ class ITUNES(DriverBase):
                db_added.composer.set(metadata_x.uuid)
                db_added.description.set("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
                db_added.enabled.set(True)
-                db_added.sort_artist.set(metadata_x.author_sort.title())
+                db_added.sort_artist.set(icu_title(metadata_x.author_sort))
                db_added.sort_name.set(metadata.title_sort)

            if metadata_x.comments:
@@ -2729,7 +2729,7 @@ class ITUNES(DriverBase):
                lb_added.Composer = metadata_x.uuid
                lb_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
                lb_added.Enabled = True
-                lb_added.SortArtist = metadata_x.author_sort.title()
+                lb_added.SortArtist = icu_title(metadata_x.author_sort)
                lb_added.SortName = metadata.title_sort

            if db_added:
@@ -2739,7 +2739,7 @@ class ITUNES(DriverBase):
                db_added.Composer = metadata_x.uuid
                db_added.Description = ("%s %s" % (self.description_prefix,strftime('%Y-%m-%d %H:%M:%S')))
                db_added.Enabled = True
-                db_added.SortArtist = metadata_x.author_sort.title()
+                db_added.SortArtist = icu_title(metadata_x.author_sort)
                db_added.SortName = metadata.title_sort

            if metadata_x.comments:
@@ -13,7 +13,7 @@ from calibre.devices.interface import BookList as _BookList
 from calibre.constants import preferred_encoding
 from calibre import isbytestring
 from calibre.utils.config import prefs, tweaks
-from calibre.utils.icu import sort_key
+from calibre.utils.icu import sort_key, strcmp as icu_strcmp

 class Book(Metadata):
    def __init__(self, prefix, lpath, size=None, other=None):
@@ -241,7 +241,7 @@ class CollectionsBookList(BookList):
            if y is None:
                return -1
            if isinstance(x, (unicode, str)):
-                c = cmp(sort_key(x), sort_key(y))
+                c = strcmp(x, y)
            else:
                c = cmp(x, y)
            if c != 0:
@@ -20,7 +20,7 @@ from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
-from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
+from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
 from calibre.utils.magick import Image

 class FB2MLizer(object):
@@ -85,8 +85,8 @@ class FB2MLizer(object):
        metadata['version'] = __version__
        metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year)
        metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en'
-        metadata['id'] = '%s' % uuid.uuid4() 
-        
+        metadata['id'] = None
+
        author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
        if len(author_parts) == 1:
            metadata['author_last'] = author_parts[0]
@@ -98,6 +98,15 @@ class FB2MLizer(object):
            metadata['author_middle'] = ' '.join(author_parts[1:-2])
            metadata['author_last'] = author_parts[-1]

+        identifiers = self.oeb_book.metadata['identifier']
+        for x in identifiers:
+            if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
+                metadata['id'] = unicode(x).split(':')[-1]
+                break
+        if metadata['id'] is None:
+            self.log.warn('No UUID identifier found')
+            metadata['id'] = str(uuid.uuid4()) 
+
        for key, value in metadata.items():
            metadata[key] = prepare_string_for_xml(value)

@@ -18,9 +18,10 @@ def extract_alphanumeric(in_str=None):
    """
    # I'm sure this is really inefficient and
    # could be done with a lambda/map()
-    #x.strip().title().replace(' ', "")
+    #x.strip(). title().replace(' ', "")
    out_str=[]
-    for x in in_str.title():
+    for x in in_str:
+        x = icu_title(x)
        if x.isalnum(): out_str.append(x)
    return ''.join(out_str)

@@ -607,7 +607,7 @@ class Metadata(object):
                    key = barename(key)
                attrib[key] = prefixname(value, nsrmap)
            if namespace(self.term) == DC11_NS:
-                name = DC(barename(self.term).title())
+                name = DC(icu_title(barename(self.term)))
                elem = element(dcmeta, name, attrib=attrib)
                elem.text = self.value
            else:
@@ -50,11 +50,11 @@ class CaseMangler(object):

    def text_transform(self, transform, text):
        if transform == 'capitalize':
-            return text.title()
+            return icu_title(text)
        elif transform == 'uppercase':
-            return text.upper()
+            return icu_upper(text)
        elif transform == 'lowercase':
-            return text.lower()
+            return icu_lower(text)
        return text

    def split_text(self, text):
@@ -147,8 +147,13 @@ class EditMetadataAction(InterfaceAction):

            d = MetadataSingleDialog(self.gui, row_list[current_row], db,
                    prev=prev, next_=next_)
+            d.view_format.connect(lambda
+                    fmt:self.gui.iactions['View'].view_format(row_list[current_row],
+                        fmt))
            if d.exec_() != d.Accepted:
+                d.view_format.disconnect()
                break
+            d.view_format.disconnect()
            changed.add(d.id)
            if d.row_delta == 0:
                break
@@ -26,7 +26,6 @@ class ViewAction(InterfaceAction):

    def genesis(self):
        self.persistent_files = []
-        self.metadata_view_id = None
        self.qaction.triggered.connect(self.view_book)
        self.view_menu = QMenu()
        self.view_menu.addAction(_('View'), partial(self.view_book, False))
@@ -51,14 +50,6 @@ class ViewAction(InterfaceAction):
        if fmt_path:
            self._view_file(fmt_path)

-    def metadata_view_format(self, fmt):
-        fmt_path = self.gui.library_view.model().db.\
-                format_abspath(self.metadata_view_id,
-                        fmt, index_is_id=True)
-        if fmt_path:
-            self._view_file(fmt_path)
-
-
    def book_downloaded_for_viewing(self, job):
        if job.failed:
            self.gui.device_job_exception(job)
@@ -184,8 +184,8 @@ class MyBlockingBusy(QDialog):
 class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):

    s_r_functions = {       ''              : lambda x: x,
-                            _('Lower Case') : lambda x: x.lower(),
-                            _('Upper Case') : lambda x: x.upper(),
+                            _('Lower Case') : lambda x: icu_lower(x),
+                            _('Upper Case') : lambda x: icu_upper(x),
                            _('Title Case') : lambda x: titlecase(x),
                    }

@@ -22,6 +22,15 @@ class Item:
        return 'name=%s, label=%s, index=%s, exists='%(self.name, self.label, self.index, self.exists)

 class TagCategories(QDialog, Ui_TagCategories):
+    '''
+    The structure of user_categories stored in preferences is
+      {cat_name: [ [name, category, v], [], []}, cat_name [ [name, cat, v] ...}
+    where name is the item name, category is where it came from (series, etc),
+    and v is a scratch area that this editor uses to keep track of categories.
+
+    If you add a category, it is permissible to set v to zero. If you delete
+    a category, ensure that both the name and the category match.
+    '''
    category_labels_orig =   ['', 'authors', 'series', 'publisher', 'tags']

    def __init__(self, window, db, on_category=None):
@@ -18,7 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.config import tweaks, prefs
 from calibre.utils.date import dt_factory, qt_to_dt, isoformat
-from calibre.utils.icu import sort_key
+from calibre.utils.icu import sort_key, strcmp as icu_strcmp
 from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
@@ -1023,8 +1023,8 @@ class DeviceBooksModel(BooksModel): # {{{
                    x = ''
                if y == None:
                    y = ''
-                x, y = x.strip().lower(), y.strip().lower()
-                return cmp(x, y)
+                x, y = icu_lower(x.strip()), icu_lower(y.strip())
+                return icu_strcmp(x, y)
            return _strcmp
        def datecmp(x, y):
            x = self.db[x].datetime
@@ -223,7 +223,7 @@ EQUALS_MATCH   = 1
 REGEXP_MATCH   = 2
 def _match(query, value, matchkind):
    for t in value:
-        t = t.lower()
+        t = icu_lower(t)
        try:     ### ignore regexp exceptions, required because search-ahead tries before typing is finished
            if ((matchkind == EQUALS_MATCH and query == t) or
                (matchkind == REGEXP_MATCH and re.search(query, t, re.I)) or ### search unanchored
@@ -505,7 +505,7 @@ class ResultCache(SearchQueryParser): # {{{
                    query = query[1:]
            if matchkind != REGEXP_MATCH:
                # leave case in regexps because it can be significant e.g. \S \W \D
-                query = query.lower()
+                query = icu_lower(query)

            if not isinstance(query, unicode):
                query = query.decode('utf-8')
@@ -1476,20 +1476,20 @@ class EPUB_MOBI(CatalogPlugin):
                        self.opts.log.warn(" '%s' != '%s'" % (author[1], current_author[1]))

                    # New author, save the previous author/sort/count
-                    unique_authors.append((current_author[0], current_author[1].title(),
+                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
                    current_author = author
                    books_by_current_author = 1
                elif i==0 and len(authors) == 1:
                    # Allow for single-book lists
-                    unique_authors.append((current_author[0], current_author[1].title(),
+                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))
                else:
                    books_by_current_author += 1
            else:
                # Add final author to list or single-author dataset
                if (current_author == author and len(authors) > 1) or not multiple_authors:
-                    unique_authors.append((current_author[0], current_author[1].title(),
+                    unique_authors.append((current_author[0], icu_title(current_author[1]),
                                           books_by_current_author))

            if False and self.verbose:
@@ -18,8 +18,9 @@ from functools import partial
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.utils.config import tweaks
 from calibre.utils.date import parse_date, isoformat
-from calibre import isbytestring
+from calibre import isbytestring, force_unicode
 from calibre.constants import iswindows, DEBUG
+from calibre.utils.icu import strcmp

 global_lock = RLock()

@@ -115,8 +116,8 @@ def pynocase(one, two, encoding='utf-8'):
            pass
    return cmp(one.lower(), two.lower())

-def icu_collator(s1, s2, func=None):
-    return cmp(func(unicode(s1)), func(unicode(s2)))
+def icu_collator(s1, s2):
+    return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))

 def load_c_extensions(conn, debug=DEBUG):
    try:
@@ -169,8 +170,7 @@ class DBThread(Thread):
        self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
        # Dummy functions for dynamically created filters
        self.conn.create_function('books_list_filter', 1, lambda x: 1)
-        from calibre.utils.icu import sort_key
-        self.conn.create_collation('icucollate', partial(icu_collator, func=sort_key))
+        self.conn.create_collation('icucollate', icu_collator)

    def run(self):
        try:
@@ -199,6 +199,10 @@ if not _run_once:

    __builtin__.__dict__['lopen'] = local_open

+    from calibre.utils.icu import title_case, lower as icu_lower, upper as icu_upper
+    __builtin__.__dict__['icu_lower'] = icu_lower
+    __builtin__.__dict__['icu_upper'] = icu_upper
+    __builtin__.__dict__['icu_title'] = title_case

    import mimetypes
    mimetypes.init([P('mime.types')])
@@ -40,6 +40,11 @@ def titlecase(text):

    """

+    def capitalize(w):
+        w = icu_lower(w)
+        w = w.replace(w[0], icu_upper(w[0]))
+        return w
+
    all_caps = ALL_CAPS.match(text)

    words = re.split('\s', text)
@@ -50,29 +55,29 @@ def titlecase(text):
                line.append(word)
                continue
            else:
-                word = word.lower()
+                word = icu_lower(word)

        if APOS_SECOND.match(word):
-            word = word.replace(word[0], word[0].upper())
-            word = word.replace(word[2], word[2].upper())
+            word = word.replace(word[0], icu_upper(word[0]))
+            word = word.replace(word[2], icu_upper(word[2]))
            line.append(word)
            continue
        if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
            line.append(word)
            continue
        if SMALL_WORDS.match(word):
-            line.append(word.lower())
+            line.append(icu_lower(word))
            continue

        match = MAC_MC.match(word)
        if match:
-            line.append("%s%s" % (match.group(1).capitalize(),
-                                  match.group(2).capitalize()))
+            line.append("%s%s" % (capitalize(match.group(1)),
+                                  capitalize(match.group(2))))
            continue

        hyphenated = []
        for item in word.split('-'):
-            hyphenated.append(CAPFIRST.sub(lambda m: m.group(0).upper(), item))
+            hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
        line.append("-".join(hyphenated))


@@ -80,14 +85,14 @@ def titlecase(text):

    result = SMALL_FIRST.sub(lambda m: '%s%s' % (
        m.group(1),
-        m.group(2).capitalize()
+        capitalize(m.group(2))
    ), result)

-    result = SMALL_LAST.sub(lambda m: m.group(0).capitalize(), result)
+    result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)

    result = SUBPHRASE.sub(lambda m: '%s%s' % (
        m.group(1),
-        m.group(2).capitalize()
+        capitalize(m.group(2))
    ), result)

    return result