Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-12-18 20:43:06 +01:00 · 2010-12-18 20:43:06 +01:00 · a5baa3c1c6
commit a5baa3c1c6
parent d9b79fb481 66f9313ac9
11 changed files with 135 additions and 132 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -11,7 +11,7 @@
    - title: "Page turn animations in the e-book viewer"
      type: major
      description: >
-        "Now when you use the Page Down/Page Up keys or the next/previous page buttons in the viewer, page turning will be animated. The duration of the animation can be controlled in the viewer preferences. Setting it to o disables the animation completely."
+        "Now when you use the Page Down/Page Up keys or the next/previous page buttons in the viewer, page turning will be animated. The duration of the animation can be controlled in the viewer preferences. Setting it to 0 disables the animation completely."
    - title: "Conversion pipeline: Add an option to set the minimum line height of all elemnts as a percentage of the computed font size. By default, calibre now sets the line height to 120% of the computed font size."
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -41,6 +41,20 @@ series_index_auto_increment = 'next'
 # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
 author_sort_copy_method = 'invert'
 # Set which author field to display in the tags pane (the list of authors,
 # series, publishers etc on the left hand side). The choices are author and
 # author_sort. This tweak affects only the tags pane, and only what is displayed
 # under the authors category. Please note that if you set this to author_sort,
 # it is very possible to see duplicate names in the list becasue although it is
 # guaranteed that author names are unique, there is no such guarantee for
 # author_sort values. Showing duplicates won't break anything, but it could
 # lead to some confusion. When using 'author_sort', the tooltip will show the
 # author's name.
 # Examples:
 #   tags_pane_use_field_for_author_name = 'author'
 #   tags_pane_use_field_for_author_name = 'author_sort'
 tags_pane_use_field_for_author_name = 'author'
 # Set whether boolean custom columns are two- or three-valued.
 #  Two-values for true booleans
--- a/resources/recipes/johm.recipe
+++ b/resources/recipes/johm.recipe
@ -12,16 +12,6 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
    language = 'en'
    no_stylesheets = True
    #remove_tags_before = dict(name='div', attrs={'align':'center'})
    #remove_tags_after  = dict(name='ol', attrs={'compact':'COMPACT'})
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':'subContent'}),
       dict(name='div', attrs={'id':['contentFrame']}),
       #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
       #dict(name='table', attrs={'align':'RIGHT'}),
    ]
   # TO LOGIN
@ -39,47 +29,47 @@ class JournalofHospitalMedicine(BasicNewsRecipe):
    #TO GET ARTICLE TOC
    def johm_get_index(self):
-            return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
+        return self.index_to_soup('http://www3.interscience.wiley.com/journal/111081937/home')
    # To parse artice toc
    def parse_index(self):
-            parse_soup = self.johm_get_index()
+        parse_soup = self.johm_get_index()
-            div = parse_soup.find(id='contentCell')
+        div = parse_soup.find(id='contentCell')
-            current_section = None
+        current_section = None
-            current_articles = []
+        current_articles = []
-            feeds = []
+        feeds = []
-            for x in div.findAll(True):
+        for x in div.findAll(True):
-                if x.name == 'h4':
+            if x.name == 'h4':
-                    # Section heading found
+                # Section heading found
-                    if current_articles and current_section:
+                if current_articles and current_section:
-                        feeds.append((current_section, current_articles))
+                    feeds.append((current_section, current_articles))
-                    current_section = self.tag_to_string(x)
+                current_section = self.tag_to_string(x)
-                    current_articles = []
+                current_articles = []
-                    self.log('\tFound section:', current_section)
+                self.log('\tFound section:', current_section)
-                if current_section is not None and x.name == 'strong':
+            if current_section is not None and x.name == 'strong':
-                    title = self.tag_to_string(x)
+                title = self.tag_to_string(x)
-                    p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
+                p = x.parent.parent.find('a', href=lambda x: x and '/HTMLSTART' in x)
-                    if p is None:
+                if p is None:
-                        continue
+                    continue
-                    url = p.get('href', False)
+                url = p.get('href', False)
-                    if not url or not title:
+                if not url or not title:
-                        continue
+                    continue
-                    if url.startswith('/'):
+                if url.startswith('/'):
-                         url = 'http://www3.interscience.wiley.com'+url
+                        url = 'http://www3.interscience.wiley.com'+url
-                    url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
+                url = url.replace('/HTMLSTART', '/main.html,ftx_abs')
-                    self.log('\t\tFound article:', title)
+                self.log('\t\tFound article:', title)
-                    self.log('\t\t\t', url)
+                self.log('\t\t\t', url)
-                    #if url.startswith('/'):
+                #if url.startswith('/'):
-                        #url = 'http://online.wsj.com'+url
+                    #url = 'http://online.wsj.com'+url
-                    current_articles.append({'title': title, 'url':url,
+                current_articles.append({'title': title, 'url':url,
-                        'description':'', 'date':''})
+                    'description':'', 'date':''})
-            if current_articles and current_section:
+        if current_articles and current_section:
-                feeds.append((current_section, current_articles))
+            feeds.append((current_section, current_articles))
-            return feeds
+        return feeds
    def preprocess_html(self, soup):
        for img in soup.findAll('img', src=True):
--- a/resources/recipes/nejm.recipe
+++ b/resources/recipes/nejm.recipe
@ -11,16 +11,7 @@ class NYTimes(BasicNewsRecipe):
    language = 'en'
    no_stylesheets = True
-    remove_tags_before = dict(name='div', attrs={'align':'center'})
+    keep_only_tags = dict(id='content')
    remove_tags_after  = dict(name='ol', attrs={'compact':'COMPACT'})
    remove_tags = [
       dict(name='iframe'),
       #dict(name='div', attrs={'class':'related-articles'}),
       dict(name='div', attrs={'id':['sidebar']}),
       #dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or author')"}),
       dict(name='table', attrs={'align':'RIGHT'}),
    ]
    #TO LOGIN
@ -38,61 +29,50 @@ class NYTimes(BasicNewsRecipe):
    #TO GET ARTICLE TOC
    def nejm_get_index(self):
-            return self.index_to_soup('http://content.nejm.org/current.dtl')
+        return self.index_to_soup('http://content.nejm.org/current.dtl')
    # To parse artice toc
    def parse_index(self):
-            parse_soup = self.nejm_get_index()
+        parse_soup = self.nejm_get_index()
-            div = parse_soup.find(id='centerTOC')
+        feeds = []
-            current_section = None
+        div = parse_soup.find(attrs={'class':'tocContent'})
-            current_articles = []
+        for group in div.findAll(attrs={'class':'articleGrouping'}):
-            feeds = []
+            feed_title = group.find(attrs={'class':'articleType'})
-            for x in div.findAll(True):
+            if feed_title is None:
                if x.name == 'img' and '/toc/' in x.get('src', '') and 'uarrow.gif' not in x.get('src', ''):
                    # Section heading found
                    if current_articles and current_section and 'Week in the' not in current_section:
                        feeds.append((current_section, current_articles))
                    current_section = x.get('alt')
                    current_articles = []
                    self.log('\tFound section:', current_section)
                if current_section is not None and x.name == 'strong':
                    title = self.tag_to_string(x)
                    a = x.parent.find('a', href=lambda x: x and '/full/' in x)
                    if a is None:
                        continue
                    url = a.get('href', False)
                    if not url or not title:
                        continue
                    if url.startswith('/'):
                         url = 'http://content.nejm.org'+url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    if url.startswith('/'):
                        url = 'http://online.wsj.com'+url
                    current_articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})
            if current_articles and current_section:
                feeds.append((current_section, current_articles))
            return feeds
    def preprocess_html(self, soup):
        for a in soup.findAll(text=lambda x: x and '[in this window]' in x):
            a = a.findParent('a')
            url = a.get('href', None)
            if not url:
                continue
-            if url.startswith('/'):
+            feed_title = self.tag_to_string(feed_title)
-                url = 'http://content.nejm.org'+url
+            articles = []
-            isoup = self.index_to_soup(url)
+            self.log('Found section:', feed_title)
-            img = isoup.find('img', src=lambda x: x and
+            for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry'
-                    x.startswith('/content/'))
+                in x}):
-            if img is not None:
+                link = art.find(attrs={'class':lambda x:x and 'articleLink' in
-                img.extract()
+                    x})
-                table = a.findParent('table')
+                if link is None:
-                table.replaceWith(img)
+                    continue
-        return soup
+                a = link.find('a', href=True)
                if a is None:
                    continue
                url = a.get('href')
                if url.startswith('/'):
                    url = 'http://www.nejm.org'+url
                title = self.tag_to_string(a)
                self.log.info('\tFound article:', title, 'at', url)
                article = {'title':title, 'url':url, 'date':''}
                au = art.find(attrs={'class':'articleAuthors'})
                if au is not None:
                    article['author'] = self.tag_to_string(au)
                desc = art.find(attrs={'class':'hover_text'})
                if desc is not None:
                    desc = self.tag_to_string(desc)
                    if 'author' in article:
                        desc = ' by ' + article['author'] + ' ' +desc
                    article['description'] = desc
                articles.append(article)
            if articles:
                feeds.append((feed_title, articles))
        return feeds
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -62,9 +62,9 @@ class SWEEX(USBMS):
    # Ordered list of supported formats
    FORMATS     = ['epub', 'prc', 'fb2', 'html', 'rtf', 'chm', 'pdf', 'txt']
-    VENDOR_ID   = [0x0525]
+    VENDOR_ID   = [0x0525, 0x177f]
-    PRODUCT_ID  = [0xa4a5]
+    PRODUCT_ID  = [0xa4a5, 0x300]
-    BCD         = [0x0319]
+    BCD         = [0x0319, 0x110]
    VENDOR_NAME = 'SWEEX'
    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'EBOOKREADER'
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -18,6 +18,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, \
 from calibre.ebooks.metadata import title_sort
 from calibre.gui2 import config, NONE
 from calibre.library.field_metadata import TagsIcons, category_icon_map
 from calibre.utils.config import tweaks
 from calibre.utils.icu import sort_key
 from calibre.utils.search_query_parser import saved_searches
 from calibre.gui2 import error_dialog
@ -409,17 +410,31 @@ class TagTreeItem(object): # {{{
        return NONE
    def tag_data(self, role):
        tag = self.tag
        if tag.category == 'authors' and \
                tweaks['tags_pane_use_field_for_author_name'] == 'author_sort':
            name = tag.sort
            tt_author = True
        else:
            name = tag.name
            tt_author = False
        if role == Qt.DisplayRole:
-            if self.tag.count == 0:
+            if tag.count == 0:
-                return QVariant('%s'%(self.tag.name))
+                return QVariant('%s'%(name))
            else:
-                return QVariant('[%d] %s'%(self.tag.count, self.tag.name))
+                return QVariant('[%d] %s'%(tag.count, name))
        if role == Qt.EditRole:
-            return QVariant(self.tag.name)
+            return QVariant(tag.name)
        if role == Qt.DecorationRole:
-            return self.icon_state_map[self.tag.state]
+            return self.icon_state_map[tag.state]
-        if role == Qt.ToolTipRole and self.tag.tooltip is not None:
+        if role == Qt.ToolTipRole:
-            return QVariant(self.tag.tooltip)
+            if tt_author:
                if tag.tooltip is not None:
                    return QVariant('(%s) %s'%(tag.name, tag.tooltip))
                else:
                    return QVariant(tag.name)
            if tag.tooltip is not None:
                return QVariant(tag.tooltip)
        return NONE
    def toggle(self):
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1128,6 +1128,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                for l in list:
                    (id, val, sort_val) = (l[0], l[1], l[2])
                    tids[category][val] = (id, sort_val)
            elif cat['datatype'] == 'rating':
                for l in list:
                    (id, val) = (l[0], l[1])
                    tids[category][val] = (id, '{0:05.2f}'.format(val))
            else:
                for l in list:
                    (id, val) = (l[0], l[1])
@ -1256,12 +1260,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            # sort the list
            if sort == 'name':
-                def get_sort_key(x):
+                kf = lambda x :sort_key(x.s)
                    sk = x.s
                    if isinstance(sk, unicode):
                        sk = sort_key(sk)
                    return sk
                kf = get_sort_key
                reverse=False
            elif sort == 'popularity':
                kf = lambda x: x.c
@ -1967,7 +1966,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
    @classmethod
    def cleanup_tags(cls, tags):
-        tags = [x.strip() for x in tags if x.strip()]
+        tags = [x.strip().replace(',', ';') for x in tags if x.strip()]
        tags = [x.decode(preferred_encoding, 'replace') \
                    if isbytestring(x) else x for x in tags]
        tags = [u' '.join(x.split()) for x in tags]
--- a/src/calibre/library/schema_upgrades.py
+++ b/src/calibre/library/schema_upgrades.py
@ -427,7 +427,9 @@ class SchemaUpgrade(object):
    def upgrade_version_15(self):
        'Remove commas from tags'
-        self.conn.execute("UPDATE tags SET name=REPLACE(name, ',', ';')")
+        self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';')")
        self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', ';;')")
        self.conn.execute("UPDATE OR IGNORE tags SET name=REPLACE(name, ',', '')")
    def upgrade_version_16(self):
        self.conn.executescript('''
--- a/src/calibre/library/server/opds.py
+++ b/src/calibre/library/server/opds.py
@ -19,7 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
 from calibre.library.comments import comments_to_html
 from calibre.library.server import custom_fields_to_display
 from calibre.library.server.utils import format_tag_string, Offsets
-from calibre import guess_type
+from calibre import guess_type, prepare_string_for_xml as xml
 from calibre.utils.icu import sort_key
 from calibre.utils.ordered_dict import OrderedDict
@ -150,13 +150,13 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
        extra.append(_('RATING: %s<br />')%rating)
    tags = item[FM['tags']]
    if tags:
-        extra.append(_('TAGS: %s<br />')%format_tag_string(tags, ',',
+        extra.append(_('TAGS: %s<br />')%xml(format_tag_string(tags, ',',
                                                           ignore_max=True,
-                                                           no_tag_count=True))
+                                                           no_tag_count=True)))
    series = item[FM['series']]
    if series:
        extra.append(_('SERIES: %s [%s]<br />')%\
-                (series,
+                (xml(series),
                fmt_sidx(float(item[FM['series_index']]))))
    for key in CKEYS:
        mi = db.get_metadata(item[CFM['id']['rec_index']], index_is_id=True)
@ -164,11 +164,11 @@ def ACQUISITION_ENTRY(item, version, db, updated, CFM, CKEYS, prefix):
        if val:
            datatype = CFM[key]['datatype']
            if datatype == 'text' and CFM[key]['is_multiple']:
-                extra.append('%s: %s<br />'%(name, format_tag_string(val, ',',
+                extra.append('%s: %s<br />'%(xml(name), xml(format_tag_string(val, ',',
                                                           ignore_max=True,
-                                                           no_tag_count=True)))
+                                                           no_tag_count=True))))
            else:
-                extra.append('%s: %s<br />'%(name, val))
+                extra.append('%s: %s<br />'%(xml(name), xml(unicode(val))))
    comments = item[FM['comments']]
    if comments:
        comments = comments_to_html(comments)
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -541,7 +541,9 @@ Use the options to remove headers and footers to mitigate this issue. If the hea
 removed from the text it can throw off the paragraph unwrapping.
 Some limitations of PDF input is complex, multi-column, and image based documents are not supported.
-Extraction of vector images and tables from within the document is also not supported.
+Extraction of vector images and tables from within the document is also not supported. Some PDFs use special glyphs to
 represent double ll or doubfle ff or fi,etc. Conversion of these may or may not work depending on jusy how they are 
 represented internally in the PDF.
 Comic Book Collections
 ~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -548,6 +548,7 @@ class BasicNewsRecipe(Recipe):
            }
        For an example, see the recipe for downloading `The Atlantic`.
        In addition, you can add 'author' for the author of the article.
        '''
        raise NotImplementedError