Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-09-30 09:00:50 +01:00 · 2010-09-30 09:00:50 +01:00 · fa9c23031e
commit fa9c23031e
parent 59d5090654 01d7397cca
11 changed files with 341 additions and 7202 deletions
--- a/imgsrc/plugboard.svg
+++ b/imgsrc/plugboard.svg
--- a/resources/images/plugboard.png
+++ b/resources/images/plugboard.png
--- a/resources/recipes/peterschiff.recipe
+++ b/resources/recipes/peterschiff.recipe
@ -12,15 +12,18 @@ class PeterSchiff(BasicNewsRecipe):
    description           = 'Economic commentary'
    publisher             = 'Euro Pacific capital'
    category              = 'news, politics, economy, USA'
-    oldest_article        = 15
+    oldest_article        = 25
    max_articles_per_feed = 200
    no_stylesheets        = True
-    encoding              = 'cp1252'
+    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    country               = 'US'
    remove_empty_feeds    = True
-    extra_css             = ' body{font-family: Verdana,Times,serif } h1{text-align: left} img{margin-bottom: 0.4em}  '
+    extra_css             = """ 
                                body{font-family: Verdana,Times,serif } 
                                .field-field-commentary-writer-name{font-weight: bold}
                                .field-items{display: inline}
                            """
    conversion_options = {
                          'comment'   : description
@ -30,7 +33,15 @@ class PeterSchiff(BasicNewsRecipe):
                        , 'linearize_tables' : True
                        }
-    keep_only_tags = [dict(name='tr',attrs={'style':'vertical-align: top;'})]
+    keep_only_tags = [
                        dict(name='h2',attrs={'id':'page-title'})
                       ,dict(name='div',attrs={'class':'node'})
                     ]
    remove_tags = [
                    dict(name=['meta','link','base','iframe','embed'])                   
                   ,dict(attrs={'id':'text-zoom'})
                  ]
    remove_attributes=['track','linktype','lang']
    feeds = [(u'Articles', u'http://feeds.feedburner.com/PeterSchiffsEconomicCommentary')]
--- a/resources/recipes/rmf24_opinie.recipe
+++ b/resources/recipes/rmf24_opinie.recipe
@ -0,0 +1,55 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 rmf24.pl
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class RMF24_opinie(BasicNewsRecipe):
    title          = u'Rmf24.pl - Opinie'
    description    = u'Blogi, wywiady i komentarze ze strony rmf24.pl'
    language = 'pl'
    oldest_article = 7
    max_articles_per_feed = 100
    __author__ = u'Tomasz D\u0142ugosz'
    no_stylesheets = True
    remove_javascript = True
    feeds          = [(u'Blogi', u'http://www.rmf24.pl/opinie/blogi/feed'),
                      (u'Kontrwywiad', u'http://www.rmf24.pl/opinie/wywiady/kontrwywiad/feed'),
                      (u'Przes\u0142uchanie', u'http://www.rmf24.pl/opinie/wywiady/przesluchanie/feed'),
                      (u'Komentarze', u'http://www.rmf24.pl/opinie/komentarze/feed')]
    keep_only_tags = [
        dict(name='div', attrs={'class':'box articleSingle print'}),
        dict(name='div', attrs={'class':'box articleSingle print singleCommentary'}),
        dict(name='div', attrs={'class':'box articleSingle print blogSingleEntry'})]
    remove_tags = [
        dict(name='div', attrs={'class':'toTop'}),
        dict(name='div', attrs={'class':'category'}),
        dict(name='div', attrs={'class':'REMOVE'}),
        dict(name='div', attrs={'class':'embed embedAd'})]
    extra_css = '''
        h1 { font-size: 1.2em; }
    '''
    # thanks to Kovid Goyal
    def get_article_url(self, article):
        link = article.get('link')
        if 'audio' not in link:
            return link
    preprocess_regexps = [
        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
        [
            (r'<h2>Zdj.cie</h2>', lambda match: ''),
            (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
            (r'<a href="http://www.facebook.com/pages/RMF24pl/.*?>RMF24.pl</a> on Facebook</div>', lambda match: '</div>')
        ]
    ]
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.905'
+__version__   = '0.7.906'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -47,29 +47,43 @@ class ISBNDBMetadata(Metadata):
    def __init__(self, book):
        Metadata.__init__(self, None, [])
        def tostring(e):
            if not hasattr(e, 'string'):
                return None
            ans = e.string
            if ans is not None:
                ans = unicode(ans).strip()
            if not ans:
                ans = None
            return ans
        self.isbn = unicode(book.get('isbn13', book.get('isbn')))
-        self.title = unicode(book.find('titlelong').string)
+        self.title = tostring(book.find('titlelong'))
        if not self.title:
-            self.title = unicode(book.find('title').string)
+            self.title = tostring(book.find('title'))
        if not self.title:
            self.title = _('Unknown')
        self.title = unicode(self.title).strip()
        au = unicode(book.find('authorstext').string).strip()
        temp = au.split(',')
        self.authors = []
-        for au in temp:
+        au = tostring(book.find('authorstext'))
-            if not au: continue
+        if au:
-            self.authors.extend([a.strip() for a in au.split('&amp;')])
+            au = au.strip()
            temp = au.split(',')
            for au in temp:
                if not au: continue
                self.authors.extend([a.strip() for a in au.split('&amp;')])
        try:
-            self.author_sort = book.find('authors').find('person').string
+            self.author_sort = tostring(book.find('authors').find('person'))
            if self.authors and self.author_sort == self.authors[0]:
                self.author_sort = None
        except:
            pass
-        self.publisher = unicode(book.find('publishertext').string)
+        self.publisher = tostring(book.find('publishertext'))
-        summ = book.find('summary')
+        summ = tostring(book.find('summary'))
-        if summ and hasattr(summ, 'string') and summ.string:
+        if summ:
-            self.comments = 'SUMMARY:\n'+unicode(summ.string)
+            self.comments = 'SUMMARY:\n'+summ.string
 def build_isbn(base_url, opts):
--- a/src/calibre/ebooks/metadata/library_thing.py
+++ b/src/calibre/ebooks/metadata/library_thing.py
@ -12,6 +12,7 @@ import mechanize
 from calibre import browser, prints
 from calibre.utils.config import OptionParser
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.ebooks.chardet import strip_encoding_declarations
 OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
@ -110,6 +111,8 @@ def get_social_metadata(title, authors, publisher, isbn, username=None,
                    +isbn).read()
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
--- a/src/calibre/gui2/metadata.py
+++ b/src/calibre/gui2/metadata.py
@ -19,6 +19,7 @@ from calibre import prints
 from calibre.constants import DEBUG
 class Worker(Thread):
    'Cover downloader'
    def __init__(self):
        Thread.__init__(self)
@ -88,7 +89,7 @@ class DownloadMetadata(Thread):
                if mi.isbn:
                    args['isbn'] = mi.isbn
                else:
-                    if not mi.title or mi.title == _('Unknown'):
+                    if mi.is_null('title'):
                        self.failures[id] = \
                                (str(id), _('Book has neither title nor ISBN'))
                        continue
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -579,6 +579,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, # {{{
        except KeyboardInterrupt:
            pass
        time.sleep(2)
        if mb is not None:
            mb.flush()
        self.hide_windows()
        return True
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -40,12 +40,14 @@ class MetadataBackup(Thread): # {{{
        self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
        self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
        self.set_dirtied = FunctionDispatcher(db.dirtied)
        self.in_limbo = None
    def stop(self):
        self.keep_running = False
    def run(self):
        while self.keep_running:
            self.in_limbo = None
            try:
                time.sleep(0.5) # Limit to two per second
                id_ = self.db.dirtied_queue.get(True, 1.45)
@ -72,6 +74,7 @@ class MetadataBackup(Thread): # {{{
            if mi is None:
                continue
            self.in_limbo = id_
            # Give the GUI thread a chance to do something. Python threads don't
            # have priorities, so this thread would naturally keep the processor
@ -98,6 +101,15 @@ class MetadataBackup(Thread): # {{{
                    prints('Failed to write backup metadata for id:', id_,
                            'again, giving up')
                    continue
        self.in_limbo = None
    def flush(self):
        'Used during shutdown to ensure that a dirtied book is not missed'
        if self.in_limbo is not None:
            try:
                self.db.dirtied([self.in_limbo])
            except:
                traceback.print_exc()
    def write(self, path, raw):
        with open(path, 'wb') as f:
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -348,10 +348,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        setattr(self, 'title_sort', functools.partial(self.get_property,
                loc=self.FIELD_MAP['sort']))
        self.dirtied_cache = set()
        d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
        for x in d:
            self.dirtied_queue.put(x[0])
        self.dirtied_cache = set([x[0] for x in d])
        self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
        self.refresh()
@ -616,9 +616,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
            self.conn.commit()
    def dirtied(self, book_ids, commit=True):
-        for book in book_ids:
+        for book in frozenset(book_ids) - self.dirtied_cache:
            if book in self.dirtied_cache:
                continue
            try:
                self.conn.execute(
                    'INSERT INTO metadata_dirtied (book) VALUES (?)',