Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-09-25 09:01:33 +02:00 · 2011-09-25 09:01:33 +02:00 · b2dceb1a7a
commit b2dceb1a7a
parent 4ece2a59e2 90bb4382e0
4 changed files with 63 additions and 25 deletions
--- a/recipes/dna.recipe
+++ b/recipes/dna.recipe
@ -1,6 +1,3 @@
 '''
 dnaindia.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -12,6 +9,10 @@ class DNAIndia(BasicNewsRecipe):
    language = 'en_IN'
    encoding    = 'cp1252'
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds       = [
                   ('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
@ -22,15 +23,10 @@ class DNAIndia(BasicNewsRecipe):
                   ('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
                   ('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
                   ('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
-                   ('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
+                   ('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml')
                   ('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
                   ]
-    remove_tags = [{'id':['footer', 'lhs-col']}, {'class':['bottom', 'categoryHead',
+
-        'article_tools']}]
+    
    keep_only_tags = dict(id='middle-col')
    remove_tags_after=[dict(attrs={'id':'story'})]
    remove_attributes=['style']
    no_stylesheets = True
    def print_version(self, url):
        match = re.search(r'newsid=(\d+)', url)
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@ -9,11 +9,16 @@ class TimesOfIndia(BasicNewsRecipe):
    max_articles_per_feed = 25
    no_stylesheets = True
-    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
+    remove_attributes = ['style']
    keep_only_tags = [
            {'class':re.compile(r'maintable12|prttabl')},
            {'id':['mod-article-header',
                'mod-a-body-after-first-para', 'mod-a-body-first-para']},
            ]
    remove_tags = [
-            dict(style=lambda x: x and 'float' in x),
+            {'class':re.compile('tabsintbgshow|prvnxtbg')},
-            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
+            {'id':['fbrecommend', 'relmaindiv']}
-    ]
+            ]
    feeds          = [
 ('Top Stories',
@ -41,6 +46,8 @@ class TimesOfIndia(BasicNewsRecipe):
 ]
    def get_article_url(self, article):
        # Times of India sometimes serves an ad page instead of the article,
        # this code, detects and circumvents that
        url = BasicNewsRecipe.get_article_url(self, article)
        if '/0Ltimesofindia' in url:
            url = url.partition('/0L')[-1]
@ -61,6 +68,3 @@ class TimesOfIndia(BasicNewsRecipe):
        return url
    def preprocess_html(self, soup):
        return soup
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -73,6 +73,20 @@ class Worker(Thread): # Get details {{{
            8: ['août'],
            9: ['sept'],
            12: ['déc'],
            },
            'es': {
                1: ['enero'],
                2: ['febrero'],
                3: ['marzo'],
                4: ['abril'],
                5: ['mayo'],
                6: ['junio'],
                7: ['julio'],
                8: ['agosto'],
                9: ['septiembre', 'setiembre'],
                10: ['octubre'],
                11: ['noviembre'],
                12: ['diciembre'],
            },
                'jp': {
            1: [u'1月'],
@ -101,13 +115,16 @@ class Worker(Thread): # Get details {{{
                 text()="Dettagli prodotto" or \
                 text()="Product details" or \
                 text()="Détails sur le produit" or \
                 text()="Detalles del producto" or \
                 text()="登録情報"]/../div[@class="content"]
            '''
        # Editor: is for Spanish
        self.publisher_xpath = '''
            descendant::*[starts-with(text(), "Publisher:") or \
                    starts-with(text(), "Verlag:") or \
                    starts-with(text(), "Editore:") or \
                    starts-with(text(), "Editeur") or \
                    starts-with(text(), "Editor:") or \
                    starts-with(text(), "出版社:")]
            '''
        self.language_xpath =    '''
@ -116,12 +133,14 @@ class Worker(Thread): # Get details {{{
                or text() = "Language" \
                or text() = "Sprache:" \
                or text() = "Lingua:" \
                or text() = "Idioma:" \
                or starts-with(text(), "Langue") \
                or starts-with(text(), "言語") \
                ]
            '''
        self.ratings_pat = re.compile(
-            r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち) ([\d\.]+)( (stars|Sternen|stelle)){0,1}')
+            r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち|de un máximo de) ([\d\.]+)( (stars|Sternen|stelle|estrellas)){0,1}')
        lm = {
                'eng': ('English', 'Englisch'),
@ -143,6 +162,7 @@ class Worker(Thread): # Get details {{{
        for i, vals in self.months.iteritems():
            for x in vals:
                ans = ans.replace(x, self.english_months[i])
        ans = ans.replace(' de ', ' ')
        return ans
    def run(self):
@ -422,6 +442,7 @@ class Amazon(Source):
            'uk' : _('UK'),
            'it' : _('Italy'),
            'jp' : _('Japan'),
            'es' : _('Spain'),
    }
    options = (
@ -789,6 +810,16 @@ if __name__ == '__main__': # tests {{{
            ),
    ] # }}}
    es_tests = [ # {{{
            (
                {'identifiers':{'isbn': '8483460831'}},
                [title_test('Tiempos Interesantes',
                    exact=True), authors_test(['Terry Pratchett'])
                 ]
            ),
    ] # }}}
    jp_tests = [ # {{{
            ( # isbn -> title, authors
                {'identifiers':{'isbn': '9784101302720' }},
@ -804,6 +835,6 @@ if __name__ == '__main__': # tests {{{
    ] # }}}
    test_identify_plugin(Amazon.name, com_tests)
-    #test_identify_plugin(Amazon.name, jp_tests)
+    #test_identify_plugin(Amazon.name, es_tests)
 # }}}
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -173,6 +173,10 @@ class PDFWriter(QObject): # {{{
                printer.setOutputFormat(QPrinter.NativeFormat)
            self.view.print_(printer)
            printer.abort()
        else:
            # The document is so corrupt that we can't render the page.
            self.loop.exit(0)
            raise Exception('Document cannot be rendered.')
        self._render_book()
    def _delete_tmpdir(self):
@ -207,11 +211,14 @@ class PDFWriter(QObject): # {{{
        try:
            outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
            for item in self.combine_queue:
-                with open(item, 'rb') as item_stream:
+                # The input PDF stream must remain open until the final PDF
-                    inputPDF = PdfFileReader(item_stream)
+                # is written to disk. PyPDF references pages added to the
-                    for page in inputPDF.pages:
+                # final PDF from the input PDF on disk. It does not store
-                        outPDF.addPage(page)
+                # the pages in memory so we can't close the input PDF.
-                        outPDF.write(self.out_stream)
+                inputPDF = PdfFileReader(open(item, 'rb'))
                for page in inputPDF.pages:
                    outPDF.addPage(page)
            outPDF.write(self.out_stream)
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)