Merge from trunk

2025-07-08 10:44:09 -04:00 · 2011-01-25 18:56:05 +00:00 · 2011-01-25 18:56:05 +00:00 · c47013ef46
commit c47013ef46
parent 8b62f6be45 05be08a9ee
11 changed files with 122 additions and 35 deletions
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@ -62,6 +62,18 @@ div.description {
 	text-indent: 1em;
 	}
 /*
 * 	Attempt to minimize widows and orphans by logically grouping chunks
 * 	Recommend enabling for iPad
 *   Some reports of problems with Sony ereaders, presumably ADE engines
 */
 /*
 div.logical_group {
 	display:inline-block;
 	width:100%;
 	}
 */
 p.date_index {
 	font-size:x-large;
 	text-align:center;
--- a/resources/recipes/20_minutos.recipe
+++ b/resources/recipes/20_minutos.recipe
@ -1,17 +1,67 @@
 # -*- coding: utf-8
 __license__   = 'GPL v3'
 __author__    = 'Luis Hernandez'
 __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
 description   = 'Periódico gratuito en español - v0.5 - 25 Jan 2011'
 '''
 www.20minutos.es
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-class AdvancedUserRecipe1295310874(BasicNewsRecipe):
+class AdvancedUserRecipe1294946868(BasicNewsRecipe):
-    title          = u'20 Minutos (Boletin)'
+
-    __author__            = 'Luis Hernandez'
+    title          = u'20 Minutos'
-    description           = 'Periódico gratuito en español'
+    publisher      = u'Grupo 20 Minutos'
    __author__            = u'Luis Hernández'
    description           = u'Periódico gratuito en español'
    cover_url     = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
    language      = 'es'
-    oldest_article = 2
+    oldest_article = 5
-    max_articles_per_feed = 50
+    max_articles_per_feed = 100
    remove_javascript = True
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'ISO-8859-1'
    language              = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    keep_only_tags     = [dict(name='div', attrs={'id':['content']})
                                  ,dict(name='div', attrs={'class':['boxed','description','lead','article-content']})
                                  ,dict(name='span', attrs={'class':['photo-bar']})
                                  ,dict(name='ul', attrs={'class':['article-author']})
                                ]
    remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']})
    remove_tags_after  = dict(name='div' , attrs={'class':['related-news','col']})
    remove_tags = [
                     dict(name='ol', attrs={'class':['navigation',]})
                    ,dict(name='span', attrs={'class':['action']})
                    ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col']})
                    ,dict(name='div', attrs={'id':['twitter-destacados']})
                    ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']})
                                          ]
    feeds = [
              (u'Portada'              , u'http://www.20minutos.es/rss/')
             ,(u'Nacional'             , u'http://www.20minutos.es/rss/nacional/')
             ,(u'Internacional'       , u'http://www.20minutos.es/rss/internacional/')
             ,(u'Economia'           , u'http://www.20minutos.es/rss/economia/')
             ,(u'Deportes'            , u'http://www.20minutos.es/rss/deportes/')
             ,(u'Tecnologia'          , u'http://www.20minutos.es/rss/tecnologia/')
             ,(u'Gente - TV'         , u'http://www.20minutos.es/rss/gente-television/')
             ,(u'Motor'                 , u'http://www.20minutos.es/rss/motor/')
             ,(u'Salud'                 , u'http://www.20minutos.es/rss/belleza-y-salud/')
             ,(u'Viajes'                , u'http://www.20minutos.es/rss/viajes/')
             ,(u'Vivienda'             , u'http://www.20minutos.es/rss/vivienda/')
             ,(u'Empleo'              , u'http://www.20minutos.es/rss/empleo/')
             ,(u'Cine'                  , u'http://www.20minutos.es/rss/cine/')
             ,(u'Musica'               , u'http://www.20minutos.es/rss/musica/')
             ,(u'Comunidad20'     , u'http://www.20minutos.es/rss/zona20/')
            ]
    feeds          = [(u'VESPERTINO', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
                        , (u'DEPORTES', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
                        , (u'CULTURA', u'http://www.20minutos.es/rss/ocio/')
                        , (u'TV', u'http://20minutos.feedsportal.com/c/32489/f/490877/index.rss')
 ]
--- a/resources/recipes/new_yorker.recipe
+++ b/resources/recipes/new_yorker.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 newyorker.com
 '''
@ -54,10 +54,10 @@ class NewYorker(BasicNewsRecipe):
                        ,dict(attrs={'id':['show-header','show-footer'] })
                     ]
    remove_attributes = ['lang']
-    feeds             = [(u'The New Yorker', u'http://feeds.newyorker.com/services/rss/feeds/everything.xml')]
+    feeds             = [(u'The New Yorker', u'http://www.newyorker.com/services/rss/feeds/everything.xml')]
    def print_version(self, url):
-        return url + '?printable=true'
+        return 'http://www.newyorker.com' + url + '?printable=true'
    def image_url_processor(self, baseurl, url):
        return url.strip()
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -498,7 +498,7 @@ class NYTimes(BasicNewsRecipe):
                for lidiv in div.findAll('li'):
                    if not skipping:
                        self.handle_article(lidiv)
-            
+
        self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
        return self.filter_ans(self.ans)
@ -609,7 +609,7 @@ class NYTimes(BasicNewsRecipe):
                if article_date < self.earliest_date:
                    self.log("Skipping article dated %s" % date_str)
                    return None
-                    
+
        #all articles are from today, no need to print the date on every page
        try:
            if not self.webEdition:
@ -631,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
                            refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
                            refend = reflinkstring.find(".html", refstart) + len(".html")
                            reflinkstring = reflinkstring[refstart:refend]
-                            
+
                            popuppage = self.browser.open(reflinkstring)
                            popuphtml = popuppage.read()
                            popuppage.close()
@ -640,7 +640,7 @@ class NYTimes(BasicNewsRecipe):
                                year = str(st.tm_year)
                                month = "%.2d" % st.tm_mon
                                day = "%.2d" % st.tm_mday
-                                imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')                                
+                                imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/')
                                highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' +  month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
                                popupSoup = BeautifulSoup(popuphtml)
                                highResTag = popupSoup.find('img', {'src':highResImageLink})
@ -659,9 +659,9 @@ class NYTimes(BasicNewsRecipe):
                                            imageTag['height'] = newHeight
                                        except:
                                            self.log("Error setting the src width and height parameters")
-            except Exception as e:
+            except Exception:
                self.log("Error pulling high resolution images")
-                
+
            try:
                #remove "Related content" bar
                runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
@ -674,8 +674,8 @@ class NYTimes(BasicNewsRecipe):
                                hline.extract()
            except:
                self.log("Error removing related content bar")
-     
+
-                
+
            try:
                #in case pulling images failed, delete the enlarge this text
                enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -17,7 +17,7 @@ from lxml import etree
 import cssutils
 from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
-    DC_NSES, OPF
+    DC_NSES, OPF, xml2text
 from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
    PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
 from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, \
@ -423,7 +423,7 @@ class OEBReader(object):
            path, frag = urldefrag(href)
            if path not in self.oeb.manifest.hrefs:
                continue
-            title = ' '.join(xpath(anchor, './/text()'))
+            title = xml2text(anchor)
            title = COLLAPSE_RE.sub(' ', title.strip())
            if href not in titles:
                order.append(href)
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -550,6 +550,14 @@ def choose_dir(window, name, title, default_dir='~'):
    if dir:
        return dir[0]
 def choose_osx_app(window, name, title, default_dir='/Applications'):
    fd = FileDialog(title=title, parent=window, name=name, mode=QFileDialog.ExistingFile,
            default_dir=default_dir)
    app = fd.get_files()
    fd.setParent(None)
    if app:
        return app
 def choose_files(window, name, title,
                 filters=[], all_files=True, select_only_single_file=False):
    '''
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@ -9,7 +9,7 @@ import os, datetime
 from PyQt4.Qt import pyqtSignal, QModelIndex, QThread, Qt
-from calibre.gui2 import error_dialog, gprefs
+from calibre.gui2 import error_dialog
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
 from calibre import strftime
 from calibre.gui2.actions import InterfaceAction
@ -165,10 +165,12 @@ class FetchAnnotationsAction(InterfaceAction):
                ka_soup.insert(0,divTag)
                return ka_soup
            '''
            def mark_book_as_read(self,id):
                read_tag = gprefs.get('catalog_epub_mobi_read_tag')
                if read_tag:
                    self.db.set_tags(id, [read_tag], append=True)
            '''
            def canceled(self):
                self.pd.hide()
@ -201,10 +203,12 @@ class FetchAnnotationsAction(InterfaceAction):
                        # Update library comments
                        self.db.set_comment(id, mi.comments)
                        '''
                        # Update 'read' tag except for Catalogs/Clippings
                        if bm.value.percent_read >= self.FINISHED_READING_PCT_THRESHOLD:
                            if not set(mi.tags).intersection(ignore_tags):
                                self.mark_book_as_read(id)
                        '''
                        # Add bookmark file to id
                        self.db.add_format_with_hooks(id, bm.value.bookmark_extension,
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -335,7 +335,7 @@ class PluginWidget(QWidget,Ui_Form):
        '''
        return
-
+        '''
        if new_state == 0:
            # unchecked
            self.merge_source_field.setEnabled(False)
@ -348,6 +348,7 @@ class PluginWidget(QWidget,Ui_Form):
            self.merge_before.setEnabled(True)
            self.merge_after.setEnabled(True)
            self.include_hr.setEnabled(True)
        '''
    def header_note_source_field_changed(self,new_index):
        '''
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -638,8 +638,6 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        except KeyboardInterrupt:
            pass
        time.sleep(2)
        if mb is not None:
            mb.flush()
        self.hide_windows()
        return True
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -42,6 +42,7 @@ class MetadataBackup(Thread): # {{{
    def stop(self):
        self.keep_running = False
        self.flush()
        # Break cycles so that this object doesn't hold references to db
        self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \
            self.set_dirtied = self.db = None
@ -57,7 +58,10 @@ class MetadataBackup(Thread): # {{{
            except:
                # Happens during interpreter shutdown
                break
            if not self.keep_running:
                break
            self.in_limbo = id_
            try:
                path, mi = self.get_metadata_for_dump(id_)
            except:
@ -72,10 +76,10 @@ class MetadataBackup(Thread): # {{{
                    continue
            # at this point the dirty indication is off
            if mi is None:
                continue
-            self.in_limbo = id_
+            if not self.keep_running:
                break
            # Give the GUI thread a chance to do something. Python threads don't
            # have priorities, so this thread would naturally keep the processor
@ -89,6 +93,9 @@ class MetadataBackup(Thread): # {{{
                traceback.print_exc()
                continue
            if not self.keep_running:
                break
            time.sleep(0.1) # Give the GUI thread a chance to do something
            try:
                self.do_write(path, raw)
@ -102,7 +109,8 @@ class MetadataBackup(Thread): # {{{
                    prints('Failed to write backup metadata for id:', id_,
                            'again, giving up')
                    continue
-        self.in_limbo = None
+
            self.in_limbo = None
    def flush(self):
        'Used during shutdown to ensure that a dirtied book is not missed'
@ -111,6 +119,7 @@ class MetadataBackup(Thread): # {{{
                self.db.dirtied([self.in_limbo])
            except:
                traceback.print_exc()
            self.in_limbo = None
    def write(self, path, raw):
        with lopen(path, 'wb') as f:
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1820,6 +1820,9 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                self.booksByTitle_noSeriesPrefix = nspt
            # Loop through the books by title
            # Generate one divRunningTag per initial letter for the purposes of
            # minimizing widows and orphans on readers that can handle large
            # <divs> styled as inline-block
            title_list = self.booksByTitle
            if not self.useSeriesPrefixInTitlesSection:
                title_list = self.booksByTitle_noSeriesPrefix
@ -1832,7 +1835,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                        divTag.insert(dtc, divRunningTag)
                        dtc += 1
                    divRunningTag = Tag(soup, 'div')
-                    divRunningTag['style'] = 'display:inline-block;width:100%'
+                    divRunningTag['class'] = "logical_group"
                    drtc = 0
                    current_letter = self.letter_or_symbol(book['title_sort'][0])
                    pIndexTag = Tag(soup, "p")
@ -1954,6 +1957,8 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
            drtc = 0
            # Loop through booksByAuthor
            # Each author/books group goes in an openingTag div (first) or
            # a runningTag div (subsequent)
            book_count = 0
            current_author = ''
            current_letter = ''
@ -1977,7 +1982,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                    current_letter = self.letter_or_symbol(book['author_sort'][0].upper())
                    author_count = 0
                    divOpeningTag = Tag(soup, 'div')
-                    divOpeningTag['style'] = 'display:inline-block;width:100%'
+                    divOpeningTag['class'] = "logical_group"
                    dotc = 0
                    pIndexTag = Tag(soup, "p")
                    pIndexTag['class'] = "letter_index"
@ -2001,7 +2006,7 @@ then rebuild the catalog.\n''').format(author[0],author[1],current_author[1])
                        # Create a divRunningTag for the rest of the authors in this letter
                        divRunningTag = Tag(soup, 'div')
-                        divRunningTag['style'] = 'display:inline-block;width:100%'
+                        divRunningTag['class'] = "logical_group"
                        drtc = 0
                    non_series_books = 0