Misc. fixes

2025-07-09 03:04:10 -04:00 · 2009-07-15 12:36:50 -06:00 · 2009-07-15 12:36:50 -06:00 · 656c55debf
commit 656c55debf
parent eb625d37c3
5 changed files with 84 additions and 137 deletions
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -24,7 +24,7 @@ class ANDROID(USBMS):
            ]
    PRODUCT_ID  = [0x0c02]
    BCD         = [0x100]
-    EBOOK_DIR_MAIN = 'wordplayer/calibre'
+    EBOOK_DIR_MAIN = 'wordplayer/calibretransfer'
    VENDOR_NAME      = 'HTC'
    WINDOWS_MAIN_MEM = 'ANDROID_PHONE'
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -163,7 +163,7 @@ def fetch_scheduled_recipe(recipe, script):
            OptionRecommendation.HIGH))
    lf = load_defaults('look_and_feel')
    if lf.get('base_font_size', 0.0) != 0.0:
-        recs.append(('base_font_size', ps['base_font_size'],
+        recs.append(('base_font_size', lf['base_font_size'],
            OptionRecommendation.HIGH))
    args = [script, pt.name, recs]
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -1015,7 +1015,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
    def books_in_series_of(self, index, index_is_id=False):
        '''
-        Return an ordered list of all books in the series that the book indetified by index belongs to.
+        Return an ordered list of all books in the series that the book identified by index belongs to.
        If the book does not belong to a series return an empty list. The list contains book ids.
        '''
        series_id = self.series_id(index, index_is_id=index_is_id)
--- a/src/calibre/web/feeds/recipes/recipe_nytimes.py
+++ b/src/calibre/web/feeds/recipes/recipe_nytimes.py
@ -16,7 +16,43 @@ class NYTimes(BasicNewsRecipe):
    __author__  = 'GRiker'
    language = _('English')
    description = 'Top Stories from the New York Times'
-    #max_articles_per_feed = 3
+    
    # List of sections typically included in Top Stories.  Use a keyword from the
    # right column in the excludeSectionKeywords[] list to skip downloading that section
    sections = {
                 'arts'             :   'Arts',
                 'business'         :   'Business',
                 'diningwine'       :   'Dining & Wine',
                 'editorials'       :   'Editorials',
                 'health'           :   'Health',
                 'magazine'         :   'Magazine',
                 'mediaadvertising' :   'Media & Advertising',
                 'newyorkregion'    :   'New York/Region',
                 'oped'             :   'Op-Ed',
                 'politics'         :   'Politics',
                 'science'          :   'Science',
                 'sports'           :   'Sports',
                 'technology'       :   'Technology',
                 'topstories'       :   'Top Stories',
                 'travel'           :   'Travel',
                 'us'               :   'U.S.',
                 'world'            :   'World'
               }
    # By default, no sections are skipped.  
    excludeSectionKeywords = []
    # Add section keywords from the right column above to skip that section
    # For example, to skip sections containing the word 'Sports' or 'Dining', use:
    # excludeSectionKeywords = ['Sports', 'Dining']
    # Fetch only Business and Technology
    #excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
    # Fetch only Top Stories
    #excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 50
    timefmt = ''
    needs_subscription = True
    remove_tags_after  = dict(attrs={'id':['comments']})
@ -31,17 +67,11 @@ class NYTimes(BasicNewsRecipe):
                   dict(name=['script', 'noscript', 'style','hr'])]
    encoding = 'cp1252'
    no_stylesheets = True
    #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
    extra_css = '.headline  {text-align:left;}\n\
                 .byline    {font:monospace; margin-bottom:0px;}\n\
                 .source    {align:left;}\n\
                 .credit    {align:right;}\n'
    flatPeriodical = True
    feed = None
    ans = []
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
@ -54,14 +84,8 @@ class NYTimes(BasicNewsRecipe):
    def index_to_soup(self, url_or_raw, raw=False):
        '''
-        Convenience method that takes an URL to the index page and returns
+        OVERRIDE of class method
-        a `BeautifulSoup <http://www.crummy.com/software/BeautifulSoup/documentation.html>`_
+        deals with various page encodings between index and articles
        of it.
        This is an OVERRIDE of the method provided in news.py to solve an encoding problem
        with NYTimes index pages which seem to be encoded in a wonderful blend
        `url_or_raw`: Either a URL or the downloaded index page as a string
        '''
        def get_the_soup(docEncoding, url_or_raw, raw=False) :
            if re.match(r'\w+://', url_or_raw):
@ -88,8 +112,6 @@ class NYTimes(BasicNewsRecipe):
        if docEncoding == '' :
            docEncoding = self.encoding
        if self.verbose :
            self.log( "  document encoding: '%s'" % docEncoding)
        if docEncoding != self.encoding :
            soup = get_the_soup(docEncoding, url_or_raw)
@ -97,49 +119,11 @@ class NYTimes(BasicNewsRecipe):
    def parse_index(self):
        articles = {}
        ans = []
-        if self.flatPeriodical :
+        feed = key = 'All Top Stories'
-            self.feed = key = 'All Top Stories'
+        articles[key] = []
-            articles[key] = []
+        ans.append(key)
            self.ans.append(key)
        else :
            key = None
        '''
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=False)).strip()
        '''
        sections = {
                     'arts'             :   'Arts',
                     'business'         :   'Business',
                     'editorials'       :   'Editorials',
                     'health'           :   'Health',
                     'magazine'         :   'Magazine',
                     'mediaadvertising' :   'Media & Advertising',
                     'newyorkregion'    :   'New York/Region',
                     'oped'             :   'Op-Ed',
                     'politics'         :   'Politics',
                     'science'          :   'Science',
                     'sports'           :   'Sports',
                     'technology'       :   'Technology',
                     'topstories'       :   'Top Stories',
                     'travel'           :   'Travel',
                     'us'               :   'U.S.',
                     'world'            :   'World'
                   }
        '''
        excludeSectionKeywords = ['Arts','Business','Editorials','Health','Magazine','Media',
                                   'New York','Op-Ed','Politics','Science','Sports','Technology',
                                   'Top Stories','Travel','U.S.','World']
        '''                                   
        excludeSectionKeywords = ['Arts','Business','Editorials','Health','Magazine','Media',
                                   'New York','Politics','Science','Sports','Technology',
                                   'Top Stories','Travel','U.S.','World']
        #excludeSectionKeywords = []
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
@ -152,35 +136,25 @@ class NYTimes(BasicNewsRecipe):
        while True :
            table = table.find('table')
            if table.find(text=re.compile('top stories start')) :
                if self.verbose > 2 : self.log( "*********** dropping one level deeper **************")
                previousTable = table
                continue
            else :
                if self.verbose > 2 : self.log( "found table with top stories")
                table = previousTable
                if self.verbose > 2 : self.log( "lowest table containing 'top stories start:\n%s" % table)
                break
        # There are multiple subtables, find the one containing the stories
        for block in table.findAll('table') :
            if block.find(text=re.compile('top stories start')) :
                if self.verbose > 2 : self.log( "found subtable with top stories")
                table = block
                if self.verbose > 2 : self.log( "lowest subtable containing 'top stories start:\n%s" % table)
                break
            else :
                if self.verbose > 2 : self.log( "trying next subtable")
                continue
        # Again there are multiple subtables, find the one containing the stories
        for storyblock in table.findAll('table') :
            if storyblock.find(text=re.compile('top stories start')) :
                if self.verbose > 2 : self.log( "found subsubtable with top stories\n" )
                # table = storyblock
                if self.verbose > 2 : self.log( "\nlowest subsubtable containing 'top stories start:\n%s" % storyblock)
                break
            else :
                if self.verbose > 2 : self.log( "trying next subsubtable")
                continue
        skipThisSection = False
@ -192,7 +166,6 @@ class NYTimes(BasicNewsRecipe):
                sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
                                                         'times new roman,times, sans serif',
                                                         'times new roman, times, sans serif']})
                if self.verbose > 2 : self.log( "----------- new tr ----------------")
                section = None
                bylines = []
                descriptions = []
@ -205,26 +178,20 @@ class NYTimes(BasicNewsRecipe):
                    if ('Comment' in str(i.__class__)) :
                        if 'start(name=' in i :
                            section = i[i.find('=')+1:-2]
                            if self.verbose > 2 : self.log( "sectionTitle: %s" % sections[section])
-                        if not sections.has_key(section) :
+                        if not self.sections.has_key(section) :
                            self.log( "Unrecognized section id: %s, skipping" % section )
                            skipThisSection = True
                            break
                        # Check for excluded section
-                        if len(excludeSectionKeywords):
+                        if len(self.excludeSectionKeywords):
-                            key = sections[section]
+                            key = self.sections[section]
-                            excluded = re.compile('|'.join(excludeSectionKeywords))
+                            excluded = re.compile('|'.join(self.excludeSectionKeywords))
                            if excluded.search(key) or articles.has_key(key):
-                                if self.verbose > 2 : self.log("Skipping section %s" % key)
+                                if self.verbose : self.log("Skipping section %s" % key)
                                skipThisSection = True
                                break
                        if not self.flatPeriodical :
                            articles[key] = []
                            self.ans.append(key)
                # Get the bylines and descriptions
                if not skipThisSection :
                    for (x,i) in enumerate(sectionblock.contents) :
@ -248,31 +215,26 @@ class NYTimes(BasicNewsRecipe):
                            #continue
                        url = re.sub(r'\?.*', '', a['href'])
                        url += '?pagewanted=all'
                        title = self.tag_to_string(a, use_alt=True)
-                        if self.flatPeriodical :
+                        # prepend the section name
-                            # prepend the section name
+                        title = self.sections[section] + " &middot; " + title
-                            title = sections[section] + " &middot; " + title
+
                        if not isinstance(title, unicode):
                            title = title.decode('utf-8', 'replace')
                        description = descriptions[i]
                        if len(bylines) == articleCount :
                            author = bylines[i]
                        else :
                            author = None
                        if self.verbose > 2 : self.log( "      title: %s" % title)
                        if self.verbose > 2 : self.log( "        url: %s" % url)
                        if self.verbose > 2 : self.log( "     author: %s" % author)
                        if self.verbose > 2 : self.log( "description: %s" % description)
                        if not self.flatPeriodical :
                            self.feed = key
                        # Check for duplicates
                        duplicateFound = False
-                        if self.flatPeriodical and len(articles[self.feed]) > 1:
+                        if len(articles[feed]) > 1:
-                            #print articles[self.feed]
+                            #print articles[feed]
-                            for article in articles[self.feed] :
+                            for article in articles[feed] :
                                #print "comparing %s\n %s\n" % (url, article['url'])
                                if url == article['url'] :
                                    duplicateFound = True
@ -280,23 +242,18 @@ class NYTimes(BasicNewsRecipe):
                            #print
                            if duplicateFound:        
                                # Continue fetching, don't add this article
                                print "  skipping duplicate %s" % article['url']
                                continue        
-                        if not articles.has_key(self.feed):
+                        if not articles.has_key(feed):
-                            if self.verbose > 2 : self.log( "adding %s to articles[]" % self.feed)
+                            articles[feed] = []
-                            articles[self.feed] = []
+                        articles[feed].append(
                        if self.verbose > 2 : self.log( "     adding: %s to articles[%s]\n" % (title, self.feed))
                        articles[self.feed].append(
                            dict(title=title, url=url, date=pubdate,
                                 description=description, author=author, content=''))
-        self.ans = self.sort_index_by(self.ans, {'Top Stories':-1})
+        ans = self.sort_index_by(ans, {'Top Stories':-1})
-        self.ans = [(key, articles[key]) for key in self.ans if articles.has_key(key)]
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        #sys.exit(1)
-        return self.ans
+        return ans
    def preprocess_html(self, soup):
        refresh = soup.find('meta', {'http-equiv':'refresh'})
@ -307,12 +264,9 @@ class NYTimes(BasicNewsRecipe):
        return BeautifulSoup(raw.decode('cp1252', 'replace'))
    def postprocess_html(self,soup, True):
        if self.verbose > 2 : self.log(" ********** recipe.postprocess_html ********** ")
        # Change class="kicker" to <h3>
        kicker = soup.find(True, {'class':'kicker'})
        if kicker is not None :
            print "changing kicker to <h3>"
            print kicker
            h3Tag = Tag(soup, "h3")
            h3Tag.insert(0, kicker.contents[0])
            kicker.replaceWith(h3Tag)
@ -345,13 +299,7 @@ class NYTimes(BasicNewsRecipe):
            tag = Tag(soup, "h3")
            tag.insert(0, masthead.contents[0])
            soup.h1.replaceWith(tag)
-        '''
+
        # Change subheads to <h3>
        for subhead in soup.findAll(True, {'class':'bold'}) :
            h3Tag = Tag(soup, "h3")
            h3Tag.insert(0, subhead.contents[0])
            subhead.replaceWith(h3Tag)
        '''
        # Change <span class="bold"> to <b>
        for subhead in soup.findAll(True, {'class':'bold'}) :
            bTag = Tag(soup, "b")
@ -359,4 +307,3 @@ class NYTimes(BasicNewsRecipe):
            subhead.replaceWith(bTag)
        return soup