IGN:Tag release

2025-07-08 02:34:06 -04:00 · 2009-11-13 16:46:27 -07:00 · 2009-11-13 16:46:27 -07:00 · 6fd0a3100b
commit 6fd0a3100b
parent 507348e16c
2 changed files with 40 additions and 38 deletions
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 nytimes.com
 '''
 import re
 import time
 from calibre import entity_to_unicode
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
@ -17,7 +16,7 @@ class NYTimes(BasicNewsRecipe):
    __author__  = 'GRiker'
    language = _('English')
    description = 'Top Stories from the New York Times'
-    
+
    # List of sections typically included in Top Stories.  Use a keyword from the
    # right column in the excludeSectionKeywords[] list to skip downloading that section
    sections = {
@ -40,7 +39,7 @@ class NYTimes(BasicNewsRecipe):
                 'world'            :   'World'
               }
-    # By default, no sections are skipped.  
+    # By default, no sections are skipped.
    excludeSectionKeywords = []
    # Add section keywords from the right column above to skip that section
@ -50,7 +49,7 @@ class NYTimes(BasicNewsRecipe):
    # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
    # Fetch only Top Stories
    # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
-    
+
    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 40
@ -64,7 +63,7 @@ class NYTimes(BasicNewsRecipe):
                                dict(attrs={   'id':['toolsRight','inlineBox','sidebarArticles',
                                                     'portfolioInline','articleInline','readerscomment',
                                                     'nytRating']}) ]
-        
+
    encoding = 'cp1252'
    no_stylesheets = True
    extra_css = '.headline      {text-align:    left;}\n    \
@ -114,13 +113,13 @@ class NYTimes(BasicNewsRecipe):
                _raw = url_or_raw
            if raw:
                return _raw
-                
+
            if not isinstance(_raw, unicode) and self.encoding:
                _raw = _raw.decode(docEncoding, 'replace')
            massage = list(BeautifulSoup.MARKUP_MASSAGE)
            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
            return BeautifulSoup(_raw, markupMassage=massage)
-        
+
        # Entry point
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
@ -131,7 +130,7 @@ class NYTimes(BasicNewsRecipe):
        if self.verbose > 2:
            self.log( "  document encoding: '%s'" % docEncoding)
        if docEncoding != self.encoding :
-            soup = get_the_soup(docEncoding, url_or_raw)         
+            soup = get_the_soup(docEncoding, url_or_raw)
        return soup
@ -142,7 +141,7 @@ class NYTimes(BasicNewsRecipe):
        feed = key = 'All Top Stories'
        articles[key] = []
        ans.append(key)
-        
+
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
        # Fetch the outer table
@ -188,7 +187,7 @@ class NYTimes(BasicNewsRecipe):
                bylines = []
                descriptions = []
                pubdate = None
-        
+
                # Get the Section title
                for (x,i) in enumerate(sectionblock.contents) :
                    skipThisSection = False
@ -210,14 +209,14 @@ class NYTimes(BasicNewsRecipe):
                                break
                # Get the bylines and descriptions
-                if not skipThisSection :                    
+                if not skipThisSection :
                    lines = sectionblock.contents
                    contentStrings = []
-                    
+
                    for line in lines:
                        if not isinstance(line, Comment) and line.strip and line.strip() > "":
                            contentStrings.append(line.strip())
-                
+
                    # Gather the byline/description pairs
                    bylines = []
                    descriptions = []
@ -226,7 +225,7 @@ class NYTimes(BasicNewsRecipe):
                            bylines.append(contentString)
                        else:
                            descriptions.append(contentString)
-                                                
+
                    # Fetch the article titles and URLs
                    articleCount = len(sectionblock.findAll('span'))
                    for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
@ -241,7 +240,7 @@ class NYTimes(BasicNewsRecipe):
                        if not isinstance(title, unicode):
                            title = title.decode('utf-8', 'replace')
-                        # Allow for unattributed, undescribed entries "Editor's Note"                                                
+                        # Allow for unattributed, undescribed entries "Editor's Note"
                        if i >= len(descriptions) :
                            description = None
                        else :
@ -259,10 +258,10 @@ class NYTimes(BasicNewsRecipe):
                                if url == article['url'] :
                                    duplicateFound = True
                                    break
-                            
+
-                            if duplicateFound:        
+                            if duplicateFound:
                                # Continue fetching, don't add this article
-                                continue        
+                                continue
                        if not articles.has_key(feed):
                            articles[feed] = []
@ -271,7 +270,7 @@ class NYTimes(BasicNewsRecipe):
                                 description=description, author=author, content=''))
        ans = self.sort_index_by(ans, {'Top Stories':-1})
-        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]        
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
    def strip_anchors(self,soup):
@ -287,7 +286,7 @@ class NYTimes(BasicNewsRecipe):
 #         refresh = soup.find('meta', {'http-equiv':'refresh'})
 #         if refresh is None:
 #             return self.strip_anchors(soup)
-# 
+#
 #         content = refresh.get('content').partition('=')[2]
 #         raw = self.browser.open('http://www.nytimes.com'+content).read()
 #         soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
@ -297,7 +296,7 @@ class NYTimes(BasicNewsRecipe):
            content = refresh.get('content').partition('=')[2]
            raw = self.browser.open('http://www.nytimes.com'+content).read()
            soup = BeautifulSoup(raw.decode('cp1252', 'replace'))
-        
+
        soup = self.strip_anchors(soup)
        # Test for empty content
@ -308,7 +307,7 @@ class NYTimes(BasicNewsRecipe):
            return soup
        else:
            print "no allowed content found, removing article"
-            raise StringError
+            raise Exception
    def postprocess_html(self,soup, True):
@ -351,7 +350,7 @@ class NYTimes(BasicNewsRecipe):
            bTag = Tag(soup, "b")
            bTag.insert(0, subhead.contents[0])
            subhead.replaceWith(bTag)
-            
+
        # Synthesize a section header
        dsk = soup.find('meta', attrs={'name':'dsk'})
        if dsk is not None and dsk.has_key('content'):
@ -360,12 +359,12 @@ class NYTimes(BasicNewsRecipe):
            hTag.insert(0,NavigableString(dsk['content']))
            articleTag = soup.find(True, attrs={'id':'article'})
            articleTag.insert(0,hTag)
-            
+
        # Add class="articleBody" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag is not None :
            divTag['class'] = divTag['id']
-        
+
        # Add class="authorId" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'authorId'})
        if divTag is not None :
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
@ -4,9 +4,9 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: calibre 0.6.21\n"
+"Project-Id-Version: calibre 0.6.22\n"
-"POT-Creation-Date: 2009-11-13 15:53+MST\n"
+"POT-Creation-Date: 2009-11-13 16:05+MST\n"
-"PO-Revision-Date: 2009-11-13 15:53+MST\n"
+"PO-Revision-Date: 2009-11-13 16:05+MST\n"
 "Last-Translator: Automatically generated\n"
 "Language-Team: LANGUAGE\n"
 "MIME-Version: 1.0\n"
@ -53,6 +53,8 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:894
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdb.py:39
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdf.py:21
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pml.py:18
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pml.py:40
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/topaz.py:29
 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/txt.py:14
 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:44
@ -177,30 +179,31 @@ msgstr ""
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:170
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:181
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:192
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:214
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:204
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:225
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:235
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:236
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:245
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:246
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:256
 msgid "Read metadata from %s files"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:204
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:215
 msgid "Read metadata from ebooks in RAR archives"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:256
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:267
 msgid "Read metadata from ebooks in ZIP archives"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:267
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:278
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:277
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:288
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:287
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:298
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:309
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:320
 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:331
 msgid "Set metadata in %s files"
 msgstr ""
-#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:298
+#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:309
 msgid "Set metadata from %s files"
 msgstr ""