diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index cb037928b4..89d5656741 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal ' nytimes.com ''' import re -import time from calibre import entity_to_unicode from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment @@ -17,7 +16,7 @@ class NYTimes(BasicNewsRecipe): __author__ = 'GRiker' language = _('English') description = 'Top Stories from the New York Times' - + # List of sections typically included in Top Stories. Use a keyword from the # right column in the excludeSectionKeywords[] list to skip downloading that section sections = { @@ -40,7 +39,7 @@ class NYTimes(BasicNewsRecipe): 'world' : 'World' } - # By default, no sections are skipped. + # By default, no sections are skipped. excludeSectionKeywords = [] # Add section keywords from the right column above to skip that section @@ -50,7 +49,7 @@ class NYTimes(BasicNewsRecipe): # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World'] # Fetch only Top Stories # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World'] - + # The maximum number of articles that will be downloaded max_articles_per_feed = 40 @@ -64,7 +63,7 @@ class NYTimes(BasicNewsRecipe): dict(attrs={ 'id':['toolsRight','inlineBox','sidebarArticles', 'portfolioInline','articleInline','readerscomment', 'nytRating']}) ] - + encoding = 'cp1252' no_stylesheets = True extra_css = '.headline {text-align: left;}\n \ @@ -114,13 +113,13 @@ class NYTimes(BasicNewsRecipe): _raw = url_or_raw if raw: return _raw - + if not isinstance(_raw, unicode) and self.encoding: _raw = _raw.decode(docEncoding, 'replace') massage = list(BeautifulSoup.MARKUP_MASSAGE) massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) return BeautifulSoup(_raw, markupMassage=massage) - + # Entry point soup = get_the_soup( self.encoding, url_or_raw ) contentType = soup.find(True,attrs={'http-equiv':'Content-Type'}) @@ -131,7 +130,7 @@ class NYTimes(BasicNewsRecipe): if self.verbose > 2: self.log( " document encoding: '%s'" % docEncoding) if docEncoding != self.encoding : - soup = get_the_soup(docEncoding, url_or_raw) + soup = get_the_soup(docEncoding, url_or_raw) return soup @@ -142,7 +141,7 @@ class NYTimes(BasicNewsRecipe): feed = key = 'All Top Stories' articles[key] = [] ans.append(key) - + soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/') # Fetch the outer table @@ -188,7 +187,7 @@ class NYTimes(BasicNewsRecipe): bylines = [] descriptions = [] pubdate = None - + # Get the Section title for (x,i) in enumerate(sectionblock.contents) : skipThisSection = False @@ -210,14 +209,14 @@ class NYTimes(BasicNewsRecipe): break # Get the bylines and descriptions - if not skipThisSection : + if not skipThisSection : lines = sectionblock.contents contentStrings = [] - + for line in lines: if not isinstance(line, Comment) and line.strip and line.strip() > "": contentStrings.append(line.strip()) - + # Gather the byline/description pairs bylines = [] descriptions = [] @@ -226,7 +225,7 @@ class NYTimes(BasicNewsRecipe): bylines.append(contentString) else: descriptions.append(contentString) - + # Fetch the article titles and URLs articleCount = len(sectionblock.findAll('span')) for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) : @@ -241,7 +240,7 @@ class NYTimes(BasicNewsRecipe): if not isinstance(title, unicode): title = title.decode('utf-8', 'replace') - # Allow for unattributed, undescribed entries "Editor's Note" + # Allow for unattributed, undescribed entries "Editor's Note" if i >= len(descriptions) : description = None else : @@ -259,10 +258,10 @@ class NYTimes(BasicNewsRecipe): if url == article['url'] : duplicateFound = True break - - if duplicateFound: + + if duplicateFound: # Continue fetching, don't add this article - continue + continue if not articles.has_key(feed): articles[feed] = [] @@ -271,7 +270,7 @@ class NYTimes(BasicNewsRecipe): description=description, author=author, content='')) ans = self.sort_index_by(ans, {'Top Stories':-1}) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] + ans = [(key, articles[key]) for key in ans if articles.has_key(key)] return ans def strip_anchors(self,soup): @@ -287,7 +286,7 @@ class NYTimes(BasicNewsRecipe): # refresh = soup.find('meta', {'http-equiv':'refresh'}) # if refresh is None: # return self.strip_anchors(soup) -# +# # content = refresh.get('content').partition('=')[2] # raw = self.browser.open('http://www.nytimes.com'+content).read() # soup = BeautifulSoup(raw.decode('cp1252', 'replace')) @@ -297,7 +296,7 @@ class NYTimes(BasicNewsRecipe): content = refresh.get('content').partition('=')[2] raw = self.browser.open('http://www.nytimes.com'+content).read() soup = BeautifulSoup(raw.decode('cp1252', 'replace')) - + soup = self.strip_anchors(soup) # Test for empty content @@ -308,7 +307,7 @@ class NYTimes(BasicNewsRecipe): return soup else: print "no allowed content found, removing article" - raise StringError + raise Exception def postprocess_html(self,soup, True): @@ -351,7 +350,7 @@ class NYTimes(BasicNewsRecipe): bTag = Tag(soup, "b") bTag.insert(0, subhead.contents[0]) subhead.replaceWith(bTag) - + # Synthesize a section header dsk = soup.find('meta', attrs={'name':'dsk'}) if dsk is not None and dsk.has_key('content'): @@ -360,12 +359,12 @@ class NYTimes(BasicNewsRecipe): hTag.insert(0,NavigableString(dsk['content'])) articleTag = soup.find(True, attrs={'id':'article'}) articleTag.insert(0,hTag) - + # Add class="articleBody" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'articleBody'}) if divTag is not None : divTag['class'] = divTag['id'] - + # Add class="authorId" to
so we can format with CSS divTag = soup.find('div',attrs={'id':'authorId'}) if divTag is not None : diff --git a/src/calibre/translations/calibre.pot b/src/calibre/translations/calibre.pot index f989c04535..8f919a9b3b 100644 --- a/src/calibre/translations/calibre.pot +++ b/src/calibre/translations/calibre.pot @@ -4,9 +4,9 @@ # msgid "" msgstr "" -"Project-Id-Version: calibre 0.6.21\n" -"POT-Creation-Date: 2009-11-13 15:53+MST\n" -"PO-Revision-Date: 2009-11-13 15:53+MST\n" +"Project-Id-Version: calibre 0.6.22\n" +"POT-Creation-Date: 2009-11-13 16:05+MST\n" +"PO-Revision-Date: 2009-11-13 16:05+MST\n" "Last-Translator: Automatically generated\n" "Language-Team: LANGUAGE\n" "MIME-Version: 1.0\n" @@ -53,6 +53,8 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:894 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdb.py:39 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pdf.py:21 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pml.py:18 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/pml.py:40 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/topaz.py:29 #: /home/kovid/work/calibre/src/calibre/ebooks/metadata/txt.py:14 #: /home/kovid/work/calibre/src/calibre/ebooks/mobi/reader.py:44 @@ -177,30 +179,31 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:170 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:181 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:192 -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:214 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:204 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:225 -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:235 -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:245 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:236 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:246 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:256 msgid "Read metadata from %s files" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:204 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:215 msgid "Read metadata from ebooks in RAR archives" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:256 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:267 msgid "Read metadata from ebooks in ZIP archives" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:267 -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:277 -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:287 -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:309 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:278 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:288 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:298 #: /home/kovid/work/calibre/src/calibre/customize/builtins.py:320 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:331 msgid "Set metadata in %s files" msgstr "" -#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:298 +#: /home/kovid/work/calibre/src/calibre/customize/builtins.py:309 msgid "Set metadata from %s files" msgstr ""