From d82ad7af94d1da174357ef84a401bf1dfa071cb1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 18 Mar 2010 21:13:07 +0530 Subject: [PATCH] Oil Price by Darko Miletic --- resources/recipes/nytimes_sub.recipe | 65 ++++++++++++++++------------ resources/recipes/oilprice.recipe | 34 +++++++++++++++ src/calibre/translations/sr.po | 2 +- 3 files changed, 73 insertions(+), 28 deletions(-) create mode 100644 resources/recipes/oilprice.recipe diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index fc3ffd1d2d..c126902899 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -27,15 +27,30 @@ class NYTimes(BasicNewsRecipe): 'Home','Styles','Sunday Business','Week In Review','Travel','Magazine', 'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion", "T Women's Fashion"] - excludeSectionKeywords = ['Dining','Weddings'] - test_mode = False - if test_mode: - all = set(allSectionKeywords) - fetch_only = set(['The Front Page']) - excludeSectionKeywords = list(all ^ fetch_only) + # List of sections to exclude + # To add a section, copy the section name from the allSectionKeywords list above + # For example, to exclude 'Dining' and 'Weddings': + # excludeSectionKeywords = ['Dining','Weddings'] + excludeSectionKeywords = [] + + # List of sections to include (test and debug only) + # By default, any sections in today's paper that are not listed in excludeSectionKeywords + # are downloaded. fetch_only specifies that only certain sections are to be downloaded. + # This should only be used for testing and debugging. + # For example, to download only 'The Front Page' section: + # fetch_only = set(['The Front Page']) + fetch_only = set([]) + if fetch_only: + excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only) + + # one_picture_per_article specifies that calibre should only use the first image + # from an article (if one exists). If one_picture_per_article = True, the image + # will be moved to a location between the headline and the byline. + # If one_picture_per_article = False, all images from the article will be included + # and shown in their original location. + one_picture_per_article = True - use_one_picture_per_article = True timefmt = '' needs_subscription = True remove_tags_before = dict(id='article') @@ -45,6 +60,7 @@ class NYTimes(BasicNewsRecipe): 'columnGroup doubleRule', 'columnGroup last', 'doubleRule', + 'dottedLine', 'entry-meta', 'icon enlargeThis', 'leftNavTabs', @@ -78,7 +94,7 @@ class NYTimes(BasicNewsRecipe): 'toolsRight', ]), dict(name=['script', 'noscript', 'style'])] - #encoding = 'utf-8' + masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' no_stylesheets = True extra_css = '.headline {text-align: left;}\n \ .byline {font-family: monospace; \ @@ -118,11 +134,6 @@ class NYTimes(BasicNewsRecipe): return br - masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' - - def get_masthead_title(self): - return 'NYTimes GR Version' - def get_cover_url(self): cover = None st = time.localtime() @@ -138,6 +149,9 @@ class NYTimes(BasicNewsRecipe): cover = None return cover + def get_masthead_title(self): + return 'NYTimes GR Version' + def dump_ans(self, ans): total_article_count = 0 for section in ans : @@ -163,9 +177,6 @@ class NYTimes(BasicNewsRecipe): print result def fixChars(self,string): - # Something's not right in the pipeline - # - # Replace lsquo (\x91) fixed = re.sub("\x91","‘",string) @@ -260,16 +271,6 @@ class NYTimes(BasicNewsRecipe): self.dump_ans(ans) return ans - def strip_anchors(self,soup): - paras = soup.findAll(True) - for para in paras: - aTags = para.findAll('a') - for a in aTags: - if a.img is None: - a.replaceWith(a.renderContents().decode('utf-8','replace')) - #a.replaceWith(a.renderContents().decode('cp1252','replace')) - return soup - def preprocess_html(self, soup): ''' refresh = soup.find('meta', {'http-equiv':'refresh'}) @@ -283,7 +284,7 @@ class NYTimes(BasicNewsRecipe): def postprocess_html(self,soup, True): - if self.use_one_picture_per_article: + if self.one_picture_per_article: # Remove all images after first largeImg = soup.find(True, {'class':'articleSpanImage'}) inlineImgs = soup.findAll(True, {'class':'inlineImage module'}) @@ -462,3 +463,13 @@ class NYTimes(BasicNewsRecipe): if article.description is None : article.description = extract_description(article.href) + def strip_anchors(self,soup): + paras = soup.findAll(True) + for para in paras: + aTags = para.findAll('a') + for a in aTags: + if a.img is None: + a.replaceWith(a.renderContents().decode('utf-8','replace')) + #a.replaceWith(a.renderContents().decode('cp1252','replace')) + return soup + diff --git a/resources/recipes/oilprice.recipe b/resources/recipes/oilprice.recipe new file mode 100644 index 0000000000..04505c2eec --- /dev/null +++ b/resources/recipes/oilprice.recipe @@ -0,0 +1,34 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +oilprice.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class OilPrice(BasicNewsRecipe): + title = 'Oil Price' + __author__ = 'Darko Miletic' + description = 'The nr. 1 source for Oil Price Information' + publisher = 'oilprice.com' + category = 'news, oil, politics, world, usa' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'cp1252' + use_embedded_content = False + language = 'en' + country = 'US' + extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + remove_tags = [dict(name='div',attrs={'class':'banner'})] + keep_only_tags = [dict(name='div',attrs={'id':'storyContent'})] + remove_tags_after = dict(attrs={'id':'KonaBody'}) + + feeds = [(u'Articles', u'http://www.oilprice.com/rss.xml')] diff --git a/src/calibre/translations/sr.po b/src/calibre/translations/sr.po index 904746f72c..8bf04ddadb 100644 --- a/src/calibre/translations/sr.po +++ b/src/calibre/translations/sr.po @@ -6521,7 +6521,7 @@ msgstr "

Za pomoć vidi: Korisničko uputstvo
" #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:226 msgid "%s: %s by Kovid Goyal %%(version)s
%%(device)s

" -msgstr "%s: %s autor Kovid Goyal %%(version)
%%(device)

" +msgstr "%s: %s autor Kovid Goyal %%(version)s
%%(device)s

" #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:249 msgid "Edit metadata individually"