From d82ad7af94d1da174357ef84a401bf1dfa071cb1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Thu, 18 Mar 2010 21:13:07 +0530
Subject: [PATCH] Oil Price by Darko Miletic
---
resources/recipes/nytimes_sub.recipe | 65 ++++++++++++++++------------
resources/recipes/oilprice.recipe | 34 +++++++++++++++
src/calibre/translations/sr.po | 2 +-
3 files changed, 73 insertions(+), 28 deletions(-)
create mode 100644 resources/recipes/oilprice.recipe
diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe
index fc3ffd1d2d..c126902899 100644
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@@ -27,15 +27,30 @@ class NYTimes(BasicNewsRecipe):
'Home','Styles','Sunday Business','Week In Review','Travel','Magazine',
'Book Review','Weddings','Real Estate','Automobiles',"T Men's Fashion",
"T Women's Fashion"]
- excludeSectionKeywords = ['Dining','Weddings']
- test_mode = False
- if test_mode:
- all = set(allSectionKeywords)
- fetch_only = set(['The Front Page'])
- excludeSectionKeywords = list(all ^ fetch_only)
+ # List of sections to exclude
+ # To add a section, copy the section name from the allSectionKeywords list above
+ # For example, to exclude 'Dining' and 'Weddings':
+ # excludeSectionKeywords = ['Dining','Weddings']
+ excludeSectionKeywords = []
+
+ # List of sections to include (test and debug only)
+ # By default, any sections in today's paper that are not listed in excludeSectionKeywords
+ # are downloaded. fetch_only specifies that only certain sections are to be downloaded.
+ # This should only be used for testing and debugging.
+ # For example, to download only 'The Front Page' section:
+ # fetch_only = set(['The Front Page'])
+ fetch_only = set([])
+ if fetch_only:
+ excludeSectionKeywords = list(set(allSectionKeywords) ^ fetch_only)
+
+ # one_picture_per_article specifies that calibre should only use the first image
+ # from an article (if one exists). If one_picture_per_article = True, the image
+ # will be moved to a location between the headline and the byline.
+ # If one_picture_per_article = False, all images from the article will be included
+ # and shown in their original location.
+ one_picture_per_article = True
- use_one_picture_per_article = True
timefmt = ''
needs_subscription = True
remove_tags_before = dict(id='article')
@@ -45,6 +60,7 @@ class NYTimes(BasicNewsRecipe):
'columnGroup doubleRule',
'columnGroup last',
'doubleRule',
+ 'dottedLine',
'entry-meta',
'icon enlargeThis',
'leftNavTabs',
@@ -78,7 +94,7 @@ class NYTimes(BasicNewsRecipe):
'toolsRight',
]),
dict(name=['script', 'noscript', 'style'])]
- #encoding = 'utf-8'
+ masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \
.byline {font-family: monospace; \
@@ -118,11 +134,6 @@ class NYTimes(BasicNewsRecipe):
return br
- masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-
- def get_masthead_title(self):
- return 'NYTimes GR Version'
-
def get_cover_url(self):
cover = None
st = time.localtime()
@@ -138,6 +149,9 @@ class NYTimes(BasicNewsRecipe):
cover = None
return cover
+ def get_masthead_title(self):
+ return 'NYTimes GR Version'
+
def dump_ans(self, ans):
total_article_count = 0
for section in ans :
@@ -163,9 +177,6 @@ class NYTimes(BasicNewsRecipe):
print result
def fixChars(self,string):
- # Something's not right in the pipeline
- #
-
# Replace lsquo (\x91)
fixed = re.sub("\x91","‘",string)
@@ -260,16 +271,6 @@ class NYTimes(BasicNewsRecipe):
self.dump_ans(ans)
return ans
- def strip_anchors(self,soup):
- paras = soup.findAll(True)
- for para in paras:
- aTags = para.findAll('a')
- for a in aTags:
- if a.img is None:
- a.replaceWith(a.renderContents().decode('utf-8','replace'))
- #a.replaceWith(a.renderContents().decode('cp1252','replace'))
- return soup
-
def preprocess_html(self, soup):
'''
refresh = soup.find('meta', {'http-equiv':'refresh'})
@@ -283,7 +284,7 @@ class NYTimes(BasicNewsRecipe):
def postprocess_html(self,soup, True):
- if self.use_one_picture_per_article:
+ if self.one_picture_per_article:
# Remove all images after first
largeImg = soup.find(True, {'class':'articleSpanImage'})
inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
@@ -462,3 +463,13 @@ class NYTimes(BasicNewsRecipe):
if article.description is None :
article.description = extract_description(article.href)
+ def strip_anchors(self,soup):
+ paras = soup.findAll(True)
+ for para in paras:
+ aTags = para.findAll('a')
+ for a in aTags:
+ if a.img is None:
+ a.replaceWith(a.renderContents().decode('utf-8','replace'))
+ #a.replaceWith(a.renderContents().decode('cp1252','replace'))
+ return soup
+
diff --git a/resources/recipes/oilprice.recipe b/resources/recipes/oilprice.recipe
new file mode 100644
index 0000000000..04505c2eec
--- /dev/null
+++ b/resources/recipes/oilprice.recipe
@@ -0,0 +1,34 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+oilprice.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class OilPrice(BasicNewsRecipe):
+ title = 'Oil Price'
+ __author__ = 'Darko Miletic'
+ description = 'The nr. 1 source for Oil Price Information'
+ publisher = 'oilprice.com'
+ category = 'news, oil, politics, world, usa'
+ oldest_article = 2
+ max_articles_per_feed = 200
+ no_stylesheets = True
+ encoding = 'cp1252'
+ use_embedded_content = False
+ language = 'en'
+ country = 'US'
+ extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher' : publisher
+ , 'language' : language
+ }
+ remove_tags = [dict(name='div',attrs={'class':'banner'})]
+ keep_only_tags = [dict(name='div',attrs={'id':'storyContent'})]
+ remove_tags_after = dict(attrs={'id':'KonaBody'})
+
+ feeds = [(u'Articles', u'http://www.oilprice.com/rss.xml')]
diff --git a/src/calibre/translations/sr.po b/src/calibre/translations/sr.po
index 904746f72c..8bf04ddadb 100644
--- a/src/calibre/translations/sr.po
+++ b/src/calibre/translations/sr.po
@@ -6521,7 +6521,7 @@ msgstr "Za pomoć vidi: Korisničko uputstvo
"
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:226
msgid "%s: %s by Kovid Goyal %%(version)s
%%(device)s
"
-msgstr "%s: %s autor Kovid Goyal %%(version)
%%(device)
"
+msgstr "%s: %s autor Kovid Goyal %%(version)s
%%(device)s"
#: /home/kovid/work/calibre/src/calibre/gui2/ui.py:249
msgid "Edit metadata individually"