diff --git a/Changelog.yaml b/Changelog.yaml
index 80b7630a16..7082c7416c 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -19,6 +19,90 @@
# new recipes:
# - title:
+- version: 0.8.16
+ date: 2011-08-26
+
+ new features:
+ - title: "News download: Add algorithms to automatically clean up downloaded HTML"
+ description: "Use the algorithms from the Readability project to automatically cleanup downloaded HTML. You can turn this on in your own recipes by adding auto_cleanup=True to the recipe. It is turned on by default for basic recipes created via the GUI. This makes it a little easier to develop recipes for beginners."
+ type: major
+
+ - title: "Add an option to Preferences->Look and Feel->Cover Browser to show the cover browser full screen. When showing the cover browser in a separate window, you can make it fullscreen by pressing the F11 key."
+ tickets: [829855 ]
+
+ - title: "Show the languages currently used at the top of the drop down list of languages"
+
+ - title: "When automatically computing author sort from author's name, if the name contains certain words like Inc., Company, Team, etc. use the author name as the sort string directly. The list of such words can be controlled via Preferences->Tweaks."
+ tickets: [797895]
+
+ - title: "Add a search for individual tweaks to Preferences->Tweaks"
+
+ - title: "Drivers for a few new android phones"
+
+ bug fixes:
+ - title: "Fix line unwrapping algorithms to account for some central European characters as well."
+ tickets: [822744]
+
+ - title: "Switch to using more modern language names/translations from the iso-codes package"
+
+ - title: "Allow cases insensitive entering of language names for convenience."
+ tickets: [832761]
+
+ - title: "When adding a text indent to paragraphs as part of the remove spacing between paragraphs transformation, do not add an indent to paragraphs that are directly centered or right aligned."
+ tickets: [830439]
+
+ - title: "Conversion pipeline: More robust handling of case insensitive tag and class css selectors"
+
+ - title: "MOBI Output: Add support for the start attribute on
tags"
+
+ - title: "When adding books that have no language specified, do not automatically set the language to calibre's interface language."
+ tickets: [830092]
+
+ - title: "Fix use of tag browser to search for languages when calibre is translated to a non English language"
+ tickets: [830078]
+
+ - title: "When downloading news, set the language field correctly"
+
+ - title: "Fix languages field in the Edit metadata dialog too wide"
+ tickets: [829912]
+
+ - title: "Fix setting of languages that have commas in their names broken"
+
+ - title: "FB2 Input: When convert FB2 files, read the cover from the FB2 file correctly."
+ tickets: [829240]
+
+ improved recipes:
+ - Politifact
+ - Reuters
+ - Sueddeutsche
+ - CNN
+ - Financial Times UK
+ - MSDN Magazine
+ - Houston Chronicle
+ - Harvard Business Review
+
+ new recipes:
+ - title: CBN News and Fairbanks Daily
+ author: by Roger
+
+ - title: Hacker News
+ author: Tom Scholl
+
+ - title: Various Turkish news sources
+ author: thomass
+
+ - title: Cvece Zla
+ author: Darko Miletic
+
+ - title: Various Polish news sources
+ author: fenuks
+
+ - title: Fluter
+ author: Armin Geller
+
+ - title: Brasil de Fato
+ author: Alex Mitrani
+
- version: 0.8.15
date: 2011-08-19
diff --git a/recipes/cbn.recipe b/recipes/cbn.recipe
new file mode 100644
index 0000000000..d2ce8dc885
--- /dev/null
+++ b/recipes/cbn.recipe
@@ -0,0 +1,73 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class CBN(BasicNewsRecipe):
+ title = u'CBN News'
+ __author__ = 'Roger'
+ # TODO: I just noticed this is downloading 25+ articles, while
+ # the online site is only publishing at most 7 articles daily.
+ # So, somehow this needs to be fixed it only downloads max 7 articles
+ oldest_article = 7
+ max_articles_per_feed = 100
+
+ description = 'The Christian Broadcasting Network'
+ publisher = 'http://www.cbn.com/'
+ category = 'news, religion, spiritual, christian'
+ language = 'en'
+
+ # Make article titles, author and date bold, italic or small font.
+ # TODO: Could use a smaller title text
+ # TODO: Italicize Author and Publisher?
+ #
+ # http://www.cbn.com/App_Themes/Common/base.css,
+ # http://www.cbn.com/App_Themes/CBNNews/article.css",
+ # ... and many more style sheets.
+ #extra_css = '''
+ # .story_item_headline { font-size: medium; font-weight: bold; }
+ # .story_item_author { font-size: small; font-style:italic; }
+ # .signature_line { font-size: small; }
+ # '''
+
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ language = 'en'
+ encoding = 'iso-8859-1'
+ conversion_options = {'linearize_tables':True}
+
+ # TODO: No masterhead_url for CBN, using one I grepped from a news article
+ # (There's a better/higher contrast blue on white background image, but
+ # can't get it or it's too big -- embedded into a larger jpeg?)
+ masthead_url = 'http://www.cbn.com/templates/images/cbn_com_logo.jpg'
+
+ keep_only_tags = [
+ dict(name='h1', attrs={'id':'articleTitle'}),
+ dict(name='div', attrs={'class':'articleAuthor'}),
+ dict(name='div', attrs={'class':'articleDate'}),
+ dict(name='div', attrs={'class':'articleText'}),
+ ]
+
+ remove_tags = [
+ # The article image is usually Adobe Flash Player Image
+ # The snapshot .jpg image files of the video are found
+ # within a URL folder named "PageFiles_Files"
+ # Filter this for now.
+ # (Majority of images seem to be Adobe Flash.)
+ dict(name='div', attrs={'class':'articleImage'}),
+ ]
+
+
+ # Comment-out or uncomment any of the following RSS feeds according to your
+ # liking.
+ # A full list can be found here: http://www.cbn.com/rss.aspx
+
+ feeds = [
+ (u'World', u'http://www.cbn.com/cbnnews/world/feed/'),
+ (u'US', u'http://www.cbn.com/cbnnews/us/feed/'),
+ (u'Inside Israel', u'http://www.cbn.com/cbnnews/insideisrael/feed/'),
+ (u'Politics', u'http://www.cbn.com/cbnnews/politics/feed/'),
+ (u'Christian World News', u'http://www.cbn.com/cbnnews/shows/cwn/feed/'),
+ (u'Health and Science', u'http://www.cbn.com/cbnnews/healthscience/feed/'),
+ (u'Finance', u'http://www.cbn.com/cbnnews/finance/feed/'),
+ ]
+
diff --git a/recipes/fairbanks_daily.recipe b/recipes/fairbanks_daily.recipe
index fcf6873904..f263e74300 100644
--- a/recipes/fairbanks_daily.recipe
+++ b/recipes/fairbanks_daily.recipe
@@ -10,23 +10,23 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
publisher = 'http://www.newsminer.com/'
category = 'news, Alaska, Fairbanks'
language = 'en'
-
+
# Make article titles, author and date bold, italic or small font.
# http://assets.matchbin.com/sites/635/stylesheets/newsminer.com.css
# (signature_line contains date, views, comments)
- extra_css = '''
+ extra_css = '''
.story_item_headline { font-size: medium; font-weight: bold; }
.story_item_author { font-size: small; font-style:italic; }
.signature_line { font-size: small; }
'''
-
+
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf8'
conversion_options = {'linearize_tables':True}
-
+
# TODO: The News-miner cover image seems a bit small. Can this be enlarged by 10-30%?
masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
@@ -49,11 +49,11 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
#preprocess_regexps = [(re.compile(r']*addthis_separator*>'), lambda match: '') ]
#preprocess_regexps = [(re.compile(r'span class="addthis_separator">|'), lambda match: '') ]
-
+
#preprocess_regexps = [
# (re.compile(r'.*?', re.IGNORECASE | re.DOTALL), lambda match : ''),
# ]
-
+
#def get_browser(self):
#def preprocess_html(soup, first_fetch):
# date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
@@ -77,10 +77,10 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
dict(name='img', attrs={'class':'dont_touch_me'}),
dict(name='span', attrs={'class':'number_recommendations'}),
#dict(name='div', attrs={'class':'signature_line'}),
-
+
# Removes div within
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
-
+
dict(name='div', attrs={'class':'related_content'}),
dict(name='div', attrs={'id':'comments_container'})
]
@@ -100,10 +100,10 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
(u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
(u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
(u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
- (u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
+ #(u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
(u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
#(u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
- (u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
+ #(u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
(u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
(u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
#(u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
diff --git a/recipes/politifact.recipe b/recipes/politifact.recipe
index a0f0d786dd..a315a35bcc 100644
--- a/recipes/politifact.recipe
+++ b/recipes/politifact.recipe
@@ -26,4 +26,7 @@ class PolitiFactCom(BasicNewsRecipe):
(u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
]
-
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser(self)
+ br.set_handle_gzip(True)
+ return br
diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe
index 8cc638a760..313e6b8607 100644
--- a/recipes/reuters.recipe
+++ b/recipes/reuters.recipe
@@ -16,23 +16,25 @@ class Reuters(BasicNewsRecipe):
remove_javascript = True
extra_css = '''
- body{font-family:arial,helvetica,sans;}
+ body{font-family:arial,helvetica,sans;}
h1{ font-size:larger ; font-weight:bold; }
.byline{color:#006E97;font-size:x-small; font-weight:bold;}
.location{font-size:x-small; font-weight:bold;}
.timestamp{font-size:x-small; }
- '''
+ '''
keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
-
remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
dict(name='p', attrs={'class':['relatedTopics']}),
- dict(name='a', attrs={'id':['fullSizeLink']}),
- dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),]
+ dict(name='a', attrs={'id':['fullSizeLink']}),
+ dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),
+ # Remove the Tweet, Share this, Email and Print links below article title too!
+ dict(name='div', attrs={'class':['columnRight']}),
+ ]
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-[
+ [
##(r'.*?' , lambda match : ''),
(r'
.*?', lambda match : ''),
(r'
Share:
.*?', lambda match : '