diff --git a/recipes/high_country_blogs.recipe b/recipes/high_country_blogs.recipe deleted file mode 100644 index 5173c30596..0000000000 --- a/recipes/high_country_blogs.recipe +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -__license__ = 'GPL v3' -__copyright__ = '2012, Kovid Goyal , Armin Geller' - -''' -Fetch High Country News - Blogs -''' -from calibre.web.feeds.news import BasicNewsRecipe -class HighCountryNewsBlogs(BasicNewsRecipe): - - title = u'High Country News - Blogs' - description = u'High Country News - Blogs (RSS Version)' - __author__ = 'Armin Geller' # 2012-08-01 - publisher = 'High Country News' - category = 'news, politics, Germany' - timefmt = ' [%a, %d %b %Y]' - language = 'en' - encoding = 'UTF-8' - publication_type = 'newspaper' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - auto_cleanup = True - remove_javascript = True - use_embedded_content = False - masthead_url = 'http://www.hcn.org/logo.jpg' - cover_source = 'http://www.hcn.org' - - def get_cover_url(self): - cover_source_soup = self.index_to_soup(self.cover_source) - preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'}) - return preview_image_div.div.img['src'] - - feeds = [ - (u'From the Blogs', u'http://feeds.feedburner.com/hcn/FromTheBlogs?format=xml'), - - (u'Heard around the West', u'http://feeds.feedburner.com/hcn/heard?format=xml'), - (u'The GOAT Blog', u'http://feeds.feedburner.com/hcn/goat?format=xml'), - (u'The Range', u'http://feeds.feedburner.com/hcn/range?format=xml'), - ] - - def print_version(self, url): - return url - diff --git a/recipes/high_country_news.recipe b/recipes/high_country_news.recipe index 91602d950b..b1de2f3005 100644 --- a/recipes/high_country_news.recipe +++ b/recipes/high_country_news.recipe @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- +# +# Written: 2012-01-28 +# Last Edited: 2013-08-18 +# Remark: Version 1.2 +# Integration of former separated Blog-News +# __license__ = 'GPL v3' -__copyright__ = '2012, Kovid Goyal , Armin Geller' +__copyright__ = '2013, Armin Geller' ''' Fetch High Country News @@ -9,35 +15,75 @@ from calibre.web.feeds.news import BasicNewsRecipe class HighCountryNews(BasicNewsRecipe): title = u'High Country News' - description = u'News from the American West' - __author__ = 'Armin Geller' # 2012-01-31 + description = u'High Country News (RSS Version)' + __author__ = 'Armin Geller' publisher = 'High Country News' + category = 'news, politics' timefmt = ' [%a, %d %b %Y]' language = 'en' encoding = 'UTF-8' publication_type = 'newspaper' - oldest_article = 7 + oldest_article = 14 max_articles_per_feed = 100 no_stylesheets = True - auto_cleanup = True + auto_cleanup = False remove_javascript = True + remove_empty_feeds = True # 2013-08-18 AGe add use_embedded_content = False - masthead_url = 'http://www.hcn.org/logo.jpg' # 2012-01-31 AGe add - cover_source = 'http://www.hcn.org' # 2012-01-31 AGe add - def get_cover_url(self): # 2012-01-31 AGe add - cover_source_soup = self.index_to_soup(self.cover_source) - preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'}) - return preview_image_div.div.img['src'] + masthead_url = 'http://www.hcn.org/logo.jpg' + cover_source = 'http://www.hcn.org' + + def get_cover_url(self): + cover_source_soup = self.index_to_soup(self.cover_source) + preview_image_div = cover_source_soup.find(attrs={'class':' portaltype-Plone Site content--hcn template-homepage_view'}) + return preview_image_div.div.img['src'] feeds = [ - (u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent'), - (u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue'), + (u'Most recent', u'http://feeds.feedburner.com/hcn/most-recent?format=xml'), + (u'Current Issue', u'http://feeds.feedburner.com/hcn/current-issue?format=xml'), + + (u'From the Blogs', u'http://feeds.feedburner.com/hcn/FromTheBlogs?format=xml'), # 2013-07-23 AGe add + (u'Heard around the West', u'http://feeds.feedburner.com/hcn/heard?format=xml'), # 2013-07-23 AGe add + (u'The GOAT Blog', u'http://feeds.feedburner.com/hcn/goat?format=xml'), # 2013-07-23 AGe add + (u'The Range', u'http://feeds.feedburner.com/hcn/range?format=xml'), # 2013-07-23 AGe add (u'Writers on the Range', u'http://feeds.feedburner.com/hcn/wotr'), (u'High Country Views', u'http://feeds.feedburner.com/hcn/HighCountryViews'), ] - def print_version(self, url): - return url + '/print_view' + # 2013-07-23 AGe New coding w/o using print_version + keep_only_tags = [ + dict(name='div', attrs={'id':['content']}), + ] + + remove_tags = [ + dict(name='div', attrs={'class':['documentActions supercedeDocumentActions editorialDocumentActions', + 'documentActions supercedeDocumentActions editorialDocumentActions editorialFooterDocumentActions', + 'article-sidebar', + 'image-viewer-controls nojs', + 'protectedArticleWrapper', + 'visualClear', + ]}) + ] + + INDEX = '' + def append_page(self, soup, appendtag, position): + pager = soup.find('span',attrs={'class':'next'}) + print 'AGE-append_page-------------->: ', pager + if pager: + nexturl = self.INDEX + pager.a['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'class':'article-text'}) + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + texttag.extract() + appendtag.insert(position,texttag) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body, 3) + pager = soup.find('div',attrs={'class':'listingBar listingBar-article'}) + if pager: + pager.extract() + return self.adeify_images(soup)