diff --git a/resources/recipes/dominion.recipe b/resources/recipes/dominion.recipe new file mode 100644 index 0000000000..e72bff6920 --- /dev/null +++ b/resources/recipes/dominion.recipe @@ -0,0 +1,50 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class stuffconz(BasicNewsRecipe): + title = u'stuff.co.nz' + language = 'en_NZ' + __author__ = 'Krittika Goyal' + oldest_article = 1 #days + max_articles_per_feed = 25 + #encoding = 'latin1' + + remove_stylesheets = True + #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) + remove_tags_after = dict(name='div', attrs={'id':'related_box'}) + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['story_feature_title']}), + dict(name='div', attrs={'id':['toolbox', 'related_box', 'adSTORYBODY']}), + dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), + #dict(name='ul', attrs={'class':'articleTools'}), + ] + + feeds = [ +('Dominion Post', + 'http://www.stuff.co.nz/rss/dominion-post'), +('National', + 'http://www.stuff.co.nz/rss/national'), +('World', + 'http://www.stuff.co.nz/rss/world'), +('Business', + 'http://www.stuff.co.nz/rss/business'), +('Technology', + 'http://www.stuff.co.nz/rss/technology'), +('Sport', + 'http://www.stuff.co.nz/rss/sport'), +('Entertainment', + 'http://www.stuff.co.nz/rss/entertainment'), +('Life and Style', + 'http://www.stuff.co.nz/rss/life-style'), + +] + + def preprocess_html(self, soup): + story = soup.find(name='div', attrs={'id':'left_col'}) + #td = heading.findParent(name='td') + #td.extract() + soup = BeautifulSoup('t') + body = soup.find(name='body') + body.insert(0, story) + return soup diff --git a/resources/recipes/nzherald.recipe b/resources/recipes/nzherald.recipe index 1a962ffacb..3ac1e27c20 100644 --- a/resources/recipes/nzherald.recipe +++ b/resources/recipes/nzherald.recipe @@ -6,7 +6,7 @@ class NewZealandHerald(BasicNewsRecipe): __author__ = 'Krittika Goyal' description = 'Daily news' timefmt = ' [%d %b, %Y]' - language = 'en_AU' + language = 'en_NZ' no_stylesheets = True remove_tags_before = dict(name='div', attrs={'class':'contentContainer left eight'}) diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index c29ccadef2..1ade012b1f 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -97,6 +97,7 @@ _extra_lang_codes = { 'zh_TW' : _('Traditional Chinese'), 'en' : _('English'), 'en_AU' : _('English (Australia)'), + 'en_NZ' : _('English (New Zealand)'), 'en_CA' : _('English (Canada)'), 'en_IN' : _('English (India)'), 'en_TH' : _('English (Thailand)'),