Split up English recipes by country

This commit is contained in:
Kovid Goyal 2009-09-09 19:28:40 -06:00
parent 5ddd375a66
commit 000fc39116
16 changed files with 98 additions and 86 deletions

View File

@ -5,8 +5,8 @@
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: calibre 0.6.11\n" "Project-Id-Version: calibre 0.6.11\n"
"POT-Creation-Date: 2009-09-09 17:15+MDT\n" "POT-Creation-Date: 2009-09-09 19:27+MDT\n"
"PO-Revision-Date: 2009-09-09 17:15+MDT\n" "PO-Revision-Date: 2009-09-09 19:27+MDT\n"
"Last-Translator: Automatically generated\n" "Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n" "Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
@ -128,7 +128,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1534 #: /home/kovid/work/calibre/src/calibre/library/database2.py:1534
#: /home/kovid/work/calibre/src/calibre/library/server.py:419 #: /home/kovid/work/calibre/src/calibre/library/server.py:419
#: /home/kovid/work/calibre/src/calibre/library/server.py:517 #: /home/kovid/work/calibre/src/calibre/library/server.py:517
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:100 #: /home/kovid/work/calibre/src/calibre/utils/localization.py:103
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:63 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:63
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:77 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:77
@ -6254,7 +6254,19 @@ msgid "Traditional Chinese"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:99 #: /home/kovid/work/calibre/src/calibre/utils/localization.py:99
msgid "English (US)" msgid "English"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:100
msgid "English (AU)"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:101
msgid "English (CA)"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:102
msgid "English (IND)"
msgstr "" msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/sftp.py:53 #: /home/kovid/work/calibre/src/calibre/utils/sftp.py:53
@ -6388,12 +6400,12 @@ msgid "sr-Latn-RS"
msgstr "" msgstr ""
#: #:
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:84 #: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:81
msgid "Skipping duplicated article: %s" msgid "Skipping duplicated article: %s"
msgstr "" msgstr ""
#: #:
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:89 #: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:86
msgid "Skipping filtered article: %s" msgid "Skipping filtered article: %s"
msgstr "" msgstr ""

View File

@ -96,7 +96,10 @@ _extra_lang_codes = {
'zh_CN' : _('Simplified Chinese'), 'zh_CN' : _('Simplified Chinese'),
'zh_HK' : _('Chinese (HK)'), 'zh_HK' : _('Chinese (HK)'),
'zh_TW' : _('Traditional Chinese'), 'zh_TW' : _('Traditional Chinese'),
'en' : _('English (US)'), 'en' : _('English'),
'en_AU' : _('English (AU)'),
'en_CA' : _('English (CA)'),
'en_IN' : _('English (IND)'),
'und' : _('Unknown') 'und' : _('Unknown')
} }

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheDailyMail(BasicNewsRecipe): class TheDailyMail(BasicNewsRecipe):
title = u'The Daily Mail' title = u'The Daily Mail'
oldest_article = 2 oldest_article = 2
language = 'en' language = 'en_GB'
author = 'RufusA' author = 'RufusA'
simultaneous_downloads= 1 simultaneous_downloads= 1
@ -15,7 +15,7 @@ class TheDailyMail(BasicNewsRecipe):
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
remove_tags_before = dict(name='div', attrs={'id':'content'}) remove_tags_before = dict(name='div', attrs={'id':'content'})
no_stylesheets = True no_stylesheets = True
feeds = [ feeds = [
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'), (u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
(u'News', u'http://www.dailymail.co.uk/news/index.rss'), (u'News', u'http://www.dailymail.co.uk/news/index.rss'),

View File

@ -9,7 +9,7 @@ class DNAIndia(BasicNewsRecipe):
title = 'DNA India' title = 'DNA India'
description = 'Mumbai news, India news, World news, breaking news' description = 'Mumbai news, India news, World news, breaking news'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
language = 'en' language = 'en_IN'
encoding = 'cp1252' encoding = 'cp1252'

View File

@ -7,9 +7,9 @@ class GlasgowHerald(BasicNewsRecipe):
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
language = 'en' language = 'en_GB'
__author__ = 'McCande' __author__ = 'McCande'
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ [

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class GlobeAndMail(BasicNewsRecipe): class GlobeAndMail(BasicNewsRecipe):
title = u'Globe and Mail' title = u'Globe and Mail'
language = 'en' language = 'en_CA'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
oldest_article = 2 oldest_article = 2

View File

@ -13,7 +13,7 @@ class Guardian(BasicNewsRecipe):
title = u'The Guardian' title = u'The Guardian'
__author__ = 'Seabound and Sujata Raman' __author__ = 'Seabound and Sujata Raman'
language = 'en' language = 'en_GB'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 20 max_articles_per_feed = 20

View File

@ -7,42 +7,42 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheHindu(BasicNewsRecipe): class TheHindu(BasicNewsRecipe):
title = u'The Hindu' title = u'The Hindu'
language = 'en' language = 'en_IN'
oldest_article = 7 oldest_article = 7
__author__ = _('Kovid Goyal') __author__ = _('Kovid Goyal')
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_tags_before = {'name':'font', 'class':'storyhead'} remove_tags_before = {'name':'font', 'class':'storyhead'}
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<!-- story ends -->.*', re.DOTALL), (re.compile(r'<!-- story ends -->.*', re.DOTALL),
lambda match: '</body></html>'), lambda match: '</body></html>'),
] ]
feeds = [ feeds = [
(u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'), (u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'),
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'), (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'), (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'), (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'), (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'), (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
(u'Main - Weather / Religion / Crossword / Cartoon', (u'Main - Weather / Religion / Crossword / Cartoon',
u'http://www.hindu.com/rss/10hdline.xml'), u'http://www.hindu.com/rss/10hdline.xml'),
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'), (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
(u'Supplement - Literary Review', (u'Supplement - Literary Review',
u'http://www.hindu.com/rss/lrhdline.xml'), u'http://www.hindu.com/rss/lrhdline.xml'),
(u'Supplement - Sunday Magazine', (u'Supplement - Sunday Magazine',
u'http://www.hindu.com/rss/maghdline.xml'), u'http://www.hindu.com/rss/maghdline.xml'),
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'), (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
(u'Supplement - Business Review', (u'Supplement - Business Review',
u'http://www.hindu.com/rss/bizhdline.xml'), u'http://www.hindu.com/rss/bizhdline.xml'),
(u'Supplement - Book Review', (u'Supplement - Book Review',
u'http://www.hindu.com/rss/brhdline.xml'), u'http://www.hindu.com/rss/brhdline.xml'),
(u'Supplement - Science & Technology', (u'Supplement - Science & Technology',
u'http://www.hindu.com/rss/setahdline.xml') u'http://www.hindu.com/rss/setahdline.xml')
] ]
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']): for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div' t.name = 'div'
return soup return soup

View File

@ -22,9 +22,6 @@ class LeMonde(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg' cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg'
html2lrf_options = ['--base-font-size', '10']
feeds = [ feeds = [
('A la Une', 'http://www.lemonde.fr/rss/une.xml'), ('A la Une', 'http://www.lemonde.fr/rss/une.xml'),
('International', 'http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'), ('International', 'http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'),
@ -43,13 +40,13 @@ class LeMonde(BasicNewsRecipe):
('Examens', 'http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'), ('Examens', 'http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'),
('Opinions', 'http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xml') ('Opinions', 'http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xml')
] ]
remove_tags = [dict(name='img', attrs={'src':'http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_pet.gif'}), remove_tags = [dict(name='img', attrs={'src':'http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_pet.gif'}),
dict(name='div', attrs={'id':'xiti-logo-noscript'}), dict(name='div', attrs={'id':'xiti-logo-noscript'}),
dict(name='br', attrs={}), dict(name='br', attrs={}),
dict(name='iframe', attrs={}), dict(name='iframe', attrs={}),
] ]
extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}' extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}'
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
@ -61,7 +58,7 @@ class LeMonde(BasicNewsRecipe):
(r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)), (r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)),
] ]
] ]
article_match_regexps = [ (re.compile(i)) for i in article_match_regexps = [ (re.compile(i)) for i in
[ [
(r'http://www\.lemonde\.fr/\S+/article/.*'), (r'http://www\.lemonde\.fr/\S+/article/.*'),
@ -70,7 +67,7 @@ class LeMonde(BasicNewsRecipe):
(r'http://\S+\.blog\.lemonde\.fr/.*'), (r'http://\S+\.blog\.lemonde\.fr/.*'),
] ]
] ]
def print_version(self, url): def print_version(self, url):
return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url) return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)

View File

@ -14,12 +14,12 @@ class LondonReviewOfBooks(BasicNewsRecipe):
description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en' language = 'en_GB'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
remove_tags = [ remove_tags = [
dict(name='div' , attrs={'id' :'otherarticles'}) dict(name='div' , attrs={'id' :'otherarticles'})
,dict(name='div' , attrs={'class':'pagetools' }) ,dict(name='div' , attrs={'class':'pagetools' })
@ -28,13 +28,13 @@ class LondonReviewOfBooks(BasicNewsRecipe):
,dict(name='div' , attrs={'class':'nocss' }) ,dict(name='div' , attrs={'class':'nocss' })
,dict(name='span', attrs={'class':'inlineright' }) ,dict(name='span', attrs={'class':'inlineright' })
] ]
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')] feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
def print_version(self, url): def print_version(self, url):
main, split, rest = url.rpartition('/') main, split, rest = url.rpartition('/')
return main + '/print/' + rest return main + '/print/' + rest
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']): for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div' t.name = 'div'

View File

@ -12,7 +12,7 @@ class OutlookIndia(BasicNewsRecipe):
description = 'Weekly news and current affairs in India' description = 'Weekly news and current affairs in India'
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
language = 'en' language = 'en_IN'
recursions = 1 recursions = 1
extra_css = ''' extra_css = '''

View File

@ -12,26 +12,26 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
class SMH(BasicNewsRecipe): class SMH(BasicNewsRecipe):
title = 'Sydney Morning Herald' title = 'Sydney Morning Herald'
description = 'Business News, World News and Breaking News in Australia' description = 'Business News, World News and Breaking News in Australia'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
language = 'en' language = 'en_AU'
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False) br.set_handle_refresh(False)
return br return br
def parse_index(self): def parse_index(self):
soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read()) soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read())
feeds, articles = [], [] feeds, articles = [], []
feed = None feed = None
for tag in soup.findAll(['h3', 'a']): for tag in soup.findAll(['h3', 'a']):
if tag.name == 'h3': if tag.name == 'h3':
if articles: if articles:
@ -41,7 +41,7 @@ class SMH(BasicNewsRecipe):
elif feed is not None and tag.has_key('href') and tag['href'].strip(): elif feed is not None and tag.has_key('href') and tag['href'].strip():
url = tag['href'].strip() url = tag['href'].strip()
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.smh.com.au' + url url = 'http://www.smh.com.au' + url
title = self.tag_to_string(tag) title = self.tag_to_string(tag)
articles.append({ articles.append({
'title': title, 'title': title,
@ -49,8 +49,8 @@ class SMH(BasicNewsRecipe):
'date' : strftime('%a, %d %b'), 'date' : strftime('%a, %d %b'),
'description' : '', 'description' : '',
'content' : '', 'content' : '',
}) })
return feeds return feeds

View File

@ -12,26 +12,26 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
class TheAge(BasicNewsRecipe): class TheAge(BasicNewsRecipe):
title = 'The Age' title = 'The Age'
description = 'Business News, World News and Breaking News in Melbourne, Australia' description = 'Business News, World News and Breaking News in Melbourne, Australia'
__author__ = 'Matthew Briggs' __author__ = 'Matthew Briggs'
language = 'en' language = 'en_AU'
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False) br.set_handle_refresh(False)
return br return br
def parse_index(self): def parse_index(self):
soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read()) soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read())
feeds, articles = [], [] feeds, articles = [], []
feed = None feed = None
for tag in soup.findAll(['h3', 'a']): for tag in soup.findAll(['h3', 'a']):
if tag.name == 'h3': if tag.name == 'h3':
if articles: if articles:
@ -41,7 +41,7 @@ class TheAge(BasicNewsRecipe):
elif feed is not None and tag.has_key('href') and tag['href'].strip(): elif feed is not None and tag.has_key('href') and tag['href'].strip():
url = tag['href'].strip() url = tag['href'].strip()
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.theage.com.au' + url url = 'http://www.theage.com.au' + url
title = self.tag_to_string(tag) title = self.tag_to_string(tag)
articles.append({ articles.append({
'title': title, 'title': title,
@ -49,9 +49,9 @@ class TheAge(BasicNewsRecipe):
'date' : strftime('%a, %d %b'), 'date' : strftime('%a, %d %b'),
'description' : '', 'description' : '',
'content' : '', 'content' : '',
}) })
return feeds return feeds

View File

@ -13,27 +13,27 @@ class DailyTelegraph(BasicNewsRecipe):
title = u'The Australian' title = u'The Australian'
__author__ = u'Matthew Briggs' __author__ = u'Matthew Briggs'
description = u'National broadsheet newspaper from down under - colloquially known as The Oz' description = u'National broadsheet newspaper from down under - colloquially known as The Oz'
language = 'en' language = 'en_AU'
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 10 max_articles_per_feed = 10
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment' , description
, '--category' , 'news, Australia' , '--category' , 'news, Australia'
, '--publisher' , title , '--publisher' , title
] ]
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'class':'section-heading'}) dict(name='h1', attrs={'class':'section-heading'})
,dict(name='div', attrs={'id':'article'}) ,dict(name='div', attrs={'id':'article'})
] ]
remove_tags = [dict(name=['object','link'])] remove_tags = [dict(name=['object','link'])]
feeds = [ feeds = [
(u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'), (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
(u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'), (u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'),

View File

@ -11,12 +11,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheScotsman(BasicNewsRecipe): class TheScotsman(BasicNewsRecipe):
title = u'The Scotsman' title = u'The Scotsman'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'News from Scotland' description = 'News from Scotland'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'en' language = 'en_GB'
simultaneous_downloads = 1 simultaneous_downloads = 1
@ -34,4 +34,4 @@ class TheScotsman(BasicNewsRecipe):
('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'), ('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'),
('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'), ('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'),
('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'), ('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'),
] ]

View File

@ -22,7 +22,7 @@ class TheEconomicTimes(BasicNewsRecipe):
simultaneous_downloads = 1 simultaneous_downloads = 1
encoding = 'utf-8' encoding = 'utf-8'
lang = 'en-IN' lang = 'en-IN'
language = 'en' language = 'en_IN'
html2lrf_options = [ html2lrf_options = [