Split up English recipes by country

This commit is contained in:
Kovid Goyal 2009-09-09 19:28:40 -06:00
parent 5ddd375a66
commit 000fc39116
16 changed files with 98 additions and 86 deletions

View File

@ -5,8 +5,8 @@
msgid ""
msgstr ""
"Project-Id-Version: calibre 0.6.11\n"
"POT-Creation-Date: 2009-09-09 17:15+MDT\n"
"PO-Revision-Date: 2009-09-09 17:15+MDT\n"
"POT-Creation-Date: 2009-09-09 19:27+MDT\n"
"PO-Revision-Date: 2009-09-09 19:27+MDT\n"
"Last-Translator: Automatically generated\n"
"Language-Team: LANGUAGE\n"
"MIME-Version: 1.0\n"
@ -128,7 +128,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/library/database2.py:1534
#: /home/kovid/work/calibre/src/calibre/library/server.py:419
#: /home/kovid/work/calibre/src/calibre/library/server.py:517
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:100
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:103
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:63
#: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:77
@ -6254,7 +6254,19 @@ msgid "Traditional Chinese"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:99
msgid "English (US)"
msgid "English"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:100
msgid "English (AU)"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:101
msgid "English (CA)"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/localization.py:102
msgid "English (IND)"
msgstr ""
#: /home/kovid/work/calibre/src/calibre/utils/sftp.py:53
@ -6388,12 +6400,12 @@ msgid "sr-Latn-RS"
msgstr ""
#:
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:84
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:81
msgid "Skipping duplicated article: %s"
msgstr ""
#:
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:89
#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:86
msgid "Skipping filtered article: %s"
msgstr ""

View File

@ -96,7 +96,10 @@ _extra_lang_codes = {
'zh_CN' : _('Simplified Chinese'),
'zh_HK' : _('Chinese (HK)'),
'zh_TW' : _('Traditional Chinese'),
'en' : _('English (US)'),
'en' : _('English'),
'en_AU' : _('English (AU)'),
'en_CA' : _('English (CA)'),
'en_IN' : _('English (IND)'),
'und' : _('Unknown')
}

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheDailyMail(BasicNewsRecipe):
title = u'The Daily Mail'
oldest_article = 2
language = 'en'
language = 'en_GB'
author = 'RufusA'
simultaneous_downloads= 1
@ -15,7 +15,7 @@ class TheDailyMail(BasicNewsRecipe):
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
remove_tags_before = dict(name='div', attrs={'id':'content'})
no_stylesheets = True
feeds = [
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),

View File

@ -9,7 +9,7 @@ class DNAIndia(BasicNewsRecipe):
title = 'DNA India'
description = 'Mumbai news, India news, World news, breaking news'
__author__ = 'Kovid Goyal'
language = 'en'
language = 'en_IN'
encoding = 'cp1252'

View File

@ -7,9 +7,9 @@ class GlasgowHerald(BasicNewsRecipe):
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
language = 'en'
language = 'en_GB'
__author__ = 'McCande'
__author__ = 'McCande'
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[

View File

@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class GlobeAndMail(BasicNewsRecipe):
title = u'Globe and Mail'
language = 'en'
language = 'en_CA'
__author__ = 'Kovid Goyal'
oldest_article = 2

View File

@ -13,7 +13,7 @@ class Guardian(BasicNewsRecipe):
title = u'The Guardian'
__author__ = 'Seabound and Sujata Raman'
language = 'en'
language = 'en_GB'
oldest_article = 7
max_articles_per_feed = 20

View File

@ -7,42 +7,42 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheHindu(BasicNewsRecipe):
title = u'The Hindu'
language = 'en'
language = 'en_IN'
oldest_article = 7
__author__ = _('Kovid Goyal')
max_articles_per_feed = 100
remove_tags_before = {'name':'font', 'class':'storyhead'}
preprocess_regexps = [
(re.compile(r'<!-- story ends -->.*', re.DOTALL),
lambda match: '</body></html>'),
(re.compile(r'<!-- story ends -->.*', re.DOTALL),
lambda match: '</body></html>'),
]
feeds = [
(u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'),
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
(u'Main - Weather / Religion / Crossword / Cartoon',
u'http://www.hindu.com/rss/10hdline.xml'),
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
(u'Supplement - Literary Review',
u'http://www.hindu.com/rss/lrhdline.xml'),
(u'Supplement - Sunday Magazine',
u'http://www.hindu.com/rss/maghdline.xml'),
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
(u'Supplement - Business Review',
u'http://www.hindu.com/rss/bizhdline.xml'),
(u'Supplement - Book Review',
u'http://www.hindu.com/rss/brhdline.xml'),
(u'Supplement - Science & Technology',
(u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'),
(u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'),
(u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'),
(u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'),
(u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'),
(u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'),
(u'Main - Weather / Religion / Crossword / Cartoon',
u'http://www.hindu.com/rss/10hdline.xml'),
(u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'),
(u'Supplement - Literary Review',
u'http://www.hindu.com/rss/lrhdline.xml'),
(u'Supplement - Sunday Magazine',
u'http://www.hindu.com/rss/maghdline.xml'),
(u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'),
(u'Supplement - Business Review',
u'http://www.hindu.com/rss/bizhdline.xml'),
(u'Supplement - Book Review',
u'http://www.hindu.com/rss/brhdline.xml'),
(u'Supplement - Science & Technology',
u'http://www.hindu.com/rss/setahdline.xml')
]
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup
return soup

View File

@ -22,9 +22,6 @@ class LeMonde(BasicNewsRecipe):
no_stylesheets = True
cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg'
html2lrf_options = ['--base-font-size', '10']
feeds = [
('A la Une', 'http://www.lemonde.fr/rss/une.xml'),
('International', 'http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'),
@ -43,13 +40,13 @@ class LeMonde(BasicNewsRecipe):
('Examens', 'http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'),
('Opinions', 'http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xml')
]
remove_tags = [dict(name='img', attrs={'src':'http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_pet.gif'}),
dict(name='div', attrs={'id':'xiti-logo-noscript'}),
dict(name='br', attrs={}),
dict(name='iframe', attrs={}),
]
extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}'
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
@ -61,7 +58,7 @@ class LeMonde(BasicNewsRecipe):
(r'(<div class=desc><b>.*</b></div>).*</body>', lambda match : match.group(1)),
]
]
article_match_regexps = [ (re.compile(i)) for i in
[
(r'http://www\.lemonde\.fr/\S+/article/.*'),
@ -70,7 +67,7 @@ class LeMonde(BasicNewsRecipe):
(r'http://\S+\.blog\.lemonde\.fr/.*'),
]
]
def print_version(self, url):
return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url)

View File

@ -14,12 +14,12 @@ class LondonReviewOfBooks(BasicNewsRecipe):
description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
oldest_article = 7
max_articles_per_feed = 100
language = 'en'
language = 'en_GB'
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_tags = [
dict(name='div' , attrs={'id' :'otherarticles'})
,dict(name='div' , attrs={'class':'pagetools' })
@ -28,13 +28,13 @@ class LondonReviewOfBooks(BasicNewsRecipe):
,dict(name='div' , attrs={'class':'nocss' })
,dict(name='span', attrs={'class':'inlineright' })
]
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
def print_version(self, url):
main, split, rest = url.rpartition('/')
return main + '/print/' + rest
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'

View File

@ -12,7 +12,7 @@ class OutlookIndia(BasicNewsRecipe):
description = 'Weekly news and current affairs in India'
no_stylesheets = True
encoding = 'utf-8'
language = 'en'
language = 'en_IN'
recursions = 1
extra_css = '''

View File

@ -12,26 +12,26 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
class SMH(BasicNewsRecipe):
title = 'Sydney Morning Herald'
description = 'Business News, World News and Breaking News in Australia'
__author__ = 'Kovid Goyal'
language = 'en'
language = 'en_AU'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False)
return br
def parse_index(self):
soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read())
feeds, articles = [], []
feed = None
for tag in soup.findAll(['h3', 'a']):
if tag.name == 'h3':
if articles:
@ -41,7 +41,7 @@ class SMH(BasicNewsRecipe):
elif feed is not None and tag.has_key('href') and tag['href'].strip():
url = tag['href'].strip()
if url.startswith('/'):
url = 'http://www.smh.com.au' + url
url = 'http://www.smh.com.au' + url
title = self.tag_to_string(tag)
articles.append({
'title': title,
@ -49,8 +49,8 @@ class SMH(BasicNewsRecipe):
'date' : strftime('%a, %d %b'),
'description' : '',
'content' : '',
})
return feeds
})
return feeds

View File

@ -12,26 +12,26 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
class TheAge(BasicNewsRecipe):
title = 'The Age'
description = 'Business News, World News and Breaking News in Melbourne, Australia'
__author__ = 'Matthew Briggs'
language = 'en'
language = 'en_AU'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False)
return br
def parse_index(self):
soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read())
feeds, articles = [], []
feed = None
for tag in soup.findAll(['h3', 'a']):
if tag.name == 'h3':
if articles:
@ -41,7 +41,7 @@ class TheAge(BasicNewsRecipe):
elif feed is not None and tag.has_key('href') and tag['href'].strip():
url = tag['href'].strip()
if url.startswith('/'):
url = 'http://www.theage.com.au' + url
url = 'http://www.theage.com.au' + url
title = self.tag_to_string(tag)
articles.append({
'title': title,
@ -49,9 +49,9 @@ class TheAge(BasicNewsRecipe):
'date' : strftime('%a, %d %b'),
'description' : '',
'content' : '',
})
})
return feeds

View File

@ -13,27 +13,27 @@ class DailyTelegraph(BasicNewsRecipe):
title = u'The Australian'
__author__ = u'Matthew Briggs'
description = u'National broadsheet newspaper from down under - colloquially known as The Oz'
language = 'en'
language = 'en_AU'
oldest_article = 2
max_articles_per_feed = 10
remove_javascript = True
no_stylesheets = True
encoding = 'utf8'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Australia'
, '--publisher' , title
]
keep_only_tags = [
dict(name='h1', attrs={'class':'section-heading'})
,dict(name='div', attrs={'id':'article'})
]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
(u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'),

View File

@ -11,12 +11,12 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheScotsman(BasicNewsRecipe):
title = u'The Scotsman'
__author__ = 'Darko Miletic'
description = 'News from Scotland'
description = 'News from Scotland'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'en'
language = 'en_GB'
simultaneous_downloads = 1
@ -34,4 +34,4 @@ class TheScotsman(BasicNewsRecipe):
('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'),
('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'),
('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'),
]
]

View File

@ -22,7 +22,7 @@ class TheEconomicTimes(BasicNewsRecipe):
simultaneous_downloads = 1
encoding = 'utf-8'
lang = 'en-IN'
language = 'en'
language = 'en_IN'
html2lrf_options = [