From 000fc39116b787a72979ac66701092cc79b652bd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 9 Sep 2009 19:28:40 -0600 Subject: [PATCH] Split up English recipes by country --- src/calibre/translations/calibre.pot | 24 ++++++--- src/calibre/utils/localization.py | 5 +- .../web/feeds/recipes/recipe_daily_mail.py | 4 +- src/calibre/web/feeds/recipes/recipe_dna.py | 2 +- .../feeds/recipes/recipe_glasgow_herald.py | 4 +- .../feeds/recipes/recipe_globe_and_mail.py | 2 +- .../web/feeds/recipes/recipe_guardian.py | 2 +- src/calibre/web/feeds/recipes/recipe_hindu.py | 52 +++++++++---------- .../web/feeds/recipes/recipe_le_monde.py | 11 ++-- src/calibre/web/feeds/recipes/recipe_lrb.py | 8 +-- .../web/feeds/recipes/recipe_outlook_india.py | 2 +- src/calibre/web/feeds/recipes/recipe_smh.py | 26 +++++----- .../web/feeds/recipes/recipe_the_age.py | 24 ++++----- .../web/feeds/recipes/recipe_the_oz.py | 10 ++-- .../web/feeds/recipes/recipe_the_scotsman.py | 6 +-- .../recipes/recipe_theeconomictimes_india.py | 2 +- 16 files changed, 98 insertions(+), 86 deletions(-) diff --git a/src/calibre/translations/calibre.pot b/src/calibre/translations/calibre.pot index 3042e4f2f4..bd89de9246 100644 --- a/src/calibre/translations/calibre.pot +++ b/src/calibre/translations/calibre.pot @@ -5,8 +5,8 @@ msgid "" msgstr "" "Project-Id-Version: calibre 0.6.11\n" -"POT-Creation-Date: 2009-09-09 17:15+MDT\n" -"PO-Revision-Date: 2009-09-09 17:15+MDT\n" +"POT-Creation-Date: 2009-09-09 19:27+MDT\n" +"PO-Revision-Date: 2009-09-09 19:27+MDT\n" "Last-Translator: Automatically generated\n" "Language-Team: LANGUAGE\n" "MIME-Version: 1.0\n" @@ -128,7 +128,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/library/database2.py:1534 #: /home/kovid/work/calibre/src/calibre/library/server.py:419 #: /home/kovid/work/calibre/src/calibre/library/server.py:517 -#: /home/kovid/work/calibre/src/calibre/utils/localization.py:100 +#: /home/kovid/work/calibre/src/calibre/utils/localization.py:103 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:45 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:63 #: /home/kovid/work/calibre/src/calibre/utils/podofo/__init__.py:77 @@ -6254,7 +6254,19 @@ msgid "Traditional Chinese" msgstr "" #: /home/kovid/work/calibre/src/calibre/utils/localization.py:99 -msgid "English (US)" +msgid "English" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/utils/localization.py:100 +msgid "English (AU)" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/utils/localization.py:101 +msgid "English (CA)" +msgstr "" + +#: /home/kovid/work/calibre/src/calibre/utils/localization.py:102 +msgid "English (IND)" msgstr "" #: /home/kovid/work/calibre/src/calibre/utils/sftp.py:53 @@ -6388,12 +6400,12 @@ msgid "sr-Latn-RS" msgstr "" #: -#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:84 +#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:81 msgid "Skipping duplicated article: %s" msgstr "" #: -#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:89 +#: /home/kovid/work/calibre/src/calibre/web/feeds/recipes/recipe_le_monde.py:86 msgid "Skipping filtered article: %s" msgstr "" diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index 308adf8cea..6e4838ebe4 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -96,7 +96,10 @@ _extra_lang_codes = { 'zh_CN' : _('Simplified Chinese'), 'zh_HK' : _('Chinese (HK)'), 'zh_TW' : _('Traditional Chinese'), - 'en' : _('English (US)'), + 'en' : _('English'), + 'en_AU' : _('English (AU)'), + 'en_CA' : _('English (CA)'), + 'en_IN' : _('English (IND)'), 'und' : _('Unknown') } diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py index 30dfa3f9d7..3dba0c8d4c 100644 --- a/src/calibre/web/feeds/recipes/recipe_daily_mail.py +++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheDailyMail(BasicNewsRecipe): title = u'The Daily Mail' oldest_article = 2 - language = 'en' + language = 'en_GB' author = 'RufusA' simultaneous_downloads= 1 @@ -15,7 +15,7 @@ class TheDailyMail(BasicNewsRecipe): remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) remove_tags_before = dict(name='div', attrs={'id':'content'}) no_stylesheets = True - + feeds = [ (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), (u'News', u'http://www.dailymail.co.uk/news/index.rss'), diff --git a/src/calibre/web/feeds/recipes/recipe_dna.py b/src/calibre/web/feeds/recipes/recipe_dna.py index 91cc207e24..c9bc5642c1 100644 --- a/src/calibre/web/feeds/recipes/recipe_dna.py +++ b/src/calibre/web/feeds/recipes/recipe_dna.py @@ -9,7 +9,7 @@ class DNAIndia(BasicNewsRecipe): title = 'DNA India' description = 'Mumbai news, India news, World news, breaking news' __author__ = 'Kovid Goyal' - language = 'en' + language = 'en_IN' encoding = 'cp1252' diff --git a/src/calibre/web/feeds/recipes/recipe_glasgow_herald.py b/src/calibre/web/feeds/recipes/recipe_glasgow_herald.py index 1ec73319be..2551c31c3d 100644 --- a/src/calibre/web/feeds/recipes/recipe_glasgow_herald.py +++ b/src/calibre/web/feeds/recipes/recipe_glasgow_herald.py @@ -7,9 +7,9 @@ class GlasgowHerald(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True - language = 'en' + language = 'en_GB' - __author__ = 'McCande' + __author__ = 'McCande' preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ diff --git a/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py b/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py index adc6689356..1e4fc08e39 100644 --- a/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py +++ b/src/calibre/web/feeds/recipes/recipe_globe_and_mail.py @@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class GlobeAndMail(BasicNewsRecipe): title = u'Globe and Mail' - language = 'en' + language = 'en_CA' __author__ = 'Kovid Goyal' oldest_article = 2 diff --git a/src/calibre/web/feeds/recipes/recipe_guardian.py b/src/calibre/web/feeds/recipes/recipe_guardian.py index c1f34f6a1f..aad217533b 100644 --- a/src/calibre/web/feeds/recipes/recipe_guardian.py +++ b/src/calibre/web/feeds/recipes/recipe_guardian.py @@ -13,7 +13,7 @@ class Guardian(BasicNewsRecipe): title = u'The Guardian' __author__ = 'Seabound and Sujata Raman' - language = 'en' + language = 'en_GB' oldest_article = 7 max_articles_per_feed = 20 diff --git a/src/calibre/web/feeds/recipes/recipe_hindu.py b/src/calibre/web/feeds/recipes/recipe_hindu.py index 3d51490662..5c84880ff9 100644 --- a/src/calibre/web/feeds/recipes/recipe_hindu.py +++ b/src/calibre/web/feeds/recipes/recipe_hindu.py @@ -7,42 +7,42 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheHindu(BasicNewsRecipe): title = u'The Hindu' - language = 'en' + language = 'en_IN' oldest_article = 7 __author__ = _('Kovid Goyal') max_articles_per_feed = 100 - + remove_tags_before = {'name':'font', 'class':'storyhead'} preprocess_regexps = [ - (re.compile(r'.*', re.DOTALL), - lambda match: ''), + (re.compile(r'.*', re.DOTALL), + lambda match: ''), ] - + feeds = [ - (u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'), - (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'), - (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'), - (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'), - (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'), - (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'), - (u'Main - Weather / Religion / Crossword / Cartoon', - u'http://www.hindu.com/rss/10hdline.xml'), - (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'), - (u'Supplement - Literary Review', - u'http://www.hindu.com/rss/lrhdline.xml'), - (u'Supplement - Sunday Magazine', - u'http://www.hindu.com/rss/maghdline.xml'), - (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'), - (u'Supplement - Business Review', - u'http://www.hindu.com/rss/bizhdline.xml'), - (u'Supplement - Book Review', - u'http://www.hindu.com/rss/brhdline.xml'), - (u'Supplement - Science & Technology', + (u'Main - Font Page', u'http://www.hindu.com/rss/01hdline.xml'), + (u'Main - National', u'http://www.hindu.com/rss/02hdline.xml'), + (u'Main - International', u'http://www.hindu.com/rss/03hdline.xml'), + (u'Main - Opinion', u'http://www.hindu.com/rss/05hdline.xml'), + (u'Main - Business', u'http://www.hindu.com/rss/06hdline.xml'), + (u'Main - Sport', u'http://www.hindu.com/rss/07hdline.xml'), + (u'Main - Weather / Religion / Crossword / Cartoon', + u'http://www.hindu.com/rss/10hdline.xml'), + (u'Main - Engagements', u'http://www.hindu.com/rss/26hdline.xml'), + (u'Supplement - Literary Review', + u'http://www.hindu.com/rss/lrhdline.xml'), + (u'Supplement - Sunday Magazine', + u'http://www.hindu.com/rss/maghdline.xml'), + (u'Supplement - Open Page', u'http://www.hindu.com/rss/ophdline.xml'), + (u'Supplement - Business Review', + u'http://www.hindu.com/rss/bizhdline.xml'), + (u'Supplement - Book Review', + u'http://www.hindu.com/rss/brhdline.xml'), + (u'Supplement - Science & Technology', u'http://www.hindu.com/rss/setahdline.xml') ] - + def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td']): t.name = 'div' - return soup \ No newline at end of file + return soup diff --git a/src/calibre/web/feeds/recipes/recipe_le_monde.py b/src/calibre/web/feeds/recipes/recipe_le_monde.py index cd49d4a6c4..c761233452 100644 --- a/src/calibre/web/feeds/recipes/recipe_le_monde.py +++ b/src/calibre/web/feeds/recipes/recipe_le_monde.py @@ -22,9 +22,6 @@ class LeMonde(BasicNewsRecipe): no_stylesheets = True cover_url='http://abonnes.lemonde.fr/titresdumonde/'+date.today().strftime("%y%m%d")+'/1.jpg' - - html2lrf_options = ['--base-font-size', '10'] - feeds = [ ('A la Une', 'http://www.lemonde.fr/rss/une.xml'), ('International', 'http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'), @@ -43,13 +40,13 @@ class LeMonde(BasicNewsRecipe): ('Examens', 'http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'), ('Opinions', 'http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xml') ] - + remove_tags = [dict(name='img', attrs={'src':'http://medias.lemonde.fr/mmpub/img/lgo/lemondefr_pet.gif'}), dict(name='div', attrs={'id':'xiti-logo-noscript'}), dict(name='br', attrs={}), dict(name='iframe', attrs={}), ] - + extra_css = '.ar-tit {font-size: x-large;} \n .dt {font-size: x-small;}' preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in @@ -61,7 +58,7 @@ class LeMonde(BasicNewsRecipe): (r'(
.*
).*', lambda match : match.group(1)), ] ] - + article_match_regexps = [ (re.compile(i)) for i in [ (r'http://www\.lemonde\.fr/\S+/article/.*'), @@ -70,7 +67,7 @@ class LeMonde(BasicNewsRecipe): (r'http://\S+\.blog\.lemonde\.fr/.*'), ] ] - + def print_version(self, url): return re.sub('http://www\.lemonde\.fr/.*_([0-9]+)_[0-9]+\.html.*','http://www.lemonde.fr/web/imprimer_element/0,40-0,50-\\1,0.html' ,url) diff --git a/src/calibre/web/feeds/recipes/recipe_lrb.py b/src/calibre/web/feeds/recipes/recipe_lrb.py index ea89fcd653..8c248b00f1 100644 --- a/src/calibre/web/feeds/recipes/recipe_lrb.py +++ b/src/calibre/web/feeds/recipes/recipe_lrb.py @@ -14,12 +14,12 @@ class LondonReviewOfBooks(BasicNewsRecipe): description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' oldest_article = 7 max_articles_per_feed = 100 - language = 'en' + language = 'en_GB' no_stylesheets = True use_embedded_content = False encoding = 'cp1252' - + remove_tags = [ dict(name='div' , attrs={'id' :'otherarticles'}) ,dict(name='div' , attrs={'class':'pagetools' }) @@ -28,13 +28,13 @@ class LondonReviewOfBooks(BasicNewsRecipe): ,dict(name='div' , attrs={'class':'nocss' }) ,dict(name='span', attrs={'class':'inlineright' }) ] - + feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')] def print_version(self, url): main, split, rest = url.rpartition('/') return main + '/print/' + rest - + def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td']): t.name = 'div' diff --git a/src/calibre/web/feeds/recipes/recipe_outlook_india.py b/src/calibre/web/feeds/recipes/recipe_outlook_india.py index 9ce86ddec1..6bec491c75 100644 --- a/src/calibre/web/feeds/recipes/recipe_outlook_india.py +++ b/src/calibre/web/feeds/recipes/recipe_outlook_india.py @@ -12,7 +12,7 @@ class OutlookIndia(BasicNewsRecipe): description = 'Weekly news and current affairs in India' no_stylesheets = True encoding = 'utf-8' - language = 'en' + language = 'en_IN' recursions = 1 extra_css = ''' diff --git a/src/calibre/web/feeds/recipes/recipe_smh.py b/src/calibre/web/feeds/recipes/recipe_smh.py index 4b4d5dd047..0ea953170d 100644 --- a/src/calibre/web/feeds/recipes/recipe_smh.py +++ b/src/calibre/web/feeds/recipes/recipe_smh.py @@ -12,26 +12,26 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup class SMH(BasicNewsRecipe): - + title = 'Sydney Morning Herald' description = 'Business News, World News and Breaking News in Australia' __author__ = 'Kovid Goyal' - language = 'en' + language = 'en_AU' + - def get_browser(self): br = BasicNewsRecipe.get_browser() br.set_handle_refresh(False) return br - + def parse_index(self): - + soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read()) - + feeds, articles = [], [] feed = None - - + + for tag in soup.findAll(['h3', 'a']): if tag.name == 'h3': if articles: @@ -41,7 +41,7 @@ class SMH(BasicNewsRecipe): elif feed is not None and tag.has_key('href') and tag['href'].strip(): url = tag['href'].strip() if url.startswith('/'): - url = 'http://www.smh.com.au' + url + url = 'http://www.smh.com.au' + url title = self.tag_to_string(tag) articles.append({ 'title': title, @@ -49,8 +49,8 @@ class SMH(BasicNewsRecipe): 'date' : strftime('%a, %d %b'), 'description' : '', 'content' : '', - }) - - return feeds - + }) + + return feeds + diff --git a/src/calibre/web/feeds/recipes/recipe_the_age.py b/src/calibre/web/feeds/recipes/recipe_the_age.py index 996b832bda..8e4ae05575 100644 --- a/src/calibre/web/feeds/recipes/recipe_the_age.py +++ b/src/calibre/web/feeds/recipes/recipe_the_age.py @@ -12,26 +12,26 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup class TheAge(BasicNewsRecipe): - + title = 'The Age' description = 'Business News, World News and Breaking News in Melbourne, Australia' __author__ = 'Matthew Briggs' - language = 'en' + language = 'en_AU' + - def get_browser(self): br = BasicNewsRecipe.get_browser() br.set_handle_refresh(False) return br - + def parse_index(self): - + soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read()) - + feeds, articles = [], [] feed = None - - + + for tag in soup.findAll(['h3', 'a']): if tag.name == 'h3': if articles: @@ -41,7 +41,7 @@ class TheAge(BasicNewsRecipe): elif feed is not None and tag.has_key('href') and tag['href'].strip(): url = tag['href'].strip() if url.startswith('/'): - url = 'http://www.theage.com.au' + url + url = 'http://www.theage.com.au' + url title = self.tag_to_string(tag) articles.append({ 'title': title, @@ -49,9 +49,9 @@ class TheAge(BasicNewsRecipe): 'date' : strftime('%a, %d %b'), 'description' : '', 'content' : '', - }) - + }) + return feeds - + diff --git a/src/calibre/web/feeds/recipes/recipe_the_oz.py b/src/calibre/web/feeds/recipes/recipe_the_oz.py index 5c1d26f48b..6455a69a20 100644 --- a/src/calibre/web/feeds/recipes/recipe_the_oz.py +++ b/src/calibre/web/feeds/recipes/recipe_the_oz.py @@ -13,27 +13,27 @@ class DailyTelegraph(BasicNewsRecipe): title = u'The Australian' __author__ = u'Matthew Briggs' description = u'National broadsheet newspaper from down under - colloquially known as The Oz' - language = 'en' + language = 'en_AU' oldest_article = 2 max_articles_per_feed = 10 remove_javascript = True no_stylesheets = True encoding = 'utf8' - + html2lrf_options = [ '--comment' , description , '--category' , 'news, Australia' , '--publisher' , title ] - + keep_only_tags = [ dict(name='h1', attrs={'class':'section-heading'}) ,dict(name='div', attrs={'id':'article'}) ] - + remove_tags = [dict(name=['object','link'])] - + feeds = [ (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'), (u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'), diff --git a/src/calibre/web/feeds/recipes/recipe_the_scotsman.py b/src/calibre/web/feeds/recipes/recipe_the_scotsman.py index 5c550511cb..b9dede1a96 100644 --- a/src/calibre/web/feeds/recipes/recipe_the_scotsman.py +++ b/src/calibre/web/feeds/recipes/recipe_the_scotsman.py @@ -11,12 +11,12 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheScotsman(BasicNewsRecipe): title = u'The Scotsman' __author__ = 'Darko Miletic' - description = 'News from Scotland' + description = 'News from Scotland' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - language = 'en' + language = 'en_GB' simultaneous_downloads = 1 @@ -34,4 +34,4 @@ class TheScotsman(BasicNewsRecipe): ('Entertainment', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7010&format=rss'), ('Features', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=6996&format=rss'), ('Opinion', 'http://thescotsman.scotsman.com/getfeed.aspx?sectionid=7074&format=rss'), - ] \ No newline at end of file + ] diff --git a/src/calibre/web/feeds/recipes/recipe_theeconomictimes_india.py b/src/calibre/web/feeds/recipes/recipe_theeconomictimes_india.py index f7ae717c2b..8756e08e66 100644 --- a/src/calibre/web/feeds/recipes/recipe_theeconomictimes_india.py +++ b/src/calibre/web/feeds/recipes/recipe_theeconomictimes_india.py @@ -22,7 +22,7 @@ class TheEconomicTimes(BasicNewsRecipe): simultaneous_downloads = 1 encoding = 'utf-8' lang = 'en-IN' - language = 'en' + language = 'en_IN' html2lrf_options = [