diff --git a/resources/images/news/cnetjapan.png b/resources/images/news/cnetjapan.png new file mode 100644 index 0000000000..9a0dcc8f7f Binary files /dev/null and b/resources/images/news/cnetjapan.png differ diff --git a/resources/images/news/endgadget_ja.png b/resources/images/news/endgadget_ja.png new file mode 100644 index 0000000000..94e8f1219c Binary files /dev/null and b/resources/images/news/endgadget_ja.png differ diff --git a/resources/images/news/jijinews.png b/resources/images/news/jijinews.png new file mode 100644 index 0000000000..b87865fc34 Binary files /dev/null and b/resources/images/news/jijinews.png differ diff --git a/resources/images/news/msnsankei.png b/resources/images/news/msnsankei.png new file mode 100644 index 0000000000..7e92af7b20 Binary files /dev/null and b/resources/images/news/msnsankei.png differ diff --git a/resources/images/news/nikkei_free.png b/resources/images/news/nikkei_free.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_free.png differ diff --git a/resources/images/news/nikkei_sub_economy.png b/resources/images/news/nikkei_sub_economy.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_sub_economy.png differ diff --git a/resources/images/news/nikkei_sub_industory.png b/resources/images/news/nikkei_sub_industory.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_sub_industory.png differ diff --git a/resources/images/news/nikkei_sub_life.png b/resources/images/news/nikkei_sub_life.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_sub_life.png differ diff --git a/resources/images/news/nikkei_sub_main.png b/resources/images/news/nikkei_sub_main.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_sub_main.png differ diff --git a/resources/images/news/nikkei_sub_sports.png b/resources/images/news/nikkei_sub_sports.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_sub_sports.png differ diff --git a/resources/images/news/reuters.png b/resources/images/news/reuters.png new file mode 100644 index 0000000000..f13abce7b6 Binary files /dev/null and b/resources/images/news/reuters.png differ diff --git a/resources/images/news/reuters_ja.png b/resources/images/news/reuters_ja.png new file mode 100644 index 0000000000..f13abce7b6 Binary files /dev/null and b/resources/images/news/reuters_ja.png differ diff --git a/resources/recipes/avto-magazin.recipe b/resources/recipes/avto-magazin.recipe index 6464588acc..adaf74546e 100644 --- a/resources/recipes/avto-magazin.recipe +++ b/resources/recipes/avto-magazin.recipe @@ -13,6 +13,7 @@ class Dnevnik(BasicNewsRecipe): labguage = 'sl' no_stylesheets = True use_embedded_content = False + language = 'sl' conversion_options = {'linearize_tables' : True} diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe new file mode 100644 index 0000000000..e0178c1ff2 --- /dev/null +++ b/resources/recipes/cnetjapan.recipe @@ -0,0 +1,32 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapan(BasicNewsRecipe): + title = u'CNET Japan' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(name="h2") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + ] + remove_tags_after = {'class':"block20"} + diff --git a/resources/recipes/endgadget_ja.recipe b/resources/recipes/endgadget_ja.recipe new file mode 100644 index 0000000000..443a85905d --- /dev/null +++ b/resources/recipes/endgadget_ja.recipe @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +japan.engadget.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class EndgadgetJapan(BasicNewsRecipe): + title = u'Endgadget\u65e5\u672c\u7248' + language = 'ja' + __author__ = 'Hiroshi Miura' + cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg' + masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + language = 'ja' + encoding = 'utf-8' + feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')] diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe new file mode 100644 index 0000000000..f74864365d --- /dev/null +++ b/resources/recipes/jijinews.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.jiji.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class JijiDotCom(BasicNewsRecipe): + title = u'\u6642\u4e8b\u901a\u4fe1' + __author__ = 'Hiroshi Miura' + description = 'World News from Jiji Press' + publisher = 'Jiji Press Ltd.' + category = 'news' + encoding = 'utf-8' + oldest_article = 6 + max_articles_per_feed = 100 + language = 'ja' + cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' + masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + + feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] + remove_tags_after = dict(id="ad_google") + diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe new file mode 100644 index 0000000000..47dc7d0ebc --- /dev/null +++ b/resources/recipes/mainichi.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.mainichi.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class MainichiDailyNews(BasicNewsRecipe): + title = u'\u6bce\u65e5\u65b0\u805e' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 20 + description = 'Japanese traditional newspaper Mainichi Daily News' + publisher = 'Mainichi Daily News' + category = 'news, japan' + language = 'ja' + + feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')] + + remove_tags_before = {'class':"NewsTitle"} + remove_tags = [{'class':"RelatedArticle"}] + remove_tags_after = {'class':"Credit"} + diff --git a/resources/recipes/mainichi_it_news.recipe b/resources/recipes/mainichi_it_news.recipe new file mode 100644 index 0000000000..8e15496e57 --- /dev/null +++ b/resources/recipes/mainichi_it_news.recipe @@ -0,0 +1,18 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class MainichiDailyITNews(BasicNewsRecipe): + title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 100 + description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics' + publisher = 'Mainichi Daily News' + category = 'news, Japan, IT, Electronics' + language = 'ja' + + feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')] + + remove_tags_before = {'class':"NewsTitle"} + remove_tags = [{'class':"RelatedArticle"}] + remove_tags_after = {'class':"Credit"} + diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe new file mode 100644 index 0000000000..4c79771945 --- /dev/null +++ b/resources/recipes/msnsankei.recipe @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +sankei.jp.msn.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class MSNSankeiNewsProduct(BasicNewsRecipe): + title = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)' + __author__ = 'Hiroshi Miura' + description = 'Products release from Japan' + oldest_article = 7 + max_articles_per_feed = 100 + encoding = 'Shift_JIS' + language = 'ja' + + feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')] + + remove_tags_before = dict(id="__r_article_title__") + remove_tags_after = dict(id="ajax_release_news") + remove_tags = [{'class':"parent chromeCustom6G"}] diff --git a/resources/recipes/nikkei_free.recipe b/resources/recipes/nikkei_free.recipe new file mode 100644 index 0000000000..d84aaa279b --- /dev/null +++ b/resources/recipes/nikkei_free.recipe @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.nikkei.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class NikkeiNet(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)' + __author__ = 'Hiroshi Miura' + description = 'News and current market affairs from Japan' + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + oldest_article = 2 + max_articles_per_feed = 20 + language = 'ja' + + feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'), + (u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'), + (u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'), + (u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'), + (u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'), + (u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'), + (u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'), + (u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'), + (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'), + (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'), + (u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'), + (u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'), + (u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'), + (u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'), + (u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'), + (u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'), + (u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'), + (u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'), + (u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'), + (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'), + (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'), + (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'), + (u'\u4f1a\u898b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'), + (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'), + (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research') + ] + + remove_tags_before = dict(id="CONTENTS") + remove_tags = [ + dict(name="form"), + {'class':"cmn-hide"}, + ] + remove_tags_after = {'class':"cmn-pr_list"} + diff --git a/resources/recipes/nikkei_sub_economy.recipe b/resources/recipes/nikkei_sub_economy.recipe new file mode 100644 index 0000000000..d762f505d1 --- /dev/null +++ b/resources/recipes/nikkei_sub_economy.recipe @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.nikkei.com +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +import mechanize +from calibre.ptempfile import PersistentTemporaryFile + +class NikkeiNet_sub_economy(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)' + __author__ = 'Hiroshi Miura' + description = 'News and current market affairs from Japan' + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + needs_subscription = True + oldest_article = 2 + max_articles_per_feed = 20 + language = 'ja' + remove_javascript = False + temp_files = [] + + remove_tags_before = {'class':"cmn-section cmn-indent"} + remove_tags = [ + {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, + {'class':"cmn-article_keyword cmn-clearfix"}, + {'class':"cmn-print_headline cmn-clearfix"}, + ] + remove_tags_after = {'class':"cmn-pr_list"} + + feeds = [ (u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'), + (u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'), + (u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'), + (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'), + (u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'), + (u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'), + (u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'), + (u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'), + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + cj = mechanize.LWPCookieJar() + br.set_cookiejar(cj) + + #br.set_debug_http(True) + #br.set_debug_redirects(True) + #br.set_debug_responses(True) + + if self.username is not None and self.password is not None: + #print "----------------------------get login form--------------------------------------------" + # open login form + br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam') + response = br.response() + #print "----------------------------get login form---------------------------------------------" + #print "----------------------------set login form---------------------------------------------" + # remove disabled input which brings error on mechanize + response.set_data(response.get_data().replace("", " -->")) + br.set_response(response) + br.select_form(name='LA0010Form01') + br['LA0010Form01:LA0010Email'] = self.username + br['LA0010Form01:LA0010Password'] = self.password + br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True + br.submit() + br.response() + #print "----------------------------send login form---------------------------------------------" + #print "----------------------------open news main page-----------------------------------------" + # open news site + br.open('http://www.nikkei.com/') + br.response() + #print "----------------------------www.nikkei.com BODY --------------------------------------" + #print response2.get_data() + #print "-------------------------^^-got auto redirect form----^^--------------------------------" + # forced redirect in default + br.select_form(nr=0) + br.submit() + response3 = br.response() + # return some cookie which should be set by Javascript + #print response3.geturl() + raw = response3.get_data() + #print "---------------------------response to form --------------------------------------------" + # grab cookie from JS and set it + redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1) + br.select_form(nr=0) + + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write("#LWP-Cookies-2.0\n") + + self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].close() + cj.load(self.temp_files[-1].name) + + br.submit() + + #br.set_debug_http(False) + #br.set_debug_redirects(False) + #br.set_debug_responses(False) + return br + + + + diff --git a/resources/recipes/nikkei_sub_industry.recipe b/resources/recipes/nikkei_sub_industry.recipe new file mode 100644 index 0000000000..da04bbb5f3 --- /dev/null +++ b/resources/recipes/nikkei_sub_industry.recipe @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.nikkei.com +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +import mechanize +from calibre.ptempfile import PersistentTemporaryFile + + +class NikkeiNet_sub_industory(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)' + __author__ = 'Hiroshi Miura' + description = 'News and current market affairs from Japan' + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + needs_subscription = True + oldest_article = 2 + max_articles_per_feed = 20 + language = 'ja' + remove_javascript = False + temp_files = [] + + remove_tags_before = {'class':"cmn-section cmn-indent"} + remove_tags = [ + {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, + {'class':"cmn-article_keyword cmn-clearfix"}, + {'class':"cmn-print_headline cmn-clearfix"}, + ] + remove_tags_after = {'class':"cmn-pr_list"} + + feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'), + (u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'), + (u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'), + (u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'), + (u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'), + + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + cj = mechanize.LWPCookieJar() + br.set_cookiejar(cj) + + #br.set_debug_http(True) + #br.set_debug_redirects(True) + #br.set_debug_responses(True) + + if self.username is not None and self.password is not None: + #print "----------------------------get login form--------------------------------------------" + # open login form + br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam') + response = br.response() + #print "----------------------------get login form---------------------------------------------" + #print "----------------------------set login form---------------------------------------------" + # remove disabled input which brings error on mechanize + response.set_data(response.get_data().replace("", " -->")) + br.set_response(response) + br.select_form(name='LA0010Form01') + br['LA0010Form01:LA0010Email'] = self.username + br['LA0010Form01:LA0010Password'] = self.password + br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True + br.submit() + br.response() + #print "----------------------------send login form---------------------------------------------" + #print "----------------------------open news main page-----------------------------------------" + # open news site + br.open('http://www.nikkei.com/') + br.response() + #print "----------------------------www.nikkei.com BODY --------------------------------------" + #print response2.get_data() + #print "-------------------------^^-got auto redirect form----^^--------------------------------" + # forced redirect in default + br.select_form(nr=0) + br.submit() + response3 = br.response() + # return some cookie which should be set by Javascript + #print response3.geturl() + raw = response3.get_data() + #print "---------------------------response to form --------------------------------------------" + # grab cookie from JS and set it + redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1) + br.select_form(nr=0) + + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write("#LWP-Cookies-2.0\n") + + self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].close() + cj.load(self.temp_files[-1].name) + + br.submit() + + #br.set_debug_http(False) + #br.set_debug_redirects(False) + #br.set_debug_responses(False) + return br + + + + diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe new file mode 100644 index 0000000000..2da5b13834 --- /dev/null +++ b/resources/recipes/nikkei_sub_life.recipe @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.nikkei.com +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +import mechanize +from calibre.ptempfile import PersistentTemporaryFile + + +class NikkeiNet_sub_life(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)' + __author__ = 'Hiroshi Miura' + description = 'News and current market affairs from Japan' + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + needs_subscription = True + oldest_article = 2 + max_articles_per_feed = 20 + language = 'ja' + remove_javascript = False + temp_files = [] + + remove_tags_before = {'class':"cmn-section cmn-indent"} + remove_tags = [ + {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, + {'class':"cmn-article_keyword cmn-clearfix"}, + {'class':"cmn-print_headline cmn-clearfix"}, + ] + remove_tags_after = {'class':"cmn-pr_list"} + + feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'), + (u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'), + (u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'), + (u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'), + (u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'), + (u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'), + (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking') + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + cj = mechanize.LWPCookieJar() + br.set_cookiejar(cj) + + #br.set_debug_http(True) + #br.set_debug_redirects(True) + #br.set_debug_responses(True) + + if self.username is not None and self.password is not None: + #print "----------------------------get login form--------------------------------------------" + # open login form + br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam') + response = br.response() + #print "----------------------------get login form---------------------------------------------" + #print "----------------------------set login form---------------------------------------------" + # remove disabled input which brings error on mechanize + response.set_data(response.get_data().replace("", " -->")) + br.set_response(response) + br.select_form(name='LA0010Form01') + br['LA0010Form01:LA0010Email'] = self.username + br['LA0010Form01:LA0010Password'] = self.password + br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True + br.submit() + br.response() + #print "----------------------------send login form---------------------------------------------" + #print "----------------------------open news main page-----------------------------------------" + # open news site + br.open('http://www.nikkei.com/') + br.response() + #print "----------------------------www.nikkei.com BODY --------------------------------------" + #print response2.get_data() + #print "-------------------------^^-got auto redirect form----^^--------------------------------" + # forced redirect in default + br.select_form(nr=0) + br.submit() + response3 = br.response() + # return some cookie which should be set by Javascript + #print response3.geturl() + raw = response3.get_data() + #print "---------------------------response to form --------------------------------------------" + # grab cookie from JS and set it + redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1) + br.select_form(nr=0) + + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write("#LWP-Cookies-2.0\n") + + self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].close() + cj.load(self.temp_files[-1].name) + + br.submit() + + #br.set_debug_http(False) + #br.set_debug_redirects(False) + #br.set_debug_responses(False) + return br + + + + diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe new file mode 100644 index 0000000000..142edf624d --- /dev/null +++ b/resources/recipes/nikkei_sub_main.recipe @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.nikkei.com +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +import mechanize +from calibre.ptempfile import PersistentTemporaryFile + + +class NikkeiNet_sub_main(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)' + __author__ = 'Hiroshi Miura' + description = 'News and current market affairs from Japan' + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + needs_subscription = True + oldest_article = 2 + max_articles_per_feed = 20 + language = 'ja' + remove_javascript = False + temp_files = [] + + remove_tags_before = {'class':"cmn-section cmn-indent"} + remove_tags = [ + {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, + {'class':"cmn-article_keyword cmn-clearfix"}, + {'class':"cmn-print_headline cmn-clearfix"}, + ] + remove_tags_after = {'class':"cmn-pr_list"} + + feeds = [ (u'NIKKEI', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + cj = mechanize.LWPCookieJar() + br.set_cookiejar(cj) + + #br.set_debug_http(True) + #br.set_debug_redirects(True) + #br.set_debug_responses(True) + + if self.username is not None and self.password is not None: + #print "----------------------------get login form--------------------------------------------" + # open login form + br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam') + response = br.response() + #print "----------------------------get login form---------------------------------------------" + #print "----------------------------set login form---------------------------------------------" + # remove disabled input which brings error on mechanize + response.set_data(response.get_data().replace("", " -->")) + br.set_response(response) + br.select_form(name='LA0010Form01') + br['LA0010Form01:LA0010Email'] = self.username + br['LA0010Form01:LA0010Password'] = self.password + br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True + br.submit() + br.response() + #print "----------------------------send login form---------------------------------------------" + #print "----------------------------open news main page-----------------------------------------" + # open news site + br.open('http://www.nikkei.com/') + br.response() + #print "----------------------------www.nikkei.com BODY --------------------------------------" + #print response2.get_data() + #print "-------------------------^^-got auto redirect form----^^--------------------------------" + # forced redirect in default + br.select_form(nr=0) + br.submit() + response3 = br.response() + # return some cookie which should be set by Javascript + #print response3.geturl() + raw = response3.get_data() + #print "---------------------------response to form --------------------------------------------" + # grab cookie from JS and set it + redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1) + br.select_form(nr=0) + + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write("#LWP-Cookies-2.0\n") + + self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].close() + cj.load(self.temp_files[-1].name) + + br.submit() + + #br.set_debug_http(False) + #br.set_debug_redirects(False) + #br.set_debug_responses(False) + return br + + + + diff --git a/resources/recipes/nikkei_sub_sports.recipe b/resources/recipes/nikkei_sub_sports.recipe new file mode 100644 index 0000000000..6e5a1c6bb2 --- /dev/null +++ b/resources/recipes/nikkei_sub_sports.recipe @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.nikkei.com +''' + +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +import mechanize +from calibre.ptempfile import PersistentTemporaryFile + + +class NikkeiNet_sub_sports(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)' + __author__ = 'Hiroshi Miura' + description = 'News and current market affairs from Japan' + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' + needs_subscription = True + oldest_article = 2 + max_articles_per_feed = 20 + language = 'ja' + remove_javascript = False + temp_files = [] + + remove_tags_before = {'class':"cmn-section cmn-indent"} + remove_tags = [ + {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, + {'class':"cmn-article_keyword cmn-clearfix"}, + {'class':"cmn-print_headline cmn-clearfix"}, + ] + remove_tags_after = {'class':"cmn-pr_list"} + + feeds = [ + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'), + (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba') + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + cj = mechanize.LWPCookieJar() + br.set_cookiejar(cj) + + #br.set_debug_http(True) + #br.set_debug_redirects(True) + #br.set_debug_responses(True) + + if self.username is not None and self.password is not None: + #print "----------------------------get login form--------------------------------------------" + # open login form + br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam') + response = br.response() + #print "----------------------------get login form---------------------------------------------" + #print "----------------------------set login form---------------------------------------------" + # remove disabled input which brings error on mechanize + response.set_data(response.get_data().replace("", " -->")) + br.set_response(response) + br.select_form(name='LA0010Form01') + br['LA0010Form01:LA0010Email'] = self.username + br['LA0010Form01:LA0010Password'] = self.password + br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True + br.submit() + br.response() + #print "----------------------------send login form---------------------------------------------" + #print "----------------------------open news main page-----------------------------------------" + # open news site + br.open('http://www.nikkei.com/') + br.response() + #print "----------------------------www.nikkei.com BODY --------------------------------------" + #print response2.get_data() + #print "-------------------------^^-got auto redirect form----^^--------------------------------" + # forced redirect in default + br.select_form(nr=0) + br.submit() + response3 = br.response() + # return some cookie which should be set by Javascript + #print response3.geturl() + raw = response3.get_data() + #print "---------------------------response to form --------------------------------------------" + # grab cookie from JS and set it + redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1) + br.select_form(nr=0) + + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write("#LWP-Cookies-2.0\n") + + self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") + self.temp_files[-1].close() + cj.load(self.temp_files[-1].name) + + br.submit() + + #br.set_debug_http(False) + #br.set_debug_redirects(False) + #br.set_debug_responses(False) + return br + + + + diff --git a/resources/recipes/now_toronto.recipe b/resources/recipes/now_toronto.recipe index 41741dbccb..52a4619266 100644 --- a/resources/recipes/now_toronto.recipe +++ b/resources/recipes/now_toronto.recipe @@ -13,6 +13,7 @@ class NowToronto(BasicNewsRecipe): title = u'Now Toronto' description = u'Now Toronto' __author__ = 'Starson17' + language = 'en_CA' conversion_options = { 'no_default_epub_cover' : True } diff --git a/resources/recipes/reuters_ja.recipe b/resources/recipes/reuters_ja.recipe new file mode 100644 index 0000000000..ffa084bc88 --- /dev/null +++ b/resources/recipes/reuters_ja.recipe @@ -0,0 +1,37 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class ReutersJa(BasicNewsRecipe): + + title = 'Reuters(Japan)' + description = 'Global news in Japanese' + __author__ = 'Hiroshi Miura' + use_embedded_content = False + language = 'ja' + max_articles_per_feed = 10 + remove_javascript = True + + feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'), + ('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'), + ('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'), + ('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'), + ('Oddly Enough News', 'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml') + ] + + remove_tags_before = {'class':"article primaryContent"} + remove_tags = [ dict(id="banner"), + dict(id="autilities"), + dict(id="textSizer"), + dict(id="shareFooter"), + dict(id="relatedNews"), + dict(id="editorsChoice"), + dict(id="ecArticles"), + {'class':"secondaryContent"}, + {'class':"module"}, + ] + remove_tags_after = {'class':"assetBuddy"} + + def print_version(self, url): + m = re.search('(.*idJPJAPAN-[0-9]+)', url) + return m.group(0)+'?sp=true' + diff --git a/resources/recipes/the_h.recipe b/resources/recipes/the_h.recipe new file mode 100644 index 0000000000..dbfad7e32a --- /dev/null +++ b/resources/recipes/the_h.recipe @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.h-online.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TheHeiseOnline(BasicNewsRecipe): + title = u'The H' + __author__ = 'Hiroshi Miura' + oldest_article = 3 + description = 'In association with Heise Online' + publisher = 'Heise Media UK Ltd.' + category = 'news, technology, security' + max_articles_per_feed = 100 + language = 'en' + encoding = 'utf-8' + conversion_options = { + 'comment' : description + ,'tags' : category + ,'publisher': publisher + ,'language' : language + } + feeds = [ + (u'The H News Feed', u'http://www.h-online.com/news/atom.xml') + ] + + def print_version(self, url): + return url + '?view=print' +