diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe index e0178c1ff2..1058b90401 100644 --- a/resources/recipes/cnetjapan.recipe +++ b/resources/recipes/cnetjapan.recipe @@ -7,7 +7,9 @@ class CNetJapan(BasicNewsRecipe): max_articles_per_feed = 30 __author__ = 'Hiroshi Miura' - feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')] + feeds = [(u'CNet News', u'http://feed.japan.cnet.com/rss/index.rdf'), + (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf') + ] language = 'ja' encoding = 'Shift_JIS' remove_javascript = True @@ -21,12 +23,29 @@ class CNetJapan(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(name="h2") + remove_tags_before = dict(id="contents_l") remove_tags = [ {'class':"social_bkm_share"}, {'class':"social_bkm_print"}, {'class':"block20 clearfix"}, dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} ] remove_tags_after = {'class':"block20"} + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_digital.recipe b/resources/recipes/cnetjapan_digital.recipe new file mode 100644 index 0000000000..9028126af2 --- /dev/null +++ b/resources/recipes/cnetjapan_digital.recipe @@ -0,0 +1,49 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanDigital(BasicNewsRecipe): + title = u'CNET Japan Digital' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_release.recipe b/resources/recipes/cnetjapan_release.recipe new file mode 100644 index 0000000000..e8d13ec99f --- /dev/null +++ b/resources/recipes/cnetjapan_release.recipe @@ -0,0 +1,48 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanRelease(BasicNewsRecipe): + title = u'CNET Japan release' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet Release', u'http://feed.japan.cnet.com/rss/release/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds