merge from source
BIN
resources/images/news/cnetjapan.png
Normal file
After Width: | Height: | Size: 892 B |
BIN
resources/images/news/endgadget_ja.png
Normal file
After Width: | Height: | Size: 698 B |
BIN
resources/images/news/jijinews.png
Normal file
After Width: | Height: | Size: 919 B |
BIN
resources/images/news/msnsankei.png
Normal file
After Width: | Height: | Size: 543 B |
BIN
resources/images/news/nikkei_free.png
Normal file
After Width: | Height: | Size: 948 B |
BIN
resources/images/news/nikkei_sub_economy.png
Normal file
After Width: | Height: | Size: 948 B |
BIN
resources/images/news/nikkei_sub_industory.png
Normal file
After Width: | Height: | Size: 948 B |
BIN
resources/images/news/nikkei_sub_life.png
Normal file
After Width: | Height: | Size: 948 B |
BIN
resources/images/news/nikkei_sub_main.png
Normal file
After Width: | Height: | Size: 948 B |
BIN
resources/images/news/nikkei_sub_sports.png
Normal file
After Width: | Height: | Size: 948 B |
BIN
resources/images/news/reuters.png
Normal file
After Width: | Height: | Size: 693 B |
BIN
resources/images/news/reuters_ja.png
Normal file
After Width: | Height: | Size: 693 B |
@ -13,6 +13,7 @@ class Dnevnik(BasicNewsRecipe):
|
|||||||
labguage = 'sl'
|
labguage = 'sl'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
language = 'sl'
|
||||||
|
|
||||||
conversion_options = {'linearize_tables' : True}
|
conversion_options = {'linearize_tables' : True}
|
||||||
|
|
||||||
|
@ -91,8 +91,8 @@ class BrandEins(BasicNewsRecipe):
|
|||||||
latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
|
latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
|
||||||
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-issue]
|
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-issue]
|
||||||
url = pre_latest_issue.get('href', False)
|
url = pre_latest_issue.get('href', False)
|
||||||
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
|
# Get month and year of the magazine issue - build it out of the title of the cover
|
||||||
self.title = "brand eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d)", pre_latest_issue.find('img').get('title', False)).group('date')
|
self.timefmt = " " + re.search(r"(?P<date>\d\d\/\d\d\d\d)", pre_latest_issue.find('img').get('title', False)).group('date')
|
||||||
url = 'http://brandeins.de/'+url
|
url = 'http://brandeins.de/'+url
|
||||||
|
|
||||||
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
|
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
|
||||||
|
32
resources/recipes/cnetjapan.recipe
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CNetJapan(BasicNewsRecipe):
|
||||||
|
title = u'CNET Japan'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 30
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
|
||||||
|
feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
|
||||||
|
language = 'ja'
|
||||||
|
encoding = 'Shift_JIS'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
|
||||||
|
lambda match: '</body>'),
|
||||||
|
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</body>'),
|
||||||
|
(re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
|
||||||
|
lambda match: '<!-- removed -->'),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name="h2")
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"social_bkm_share"},
|
||||||
|
{'class':"social_bkm_print"},
|
||||||
|
{'class':"block20 clearfix"},
|
||||||
|
dict(name="div",attrs={'id':'bookreview'}),
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"block20"}
|
||||||
|
|
22
resources/recipes/endgadget_ja.recipe
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
japan.engadget.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class EndgadgetJapan(BasicNewsRecipe):
|
||||||
|
title = u'Endgadget\u65e5\u672c\u7248'
|
||||||
|
language = 'ja'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
|
||||||
|
masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
language = 'ja'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
|
26
resources/recipes/jijinews.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.jiji.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class JijiDotCom(BasicNewsRecipe):
|
||||||
|
title = u'\u6642\u4e8b\u901a\u4fe1'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'World News from Jiji Press'
|
||||||
|
publisher = 'Jiji Press Ltd.'
|
||||||
|
category = 'news'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
oldest_article = 6
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'ja'
|
||||||
|
cover_url = 'http://www.jiji.com/img/top_header_logo2.gif'
|
||||||
|
masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif'
|
||||||
|
|
||||||
|
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
|
||||||
|
remove_tags_after = dict(id="ad_google")
|
||||||
|
|
26
resources/recipes/mainichi.recipe
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.mainichi.jp
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MainichiDailyNews(BasicNewsRecipe):
|
||||||
|
title = u'\u6bce\u65e5\u65b0\u805e'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
description = 'Japanese traditional newspaper Mainichi Daily News'
|
||||||
|
publisher = 'Mainichi Daily News'
|
||||||
|
category = 'news, japan'
|
||||||
|
language = 'ja'
|
||||||
|
|
||||||
|
feeds = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
|
remove_tags_after = {'class':"Credit"}
|
||||||
|
|
18
resources/recipes/mainichi_it_news.recipe
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MainichiDailyITNews(BasicNewsRecipe):
|
||||||
|
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
|
||||||
|
publisher = 'Mainichi Daily News'
|
||||||
|
category = 'news, Japan, IT, Electronics'
|
||||||
|
language = 'ja'
|
||||||
|
|
||||||
|
feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
|
remove_tags_after = {'class':"Credit"}
|
||||||
|
|
@ -3,13 +3,28 @@ __copyright__ = '2010, Eddie Lau'
|
|||||||
'''
|
'''
|
||||||
modified from Singtao Toronto calibre recipe by rty
|
modified from Singtao Toronto calibre recipe by rty
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||||
|
ordering of articles
|
||||||
|
2010/11/12: add news image and eco-news section
|
||||||
|
2010/11/08: add parsing of finance section
|
||||||
|
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
|
||||||
|
in section/article list.
|
||||||
2010/10/31: skip repeated articles in section pages
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import datetime
|
import os, datetime, re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from contextlib import nested
|
||||||
|
|
||||||
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
|
||||||
|
from calibre import __appname__, strftime
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.utils.date import now as nowf
|
||||||
|
|
||||||
|
class MPHKRecipe(BasicNewsRecipe):
|
||||||
title = 'Ming Pao - Hong Kong'
|
title = 'Ming Pao - Hong Kong'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -24,27 +39,131 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
|||||||
encoding = 'Big5-HKSCS'
|
encoding = 'Big5-HKSCS'
|
||||||
recursions = 0
|
recursions = 0
|
||||||
conversion_options = {'linearize_tables':True}
|
conversion_options = {'linearize_tables':True}
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;}'
|
||||||
|
#extra_css = 'img {float:right; margin:4px;}'
|
||||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||||
keep_only_tags = [dict(name='h1'),
|
keep_only_tags = [dict(name='h1'),
|
||||||
|
#dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page
|
||||||
|
dict(attrs={'class':['photo']}),
|
||||||
|
dict(attrs={'id':['newscontent']}),
|
||||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||||
|
remove_tags = [dict(name='style'),
|
||||||
|
dict(attrs={'id':['newscontent135']})] # for the finance page
|
||||||
|
remove_attributes = ['width']
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '<h1>'),
|
||||||
|
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</h1>'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def image_url_processor(cls, baseurl, url):
|
||||||
|
# trick: break the url at the first occurance of digit, add an additional
|
||||||
|
# '_' at the front
|
||||||
|
# not working, may need to move this to preprocess_html() method
|
||||||
|
#minIdx = 10000
|
||||||
|
#i0 = url.find('0')
|
||||||
|
#if i0 >= 0 and i0 < minIdx:
|
||||||
|
# minIdx = i0
|
||||||
|
#i1 = url.find('1')
|
||||||
|
#if i1 >= 0 and i1 < minIdx:
|
||||||
|
# minIdx = i1
|
||||||
|
#i2 = url.find('2')
|
||||||
|
#if i2 >= 0 and i2 < minIdx:
|
||||||
|
# minIdx = i2
|
||||||
|
#i3 = url.find('3')
|
||||||
|
#if i3 >= 0 and i0 < minIdx:
|
||||||
|
# minIdx = i3
|
||||||
|
#i4 = url.find('4')
|
||||||
|
#if i4 >= 0 and i4 < minIdx:
|
||||||
|
# minIdx = i4
|
||||||
|
#i5 = url.find('5')
|
||||||
|
#if i5 >= 0 and i5 < minIdx:
|
||||||
|
# minIdx = i5
|
||||||
|
#i6 = url.find('6')
|
||||||
|
#if i6 >= 0 and i6 < minIdx:
|
||||||
|
# minIdx = i6
|
||||||
|
#i7 = url.find('7')
|
||||||
|
#if i7 >= 0 and i7 < minIdx:
|
||||||
|
# minIdx = i7
|
||||||
|
#i8 = url.find('8')
|
||||||
|
#if i8 >= 0 and i8 < minIdx:
|
||||||
|
# minIdx = i8
|
||||||
|
#i9 = url.find('9')
|
||||||
|
#if i9 >= 0 and i9 < minIdx:
|
||||||
|
# minIdx = i9
|
||||||
|
#return url[0:minIdx] + '_' + url[minIdx+1:]
|
||||||
|
return url
|
||||||
|
|
||||||
def get_fetchdate(self):
|
def get_fetchdate(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
# convert UTC to local hk time - at around HKT 5.30am, all news are available
|
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||||
dt_local = dt_utc - datetime.timedelta(-2.5/24)
|
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||||
return dt_local.strftime("%Y%m%d")
|
return dt_local.strftime("%Y%m%d")
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||||
articles = self.parse_section(url)
|
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||||
if articles:
|
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||||
feeds.append((title, articles))
|
(u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
|
||||||
|
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||||
|
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||||
|
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
|
||||||
|
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
|
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
|
||||||
|
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
|
articles = self.parse_section(url)
|
||||||
|
if articles:
|
||||||
|
feeds.append((title, articles))
|
||||||
|
# special - finance
|
||||||
|
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||||
|
if fin_articles:
|
||||||
|
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||||
|
# special - eco-friendly
|
||||||
|
# eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
|
||||||
|
# if eco_articles:
|
||||||
|
# feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
|
||||||
|
# special - entertainment
|
||||||
|
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||||
|
#if ent_articles:
|
||||||
|
# feeds.append(('Entertainment', ent_articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_section(self, url):
|
def parse_section(self, url):
|
||||||
|
dateStr = self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||||
|
current_articles = []
|
||||||
|
included_urls = []
|
||||||
|
divs.reverse()
|
||||||
|
for i in divs:
|
||||||
|
a = i.find('a', href = True)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a.get('href', False)
|
||||||
|
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||||
|
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||||
|
included_urls.append(url)
|
||||||
|
current_articles.reverse()
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_fin_section(self, url):
|
||||||
dateStr = self.get_fetchdate()
|
dateStr = self.get_fetchdate()
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
a = soup.findAll('a', href= True)
|
||||||
|
current_articles = []
|
||||||
|
for i in a:
|
||||||
|
url = i.get('href', False)
|
||||||
|
if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||||
|
title = self.tag_to_string(i)
|
||||||
|
url = 'http://www.mpfinance.com/cfm/' +url
|
||||||
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
|
return current_articles
|
||||||
|
|
||||||
|
def parse_eco_section(self, url):
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||||
current_articles = []
|
current_articles = []
|
||||||
@ -53,9 +172,162 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
|||||||
a = i.find('a', href = True)
|
a = i.find('a', href = True)
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
url = a.get('href', False)
|
url = a.get('href', False)
|
||||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
|
||||||
if url not in included_urls:
|
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
|
#def parse_ent_section(self, url):
|
||||||
|
# dateStr = self.get_fetchdate()
|
||||||
|
# soup = self.index_to_soup(url)
|
||||||
|
# a = soup.findAll('a', href=True)
|
||||||
|
# current_articles = []
|
||||||
|
# included_urls = []
|
||||||
|
# for i in a:
|
||||||
|
# title = self.tag_to_string(i)
|
||||||
|
# url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||||
|
# if url not in included_urls and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1 and not title == '':
|
||||||
|
# current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
|
# return current_articles
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['width']
|
||||||
|
for item in soup.findAll(stype=True):
|
||||||
|
del item['absmiddle']
|
||||||
|
return soup
|
||||||
|
|
||||||
|
def create_opf(self, feeds, dir=None):
|
||||||
|
#super(MPHKRecipe,self).create_opf(feeds, dir)
|
||||||
|
if dir is None:
|
||||||
|
dir = self.output_dir
|
||||||
|
title = self.short_title()
|
||||||
|
if self.output_profile.periodical_date_in_title:
|
||||||
|
title += strftime(self.timefmt)
|
||||||
|
mi = MetaInformation(title, [__appname__])
|
||||||
|
mi.publisher = __appname__
|
||||||
|
mi.author_sort = __appname__
|
||||||
|
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||||
|
mi.timestamp = nowf()
|
||||||
|
mi.comments = self.description
|
||||||
|
if not isinstance(mi.comments, unicode):
|
||||||
|
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||||
|
mi.pubdate = nowf()
|
||||||
|
opf_path = os.path.join(dir, 'index.opf')
|
||||||
|
ncx_path = os.path.join(dir, 'index.ncx')
|
||||||
|
opf = OPFCreator(dir, mi)
|
||||||
|
# Add mastheadImage entry to <guide> section
|
||||||
|
mp = getattr(self, 'masthead_path', None)
|
||||||
|
if mp is not None and os.access(mp, os.R_OK):
|
||||||
|
from calibre.ebooks.metadata.opf2 import Guide
|
||||||
|
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||||
|
ref.type = 'masthead'
|
||||||
|
ref.title = 'Masthead Image'
|
||||||
|
opf.guide.append(ref)
|
||||||
|
|
||||||
|
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||||
|
manifest.append(os.path.join(dir, 'index.html'))
|
||||||
|
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||||
|
|
||||||
|
# Get cover
|
||||||
|
cpath = getattr(self, 'cover_path', None)
|
||||||
|
if cpath is None:
|
||||||
|
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||||
|
if self.default_cover(pf):
|
||||||
|
cpath = pf.name
|
||||||
|
if cpath is not None and os.access(cpath, os.R_OK):
|
||||||
|
opf.cover = cpath
|
||||||
|
manifest.append(cpath)
|
||||||
|
|
||||||
|
# Get masthead
|
||||||
|
mpath = getattr(self, 'masthead_path', None)
|
||||||
|
if mpath is not None and os.access(mpath, os.R_OK):
|
||||||
|
manifest.append(mpath)
|
||||||
|
|
||||||
|
opf.create_manifest_from_files_in(manifest)
|
||||||
|
for mani in opf.manifest:
|
||||||
|
if mani.path.endswith('.ncx'):
|
||||||
|
mani.id = 'ncx'
|
||||||
|
if mani.path.endswith('mastheadImage.jpg'):
|
||||||
|
mani.id = 'masthead-image'
|
||||||
|
entries = ['index.html']
|
||||||
|
toc = TOC(base_path=dir)
|
||||||
|
self.play_order_counter = 0
|
||||||
|
self.play_order_map = {}
|
||||||
|
|
||||||
|
def feed_index(num, parent):
|
||||||
|
f = feeds[num]
|
||||||
|
for j, a in enumerate(f):
|
||||||
|
if getattr(a, 'downloaded', False):
|
||||||
|
adir = 'feed_%d/article_%d/'%(num, j)
|
||||||
|
auth = a.author
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = a.text_summary
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
else:
|
||||||
|
desc = self.description_limiter(desc)
|
||||||
|
entries.append('%sindex.html'%adir)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||||
|
play_order=po, author=auth, description=desc)
|
||||||
|
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||||
|
for sp in a.sub_pages:
|
||||||
|
prefix = os.path.commonprefix([opf_path, sp])
|
||||||
|
relp = sp[len(prefix):]
|
||||||
|
entries.append(relp.replace(os.sep, '/'))
|
||||||
|
last = sp
|
||||||
|
|
||||||
|
if os.path.exists(last):
|
||||||
|
with open(last, 'rb') as fi:
|
||||||
|
src = fi.read().decode('utf-8')
|
||||||
|
soup = BeautifulSoup(src)
|
||||||
|
body = soup.find('body')
|
||||||
|
if body is not None:
|
||||||
|
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||||
|
templ = self.navbar.generate(True, num, j, len(f),
|
||||||
|
not self.has_single_feed,
|
||||||
|
a.orig_url, __appname__, prefix=prefix,
|
||||||
|
center=self.center_navbar)
|
||||||
|
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||||
|
body.insert(len(body.contents), elem)
|
||||||
|
with open(last, 'wb') as fi:
|
||||||
|
fi.write(unicode(soup).encode('utf-8'))
|
||||||
|
if len(feeds) == 0:
|
||||||
|
raise Exception('All feeds are empty, aborting.')
|
||||||
|
|
||||||
|
if len(feeds) > 1:
|
||||||
|
for i, f in enumerate(feeds):
|
||||||
|
entries.append('feed_%d/index.html'%i)
|
||||||
|
po = self.play_order_map.get(entries[-1], None)
|
||||||
|
if po is None:
|
||||||
|
self.play_order_counter += 1
|
||||||
|
po = self.play_order_counter
|
||||||
|
auth = getattr(f, 'author', None)
|
||||||
|
if not auth:
|
||||||
|
auth = None
|
||||||
|
desc = getattr(f, 'description', None)
|
||||||
|
if not desc:
|
||||||
|
desc = None
|
||||||
|
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||||
|
f.title, play_order=po, description=desc, author=auth))
|
||||||
|
|
||||||
|
else:
|
||||||
|
entries.append('feed_%d/index.html'%0)
|
||||||
|
feed_index(0, toc)
|
||||||
|
|
||||||
|
for i, p in enumerate(entries):
|
||||||
|
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||||
|
opf.create_spine(entries)
|
||||||
|
opf.set_toc(toc)
|
||||||
|
|
||||||
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
24
resources/recipes/msnsankei.recipe
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
sankei.jp.msn.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MSNSankeiNewsProduct(BasicNewsRecipe):
|
||||||
|
title = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'Products release from Japan'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
encoding = 'Shift_JIS'
|
||||||
|
language = 'ja'
|
||||||
|
|
||||||
|
feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id="__r_article_title__")
|
||||||
|
remove_tags_after = dict(id="ajax_release_news")
|
||||||
|
remove_tags = [{'class':"parent chromeCustom6G"}]
|
60
resources/recipes/nikkei_free.recipe
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NikkeiNet(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
|
||||||
|
feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
|
||||||
|
(u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
|
||||||
|
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
|
||||||
|
(u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
||||||
|
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||||
|
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
||||||
|
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
|
||||||
|
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
||||||
|
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
||||||
|
(u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
||||||
|
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||||
|
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||||
|
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||||
|
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||||
|
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
||||||
|
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
||||||
|
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
||||||
|
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
||||||
|
(u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
|
||||||
|
(u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
|
||||||
|
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
|
||||||
|
(u'\u4f1a\u898b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
|
||||||
|
(u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
|
||||||
|
(u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(id="CONTENTS")
|
||||||
|
remove_tags = [
|
||||||
|
dict(name="form"),
|
||||||
|
{'class':"cmn-hide"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
109
resources/recipes/nikkei_sub_economy.recipe
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
class NikkeiNet_sub_economy(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [ (u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
||||||
|
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||||
|
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
||||||
|
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
||||||
|
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||||
|
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
||||||
|
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
||||||
|
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
108
resources/recipes/nikkei_sub_industry.recipe
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_industory(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
|
||||||
|
(u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
|
||||||
|
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
|
||||||
|
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
|
||||||
|
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
109
resources/recipes/nikkei_sub_life.recipe
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
||||||
|
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||||
|
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||||
|
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||||
|
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
||||||
|
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
102
resources/recipes/nikkei_sub_main.recipe
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_main(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [ (u'NIKKEI', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
109
resources/recipes/nikkei_sub_sports.recipe
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_sports(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
||||||
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -13,6 +13,7 @@ class NowToronto(BasicNewsRecipe):
|
|||||||
title = u'Now Toronto'
|
title = u'Now Toronto'
|
||||||
description = u'Now Toronto'
|
description = u'Now Toronto'
|
||||||
__author__ = 'Starson17'
|
__author__ = 'Starson17'
|
||||||
|
language = 'en_CA'
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'no_default_epub_cover' : True
|
'no_default_epub_cover' : True
|
||||||
}
|
}
|
||||||
|
37
resources/recipes/reuters_ja.recipe
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
class ReutersJa(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Reuters(Japan)'
|
||||||
|
description = 'Global news in Japanese'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'ja'
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'),
|
||||||
|
('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'),
|
||||||
|
('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'),
|
||||||
|
('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'),
|
||||||
|
('Oddly Enough News', 'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"article primaryContent"}
|
||||||
|
remove_tags = [ dict(id="banner"),
|
||||||
|
dict(id="autilities"),
|
||||||
|
dict(id="textSizer"),
|
||||||
|
dict(id="shareFooter"),
|
||||||
|
dict(id="relatedNews"),
|
||||||
|
dict(id="editorsChoice"),
|
||||||
|
dict(id="ecArticles"),
|
||||||
|
{'class':"secondaryContent"},
|
||||||
|
{'class':"module"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"assetBuddy"}
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
m = re.search('(.*idJPJAPAN-[0-9]+)', url)
|
||||||
|
return m.group(0)+'?sp=true'
|
||||||
|
|
33
resources/recipes/the_h.recipe
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.h-online.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheHeiseOnline(BasicNewsRecipe):
|
||||||
|
title = u'The H'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
oldest_article = 3
|
||||||
|
description = 'In association with Heise Online'
|
||||||
|
publisher = 'Heise Media UK Ltd.'
|
||||||
|
category = 'news, technology, security'
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'publisher': publisher
|
||||||
|
,'language' : language
|
||||||
|
}
|
||||||
|
feeds = [
|
||||||
|
(u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?view=print'
|
||||||
|
|
@ -132,9 +132,9 @@ class CheckIntegrity(QProgressDialog):
|
|||||||
titles = [self.db.title(x, index_is_id=True) for x in bad]
|
titles = [self.db.title(x, index_is_id=True) for x in bad]
|
||||||
det_msg = '\n'.join(titles)
|
det_msg = '\n'.join(titles)
|
||||||
warning_dialog(self, _('Some inconsistencies found'),
|
warning_dialog(self, _('Some inconsistencies found'),
|
||||||
_('The following books had formats listed in the '
|
_('The following books had formats or covers listed in the '
|
||||||
'database that are not actually available. '
|
'database that are not actually available. '
|
||||||
'The entries for the formats have been removed. '
|
'The entries for the formats/covers have been removed. '
|
||||||
'You should check them manually. This can '
|
'You should check them manually. This can '
|
||||||
'happen if you manipulate the files in the '
|
'happen if you manipulate the files in the '
|
||||||
'library folder directly.'), det_msg=det_msg, show=True)
|
'library folder directly.'), det_msg=det_msg, show=True)
|
||||||
|
@ -670,7 +670,6 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
for id in ids:
|
for id in ids:
|
||||||
try:
|
try:
|
||||||
self._data[id] = db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0]
|
self._data[id] = db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0]
|
||||||
self._data[id].append(db.has_cover(id, index_is_id=True))
|
|
||||||
self._data[id].append(db.book_on_device_string(id))
|
self._data[id].append(db.book_on_device_string(id))
|
||||||
self._data[id].append(None)
|
self._data[id].append(None)
|
||||||
if len(self.composites) > 0:
|
if len(self.composites) > 0:
|
||||||
@ -691,7 +690,6 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
self._data.extend(repeat(None, max(ids)-len(self._data)+2))
|
self._data.extend(repeat(None, max(ids)-len(self._data)+2))
|
||||||
for id in ids:
|
for id in ids:
|
||||||
self._data[id] = db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0]
|
self._data[id] = db.conn.get('SELECT * from meta2 WHERE id=?', (id,))[0]
|
||||||
self._data[id].append(db.has_cover(id, index_is_id=True))
|
|
||||||
self._data[id].append(db.book_on_device_string(id))
|
self._data[id].append(db.book_on_device_string(id))
|
||||||
self._data[id].append(None)
|
self._data[id].append(None)
|
||||||
if len(self.composites) > 0:
|
if len(self.composites) > 0:
|
||||||
@ -721,7 +719,6 @@ class ResultCache(SearchQueryParser): # {{{
|
|||||||
self._data[r[0]] = r
|
self._data[r[0]] = r
|
||||||
for item in self._data:
|
for item in self._data:
|
||||||
if item is not None:
|
if item is not None:
|
||||||
item.append(db.has_cover(item[0], index_is_id=True))
|
|
||||||
item.append(db.book_on_device_string(item[0]))
|
item.append(db.book_on_device_string(item[0]))
|
||||||
item.append(None)
|
item.append(None)
|
||||||
if len(self.composites) > 0:
|
if len(self.composites) > 0:
|
||||||
|
@ -226,7 +226,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
'lccn',
|
'lccn',
|
||||||
'pubdate',
|
'pubdate',
|
||||||
'flags',
|
'flags',
|
||||||
'uuid'
|
'uuid',
|
||||||
|
'has_cover'
|
||||||
]
|
]
|
||||||
lines = []
|
lines = []
|
||||||
for col in columns:
|
for col in columns:
|
||||||
@ -245,7 +246,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
||||||
'publisher':9, 'series_index':10,
|
'publisher':9, 'series_index':10,
|
||||||
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
|
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
|
||||||
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19}
|
'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20}
|
||||||
|
|
||||||
for k,v in self.FIELD_MAP.iteritems():
|
for k,v in self.FIELD_MAP.iteritems():
|
||||||
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
||||||
@ -267,12 +268,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
base,
|
base,
|
||||||
prefer_custom=True)
|
prefer_custom=True)
|
||||||
|
|
||||||
self.FIELD_MAP['cover'] = base+1
|
self.field_metadata.set_field_record_index('cover',
|
||||||
self.field_metadata.set_field_record_index('cover', base+1, prefer_custom=False)
|
self.FIELD_MAP['cover'], prefer_custom=False)
|
||||||
self.FIELD_MAP['ondevice'] = base+2
|
self.FIELD_MAP['ondevice'] = base+1
|
||||||
self.field_metadata.set_field_record_index('ondevice', base+2, prefer_custom=False)
|
self.field_metadata.set_field_record_index('ondevice', base+1, prefer_custom=False)
|
||||||
self.FIELD_MAP['all_metadata'] = base+3
|
self.FIELD_MAP['all_metadata'] = base+2
|
||||||
self.field_metadata.set_field_record_index('all_metadata', base+3, prefer_custom=False)
|
self.field_metadata.set_field_record_index('all_metadata', base+2, prefer_custom=False)
|
||||||
|
|
||||||
script = '''
|
script = '''
|
||||||
DROP VIEW IF EXISTS meta2;
|
DROP VIEW IF EXISTS meta2;
|
||||||
@ -332,7 +333,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
self.dirtied_cache = set([x[0] for x in d])
|
self.dirtied_cache = set([x[0] for x in d])
|
||||||
|
|
||||||
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
||||||
|
st = time.time()
|
||||||
self.refresh()
|
self.refresh()
|
||||||
|
print 'refresh time:', time.time() - st
|
||||||
self.last_update_check = self.last_modified()
|
self.last_update_check = self.last_modified()
|
||||||
|
|
||||||
|
|
||||||
@ -763,17 +766,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
identical_book_ids.add(book_id)
|
identical_book_ids.add(book_id)
|
||||||
return identical_book_ids
|
return identical_book_ids
|
||||||
|
|
||||||
def has_cover(self, index, index_is_id=False):
|
def remove_cover(self, id, notify=True, commit=True):
|
||||||
id = index if index_is_id else self.id(index)
|
|
||||||
try:
|
|
||||||
path = os.path.join(self.abspath(id, index_is_id=True,
|
|
||||||
create_dirs=False), 'cover.jpg')
|
|
||||||
except:
|
|
||||||
# Can happen if path has not yet been set
|
|
||||||
return False
|
|
||||||
return os.path.exists(path)
|
|
||||||
|
|
||||||
def remove_cover(self, id, notify=True):
|
|
||||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
|
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
try:
|
try:
|
||||||
@ -781,11 +774,14 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
|
self.conn.execute('UPDATE books SET has_cover=0 WHERE id=?', (id,))
|
||||||
|
if commit:
|
||||||
|
self.conn.commit()
|
||||||
self.data.set(id, self.FIELD_MAP['cover'], False, row_is_id=True)
|
self.data.set(id, self.FIELD_MAP['cover'], False, row_is_id=True)
|
||||||
if notify:
|
if notify:
|
||||||
self.notify('cover', [id])
|
self.notify('cover', [id])
|
||||||
|
|
||||||
def set_cover(self, id, data, notify=True):
|
def set_cover(self, id, data, notify=True, commit=True):
|
||||||
'''
|
'''
|
||||||
Set the cover for this book.
|
Set the cover for this book.
|
||||||
|
|
||||||
@ -802,6 +798,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
save_cover_data_to(data, path)
|
save_cover_data_to(data, path)
|
||||||
|
self.conn.execute('UPDATE books SET has_cover=1 WHERE id=?', (id,))
|
||||||
|
if commit:
|
||||||
|
self.conn.commit()
|
||||||
self.data.set(id, self.FIELD_MAP['cover'], True, row_is_id=True)
|
self.data.set(id, self.FIELD_MAP['cover'], True, row_is_id=True)
|
||||||
if notify:
|
if notify:
|
||||||
self.notify('cover', [id])
|
self.notify('cover', [id])
|
||||||
@ -1273,11 +1272,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
if mi.series:
|
if mi.series:
|
||||||
doit(self.set_series, id, mi.series, notify=False, commit=False)
|
doit(self.set_series, id, mi.series, notify=False, commit=False)
|
||||||
if mi.cover_data[1] is not None:
|
if mi.cover_data[1] is not None:
|
||||||
doit(self.set_cover, id, mi.cover_data[1]) # doesn't use commit
|
doit(self.set_cover, id, mi.cover_data[1], commit=False)
|
||||||
elif mi.cover is not None:
|
elif mi.cover is not None:
|
||||||
if os.access(mi.cover, os.R_OK):
|
if os.access(mi.cover, os.R_OK):
|
||||||
with lopen(mi.cover, 'rb') as f:
|
with lopen(mi.cover, 'rb') as f:
|
||||||
doit(self.set_cover, id, f)
|
doit(self.set_cover, id, f, commit=False)
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
doit(self.set_tags, id, mi.tags, notify=False, commit=False)
|
doit(self.set_tags, id, mi.tags, notify=False, commit=False)
|
||||||
if mi.comments:
|
if mi.comments:
|
||||||
@ -2291,7 +2290,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
x['tags'] = [i.replace('|', ',').strip() for i in x['tags'].split(',')] if x['tags'] else []
|
x['tags'] = [i.replace('|', ',').strip() for i in x['tags'].split(',')] if x['tags'] else []
|
||||||
path = os.path.join(prefix, self.path(record[self.FIELD_MAP['id']], index_is_id=True))
|
path = os.path.join(prefix, self.path(record[self.FIELD_MAP['id']], index_is_id=True))
|
||||||
x['cover'] = os.path.join(path, 'cover.jpg')
|
x['cover'] = os.path.join(path, 'cover.jpg')
|
||||||
if not self.has_cover(x['id'], index_is_id=True):
|
if not record[self.FIELD_MAP['cover']]:
|
||||||
x['cover'] = None
|
x['cover'] = None
|
||||||
formats = self.formats(record[self.FIELD_MAP['id']], index_is_id=True)
|
formats = self.formats(record[self.FIELD_MAP['id']], index_is_id=True)
|
||||||
if formats:
|
if formats:
|
||||||
@ -2510,11 +2509,20 @@ books_series_link feeds
|
|||||||
if id not in bad:
|
if id not in bad:
|
||||||
bad[id] = []
|
bad[id] = []
|
||||||
bad[id].append(fmt)
|
bad[id].append(fmt)
|
||||||
|
has_cover = self.data.get(id, self.FIELD_MAP['cover'],
|
||||||
|
row_is_id=True)
|
||||||
|
if has_cover and self.cover(id, index_is_id=True, as_path=True) is None:
|
||||||
|
if id not in bad:
|
||||||
|
bad[id] = []
|
||||||
|
bad[id].append('COVER')
|
||||||
callback(0.1+0.9*(1+i)/total, _('Checked id') + ' %d'%id)
|
callback(0.1+0.9*(1+i)/total, _('Checked id') + ' %d'%id)
|
||||||
|
|
||||||
for id in bad:
|
for id in bad:
|
||||||
for fmt in bad[id]:
|
for fmt in bad[id]:
|
||||||
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, fmt.upper()))
|
if fmt != 'COVER':
|
||||||
|
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, fmt.upper()))
|
||||||
|
else:
|
||||||
|
self.conn.execute('UPDATE books SET has_cover=0 WHERE id=?', (id,))
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
self.refresh_ids(list(bad.keys()))
|
self.refresh_ids(list(bad.keys()))
|
||||||
|
|
||||||
|
@ -6,6 +6,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
class SchemaUpgrade(object):
|
class SchemaUpgrade(object):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -409,3 +411,17 @@ class SchemaUpgrade(object):
|
|||||||
'''
|
'''
|
||||||
self.conn.executescript(script)
|
self.conn.executescript(script)
|
||||||
|
|
||||||
|
def upgrade_version_14(self):
|
||||||
|
'Cache has_cover'
|
||||||
|
self.conn.execute('ALTER TABLE books ADD COLUMN has_cover BOOL DEFAULT 0')
|
||||||
|
data = self.conn.get('SELECT id,path FROM books', all=True)
|
||||||
|
def has_cover(path):
|
||||||
|
if path:
|
||||||
|
path = os.path.join(self.library_path, path.replace('/', os.sep),
|
||||||
|
'cover.jpg')
|
||||||
|
return os.path.exists(path)
|
||||||
|
return False
|
||||||
|
|
||||||
|
ids = [(x[0],) for x in data if has_cover(x[1])]
|
||||||
|
self.conn.executemany('UPDATE books SET has_cover=1 WHERE id=?', ids)
|
||||||
|
|
||||||
|
@ -34,10 +34,11 @@ sqlite.register_adapter(datetime, adapt_datetime)
|
|||||||
sqlite.register_converter('timestamp', convert_timestamp)
|
sqlite.register_converter('timestamp', convert_timestamp)
|
||||||
|
|
||||||
def convert_bool(val):
|
def convert_bool(val):
|
||||||
return bool(int(val))
|
return val != '0'
|
||||||
|
|
||||||
sqlite.register_adapter(bool, lambda x : 1 if x else 0)
|
sqlite.register_adapter(bool, lambda x : 1 if x else 0)
|
||||||
sqlite.register_converter('bool', convert_bool)
|
sqlite.register_converter('bool', convert_bool)
|
||||||
|
sqlite.register_converter('BOOL', convert_bool)
|
||||||
|
|
||||||
class DynamicFilter(object):
|
class DynamicFilter(object):
|
||||||
|
|
||||||
|