Sync to trunk.

This commit is contained in:
John Schember 2011-05-12 18:32:48 -04:00
commit cc4531a1de
47 changed files with 1396 additions and 618 deletions

View File

@ -93,7 +93,7 @@ class Arcamax(BasicNewsRecipe):
for page in pages: for page in pages:
page_soup = self.index_to_soup(url) page_soup = self.index_to_soup(url)
if page_soup: if page_soup:
title = page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0] title = self.tag_to_string(page_soup.find(name='div', attrs={'class':'comics-header'}).h1.contents[0])
page_url = url page_url = url
# orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href'] # orig prev_page_url = 'http://www.arcamax.com' + page_soup.find('a', attrs={'class':'prev'}, text='Previous').parent['href']
prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href'] prev_page_url = 'http://www.arcamax.com' + page_soup.find('span', text='Previous').parent.parent['href']
@ -127,4 +127,3 @@ class Arcamax(BasicNewsRecipe):
p{font-family:Arial,Helvetica,sans-serif;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' '''

View File

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'中時電子報'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
]
__author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.0'
language = 'zh'
publisher = 'China Times Group'
description = 'China Times (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
remove_tags = [dict(name='div', attrs={'class':['focus-news']})]

53
recipes/divahair.recipe Normal file
View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
divahair.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DivaHair(BasicNewsRecipe):
title = u'Diva Hair'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Coafuri, frizuri, tunsori ..'
publisher = u'Diva Hair'
category = u'Ziare,Stiri,Coafuri,Femei'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.divahair.ro/imgs/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='td', attrs={'class':'spatiuart'})
, dict(name='div', attrs={'class':'spatiuart'})
]
remove_tags = [
dict(name='div', attrs={'class':'categorie'})
, dict(name='div', attrs={'class':'gri gri2 detaliiart'})
, dict(name='div', attrs={'class':'articol_box_bottom'})
]
remove_tags_after = [
dict(name='div', attrs={'class':'articol_box_bottom'})
]
feeds = [ (u'\u0218tiri', u'http://www.divahair.ro/feed') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

BIN
recipes/icons/divahair.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 675 B

BIN
recipes/icons/mayra.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 620 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 837 B

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
title = u'自由電子報'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
]
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
__author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.1'
language = 'zh'
publisher = 'Liberty Times Group'
description = 'Liberty Times (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]

51
recipes/mayra.recipe Normal file
View File

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
mayra.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Mayra(BasicNewsRecipe):
title = u'Mayra'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Traieste urban, cool, sexy'
publisher = 'Mayra'
category = 'Ziare,Stiri,Reviste'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://img.konkurs.ro/img/concursuri-cu-premii/147/14672_front.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'article_details'})
]
remove_tags = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
, dict(name='p', attrs={'id':'tags'})
, dict(name='span', attrs={'id':'tweet-button'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'LikePluginPagelet'})
]
feeds = [ (u'\u0218tiri', u'http://www.mayra.ro/rss') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,15 +1,18 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010-2011, Eddie Lau' __copyright__ = '2010-2011, Eddie Lau'
# Users of Kindle 3 (with limited system-level CJK support) # Users of Kindle 3 with limited system-level CJK support
# please replace the following "True" with "False". # please replace the following "True" with "False".
__MakePeriodical__ = True __MakePeriodical__ = True
# Turn it to True if your device supports display of CJK titles # Turn below to true if your device supports display of CJK titles
__UseChineseTitle__ = False __UseChineseTitle__ = False
# Trun below to true if you wish to use life.mingpao.com as the main article source
__UseLife__ = True
''' '''
Change Log: Change Log:
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
2011/03/06: add new articles for finance section, also a new section "Columns" 2011/03/06: add new articles for finance section, also a new section "Columns"
2011/02/28: rearrange the sections 2011/02/28: rearrange the sections
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles [Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
@ -32,41 +35,43 @@ import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested from contextlib import nested
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
class MPHKRecipe(BasicNewsRecipe): class MPHKRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong' title = 'Ming Pao - Hong Kong'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)' description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
publisher = 'MingPao' publisher = 'MingPao'
category = 'Chinese, News, Hong Kong' category = 'Chinese, News, Hong Kong'
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
language = 'zh' language = 'zh'
encoding = 'Big5-HKSCS' encoding = 'Big5-HKSCS'
recursions = 0 recursions = 0
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
timefmt = '' timefmt = ''
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif' masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'), keep_only_tags = [dict(name='h1'),
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
dict(name='font', attrs={'color':['AA0000']}), # for column articles title dict(name='font', attrs={'color':['AA0000']}), # for column articles title
dict(attrs={'id':['newscontent']}), # entertainment and column page content dict(attrs={'id':['newscontent']}), # entertainment and column page content
dict(attrs={'id':['newscontent01','newscontent02']}), dict(attrs={'id':['newscontent01','newscontent02']}),
dict(attrs={'class':['photo']}) dict(attrs={'class':['photo']}),
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
] ]
remove_tags = [dict(name='style'), remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']}), # for the finance page dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
dict(name='table')] # for content fetched from life.mingpao.com dict(name='table')] # for content fetched from life.mingpao.com
remove_attributes = ['width'] remove_attributes = ['width']
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE), (re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'), lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE), (re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
@ -80,10 +85,10 @@ class MPHKRecipe(BasicNewsRecipe):
lambda match: "</b>") lambda match: "</b>")
] ]
def image_url_processor(cls, baseurl, url): def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional # trick: break the url at the first occurance of digit, add an additional
# '_' at the front # '_' at the front
# not working, may need to move this to preprocess_html() method # not working, may need to move this to preprocess_html() method
# minIdx = 10000 # minIdx = 10000
# i0 = url.find('0') # i0 = url.find('0')
# if i0 >= 0 and i0 < minIdx: # if i0 >= 0 and i0 < minIdx:
@ -115,314 +120,357 @@ class MPHKRecipe(BasicNewsRecipe):
# i9 = url.find('9') # i9 = url.find('9')
# if i9 >= 0 and i9 < minIdx: # if i9 >= 0 and i9 < minIdx:
# minIdx = i9 # minIdx = i9
return url return url
def get_dtlocal(self): def get_dtlocal(self):
dt_utc = datetime.datetime.utcnow() dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 6.00am, all news are available # convert UTC to local hk time - at around HKT 6.00am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.0/24) dt_local = dt_utc - datetime.timedelta(-2.0/24)
return dt_local return dt_local
def get_fetchdate(self): def get_fetchdate(self):
return self.get_dtlocal().strftime("%Y%m%d") return self.get_dtlocal().strftime("%Y%m%d")
def get_fetchformatteddate(self): def get_fetchformatteddate(self):
return self.get_dtlocal().strftime("%Y-%m-%d") return self.get_dtlocal().strftime("%Y-%m-%d")
def get_fetchday(self): def get_fetchday(self):
# convert UTC to local hk time - at around HKT 6.00am, all news are available # dt_utc = datetime.datetime.utcnow()
return self.get_dtlocal().strftime("%d") # convert UTC to local hk time - at around HKT 6.00am, all news are available
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
return self.get_dtlocal().strftime("%d")
def get_cover_url(self): def get_cover_url(self):
cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg' cover = 'http://news.mingpao.com/' + self.get_fetchdate() + '/' + self.get_fetchdate() + '_' + self.get_fetchday() + 'gacov.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
except: except:
cover = None cover = None
return cover return cover
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
dateStr = self.get_fetchdate() dateStr = self.get_fetchdate()
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), if __UseLife__:
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]: (u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
articles = self.parse_section(url) (u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
if articles: (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
feeds.append((title, articles)) (u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
articles = self.parse_section2(url, keystr)
if articles:
feeds.append((title, articles))
# special- editorial for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr') (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
if ed_articles: articles = self.parse_section(url)
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles)) if articles:
feeds.append((title, articles))
else:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), # special- editorial
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]: if ed_articles:
articles = self.parse_section(url) feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
if articles:
feeds.append((title, articles))
# special - finance for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm') (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea') (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm')]:
if fin_articles: articles = self.parse_section(url)
feeds.append((u'\u7d93\u6fdf Finance', fin_articles)) if articles:
feeds.append((title, articles))
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), # special - finance
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
articles = self.parse_section(url) fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
if articles: if fin_articles:
feeds.append((title, articles)) feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
# special - entertainment for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm') (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
if ent_articles: articles = self.parse_section(url)
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles)) if articles:
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), # special - entertainment
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
articles = self.parse_section(url) if ent_articles:
if articles: feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
feeds.append((title, articles))
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special- columns # special- columns
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn') col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
if col_articles: if col_articles:
feeds.append((u'\u5c08\u6b04 Columns', col_articles)) feeds.append((u'\u5c08\u6b04 Columns', col_articles))
return feeds return feeds
def parse_section(self, url): # parse from news.mingpao.com
dateStr = self.get_fetchdate() def parse_section(self, url):
soup = self.index_to_soup(url) dateStr = self.get_fetchdate()
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']}) soup = self.index_to_soup(url)
current_articles = [] divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
included_urls = [] current_articles = []
divs.reverse() included_urls = []
for i in divs: divs.reverse()
a = i.find('a', href = True) for i in divs:
title = self.tag_to_string(a) a = i.find('a', href = True)
url = a.get('href', False) title = self.tag_to_string(a)
url = 'http://news.mingpao.com/' + dateStr + '/' +url url = a.get('href', False)
if url not in included_urls and url.rfind('Redirect') == -1: url = 'http://news.mingpao.com/' + dateStr + '/' +url
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) if url not in included_urls and url.rfind('Redirect') == -1:
included_urls.append(url) current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
current_articles.reverse() included_urls.append(url)
return current_articles current_articles.reverse()
return current_articles
def parse_ed_section(self, url): # parse from life.mingpao.com
self.get_fetchdate() def parse_section2(self, url, keystr):
soup = self.index_to_soup(url) self.get_fetchdate()
a = soup.findAll('a', href=True) soup = self.index_to_soup(url)
a.reverse() a = soup.findAll('a', href=True)
current_articles = [] a.reverse()
included_urls = [] current_articles = []
for i in a: included_urls = []
title = self.tag_to_string(i) for i in a:
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) title = self.tag_to_string(i)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1): url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
current_articles.append({'title': title, 'url': url, 'description': ''}) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
included_urls.append(url) current_articles.append({'title': title, 'url': url, 'description': ''})
current_articles.reverse() included_urls.append(url)
return current_articles current_articles.reverse()
return current_articles
def parse_fin_section(self, url): def parse_ed_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href= True) a = soup.findAll('a', href=True)
current_articles = [] a.reverse()
included_urls = [] current_articles = []
for i in a: included_urls = []
#url = 'http://www.mpfinance.com/cfm/' + i.get('href', False) for i in a:
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) title = self.tag_to_string(i)
#if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1: url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('nal') == -1):
title = self.tag_to_string(i) current_articles.append({'title': title, 'url': url, 'description': ''})
current_articles.append({'title': title, 'url': url, 'description':''}) included_urls.append(url)
included_urls.append(url) current_articles.reverse()
return current_articles return current_articles
def parse_ent_section(self, url): def parse_fin_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href= True)
a.reverse() current_articles = []
current_articles = [] included_urls = []
included_urls = [] for i in a:
for i in a: #url = 'http://www.mpfinance.com/cfm/' + i.get('href', False)
title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
url = 'http://ol.mingpao.com/cfm/' + i.get('href', False) #if url not in included_urls and not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1): if url not in included_urls and (not url.rfind('txt') == -1) and (not url.rfind('nal') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''}) title = self.tag_to_string(i)
included_urls.append(url) current_articles.append({'title': title, 'url': url, 'description':''})
current_articles.reverse() included_urls.append(url)
return current_articles return current_articles
def parse_col_section(self, url): def parse_ent_section(self, url):
self.get_fetchdate() self.get_fetchdate()
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
a = soup.findAll('a', href=True) a = soup.findAll('a', href=True)
a.reverse() a.reverse()
current_articles = [] current_articles = []
included_urls = [] included_urls = []
for i in a: for i in a:
title = self.tag_to_string(i) title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False) url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1): if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('star') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''}) current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url) included_urls.append(url)
current_articles.reverse() current_articles.reverse()
return current_articles return current_articles
def preprocess_html(self, soup): def parse_col_section(self, url):
for item in soup.findAll(style=True): self.get_fetchdate()
del item['style'] soup = self.index_to_soup(url)
for item in soup.findAll(style=True): a = soup.findAll('a', href=True)
del item['width'] a.reverse()
for item in soup.findAll(stype=True): current_articles = []
del item['absmiddle'] included_urls = []
return soup for i in a:
title = self.tag_to_string(i)
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind('ncl') == -1):
current_articles.append({'title': title, 'url': url, 'description': ''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def create_opf(self, feeds, dir=None): def preprocess_html(self, soup):
if dir is None: for item in soup.findAll(style=True):
dir = self.output_dir del item['style']
if __UseChineseTitle__ == True: for item in soup.findAll(style=True):
title = u'\u660e\u5831 (\u9999\u6e2f)' del item['width']
else: for item in soup.findAll(stype=True):
title = self.short_title() del item['absmiddle']
# if not generating a periodical, force date to apply in title return soup
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] def create_opf(self, feeds, dir=None):
manifest.append(os.path.join(dir, 'index.html')) if dir is None:
manifest.append(os.path.join(dir, 'index.ncx')) dir = self.output_dir
if __UseChineseTitle__ == True:
title = u'\u660e\u5831 (\u9999\u6e2f)'
else:
title = self.short_title()
# if not generating a periodical, force date to apply in title
if __MakePeriodical__ == False:
title = title + ' ' + self.get_fetchformatteddate()
if True:
mi = MetaInformation(title, [self.publisher])
mi.publisher = self.publisher
mi.author_sort = self.publisher
if __MakePeriodical__ == True:
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
else:
mi.publication_type = self.publication_type+':'+self.short_title()
#mi.timestamp = nowf()
mi.timestamp = self.get_dtlocal()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
#mi.pubdate = nowf()
mi.pubdate = self.get_dtlocal()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
# Get cover manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
cpath = getattr(self, 'cover_path', None) manifest.append(os.path.join(dir, 'index.html'))
if cpath is None: manifest.append(os.path.join(dir, 'index.ncx'))
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead # Get cover
mpath = getattr(self, 'masthead_path', None) cpath = getattr(self, 'cover_path', None)
if mpath is not None and os.access(mpath, os.R_OK): if cpath is None:
manifest.append(mpath) pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
opf.create_manifest_from_files_in(manifest) # Get masthead
for mani in opf.manifest: mpath = getattr(self, 'masthead_path', None)
if mani.path.endswith('.ncx'): if mpath is not None and os.access(mpath, os.R_OK):
mani.id = 'ncx' manifest.append(mpath)
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent): opf.create_manifest_from_files_in(manifest)
f = feeds[num] for mani in opf.manifest:
for j, a in enumerate(f): if mani.path.endswith('.ncx'):
if getattr(a, 'downloaded', False): mani.id = 'ncx'
adir = 'feed_%d/article_%d/'%(num, j) if mani.path.endswith('mastheadImage.jpg'):
auth = a.author mani.id = 'masthead-image'
if not auth: entries = ['index.html']
auth = None toc = TOC(base_path=dir)
desc = a.text_summary self.play_order_counter = 0
if not desc: self.play_order_map = {}
desc = None
else: def feed_index(num, parent):
desc = self.description_limiter(desc) f = feeds[num]
entries.append('%sindex.html'%adir) for j, a in enumerate(f):
po = self.play_order_map.get(entries[-1], None) if getattr(a, 'downloaded', False):
if po is None: adir = 'feed_%d/article_%d/'%(num, j)
self.play_order_counter += 1 auth = a.author
po = self.play_order_counter if not auth:
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc) play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages: for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp]) prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):] relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/')) entries.append(relp.replace(os.sep, '/'))
last = sp last = sp
if os.path.exists(last): if os.path.exists(last):
with open(last, 'rb') as fi: with open(last, 'rb') as fi:
src = fi.read().decode('utf-8') src = fi.read().decode('utf-8')
soup = BeautifulSoup(src) soup = BeautifulSoup(src)
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f), templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed, not self.has_single_feed,
a.orig_url, self.publisher, prefix=prefix, a.orig_url, self.publisher, prefix=prefix,
center=self.center_navbar) center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1: if len(feeds) > 1:
for i, f in enumerate(feeds): for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i) entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None) po = self.play_order_map.get(entries[-1], None)
if po is None: if po is None:
self.play_order_counter += 1 self.play_order_counter += 1
po = self.play_order_counter po = self.play_order_counter
auth = getattr(f, 'author', None) auth = getattr(f, 'author', None)
if not auth: if not auth:
auth = None auth = None
desc = getattr(f, 'description', None) desc = getattr(f, 'description', None)
if not desc: if not desc:
desc = None desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None, feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth)) f.title, play_order=po, description=desc, author=auth))
else: else:
entries.append('feed_%d/index.html'%0) entries.append('feed_%d/index.html'%0)
feed_index(0, toc) feed_index(0, toc)
for i, p in enumerate(entries): for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep)) entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries) opf.create_spine(entries)
opf.set_toc(toc) opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

50
recipes/moldovaazi.recipe Normal file
View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
azi.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MoldovaAzi(BasicNewsRecipe):
title = u'Moldova Azi'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Moldova pe internet'
publisher = 'Moldova Azi'
category = 'Ziare,Stiri,Moldova'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.azi.md/images/logo.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'id':'in'})
]
remove_tags = [
dict(name='div', attrs={'class':'in-more-stories'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'comment_wrapper'})
, dict(name='div', attrs={'class':'box-title4'})
]
feeds = [ (u'\u0218tiri', u'http://www.azi.md/ro/feeds/0/rss201') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
newsmoldova.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NewsMoldova(BasicNewsRecipe):
title = u'Agen\u0163ia de \u015ftiri Moldova'
language = 'ro'
__author__ = u'Silviu Cotoar\u0103'
description = u'Agen\u0163ia de \u015ftiri Moldova'
publisher = 'Moldova'
category = 'Ziare,Stiri,Moldova'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
cover_url = 'http://www.newsmoldova.md/i/logo_top_md.gif'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [ dict(name='div', attrs={'class':'main-article-index article'})
]
remove_tags = [
dict(name='div', attrs={'id':'actions'})
, dict(name='li', attrs={'class':'invisible'})
]
remove_tags_after = [
dict(name='div', attrs={'id':'actions'})
]
feeds = [ (u'\u0218tiri', u'http://newsmoldova.md/export/rss2/archive/index.xml') ]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -3,7 +3,6 @@ __license__ = 'GPL v3'
''' '''
''' '''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds import Feed
class ReadersDigest(BasicNewsRecipe): class ReadersDigest(BasicNewsRecipe):
@ -38,151 +37,20 @@ class ReadersDigest(BasicNewsRecipe):
''' '''
remove_tags = [
dict(name='h4', attrs={'class':'close'}),
dict(name='div', attrs={'class':'fromLine'}),
dict(name='img', attrs={'class':'colorTag'}),
dict(name='div', attrs={'id':'sponsorArticleHeader'}),
dict(name='div', attrs={'class':'horizontalAd'}),
dict(name='div', attrs={'id':'imageCounterLeft'}),
dict(name='div', attrs={'id':'commentsPrint'})
]
feeds = [ feeds = [
('New in RD', 'http://feeds.rd.com/ReadersDigest'), ('Food', 'http://www.rd.com/food/feed'),
('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'), ('Health', 'http://www.rd.com/health/feed'),
('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'), ('Home', 'http://www.rd.com/home/feed'),
('Blogs','http://feeds.rd.com/ReadersDigestBlogs') ('Family', 'http://www.rd.com/family/feed'),
('Money', 'http://www.rd.com/money/feed'),
('Travel', 'http://www.rd.com/travel/feed'),
] ]
cover_url = 'http://www.rd.com/images/logo-main-rd.gif' cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
keep_only_tags = dict(id='main-content')
remove_tags = [
#------------------------------------------------------------------------------------------------- {'class':['post-categories']},
def print_version(self, url):
# Get the identity number of the current article and append it to the root print URL
if url.find('/article') > 0:
ident = url[url.find('/article')+8:url.find('.html?')-4]
url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
elif url.find('/post') > 0:
# in this case, have to get the page itself to derive the Print page.
soup = self.index_to_soup(url)
newsoup = soup.find('ul',attrs={'class':'printBlock'})
url = 'http://www.rd.com' + newsoup('a')[0]['href']
url = url[0:url.find('&Keep')]
return url
#-------------------------------------------------------------------------------------------------
def parse_index(self):
pages = [
('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
# useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
] ]
feeds = []
for page in pages:
section, url, divider, attrList = page
newArticles = self.page_parse(url, divider, attrList)
feeds.append((section,newArticles))
# after the pages of the site have been processed, parse several RSS feeds for additional sections
newfeeds = Feed()
newfeeds = self.parse_rss()
# The utility code in parse_rss returns a Feed object. Convert each feed/article combination into a form suitable
# for this module (parse_index).
for feed in newfeeds:
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date,
'description' : article.text_summary
}
newArticles.append(newArt)
# New and Blogs should be the first two feeds.
if feed.title == 'New in RD':
feeds.insert(0,(feed.title,newArticles))
elif feed.title == 'Blogs':
feeds.insert(1,(feed.title,newArticles))
else:
feeds.append((feed.title,newArticles))
return feeds
#-------------------------------------------------------------------------------------------------
def page_parse(self, mainurl, divider, attrList):
articles = []
mainsoup = self.index_to_soup(mainurl)
for item in mainsoup.findAll(attrs=attrList):
newArticle = {
'title' : item('img')[0]['alt'],
'url' : 'http://www.rd.com'+item('a')[0]['href'],
'date' : '',
'description' : ''
}
articles.append(newArticle)
return articles
#-------------------------------------------------------------------------------------------------
def parse_rss (self):
# Do the "official" parse_feeds first
feeds = BasicNewsRecipe.parse_feeds(self)
# Loop thru the articles in all feeds to find articles with "recipe" in it
recipeArticles = []
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if curarticle.title.upper().find('RECIPE') >= 0:
recipeArticles.append(curarticle)
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
# If there are any recipes found, create a new Feed object and append.
if len(recipeArticles) > 0:
pfeed = Feed()
pfeed.title = 'Recipes'
pfeed.descrition = 'Recipe Feed (Virtual)'
pfeed.image_url = None
pfeed.oldest_article = 30
pfeed.id_counter = len(recipeArticles)
# Create a new Feed, add the recipe articles, and then append
# to "official" list of feeds
pfeed.articles = recipeArticles[:]
feeds.append(pfeed)
return feeds

View File

@ -33,7 +33,7 @@ class StrategyBusinessRecipe(BasicNewsRecipe):
elif c.name.endswith('_password'): elif c.name.endswith('_password'):
br[c.name] = self.password br[c.name] = self.password
raw = br.submit().read() raw = br.submit().read()
if '>Logout' not in raw: if 'You have been logged in' not in raw:
raise ValueError('Failed to login, check your username and password') raise ValueError('Failed to login, check your username and password')
return br return br

View File

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class UnitedDaily(BasicNewsRecipe):
title = u'聯合新聞網'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
(u'政治', u'http://udn.com/udnrss/politics.xml'),
(u'社會', u'http://udn.com/udnrss/social.xml'),
(u'生活', u'http://udn.com/udnrss/life.xml'),
(u'綜合', u'http://udn.com/udnrss/education.xml'),
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
]
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
__author__ = 'Eddie Lau'
__version__ = '1.0'
language = 'zh'
publisher = 'United Daily News Group'
description = 'United Daily (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'big5'
conversion_options = {'linearize_tables':True}
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]

View File

@ -11,7 +11,7 @@ __all__ = [
'build', 'build_pdf2xml', 'server', 'build', 'build_pdf2xml', 'server',
'gui', 'gui',
'develop', 'install', 'develop', 'install',
'resources', 'kakasi', 'resources',
'check', 'check',
'sdist', 'sdist',
'manual', 'tag_release', 'manual', 'tag_release',
@ -49,8 +49,9 @@ gui = GUI()
from setup.check import Check from setup.check import Check
check = Check() check = Check()
from setup.resources import Resources from setup.resources import Resources, Kakasi
resources = Resources() resources = Resources()
kakasi = Kakasi()
from setup.publish import Manual, TagRelease, Stage1, Stage2, \ from setup.publish import Manual, TagRelease, Stage1, Stage2, \
Stage3, Stage4, Publish Stage3, Stage4, Publish

View File

@ -32,6 +32,7 @@ class Win32(VMInstaller):
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice' FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
INSTALLER_EXT = 'msi' INSTALLER_EXT = 'msi'
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0'] SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
BUILD_BUILD = ['python setup.py kakasi',] + VMInstaller.BUILD_BUILD
def download_installer(self): def download_installer(self):
installer = self.installer() installer = self.installer()

View File

@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl' OPENSSL_DIR = r'Q:\openssl'
QT_DIR = 'Q:\\Qt\\4.7.2' QT_DIR = 'Q:\\Qt\\4.7.3'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns'] QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb' LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'

View File

@ -11,9 +11,6 @@
SummaryCodepage='1252' /> SummaryCodepage='1252' />
<Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" /> <Media Id="1" Cabinet="{app}.cab" CompressionLevel="{compression}" EmbedCab="yes" />
<!-- The following line is needed because of the patch to QtCore4.dll. You can remove this line
after you update Qt beyond 4.7.2. 'emus' means re-install even if version is the same not just if it is older. -->
<Property Id='REINSTALLMODE' Value='emus'/>
<Upgrade Id="{upgrade_code}"> <Upgrade Id="{upgrade_code}">
<UpgradeVersion Maximum="{version}" <UpgradeVersion Maximum="{version}"

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob import os, cPickle, re, anydbm, shutil, marshal, zipfile, glob
from zlib import compress from zlib import compress
from setup import Command, basenames, __appname__ from setup import Command, basenames, __appname__, iswindows
def get_opts_from_parser(parser): def get_opts_from_parser(parser):
def do_opt(opt): def do_opt(opt):
@ -23,13 +23,119 @@ def get_opts_from_parser(parser):
for o in g.option_list: for o in g.option_list:
for x in do_opt(o): yield x for x in do_opt(o): yield x
class Resources(Command): class Kakasi(Command):
description = 'Compile various needed calibre resources' description = 'Compile resources for unihandecode'
KAKASI_PATH = os.path.join(Command.SRC, __appname__, KAKASI_PATH = os.path.join(Command.SRC, __appname__,
'ebooks', 'unihandecode', 'pykakasi') 'ebooks', 'unihandecode', 'pykakasi')
def run(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src) or iswindows:
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src) or iswindows:
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src) or iswindows:
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
try:
# Needed as otherwise anydbm tries to create a gdbm db when the db
# created on Unix is found
os.remove(out)
except:
pass
dic = anydbm.open(out, 'n')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
dic.close()
def clean(self):
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi')
if os.path.exists(kakasi):
shutil.rmtree(kakasi)
class Resources(Command):
description = 'Compile various needed calibre resources'
sub_commands = ['kakasi']
def run(self, opts): def run(self, opts):
scripts = {} scripts = {}
for x in ('console', 'gui'): for x in ('console', 'gui'):
@ -117,108 +223,13 @@ class Resources(Command):
import json import json
json.dump(function_dict, open(dest, 'wb'), indent=4) json.dump(function_dict, open(dest, 'wb'), indent=4)
self.run_kakasi(opts)
def run_kakasi(self, opts):
self.records = {}
src = self.j(self.KAKASI_PATH, 'kakasidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanwadict2.db')
base = os.path.dirname(dest)
if not os.path.exists(base):
os.makedirs(base)
if self.newer(dest, src):
self.info('\tGenerating Kanwadict')
for line in open(src, "r"):
self.parsekdict(line)
self.kanwaout(dest)
src = self.j(self.KAKASI_PATH, 'itaijidict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','itaijidict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating Itaijidict')
self.mkitaiji(src, dest)
src = self.j(self.KAKASI_PATH, 'kanadict.utf8')
dest = self.j(self.RESOURCES, 'localization',
'pykakasi','kanadict2.pickle')
if self.newer(dest, src):
self.info('\tGenerating kanadict')
self.mkkanadict(src, dest)
return
def mkitaiji(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
pair = re.sub(r'\\u([0-9a-fA-F]{4})', lambda x:unichr(int(x.group(1),16)), line)
dic[pair[0]] = pair[1]
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def mkkanadict(self, src, dst):
dic = {}
for line in open(src, "r"):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
continue
if re.match(r"^$",line):
continue
(alpha, kana) = line.split(' ')
dic[kana] = alpha
cPickle.dump(dic, open(dst, 'w'), protocol=-1) #pickle
def parsekdict(self, line):
line = line.decode("utf-8").strip()
if line.startswith(';;'): # skip comment
return
(yomi, kanji) = line.split(' ')
if ord(yomi[-1:]) <= ord('z'):
tail = yomi[-1:]
yomi = yomi[:-1]
else:
tail = ''
self.updaterec(kanji, yomi, tail)
def updaterec(self, kanji, yomi, tail):
key = "%04x"%ord(kanji[0])
if key in self.records:
if kanji in self.records[key]:
rec = self.records[key][kanji]
rec.append((yomi,tail))
self.records[key].update( {kanji: rec} )
else:
self.records[key][kanji]=[(yomi, tail)]
else:
self.records[key] = {}
self.records[key][kanji]=[(yomi, tail)]
def kanwaout(self, out):
dic = anydbm.open(out, 'c')
for (k, v) in self.records.iteritems():
dic[k] = compress(marshal.dumps(v))
dic.close()
def clean(self): def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'): for x in ('scripts', 'recipes', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle') x = self.j(self.RESOURCES, x+'.pickle')
if os.path.exists(x): if os.path.exists(x):
os.remove(x) os.remove(x)
kakasi = self.j(self.RESOURCES, 'localization', 'pykakasi') from setup.commands import kakasi
if os.path.exists(kakasi): kakasi.clean()
shutil.rmtree(kakasi)

View File

@ -1096,6 +1096,11 @@ class StoreAmazonKindleStore(StoreBase):
description = _('Kindle books from Amazon') description = _('Kindle books from Amazon')
actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore' actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
class StoreAmazonDEKindleStore(StoreBase):
name = 'Amazon DE Kindle'
description = _('Kindle eBooks')
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore'
class StoreAmazonUKKindleStore(StoreBase): class StoreAmazonUKKindleStore(StoreBase):
name = 'Amazon UK Kindle' name = 'Amazon UK Kindle'
description = _('Kindle books from Amazon.uk') description = _('Kindle books from Amazon.uk')
@ -1111,6 +1116,11 @@ class StoreBNStore(StoreBase):
description = _('Books, Textbooks, eBooks, Toys, Games and More.') description = _('Books, Textbooks, eBooks, Toys, Games and More.')
actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore' actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
class StoreBeamEBooksDEStore(StoreBase):
name = 'Beam EBooks DE'
description = _('der eBook Shop')
actual_plugin = 'calibre.gui2.store.beam_ebooks_de_plugin:BeamEBooksDEStore'
class StoreBeWriteStore(StoreBase): class StoreBeWriteStore(StoreBase):
name = 'BeWrite Books' name = 'BeWrite Books'
description = _('Publishers of fine books.') description = _('Publishers of fine books.')
@ -1126,7 +1136,12 @@ class StoreEbookscomStore(StoreBase):
description = _('The digital bookstore.') description = _('The digital bookstore.')
actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore' actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
class StoreEHarlequinStoretore(StoreBase): class StoreEPubBuyDEStore(StoreBase):
name = 'EPUBBuy DE'
description = _('EPUBReaders eBook Shop')
actual_plugin = 'calibre.gui2.store.epubbuy_de_plugin:EPubBuyDEStore'
class StoreEHarlequinStore(StoreBase):
name = 'eHarlequin' name = 'eHarlequin'
description = _('entertain, enrich, inspire.') description = _('entertain, enrich, inspire.')
actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore' actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
@ -1136,6 +1151,11 @@ class StoreFeedbooksStore(StoreBase):
description = _('Read anywhere.') description = _('Read anywhere.')
actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore' actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
class StoreFoylesUKStore(StoreBase):
name = 'Foyles UK'
description = _('Foyles of London, online')
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore'
class StoreGutenbergStore(StoreBase): class StoreGutenbergStore(StoreBase):
name = 'Project Gutenberg' name = 'Project Gutenberg'
description = _('The first producer of free ebooks.') description = _('The first producer of free ebooks.')
@ -1171,22 +1191,23 @@ class StoreWaterstonesUKStore(StoreBase):
description = _('Feel every word') description = _('Feel every word')
actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore' actual_plugin = 'calibre.gui2.store.waterstones_uk_plugin:WaterstonesUKStore'
class StoreFoylesUKStore(StoreBase): class StoreWeightlessBooksStore(StoreBase):
name = 'Foyles UK' name = 'Weightless Books'
description = _('Foyles of London, online') description = '(e)Books That Don\'t Weigh You Down'
actual_plugin = 'calibre.gui2.store.foyles_uk_plugin:FoylesUKStore' actual_plugin = 'calibre.gui2.store.weightless_books_plugin:WeightlessBooksStore'
class AmazonDEKindleStore(StoreBase): class StoreWizardsTowerBooksStore(StoreBase):
name = 'Amazon DE Kindle' name = 'Wizards Tower Books'
description = _('Kindle eBooks') description = 'Wizard\'s Tower Press'
actual_plugin = 'calibre.gui2.store.amazon_de_plugin:AmazonDEKindleStore' actual_plugin = 'calibre.gui2.store.wizards_tower_books_plugin:WizardsTowerBooksStore'
plugins += [StoreAmazonKindleStore, AmazonDEKindleStore, StoreAmazonUKKindleStore, plugins += [StoreAmazonKindleStore, StoreAmazonDEKindleStore, StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore, StoreBNStore, StoreBaenWebScriptionStore, StoreBNStore,
StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore, StoreBeamEBooksDEStore, StoreBeWriteStore,
StoreEHarlequinStoretore, StoreFeedbooksStore, StoreDieselEbooksStore, StoreEbookscomStore, StoreEPubBuyDEStore,
StoreEHarlequinStore, StoreFeedbooksStore,
StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore, StoreFoylesUKStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore, StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore,
StoreWaterstonesUKStore] StoreWaterstonesUKStore, StoreWeightlessBooksStore, StoreWizardsTowerBooksStore]
# }}} # }}}

View File

@ -109,7 +109,7 @@ class ANDROID(USBMS):
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB'] 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD'] 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD']

View File

@ -103,10 +103,11 @@ class EPUBInput(InputFormatPlugin):
t.set('href', guide_cover) t.set('href', guide_cover)
t.set('title', 'Title Page') t.set('title', 'Title Page')
from calibre.ebooks import render_html_svg_workaround from calibre.ebooks import render_html_svg_workaround
renderer = render_html_svg_workaround(guide_cover, log) if os.path.exists(guide_cover):
if renderer is not None: renderer = render_html_svg_workaround(guide_cover, log)
open('calibre_raster_cover.jpg', 'wb').write( if renderer is not None:
renderer) open('calibre_raster_cover.jpg', 'wb').write(
renderer)
def find_opf(self): def find_opf(self):
def attr(n, attr): def attr(n, attr):

View File

@ -112,10 +112,15 @@ class Metadata(object):
Be careful with numeric fields since this will return True for zero as Be careful with numeric fields since this will return True for zero as
well as None. well as None.
Also returns True if the field does not exist.
''' '''
null_val = NULL_VALUES.get(field, None) try:
val = getattr(self, field, None) null_val = NULL_VALUES.get(field, None)
return not val or val == null_val val = getattr(self, field, None)
return not val or val == null_val
except:
return True
def __getattribute__(self, field): def __getattribute__(self, field):
_data = object.__getattribute__(self, '_data') _data = object.__getattribute__(self, '_data')

View File

@ -372,6 +372,18 @@ def identify(log, abort, # {{{
longest, lp = -1, '' longest, lp = -1, ''
for plugin, presults in results.iteritems(): for plugin, presults in results.iteritems():
presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
# Throw away lower priority results from the same source that have exactly the same
# title and authors as a higher priority result
filter_results = set()
filtered_results = []
for r in presults:
key = (r.title, tuple(r.authors))
if key not in filter_results:
filtered_results.append(r)
filter_results.add(key)
results[plugin] = presults = filtered_results
plog = logs[plugin].getvalue().strip() plog = logs[plugin].getvalue().strip()
log('\n'+'*'*30, plugin.name, '*'*30) log('\n'+'*'*30, plugin.name, '*'*30)
log('Request extra headers:', plugin.browser.addheaders) log('Request extra headers:', plugin.browser.addheaders)
@ -479,7 +491,7 @@ if __name__ == '__main__': # tests {{{
( (
{'title':'Magykal Papers', {'title':'Magykal Papers',
'authors':['Sage']}, 'authors':['Sage']},
[title_test('The Magykal Papers', exact=True)], [title_test('Septimus Heap: The Magykal Papers', exact=True)],
), ),
@ -506,12 +518,6 @@ if __name__ == '__main__': # tests {{{
exact=True), authors_test(['Dan Brown'])] exact=True), authors_test(['Dan Brown'])]
), ),
( # No ISBN
{'title':'Justine', 'authors':['Durrel']},
[title_test('Justine', exact=True),
authors_test(['Lawrence Durrel'])]
),
( # A newer book ( # A newer book
{'identifiers':{'isbn': '9780316044981'}}, {'identifiers':{'isbn': '9780316044981'}},
[title_test('The Heroes', exact=True), [title_test('The Heroes', exact=True),

View File

@ -86,7 +86,7 @@ class RTFInput(InputFormatPlugin):
run_lev = 4 run_lev = 4
self.log('Running RTFParser in debug mode') self.log('Running RTFParser in debug mode')
except: except:
pass self.log.warn('Impossible to run RTFParser in debug mode')
parser = ParseRtf( parser = ParseRtf(
in_file = stream, in_file = stream,
out_file = ofile, out_file = ofile,

View File

@ -197,8 +197,8 @@ class ProcessTokens:
# character info => ci # character info => ci
'b' : ('ci', 'bold______', self.bool_st_func), 'b' : ('ci', 'bold______', self.bool_st_func),
'blue' : ('ci', 'blue______', self.color_func), 'blue' : ('ci', 'blue______', self.color_func),
'caps' : ('ci', 'caps______', self.bool_st_func), 'caps' : ('ci', 'caps______', self.bool_st_func),
'cf' : ('ci', 'font-color', self.default_func), 'cf' : ('ci', 'font-color', self.colorz_func),
'chftn' : ('ci', 'footnot-mk', self.bool_st_func), 'chftn' : ('ci', 'footnot-mk', self.bool_st_func),
'dn' : ('ci', 'font-down_', self.divide_by_2), 'dn' : ('ci', 'font-down_', self.divide_by_2),
'embo' : ('ci', 'emboss____', self.bool_st_func), 'embo' : ('ci', 'emboss____', self.bool_st_func),
@ -624,6 +624,11 @@ class ProcessTokens:
num = 'true' num = 'true'
return 'cw<%s<%s<nu<%s\n' % (pre, token, num) return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
def colorz_func(self, pre, token, num):
if num is None:
num = '0'
return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
def __list_type_func(self, pre, token, num): def __list_type_func(self, pre, token, num):
type = 'arabic' type = 'arabic'
if num is None: if num is None:

View File

@ -620,7 +620,11 @@ class Application(QApplication):
self.original_font = QFont(QApplication.font()) self.original_font = QFont(QApplication.font())
fi = gprefs['font'] fi = gprefs['font']
if fi is not None: if fi is not None:
QApplication.setFont(QFont(*fi)) font = QFont(*(fi[:4]))
s = gprefs.get('font_stretch', None)
if s is not None:
font.setStretch(s)
QApplication.setFont(font)
def _send_file_open_events(self): def _send_file_open_events(self):
with self._file_open_lock: with self._file_open_lock:

View File

@ -478,6 +478,10 @@ class EditMetadataAction(InterfaceAction):
try: try:
set_title = not mi.is_null('title') set_title = not mi.is_null('title')
set_authors = not mi.is_null('authors') set_authors = not mi.is_null('authors')
idents = db.get_identifiers(i, index_is_id=True)
if mi.identifiers:
idents.update(mi.identifiers)
mi.identifiers = idents
db.set_metadata(i, mi, commit=False, set_title=set_title, db.set_metadata(i, mi, commit=False, set_title=set_title,
set_authors=set_authors, notify=False) set_authors=set_authors, notify=False)
self.applied_ids.append(i) self.applied_ids.append(i)

View File

@ -10,6 +10,7 @@ from functools import partial
from PyQt4.Qt import QMenu from PyQt4.Qt import QMenu
from calibre.gui2 import error_dialog
from calibre.gui2.actions import InterfaceAction from calibre.gui2.actions import InterfaceAction
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
@ -25,18 +26,75 @@ class StoreAction(InterfaceAction):
def load_menu(self): def load_menu(self):
self.store_menu.clear() self.store_menu.clear()
self.store_menu.addAction(_('Search'), self.search) self.store_menu.addAction(_('Search for ebooks'), self.search)
self.store_menu.addAction(_('Search for this author'), self.search_author)
self.store_menu.addAction(_('Search for this title'), self.search_title)
self.store_menu.addAction(_('Search for this book'), self.search_author_title)
self.store_menu.addSeparator() self.store_menu.addSeparator()
for n, p in self.gui.istores.items(): self.store_list_menu = self.store_menu.addMenu(_('Stores'))
self.store_menu.addAction(n, partial(self.open_store, p)) for n, p in sorted(self.gui.istores.items(), key=lambda x: x[0].lower()):
self.store_list_menu.addAction(n, partial(self.open_store, p))
self.qaction.setMenu(self.store_menu) self.qaction.setMenu(self.store_menu)
def search(self): def search(self, query=''):
self.show_disclaimer() self.show_disclaimer()
from calibre.gui2.store.search.search import SearchDialog from calibre.gui2.store.search.search import SearchDialog
sd = SearchDialog(self.gui.istores, self.gui) sd = SearchDialog(self.gui.istores, self.gui, query)
sd.exec_() sd.exec_()
def _get_selected_row(self):
rows = self.gui.current_view().selectionModel().selectedRows()
if not rows or len(rows) == 0:
return None
return rows[0].row()
def _get_author(self, row):
author = ''
if self.gui.current_view() is self.gui.library_view:
author = self.gui.library_view.model().authors(row)
else:
mi = self.gui.current_view().model().get_book_display_info(row)
author = ' & '.join(mi.authors)
return author
def search_author(self):
row = self._get_selected_row()
if row == None:
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
return
query = 'author:"%s"' % self._get_author(row)
self.search(query)
def _get_title(self, row):
title = ''
if self.gui.current_view() is self.gui.library_view:
title = self.gui.library_view.model().title(row)
else:
mi = self.gui.current_view().model().get_book_display_info(row)
title = mi.title
return title
def search_title(self):
row = self._get_selected_row()
if row == None:
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
return
query = 'title:"%s"' % self._get_title(row)
self.search(query)
def search_author_title(self):
row = self._get_selected_row()
if row == None:
error_dialog(self.gui, _('Cannot search'), _('No book selected'), show=True)
return
query = 'author:"%s" title:"%s"' % (self._get_author(row), self._get_title(row))
self.search(query)
def open_store(self, store_plugin): def open_store(self, store_plugin):
self.show_disclaimer() self.show_disclaimer()
store_plugin.open(self.gui) store_plugin.open(self.gui)

View File

@ -506,6 +506,9 @@ class BooksModel(QAbstractTableModel): # {{{
def id(self, row): def id(self, row):
return self.db.id(getattr(row, 'row', lambda:row)()) return self.db.id(getattr(row, 'row', lambda:row)())
def authors(self, row_number):
return self.db.authors(row_number)
def title(self, row_number): def title(self, row_number):
return self.db.title(row_number) return self.db.title(row_number)

View File

@ -336,7 +336,9 @@ class MetadataSingleDialogBase(ResizableDialog):
if not mi.is_null('tags'): if not mi.is_null('tags'):
self.tags.current_val = mi.tags self.tags.current_val = mi.tags
if not mi.is_null('identifiers'): if not mi.is_null('identifiers'):
self.identifiers.current_val = mi.identifiers current = self.identifiers.current_val
current.update(mi.identifiers)
self.identifiers.current_val = current
if not mi.is_null('pubdate'): if not mi.is_null('pubdate'):
self.pubdate.current_val = mi.pubdate self.pubdate.current_val = mi.pubdate
if not mi.is_null('series') and mi.series.strip(): if not mi.is_null('series') and mi.series.strip():

View File

@ -161,7 +161,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def initialize(self): def initialize(self):
ConfigWidgetBase.initialize(self) ConfigWidgetBase.initialize(self)
self.current_font = self.initial_font = gprefs['font'] font = gprefs['font']
if font is not None:
font = list(font)
font.append(gprefs.get('font_stretch', QFont.Unstretched))
self.current_font = self.initial_font = font
self.update_font_display() self.update_font_display()
self.display_model.initialize() self.display_model.initialize()
@ -178,7 +182,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
def build_font_obj(self): def build_font_obj(self):
font_info = self.current_font font_info = self.current_font
if font_info is not None: if font_info is not None:
font = QFont(*font_info) font = QFont(*(font_info[:4]))
font.setStretch(font_info[4])
else: else:
font = qt_app.original_font font = qt_app.original_font
return font return font
@ -215,15 +220,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
if fd.exec_() == fd.Accepted: if fd.exec_() == fd.Accepted:
font = fd.selectedFont() font = fd.selectedFont()
fi = QFontInfo(font) fi = QFontInfo(font)
self.current_font = (unicode(fi.family()), fi.pointSize(), self.current_font = [unicode(fi.family()), fi.pointSize(),
fi.weight(), fi.italic()) fi.weight(), fi.italic(), font.stretch()]
self.update_font_display() self.update_font_display()
self.changed_signal.emit() self.changed_signal.emit()
def commit(self, *args): def commit(self, *args):
rr = ConfigWidgetBase.commit(self, *args) rr = ConfigWidgetBase.commit(self, *args)
if self.current_font != self.initial_font: if self.current_font != self.initial_font:
gprefs['font'] = self.current_font gprefs['font'] = (self.current_font[:4] if self.current_font else
None)
gprefs['font_stretch'] = (self.current_font[4] if self.current_font
is not None else QFont.Unstretched)
QApplication.setFont(self.font_display.font()) QApplication.setFont(self.font_display.font())
rr = True rr = True
self.display_model.commit() self.display_model.commit()

View File

@ -75,6 +75,8 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
def find(self, query): def find(self, query):
query = query.strip() query = query.strip()
if not query:
return QModelIndex()
matches = self.parse(query) matches = self.parse(query)
if not matches: if not matches:
return QModelIndex() return QModelIndex()
@ -87,6 +89,8 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
def find_next(self, idx, query, backwards=False): def find_next(self, idx, query, backwards=False):
query = query.strip() query = query.strip()
if not query:
return idx
matches = self.parse(query) matches = self.parse(query)
if not matches: if not matches:
return idx return idx

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
class StorePlugin(object): # {{{ class StorePlugin(object): # {{{
''' '''
A plugin representing an online ebook repository (store). The store can A plugin representing an online ebook repository (store). The store can
be a comercial store that sells ebooks or a source of free downloadable be a commercial store that sells ebooks or a source of free downloadable
ebooks. ebooks.
Note that this class is the base class for these plugins, however, to Note that this class is the base class for these plugins, however, to
@ -43,6 +43,8 @@ class StorePlugin(object): # {{{
The easiest way to handle affiliate money payouts is to randomly select The easiest way to handle affiliate money payouts is to randomly select
between the author's affiliate id and calibre's affiliate id so that between the author's affiliate id and calibre's affiliate id so that
70% of the time the author's id is used. 70% of the time the author's id is used.
See declined.txt for a list of stores that do not want to be included.
''' '''
def __init__(self, gui, name): def __init__(self, gui, name):

View File

@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib2
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class BeamEBooksDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://klick.affiliwelt.net/klick.php?bannerid=10072&pid=32307&prid=908'
url_details = ('http://klick.affiliwelt.net/klick.php?'
'bannerid=10730&pid=32307&prid=908&prodid={0}')
if external or self.config.get('open_external', False):
if detail_item:
url = url_details.format(detail_item)
open_url(QUrl(url))
else:
detail_url = None
if detail_item:
detail_url = url_details.format(detail_item)
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.beam-ebooks.de/suchergebnis.php?Type=&sw=' + urllib2.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//table[tr/td/div[@class="stil2"]]'):
if counter <= 0:
break
id = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/@href')).strip()
if not id:
continue
id = id[7:]
cover_url = ''.join(data.xpath('./tr/td[1]/a/img/@src'))
if cover_url:
cover_url = 'http://www.beam-ebooks.de' + cover_url
title = ''.join(data.xpath('./tr/td/div[@class="stil2"]/a/b/text()'))
author = ' '.join(data.xpath('./tr/td/div[@class="stil2"]/'
'child::b/text()'
'|'
'./tr/td/div[@class="stil2"]/'
'child::strong/text()'))
price = ''.join(data.xpath('./tr/td[3]/text()'))
pdf = data.xpath(
'boolean(./tr/td[3]/a/img[contains(@alt, "PDF")]/@alt)')
epub = data.xpath(
'boolean(./tr/td[3]/a/img[contains(@alt, "ePub")]/@alt)')
mobi = data.xpath(
'boolean(./tr/td[3]/a/img[contains(@alt, "Mobipocket")]/@alt)')
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.drm = SearchResult.DRM_UNLOCKED
s.detail_item = id
formats = []
if epub:
formats.append('ePub')
if pdf:
formats.append('PDF')
if mobi:
formats.append('MOBI')
s.formats = ', '.join(formats)
yield s

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
import random import random
import re import re
import urllib2 import urllib
from contextlib import closing from contextlib import closing
from lxml import html from lxml import html
@ -48,7 +48,7 @@ class BNStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results url = 'http://productsearch.barnesandnoble.com/search/results.aspx?STORE=EBOOK&SZE=%s&WRD=' % max_results
url += urllib2.quote(query) url += urllib.quote_plus(query)
br = browser() br = browser()

View File

@ -0,0 +1,5 @@
This is a list of stores that objected, declined
or asked not to be included in the store integration.
* Borders (http://www.borders.com/)
* WH Smith (http://www.whsmith.co.uk/)

View File

@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib2
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class EPubBuyDEStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://klick.affiliwelt.net/klick.php?bannerid=47653&pid=32307&prid=2627'
url_details = ('http://klick.affiliwelt.net/klick.php?bannerid=47653'
'&pid=32307&prid=2627&prodid={0}')
if external or self.config.get('open_external', False):
if detail_item:
url = url_details.format(detail_item)
open_url(QUrl(url))
else:
detail_url = None
if detail_item:
detail_url = url_details.format(detail_item)
d = WebStoreDialog(self.gui, url, parent, detail_url)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.epubbuy.com/search.php?search_query=' + urllib2.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//li[contains(@class, "ajax_block_product")]'):
if counter <= 0:
break
id = ''.join(data.xpath('./div[@class="center_block"]'
'/p[contains(text(), "artnr:")]/text()')).strip()
if not id:
continue
id = id[6:].strip()
if not id:
continue
cover_url = ''.join(data.xpath('./div[@class="center_block"]'
'/a[@class="product_img_link"]/img/@src'))
if cover_url:
cover_url = 'http://www.epubbuy.com' + cover_url
title = ''.join(data.xpath('./div[@class="center_block"]'
'/a[@class="product_img_link"]/@title'))
author = ''.join(data.xpath('./div[@class="center_block"]/a[2]/text()'))
price = ''.join(data.xpath('.//span[@class="price"]/text()'))
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price
s.drm = SearchResult.DRM_UNLOCKED
s.detail_item = id
s.formats = 'ePub'
yield s

View File

@ -73,6 +73,6 @@ class FoylesUKStore(BasicStoreConfig, StorePlugin):
s.price = price s.price = price
s.detail_item = id s.detail_item = id
s.drm = SearchResult.DRM_LOCKED s.drm = SearchResult.DRM_LOCKED
s.formats = 'EPUB' s.formats = 'ePub'
yield s yield s

View File

@ -12,6 +12,7 @@ from threading import Thread
from Queue import Queue from Queue import Queue
from calibre import browser from calibre import browser
from calibre.constants import DEBUG
from calibre.utils.magick.draw import thumbnail from calibre.utils.magick.draw import thumbnail
class GenericDownloadThreadPool(object): class GenericDownloadThreadPool(object):
@ -119,7 +120,8 @@ class SearchThread(Thread):
self.results.put((res, store_plugin)) self.results.put((res, store_plugin))
self.tasks.task_done() self.tasks.task_done()
except: except:
traceback.print_exc() if DEBUG:
traceback.print_exc()
class CoverThreadPool(GenericDownloadThreadPool): class CoverThreadPool(GenericDownloadThreadPool):
@ -157,7 +159,8 @@ class CoverThread(Thread):
callback() callback()
self.tasks.task_done() self.tasks.task_done()
except: except:
continue if DEBUG:
traceback.print_exc()
class DetailsThreadPool(GenericDownloadThreadPool): class DetailsThreadPool(GenericDownloadThreadPool):
@ -191,7 +194,8 @@ class DetailsThread(Thread):
callback(result) callback(result)
self.tasks.task_done() self.tasks.task_done()
except: except:
continue if DEBUG:
traceback.print_exc()
class CacheUpdateThreadPool(GenericDownloadThreadPool): class CacheUpdateThreadPool(GenericDownloadThreadPool):
@ -221,4 +225,5 @@ class CacheUpdateThread(Thread):
store_plugin, timeout = self.tasks.get() store_plugin, timeout = self.tasks.get()
store_plugin.update_cache(timeout=timeout, suppress_progress=True) store_plugin.update_cache(timeout=timeout, suppress_progress=True)
except: except:
traceback.print_exc() if DEBUG:
traceback.print_exc()

View File

@ -23,8 +23,8 @@ TIMEOUT = 75 # seconds
class SearchDialog(QDialog, Ui_Dialog): class SearchDialog(QDialog, Ui_Dialog):
def __init__(self, istores, *args): def __init__(self, istores, parent=None, query=''):
QDialog.__init__(self, *args) QDialog.__init__(self, parent)
self.setupUi(self) self.setupUi(self)
self.config = JSONConfig('store/search') self.config = JSONConfig('store/search')
@ -47,13 +47,16 @@ class SearchDialog(QDialog, Ui_Dialog):
# per search basis. # per search basis.
stores_group_layout = QVBoxLayout() stores_group_layout = QVBoxLayout()
self.stores_group.setLayout(stores_group_layout) self.stores_group.setLayout(stores_group_layout)
for x in self.store_plugins: for x in sorted(self.store_plugins.keys(), key=lambda x: x.lower()):
cbox = QCheckBox(x) cbox = QCheckBox(x)
cbox.setChecked(True) cbox.setChecked(True)
stores_group_layout.addWidget(cbox) stores_group_layout.addWidget(cbox)
setattr(self, 'store_check_' + x, cbox) setattr(self, 'store_check_' + x, cbox)
stores_group_layout.addStretch() stores_group_layout.addStretch()
# Set the search query
self.search_edit.setText(query)
# Create and add the progress indicator # Create and add the progress indicator
self.pi = ProgressIndicator(self, 24) self.pi = ProgressIndicator(self, 24)
self.top_layout.addWidget(self.pi) self.top_layout.addWidget(self.pi)
@ -93,7 +96,7 @@ class SearchDialog(QDialog, Ui_Dialog):
# Store / Formats # Store / Formats
self.results_view.setColumnWidth(4, int(total*.25)) self.results_view.setColumnWidth(4, int(total*.25))
def do_search(self, checked=False): def do_search(self):
# Stop all running threads. # Stop all running threads.
self.checker.stop() self.checker.stop()
self.search_pool.abort() self.search_pool.abort()
@ -136,14 +139,17 @@ class SearchDialog(QDialog, Ui_Dialog):
query = query.replace('>', '') query = query.replace('>', '')
query = query.replace('<', '') query = query.replace('<', '')
# Remove the prefix. # Remove the prefix.
for loc in ( 'all', 'author', 'authors', 'title'): for loc in ('all', 'author', 'authors', 'title'):
query = re.sub(r'%s:"?(?P<a>[^\s"]+)"?' % loc, '\g<a>', query) query = re.sub(r'%s:"(?P<a>[^\s"]+)"' % loc, '\g<a>', query)
query = query.replace('%s:' % loc, '')
# Remove the prefix and search text. # Remove the prefix and search text.
for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'): for loc in ('cover', 'drm', 'format', 'formats', 'price', 'store'):
query = re.sub(r'%s:"[^"]"' % loc, '', query) query = re.sub(r'%s:"[^"]"' % loc, '', query)
query = re.sub(r'%s:[^\s]*' % loc, '', query) query = re.sub(r'%s:[^\s]*' % loc, '', query)
# Remove logic. # Remove logic.
query = re.sub(r'(^|\s)(and|not|or)(\s|$)', ' ', query) query = re.sub(r'(^|\s)(and|not|or|a|the|is|of)(\s|$)', ' ', query)
# Remove "
query = query.replace('"', '')
# Remove excess whitespace. # Remove excess whitespace.
query = re.sub(r'\s{2,}', ' ', query) query = re.sub(r'\s{2,}', ' ', query)
query = query.strip() query = query.strip()
@ -253,3 +259,8 @@ class SearchDialog(QDialog, Ui_Dialog):
self.cache_pool.abort() self.cache_pool.abort()
self.save_state() self.save_state()
def exec_(self):
if unicode(self.search_edit.text()).strip():
self.do_search()
return QDialog.exec_(self)

View File

@ -76,7 +76,7 @@ class WaterstonesUKStore(BasicStoreConfig, StorePlugin):
s.detail_item = id s.detail_item = id
formats = [] formats = []
if epub: if epub:
formats.append('EPUB') formats.append('ePub')
if pdf: if pdf:
formats.append('PDF') formats.append('PDF')
s.formats = ', '.join(formats) s.formats = ', '.join(formats)

View File

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class WeightlessBooksStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = 'http://weightlessbooks.com/'
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://weightlessbooks.com/?s=' + urllib.quote_plus(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//li[@id="product"]'):
if counter <= 0:
break
id = ''.join(data.xpath('.//div[@class="cover"]/a/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('.//div[@class="cover"]/a/img/@src'))
price = ''.join(data.xpath('.//div[@class="buy_buttons"]/b[1]/text()'))
if not price:
continue
formats = ', '.join(data.xpath('.//select[@class="eStore_variation"]//option//text()'))
formats = formats.upper()
title = ''.join(data.xpath('.//h3/a/text()'))
author = ''.join(data.xpath('.//h3//text()'))
author = author.replace(title, '')
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
s.formats = formats
yield s

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class WizardsTowerBooksStore(BasicStoreConfig, StorePlugin):
url = 'http://www.wizardstowerbooks.com/'
def open(self, parent=None, detail_item=None, external=False):
if detail_item:
detail_item = self.url + detail_item
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, self.url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = 'http://www.wizardstowerbooks.com/search.html?for=' + urllib.quote(query)
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//table[@class="gridp"]//td'):
if counter <= 0:
break
id = ''.join(data.xpath('.//span[@class="prti"]/a/@href'))
id = id.strip()
if not id:
continue
cover_url = ''.join(data.xpath('.//div[@class="prim"]/a/img/@src'))
cover_url = url_slash_cleaner(self.url + cover_url.strip())
price = ''.join(data.xpath('.//font[@class="selling_price"]//text()'))
price = price.strip()
if not price:
continue
title = ''.join(data.xpath('.//span[@class="prti"]/a/b/text()'))
author = ''.join(data.xpath('.//p[@class="last"]/text()'))
a, b, author = author.partition(' by ')
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = id.strip()
s.drm = SearchResult.DRM_UNLOCKED
yield s
def get_details(self, search_result, timeout):
br = browser()
with closing(br.open(url_slash_cleaner(self.url + search_result.detail_item), timeout=timeout)) as nf:
idata = html.fromstring(nf.read())
formats = ', '.join(idata.xpath('//select[@id="N1_"]//option//text()'))
search_result.formats = formats.upper()
return True

View File

@ -435,7 +435,7 @@ class DevicePage(QWizardPage, DeviceUI):
self.registerField("device", self.device_view) self.registerField("device", self.device_view)
def initializePage(self): def initializePage(self):
self.label.setText(_('Choose you e-book device. If your device is' self.label.setText(_('Choose your e-book device. If your device is'
' not in the list, choose a "%s" device.')%Device.manufacturer) ' not in the list, choose a "%s" device.')%Device.manufacturer)
self.man_model = ManufacturerModel() self.man_model = ManufacturerModel()
self.manufacturer_view.setModel(self.man_model) self.manufacturer_view.setModel(self.man_model)

View File

@ -1415,18 +1415,34 @@ ol, ul { padding-left: 2em; }
self.writedata() self.writedata()
c = attrs.get( (TEXTNS,'style-name'), None) c = attrs.get( (TEXTNS,'style-name'), None)
htmlattrs = {} htmlattrs = {}
# Changed by Kovid to handle inline special styles defined on <text:span> tags.
# Apparently LibreOffice does this.
special = 'span'
if c: if c:
c = c.replace(".","_") c = c.replace(".","_")
special = special_styles.get("S-"+c) special = special_styles.get("S-"+c)
if special is None and self.generate_css: if special is None:
htmlattrs['class'] = "S-%s" % c special = 'span'
self.opentag('span', htmlattrs) if self.generate_css:
htmlattrs['class'] = "S-%s" % c
self.opentag(special, htmlattrs)
self.purgedata() self.purgedata()
def e_text_span(self, tag, attrs): def e_text_span(self, tag, attrs):
""" End the <text:span> """ """ End the <text:span> """
self.writedata() self.writedata()
self.closetag('span', False) c = attrs.get( (TEXTNS,'style-name'), None)
# Changed by Kovid to handle inline special styles defined on <text:span> tags.
# Apparently LibreOffice does this.
special = 'span'
if c:
c = c.replace(".","_")
special = special_styles.get("S-"+c)
if special is None:
special = 'span'
self.closetag(special, False)
self.purgedata() self.purgedata()
def s_text_tab(self, tag, attrs): def s_text_tab(self, tag, attrs):