mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to tunk.
This commit is contained in:
commit
4dd794083f
@ -4,6 +4,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
|||||||
www.mainichi.jp
|
www.mainichi.jp
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class MainichiDailyNews(BasicNewsRecipe):
|
class MainichiDailyNews(BasicNewsRecipe):
|
||||||
@ -22,3 +23,18 @@ class MainichiDailyNews(BasicNewsRecipe):
|
|||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
remove_tags_after = {'class':"Credit"}
|
remove_tags_after = {'class':"Credit"}
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds
|
||||||
|
@ -14,5 +14,19 @@ class MainichiDailyITNews(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags_before = {'class':"NewsTitle"}
|
remove_tags_before = {'class':"NewsTitle"}
|
||||||
remove_tags = [{'class':"RelatedArticle"}]
|
remove_tags = [{'class':"RelatedArticle"}]
|
||||||
remove_tags_after = {'class':"Credit"}
|
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
|
||||||
|
for curfeed in feeds:
|
||||||
|
delList = []
|
||||||
|
for a,curarticle in enumerate(curfeed.articles):
|
||||||
|
if re.search(r'pheedo.jp', curarticle.url):
|
||||||
|
delList.append(curarticle)
|
||||||
|
if len(delList)>0:
|
||||||
|
for d in delList:
|
||||||
|
index = curfeed.articles.index(d)
|
||||||
|
curfeed.articles[index:index+1] = []
|
||||||
|
|
||||||
|
return feeds remove_tags_after = {'class':"Credit"}
|
||||||
|
@ -22,8 +22,19 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
||||||
extra_css = """
|
extra_css = """
|
||||||
body {font-family: "Times New Roman",Times,serif}
|
body {font-family: "Times New Roman",Times,serif}
|
||||||
.articleauthor{color: #9F9F9F; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
|
.articleauthor{color: #9F9F9F;
|
||||||
.rubric{color: #CD0021; font-family: Arial, sans-serif; font-size: small; text-transform: uppercase}
|
font-family: Arial, sans-serif;
|
||||||
|
font-size: small;
|
||||||
|
text-transform: uppercase}
|
||||||
|
.rubric,.dd,h6#credit{color: #CD0021;
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
font-size: small;
|
||||||
|
text-transform: uppercase}
|
||||||
|
.descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
||||||
|
.dd,h6#credit{color: gray}
|
||||||
|
.c{display: block}
|
||||||
|
.caption,h2#articleintro{font-style: italic}
|
||||||
|
.caption{font-size: small}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
@ -39,7 +50,7 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','iframe','base','link','embed','object'])
|
dict(name=['meta','iframe','base','link','embed','object'])
|
||||||
,dict(attrs={'class':['utils','articleRailLinks','icons'] })
|
,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] })
|
||||||
,dict(attrs={'id':['show-header','show-footer'] })
|
,dict(attrs={'id':['show-header','show-footer'] })
|
||||||
]
|
]
|
||||||
remove_attributes = ['lang']
|
remove_attributes = ['lang']
|
||||||
@ -59,3 +70,13 @@ class NewYorker(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
|
cover_url = 'http://www.newyorker.com' + cover_item['src'].strip()
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
auth = soup.find(attrs={'id':'articleauthor'})
|
||||||
|
if auth:
|
||||||
|
alink = auth.find('a')
|
||||||
|
if alink and alink.string is not None:
|
||||||
|
txt = alink.string
|
||||||
|
alink.replaceWith(txt)
|
||||||
|
return soup
|
||||||
|
@ -32,12 +32,9 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
|
|||||||
remove_tags_after = {'class':"cmn-pr_list"}
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||||
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
|
||||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
|
||||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
|
||||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
|
102
resources/recipes/nikkei_sub_shakai.recipe
Normal file
102
resources/recipes/nikkei_sub_shakai.recipe
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||||
|
'''
|
||||||
|
www.nikkei.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import mechanize
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
|
class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||||
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
|
||||||
|
__author__ = 'Hiroshi Miura'
|
||||||
|
description = 'News and current market affairs from Japan'
|
||||||
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||||
|
needs_subscription = True
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 20
|
||||||
|
language = 'ja'
|
||||||
|
remove_javascript = False
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||||
|
remove_tags = [
|
||||||
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||||
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||||
|
{'class':"cmn-print_headline cmn-clearfix"},
|
||||||
|
]
|
||||||
|
remove_tags_after = {'class':"cmn-pr_list"}
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
cj = mechanize.LWPCookieJar()
|
||||||
|
br.set_cookiejar(cj)
|
||||||
|
|
||||||
|
#br.set_debug_http(True)
|
||||||
|
#br.set_debug_redirects(True)
|
||||||
|
#br.set_debug_responses(True)
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
#print "----------------------------get login form--------------------------------------------"
|
||||||
|
# open login form
|
||||||
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||||
|
response = br.response()
|
||||||
|
#print "----------------------------get login form---------------------------------------------"
|
||||||
|
#print "----------------------------set login form---------------------------------------------"
|
||||||
|
# remove disabled input which brings error on mechanize
|
||||||
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||||
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||||
|
br.set_response(response)
|
||||||
|
br.select_form(name='LA0010Form01')
|
||||||
|
br['LA0010Form01:LA0010Email'] = self.username
|
||||||
|
br['LA0010Form01:LA0010Password'] = self.password
|
||||||
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||||
|
br.submit()
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------send login form---------------------------------------------"
|
||||||
|
#print "----------------------------open news main page-----------------------------------------"
|
||||||
|
# open news site
|
||||||
|
br.open('http://www.nikkei.com/')
|
||||||
|
br.response()
|
||||||
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||||
|
#print response2.get_data()
|
||||||
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||||
|
# forced redirect in default
|
||||||
|
br.select_form(nr=0)
|
||||||
|
br.submit()
|
||||||
|
response3 = br.response()
|
||||||
|
# return some cookie which should be set by Javascript
|
||||||
|
#print response3.geturl()
|
||||||
|
raw = response3.get_data()
|
||||||
|
#print "---------------------------response to form --------------------------------------------"
|
||||||
|
# grab cookie from JS and set it
|
||||||
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||||
|
br.select_form(nr=0)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||||
|
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
cj.load(self.temp_files[-1].name)
|
||||||
|
|
||||||
|
br.submit()
|
||||||
|
|
||||||
|
#br.set_debug_http(False)
|
||||||
|
#br.set_debug_redirects(False)
|
||||||
|
#br.set_debug_responses(False)
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
70
resources/recipes/st_louis_post_dispatch.recipe
Normal file
70
resources/recipes/st_louis_post_dispatch.recipe
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1282093204(BasicNewsRecipe):
|
||||||
|
title = u'St Louis Post-Dispatch'
|
||||||
|
__author__ = 'cisaak'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 15
|
||||||
|
masthead_url = 'http://farm5.static.flickr.com/4118/4929686950_0e22e2c88a.jpg'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News-Bill McClellan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fbill-mclellan&f=rss&t=article'),
|
||||||
|
(u'News-Columns', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcolumns*&l=50&f=rss&t=article'),
|
||||||
|
(u'News-Crime & Courtshttp://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcrime-and-courts&l=50&f=rss&t=article'),
|
||||||
|
(u'News-Deb Peterson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fdeb-peterson&f=rss&t=article'),
|
||||||
|
(u'News-Education', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2feducation&f=rss&t=article'),
|
||||||
|
(u'News-Government & Politics', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fgovt-and-politics&f=rss&t=article'),
|
||||||
|
(u'News-Local', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal&f=rss&t=article'),
|
||||||
|
(u'News-Metro', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fmetro&f=rss&t=article'),
|
||||||
|
(u'News-Metro East', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fillinois&f=rss&t=article'),
|
||||||
|
(u'News-Missouri Out State', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fstate-and-regional%2FMissouri&l=50&f=rss&t=article'),
|
||||||
|
(u'Opinion-Colleen Carroll Campbell', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2fcolumns%2Fcolleen-carroll-campbell&f=rss&t=article'),
|
||||||
|
(u'Opinion-Editorial', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2feditorial&f=rss&t=article'),
|
||||||
|
(u'Opinion-Kevin Horrigan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2fcolumns%2Fkevin-horrigan&f=rss&t=article'),
|
||||||
|
(u'Opinion-Mailbag', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Fopinion%2fmailbag&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Savvy Consumer', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fsavvy-consumer&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Lager Heads', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Flager-heads&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Job Watch', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fjob-watch&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Steve Geigerich', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fsteve-giegerich&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-David Nicklaus', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fdavid-nicklaus&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Jim Gallagher', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fjim-gallagher&l=100&f=rss&t=article'),
|
||||||
|
(u'Business Columns-Building Blocks', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fcolumns%2Fbuilding-blocks&l=100&f=rss&t=article'),
|
||||||
|
(u'Business', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business*l&l=100&f=rss&t=article'),
|
||||||
|
(u'Business-Technology', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Ftechnology&l=50&f=rss&t=article'),
|
||||||
|
(u'Business-National', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=business%2Fnational-and-international&l=50&f=rss&t=article'),
|
||||||
|
(u'Travel', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=travel*&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports*&f=rss&t=article'),
|
||||||
|
(u'Sports-Baseball', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fbaseball%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Bernie Miklasz', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fbernie-miklasz&l=50&f=rss&t=article'),
|
||||||
|
(u'Sports-Bryan Burwell', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fbryan-burwell&l=50&f=rss&t=article'),
|
||||||
|
(u'Sports-College', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcollege*&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Dan Caesar', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fdan-caesar&l=50&f=rss&t=article'),
|
||||||
|
(u'Sports-Football', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Ffootball%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Hockey', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fhockey%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Illini', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcollege%2Fillini&l=100&f=rss&t=article'),
|
||||||
|
(u'Sports-Jeff Gordon', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fcolumns%2Fjeff-gordon&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Debra Bass', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Ffashion-and-style%2Fdebra-bass&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Food and Cooking', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Ffood-and-cooking&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Health/Medicine/Fitness', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Fhealth-med-fit&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Joe Holleman', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Fcolumns%2Fjoe-holleman&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Steals-and-Deals', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Fcolumns%2Fsteals-and-deals&l=100&f=rss&t=article'),
|
||||||
|
(u'Life & Style-Tim Townsend', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=lifestyles%2Ffaith-and-values%2Ftim-townsend&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Arts & Theatre', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Farts-and-theatre&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Books & Literature', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fbooks-and-literature&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Dining', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=sports%2Fhockey%2Fprofessional&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Events Calendar', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fevents-calendar&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Gail Pennington', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Ftelevision%2Fgail-pennington&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Hip Hops', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fdining%2Fbars-and-clubs-other%2Fhip-hops&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-House-O-Fun', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fhouse-o-fun&l=100&f=rss&t=article'),
|
||||||
|
(u'Entertainment-Kevin C. Johnson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fmusic%2Fkevin-johnson&l=100&f=rss&t=article')
|
||||||
|
]
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':'blox-logo'}),dict(name='a')]
|
||||||
|
keep_only_tags = [dict(name='h1'), dict(name='p', attrs={'class':'byline'}), dict(name="div", attrs={'id':'blox-story-text'})]
|
||||||
|
extra_css = 'p {text-align: left;}'
|
||||||
|
|
||||||
|
|
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
masthead_title = u'YOMIURI ONLINE'
|
masthead_title = u'YOMIURI ONLINE'
|
||||||
|
|
||||||
remove_tags_before = {'class':"article-def"}
|
keep_only_tags = [{'class':"article-def"}]
|
||||||
remove_tags = [{'class':"RelatedArticle"},
|
remove_tags = [{'class':"RelatedArticle"},
|
||||||
{'class':"sbtns"}
|
{'class':"sbtns"}
|
||||||
]
|
]
|
||||||
|
@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
masthead_title = u"YOMIURI ONLINE"
|
masthead_title = u"YOMIURI ONLINE"
|
||||||
|
|
||||||
remove_tags_before = {'class':"article-def"}
|
keep_only_tags = [{'class':"article-def"}]
|
||||||
remove_tags = [{'class':"RelatedArticle"},
|
remove_tags = [{'class':"RelatedArticle"},
|
||||||
{'class':"sbtns"}
|
{'class':"sbtns"}
|
||||||
]
|
]
|
||||||
|
@ -21,7 +21,7 @@ class ANDROID(USBMS):
|
|||||||
# HTC
|
# HTC
|
||||||
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
|
||||||
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
|
||||||
0xc92 : [0x100]},
|
0xc92 : [0x100], 0xc97: [0x226]},
|
||||||
|
|
||||||
# Eken
|
# Eken
|
||||||
0x040d : { 0x8510 : [0x0001] },
|
0x040d : { 0x8510 : [0x0001] },
|
||||||
@ -63,7 +63,7 @@ class ANDROID(USBMS):
|
|||||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||||
'SCH-I500_CARD']
|
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810']
|
||||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
|
||||||
|
|
||||||
|
@ -11,9 +11,9 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.devices.mime import mime_type_ext
|
from calibre.devices.mime import mime_type_ext
|
||||||
from calibre.devices.interface import BookList as _BookList
|
from calibre.devices.interface import BookList as _BookList
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.utils.config import prefs, tweaks
|
from calibre.utils.config import prefs, tweaks
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import strcmp
|
||||||
|
|
||||||
class Book(Metadata):
|
class Book(Metadata):
|
||||||
def __init__(self, prefix, lpath, size=None, other=None):
|
def __init__(self, prefix, lpath, size=None, other=None):
|
||||||
@ -241,7 +241,7 @@ class CollectionsBookList(BookList):
|
|||||||
if y is None:
|
if y is None:
|
||||||
return -1
|
return -1
|
||||||
if isinstance(x, (unicode, str)):
|
if isinstance(x, (unicode, str)):
|
||||||
c = cmp(sort_key(x), sort_key(y))
|
c = strcmp(force_unicode(x), force_unicode(y))
|
||||||
else:
|
else:
|
||||||
c = cmp(x, y)
|
c = cmp(x, y)
|
||||||
if c != 0:
|
if c != 0:
|
||||||
|
516
src/calibre/ebooks/metadata/amazonfr.py
Normal file
516
src/calibre/ebooks/metadata/amazonfr.py
Normal file
@ -0,0 +1,516 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
|
||||||
|
import sys, textwrap, re, traceback
|
||||||
|
from urllib import urlencode
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
from lxml.html import soupparser
|
||||||
|
|
||||||
|
from calibre.utils.date import parse_date, utcnow, replace_months
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
|
authors_to_sort_string
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
|
||||||
|
|
||||||
|
class AmazonFr(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon French'
|
||||||
|
description = _('Downloads metadata from amazon.fr')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='fr')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonEs(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon Spanish'
|
||||||
|
description = _('Downloads metadata from amazon.com in spanish')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='es')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonEn(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon English'
|
||||||
|
description = _('Downloads metadata from amazon.com in english')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='en')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class AmazonDe(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon German'
|
||||||
|
description = _('Downloads metadata from amazon.de')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Sengian'
|
||||||
|
version = (1, 0, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='de')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
class Amazon(MetadataSource):
|
||||||
|
|
||||||
|
name = 'Amazon'
|
||||||
|
description = _('Downloads metadata from amazon.com')
|
||||||
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
author = 'Kovid Goyal & Sengian'
|
||||||
|
version = (1, 1, 0)
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
# if not self.site_customization:
|
||||||
|
# return
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose, lang='all')
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def string_customization_help(self):
|
||||||
|
# return _('You can select here the language for metadata search with amazon.com')
|
||||||
|
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL_ALL = 'http://www.amazon.com'
|
||||||
|
BASE_URL_FR = 'http://www.amazon.fr'
|
||||||
|
BASE_URL_DE = 'http://www.amazon.de'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
|
||||||
|
max_results=20, rlang='all'):
|
||||||
|
assert not(title is None and author is None and publisher is None \
|
||||||
|
and isbn is None and keywords is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
|
||||||
|
self.max_results = int(max_results)
|
||||||
|
self.renbres = re.compile(u'\s*(\d+)\s*')
|
||||||
|
|
||||||
|
q = { 'search-alias' : 'stripbooks' ,
|
||||||
|
'unfiltered' : '1',
|
||||||
|
'field-keywords' : '',
|
||||||
|
'field-author' : '',
|
||||||
|
'field-title' : '',
|
||||||
|
'field-isbn' : '',
|
||||||
|
'field-publisher' : ''
|
||||||
|
#get to amazon detailed search page to get all options
|
||||||
|
# 'node' : '',
|
||||||
|
# 'field-binding' : '',
|
||||||
|
#before, during, after
|
||||||
|
# 'field-dateop' : '',
|
||||||
|
#month as number
|
||||||
|
# 'field-datemod' : '',
|
||||||
|
# 'field-dateyear' : '',
|
||||||
|
#french only
|
||||||
|
# 'field-collection' : '',
|
||||||
|
#many options available
|
||||||
|
}
|
||||||
|
|
||||||
|
if rlang =='all':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='es':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
q['field-language'] = 'Spanish'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='en':
|
||||||
|
q['sort'] = 'relevanceexprank'
|
||||||
|
q['field-language'] = 'English'
|
||||||
|
self.urldata = self.BASE_URL_ALL
|
||||||
|
elif rlang =='fr':
|
||||||
|
q['sort'] = 'relevancerank'
|
||||||
|
self.urldata = self.BASE_URL_FR
|
||||||
|
elif rlang =='de':
|
||||||
|
q['sort'] = 'relevancerank'
|
||||||
|
self.urldata = self.BASE_URL_DE
|
||||||
|
self.baseurl = self.urldata
|
||||||
|
|
||||||
|
if isbn is not None:
|
||||||
|
q['field-isbn'] = isbn.replace('-', '')
|
||||||
|
else:
|
||||||
|
if title is not None:
|
||||||
|
q['field-title'] = title
|
||||||
|
if author is not None:
|
||||||
|
q['field-author'] = author
|
||||||
|
if publisher is not None:
|
||||||
|
q['field-publisher'] = publisher
|
||||||
|
if keywords is not None:
|
||||||
|
q['field-keywords'] = keywords
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
if verbose:
|
||||||
|
print 'Query:', self.urldata
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
|
#nb of page
|
||||||
|
try:
|
||||||
|
nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
|
||||||
|
except:
|
||||||
|
return None, self.urldata
|
||||||
|
|
||||||
|
pages =[feed]
|
||||||
|
if len(nbresults) > 1:
|
||||||
|
nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
|
||||||
|
for i in xrange(2, nbpagetoquery + 1):
|
||||||
|
try:
|
||||||
|
urldata = self.urldata + '&page=' + str(i)
|
||||||
|
raw = browser.open_novisit(urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
continue
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
continue
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
pages.append(feed)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for x in pages:
|
||||||
|
results.extend([i.getparent().get('href') \
|
||||||
|
for i in x.xpath("//a/span[@class='srTitle']")])
|
||||||
|
return results[:self.max_results], self.baseurl
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
def __init__(self, baseurl, lang = 'all'):
|
||||||
|
self.baseurl = baseurl
|
||||||
|
self.lang = lang
|
||||||
|
self.repub = re.compile(u'\((.*)\)')
|
||||||
|
self.rerat = re.compile(u'([0-9.]+)')
|
||||||
|
self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
|
||||||
|
self.reoutp = re.compile(r'(?s)<em>--This text ref.*?</em>')
|
||||||
|
self.recom = re.compile(r'(?s)<!--.*?-->')
|
||||||
|
self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
|
||||||
|
self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
|
||||||
|
self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
|
||||||
|
self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
|
||||||
|
self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
|
||||||
|
|
||||||
|
def strip_tags_etree(self, etreeobj, invalid_tags):
|
||||||
|
for (itag, rmv) in invalid_tags.iteritems():
|
||||||
|
if rmv:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tree()
|
||||||
|
else:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tag()
|
||||||
|
|
||||||
|
def clean_entry(self, entry, invalid_tags = {'script': True},
|
||||||
|
invalid_id = (), invalid_class=()):
|
||||||
|
#invalid_tags: remove tag and keep content if False else remove
|
||||||
|
#remove tags
|
||||||
|
if invalid_tags:
|
||||||
|
self.strip_tags_etree(entry, invalid_tags)
|
||||||
|
#remove id
|
||||||
|
if invalid_id:
|
||||||
|
for eltid in invalid_id:
|
||||||
|
elt = entry.get_element_by_id(eltid)
|
||||||
|
if elt is not None:
|
||||||
|
elt.drop_tree()
|
||||||
|
#remove class
|
||||||
|
if invalid_class:
|
||||||
|
for eltclass in invalid_class:
|
||||||
|
elts = entry.find_class(eltclass)
|
||||||
|
if elts is not None:
|
||||||
|
for elt in elts:
|
||||||
|
elt.drop_tree()
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.get_element_by_id('btAsinTitle')
|
||||||
|
if title is not None:
|
||||||
|
title = title.text
|
||||||
|
return unicode(title.replace('\n', '').strip())
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
author = entry.get_element_by_id('btAsinTitle')
|
||||||
|
while author.getparent().tag != 'div':
|
||||||
|
author = author.getparent()
|
||||||
|
author = author.getparent()
|
||||||
|
authortext = []
|
||||||
|
for x in author.getiterator('a'):
|
||||||
|
authortext.append(unicode(x.text_content().strip()))
|
||||||
|
return authortext
|
||||||
|
|
||||||
|
def get_description(self, entry, verbose):
|
||||||
|
try:
|
||||||
|
description = entry.get_element_by_id("productDescription").find("div[@class='content']")
|
||||||
|
inv_class = ('seeAll', 'emptyClear')
|
||||||
|
inv_tags ={'img': True, 'a': False}
|
||||||
|
self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
|
||||||
|
description = html.tostring(description, method='html', encoding=unicode).strip()
|
||||||
|
# remove all attributes from tags
|
||||||
|
description = self.reattr.sub(r'<\1>', description)
|
||||||
|
# Remove the notice about text referring to out of print editions
|
||||||
|
description = self.reoutp.sub('', description)
|
||||||
|
# Remove comments
|
||||||
|
description = self.recom.sub('', description)
|
||||||
|
return unicode(sanitize_comments_html(description))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_tags(self, entry, browser, verbose):
|
||||||
|
try:
|
||||||
|
tags = entry.get_element_by_id('tagContentHolder')
|
||||||
|
testptag = tags.find_class('see-all')
|
||||||
|
if testptag:
|
||||||
|
for x in testptag:
|
||||||
|
alink = x.xpath('descendant-or-self::a')
|
||||||
|
if alink:
|
||||||
|
if alink[0].get('class') == 'tgJsActive':
|
||||||
|
continue
|
||||||
|
link = self.baseurl + alink[0].get('href')
|
||||||
|
entry = self.get_individual_metadata(browser, link, verbose)
|
||||||
|
tags = entry.get_element_by_id('tagContentHolder')
|
||||||
|
break
|
||||||
|
tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
tags = []
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def get_book_info(self, entry, mi, verbose):
|
||||||
|
try:
|
||||||
|
entry = entry.get_element_by_id('SalesRank').getparent()
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
for z in entry.getiterator('h2'):
|
||||||
|
if self.reprod.search(z.text_content()):
|
||||||
|
entry = z.getparent().find("div[@class='content']/ul")
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return mi
|
||||||
|
elts = entry.findall('li')
|
||||||
|
#pub & date
|
||||||
|
elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
pub = elt[0].find('b').tail
|
||||||
|
mi.publisher = unicode(self.repub.sub('', pub).strip())
|
||||||
|
d = self.repub.search(pub)
|
||||||
|
if d is not None:
|
||||||
|
d = d.group(1)
|
||||||
|
try:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
if self.lang != 'all':
|
||||||
|
d = replace_months(d, self.lang)
|
||||||
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
|
mi.pubdate = d
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
#ISBN
|
||||||
|
elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
isbn = elt[0].find('b').tail.replace('-', '').strip()
|
||||||
|
if check_isbn(isbn):
|
||||||
|
mi.isbn = unicode(isbn)
|
||||||
|
elif len(elt) > 1:
|
||||||
|
isbn = elt[1].find('b').tail.replace('-', '').strip()
|
||||||
|
if check_isbn(isbn):
|
||||||
|
mi.isbn = unicode(isbn)
|
||||||
|
#Langue
|
||||||
|
elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
langue = elt[0].find('b').tail.strip()
|
||||||
|
if langue:
|
||||||
|
mi.language = unicode(langue)
|
||||||
|
#ratings
|
||||||
|
elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
|
||||||
|
if elt:
|
||||||
|
ratings = elt[0].find_class('swSprite')
|
||||||
|
if ratings:
|
||||||
|
ratings = self.rerat.findall(ratings[0].get('title'))
|
||||||
|
if len(ratings) == 2:
|
||||||
|
mi.rating = float(ratings[0])/float(ratings[1]) * 5
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def fill_MI(self, entry, title, authors, browser, verbose):
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
|
mi.comments = self.get_description(entry, verbose)
|
||||||
|
mi = self.get_book_info(entry, mi, verbose)
|
||||||
|
mi.tags = self.get_tags(entry, browser, verbose)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(linkdata).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
entry = self.get_individual_metadata(browser, x, verbose)
|
||||||
|
# clean results
|
||||||
|
# inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
|
||||||
|
# inv_class = ('buyingDetailsGrid', 'productImageGrid')
|
||||||
|
# inv_tags ={'script': True, 'style': True, 'form': False}
|
||||||
|
# self.clean_entry(entry, invalid_id=inv_ids)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print 'Failed to get all details for an entry'
|
||||||
|
print e
|
||||||
|
print 'URL who failed:', x
|
||||||
|
report(verbose)
|
||||||
|
continue
|
||||||
|
self.append(self.fill_MI(entry, title, authors, browser, verbose))
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
max_results=5, verbose=False, keywords=None, lang='all'):
|
||||||
|
br = browser()
|
||||||
|
entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
|
||||||
|
|
||||||
|
if entries is None or len(entries) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList(baseurl, lang)
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(\
|
||||||
|
_('''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Amazon. You must specify one of title, author,
|
||||||
|
ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
|
||||||
|
so you should make your query as specific as possible.
|
||||||
|
You can chose the language for metadata retrieval:
|
||||||
|
All & english & french & german & spanish
|
||||||
|
'''
|
||||||
|
)))
|
||||||
|
parser.add_option('-t', '--title', help='Book title')
|
||||||
|
parser.add_option('-a', '--author', help='Book author(s)')
|
||||||
|
parser.add_option('-p', '--publisher', help='Book publisher')
|
||||||
|
parser.add_option('-i', '--isbn', help='Book ISBN')
|
||||||
|
parser.add_option('-k', '--keywords', help='Keywords')
|
||||||
|
parser.add_option('-m', '--max-results', default=10,
|
||||||
|
help='Maximum number of results to fetch')
|
||||||
|
parser.add_option('-l', '--lang', default='all',
|
||||||
|
help='Chosen language for metadata search (all, en, fr, es, de)')
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help='Be more verbose about errors')
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
|
||||||
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
|
||||||
|
lang=opts.lang)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print 'No result found for this search!'
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
390
src/calibre/ebooks/metadata/fictionwise.py
Normal file
390
src/calibre/ebooks/metadata/fictionwise.py
Normal file
@ -0,0 +1,390 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, textwrap, re, traceback, socket
|
||||||
|
from urllib import urlencode
|
||||||
|
|
||||||
|
from lxml.html import soupparser, tostring
|
||||||
|
|
||||||
|
from calibre import browser, preferred_encoding
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
|
authors_to_sort_string
|
||||||
|
from calibre.library.comments import sanitize_comments_html
|
||||||
|
from calibre.ebooks.metadata.fetch import MetadataSource
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.utils.date import parse_date, utcnow
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
|
|
||||||
|
class Fictionwise(MetadataSource): # {{{
|
||||||
|
|
||||||
|
author = 'Sengian'
|
||||||
|
name = 'Fictionwise'
|
||||||
|
description = _('Downloads metadata from Fictionwise')
|
||||||
|
|
||||||
|
has_html_comments = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
try:
|
||||||
|
self.results = search(self.title, self.book_author, self.publisher,
|
||||||
|
self.isbn, max_results=10, verbose=self.verbose)
|
||||||
|
except Exception, e:
|
||||||
|
self.exception = e
|
||||||
|
self.tb = traceback.format_exc()
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class FictionwiseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def report(verbose):
|
||||||
|
if verbose:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
class Query(object):
|
||||||
|
|
||||||
|
BASE_URL = 'http://www.fictionwise.com/servlet/mw'
|
||||||
|
|
||||||
|
def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
|
||||||
|
assert not(title is None and author is None and publisher is None and keywords is None)
|
||||||
|
assert (max_results < 21)
|
||||||
|
|
||||||
|
self.max_results = int(max_results)
|
||||||
|
q = { 'template' : 'searchresults_adv.htm' ,
|
||||||
|
'searchtitle' : '',
|
||||||
|
'searchauthor' : '',
|
||||||
|
'searchpublisher' : '',
|
||||||
|
'searchkeyword' : '',
|
||||||
|
#possibilities startoflast, fullname, lastfirst
|
||||||
|
'searchauthortype' : 'startoflast',
|
||||||
|
'searchcategory' : '',
|
||||||
|
'searchcategory2' : '',
|
||||||
|
'searchprice_s' : '0',
|
||||||
|
'searchprice_e' : 'ANY',
|
||||||
|
'searchformat' : '',
|
||||||
|
'searchgeo' : 'US',
|
||||||
|
'searchfwdatetype' : '',
|
||||||
|
#maybe use dates fields if needed?
|
||||||
|
#'sortorder' : 'DESC',
|
||||||
|
#many options available: b.SortTitle, a.SortName,
|
||||||
|
#b.DateFirstPublished, b.FWPublishDate
|
||||||
|
'sortby' : 'b.SortTitle'
|
||||||
|
}
|
||||||
|
if title is not None:
|
||||||
|
q['searchtitle'] = title
|
||||||
|
if author is not None:
|
||||||
|
q['searchauthor'] = author
|
||||||
|
if publisher is not None:
|
||||||
|
q['searchpublisher'] = publisher
|
||||||
|
if keywords is not None:
|
||||||
|
q['searchkeyword'] = keywords
|
||||||
|
|
||||||
|
if isinstance(q, unicode):
|
||||||
|
q = q.encode('utf-8')
|
||||||
|
self.urldata = urlencode(q)
|
||||||
|
|
||||||
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
|
if verbose:
|
||||||
|
print _('Query: %s') % self.BASE_URL+self.urldata
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
feed = soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# get list of results as links
|
||||||
|
results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
|
||||||
|
results = results[:self.max_results]
|
||||||
|
results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
|
||||||
|
#return feed if no links ie normally a single book or nothing
|
||||||
|
if not results:
|
||||||
|
results = [feed]
|
||||||
|
return results
|
||||||
|
|
||||||
|
class ResultList(list):
|
||||||
|
|
||||||
|
BASE_URL = 'http://www.fictionwise.com'
|
||||||
|
COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.retitle = re.compile(r'\[[^\[\]]+\]')
|
||||||
|
self.rechkauth = re.compile(r'.*book\s*by', re.I)
|
||||||
|
self.redesc = re.compile(r'book\s*description\s*:\s*(<br[^>]+>)*(?P<desc>.*)<br[^>]*>.{,15}publisher\s*:', re.I)
|
||||||
|
self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
|
||||||
|
self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
|
||||||
|
self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
|
||||||
|
self.resplitbr = re.compile(r'<br[^>]*>', re.I)
|
||||||
|
self.recomment = re.compile(r'(?s)<!--.*?-->')
|
||||||
|
self.reimg = re.compile(r'<img[^>]*>', re.I)
|
||||||
|
self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
|
||||||
|
self.renbcom = re.compile('(?P<nbcom>\d+)\s*Reader Ratings:')
|
||||||
|
self.recolor = re.compile('(?P<ncolor>[^/]+).gif')
|
||||||
|
self.resplitbrdiv = re.compile(r'(<br[^>]+>|</?div[^>]*>)', re.I)
|
||||||
|
self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
|
||||||
|
|
||||||
|
def strip_tags_etree(self, etreeobj, invalid_tags):
|
||||||
|
for (itag, rmv) in invalid_tags.iteritems():
|
||||||
|
if rmv:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tree()
|
||||||
|
else:
|
||||||
|
for elts in etreeobj.getiterator(itag):
|
||||||
|
elts.drop_tag()
|
||||||
|
|
||||||
|
def clean_entry(self, entry, invalid_tags = {'script': True},
|
||||||
|
invalid_id = (), invalid_class=(), invalid_xpath = ()):
|
||||||
|
#invalid_tags: remove tag and keep content if False else remove
|
||||||
|
#remove tags
|
||||||
|
if invalid_tags:
|
||||||
|
self.strip_tags_etree(entry, invalid_tags)
|
||||||
|
#remove xpath
|
||||||
|
if invalid_xpath:
|
||||||
|
for eltid in invalid_xpath:
|
||||||
|
elt = entry.xpath(eltid)
|
||||||
|
for el in elt:
|
||||||
|
el.drop_tree()
|
||||||
|
#remove id
|
||||||
|
if invalid_id:
|
||||||
|
for eltid in invalid_id:
|
||||||
|
elt = entry.get_element_by_id(eltid)
|
||||||
|
if elt is not None:
|
||||||
|
elt.drop_tree()
|
||||||
|
#remove class
|
||||||
|
if invalid_class:
|
||||||
|
for eltclass in invalid_class:
|
||||||
|
elts = entry.find_class(eltclass)
|
||||||
|
if elts is not None:
|
||||||
|
for elt in elts:
|
||||||
|
elt.drop_tree()
|
||||||
|
|
||||||
|
def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
|
||||||
|
out = tostring(entry, pretty_print=prettyout)
|
||||||
|
#try to work around tostring to remove this encoding for exemle
|
||||||
|
reclean = re.compile('(\n+|\t+|\r+|&#'+htmlrm+';)')
|
||||||
|
return reclean.sub('', out)
|
||||||
|
|
||||||
|
def get_title(self, entry):
|
||||||
|
title = entry.findtext('./')
|
||||||
|
return self.retitle.sub('', title).strip()
|
||||||
|
|
||||||
|
def get_authors(self, entry):
|
||||||
|
authortext = entry.find('./br').tail
|
||||||
|
if not self.rechkauth.search(authortext):
|
||||||
|
return []
|
||||||
|
authortext = self.rechkauth.sub('', authortext)
|
||||||
|
return [a.strip() for a in authortext.split('&')]
|
||||||
|
|
||||||
|
def get_rating(self, entrytable, verbose):
|
||||||
|
nbcomment = tostring(entrytable.getprevious())
|
||||||
|
try:
|
||||||
|
nbcomment = self.renbcom.search(nbcomment).group("nbcom")
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
return None
|
||||||
|
hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
|
||||||
|
float(image.get('height', default=0))) \
|
||||||
|
for image in entrytable.getiterator('img'))
|
||||||
|
#ratings as x/5
|
||||||
|
return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
|
||||||
|
|
||||||
|
def get_description(self, entry):
|
||||||
|
description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
|
||||||
|
description = self.redesc.search(description)
|
||||||
|
if not description or not description.group("desc"):
|
||||||
|
return None
|
||||||
|
#remove invalid tags
|
||||||
|
description = self.reimg.sub('', description.group("desc"))
|
||||||
|
description = self.recomment.sub('', description)
|
||||||
|
description = self.resanitize.sub('', sanitize_comments_html(description))
|
||||||
|
return _('SUMMARY:\n %s') % re.sub(r'\n\s+</p>','\n</p>', description)
|
||||||
|
|
||||||
|
def get_publisher(self, entry):
|
||||||
|
publisher = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
publisher = filter(lambda x: self.repub.search(x) is not None,
|
||||||
|
self.resplitbr.split(publisher))
|
||||||
|
if not len(publisher):
|
||||||
|
return None
|
||||||
|
publisher = self.repub.sub('', publisher[0])
|
||||||
|
return publisher.split(',')[0].strip()
|
||||||
|
|
||||||
|
def get_tags(self, entry):
|
||||||
|
tag = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
tag = filter(lambda x: self.retag.search(x) is not None,
|
||||||
|
self.resplitbr.split(tag))
|
||||||
|
if not len(tag):
|
||||||
|
return []
|
||||||
|
return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
|
||||||
|
|
||||||
|
def get_date(self, entry, verbose):
|
||||||
|
date = self.output_entry(entry.xpath('./p')[1])
|
||||||
|
date = filter(lambda x: self.redate.search(x) is not None,
|
||||||
|
self.resplitbr.split(date))
|
||||||
|
if not len(date):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = self.redate.sub('', date[0])
|
||||||
|
if d:
|
||||||
|
default = utcnow().replace(day=15)
|
||||||
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
|
else:
|
||||||
|
d = None
|
||||||
|
except:
|
||||||
|
report(verbose)
|
||||||
|
d = None
|
||||||
|
return d
|
||||||
|
|
||||||
|
def get_ISBN(self, entry):
|
||||||
|
isbns = self.output_entry(entry.xpath('./p')[2])
|
||||||
|
isbns = filter(lambda x: self.reisbn.search(x) is not None,
|
||||||
|
self.resplitbrdiv.split(isbns))
|
||||||
|
if not len(isbns):
|
||||||
|
return None
|
||||||
|
isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
|
||||||
|
return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
|
||||||
|
|
||||||
|
def fill_MI(self, entry, title, authors, ratings, verbose):
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
mi.rating = ratings
|
||||||
|
mi.comments = self.get_description(entry)
|
||||||
|
mi.publisher = self.get_publisher(entry)
|
||||||
|
mi.tags = self.get_tags(entry)
|
||||||
|
mi.pubdate = self.get_date(entry, verbose)
|
||||||
|
mi.isbn = self.get_ISBN(entry)
|
||||||
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
|
try:
|
||||||
|
raw = browser.open_novisit(self.BASE_URL + linkdata).read()
|
||||||
|
except Exception, e:
|
||||||
|
report(verbose)
|
||||||
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
|
e.getcode() == 404:
|
||||||
|
return
|
||||||
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
|
||||||
|
raise FictionwiseError(_('Fictionwise encountered an error.'))
|
||||||
|
if '<title>404 - ' in raw:
|
||||||
|
report(verbose)
|
||||||
|
return
|
||||||
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
|
resolve_entities=True)[0]
|
||||||
|
try:
|
||||||
|
return soupparser.fromstring(raw)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
return soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def populate(self, entries, browser, verbose=False):
|
||||||
|
inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
|
||||||
|
'ul': False, 'span': False}
|
||||||
|
inv_xpath =('./table',)
|
||||||
|
#single entry
|
||||||
|
if len(entries) == 1 and not isinstance(entries[0], str):
|
||||||
|
try:
|
||||||
|
entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
|
||||||
|
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
#maybe strenghten the search
|
||||||
|
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
return
|
||||||
|
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
||||||
|
else:
|
||||||
|
#multiple entries
|
||||||
|
for x in entries:
|
||||||
|
try:
|
||||||
|
entry = self.get_individual_metadata(browser, x, verbose)
|
||||||
|
entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
|
||||||
|
self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
|
||||||
|
title = self.get_title(entry)
|
||||||
|
#maybe strenghten the search
|
||||||
|
ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
|
||||||
|
authors = self.get_authors(entry)
|
||||||
|
except Exception, e:
|
||||||
|
if verbose:
|
||||||
|
print _('Failed to get all details for an entry')
|
||||||
|
print e
|
||||||
|
continue
|
||||||
|
self.append(self.fill_MI(entry, title, authors, ratings, verbose))
|
||||||
|
|
||||||
|
|
||||||
|
def search(title=None, author=None, publisher=None, isbn=None,
|
||||||
|
min_viewability='none', verbose=False, max_results=5,
|
||||||
|
keywords=None):
|
||||||
|
br = browser()
|
||||||
|
entries = Query(title=title, author=author, publisher=publisher,
|
||||||
|
keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
|
||||||
|
|
||||||
|
#List of entry
|
||||||
|
ans = ResultList()
|
||||||
|
ans.populate(entries, br, verbose)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(textwrap.dedent(\
|
||||||
|
_('''\
|
||||||
|
%prog [options]
|
||||||
|
|
||||||
|
Fetch book metadata from Fictionwise. You must specify one of title, author,
|
||||||
|
or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
|
||||||
|
so you should make your query as specific as possible.
|
||||||
|
''')
|
||||||
|
))
|
||||||
|
parser.add_option('-t', '--title', help=_('Book title'))
|
||||||
|
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||||
|
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||||
|
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||||
|
parser.add_option('-m', '--max-results', default=20,
|
||||||
|
help=_('Maximum number of results to fetch'))
|
||||||
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
|
help=_('Be more verbose about errors'))
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
try:
|
||||||
|
results = search(opts.title, opts.author, publisher=opts.publisher,
|
||||||
|
keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
|
||||||
|
except AssertionError:
|
||||||
|
report(True)
|
||||||
|
parser.print_help()
|
||||||
|
return 1
|
||||||
|
if results is None or len(results) == 0:
|
||||||
|
print _('No result found for this search!')
|
||||||
|
return 0
|
||||||
|
for result in results:
|
||||||
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
|
print
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -10,7 +10,8 @@ from copy import deepcopy
|
|||||||
|
|
||||||
from lxml.html import soupparser
|
from lxml.html import soupparser
|
||||||
|
|
||||||
from calibre.utils.date import parse_date, utcnow
|
from calibre.utils.date import parse_date, utcnow, replace_months
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars
|
||||||
from calibre import browser, preferred_encoding
|
from calibre import browser, preferred_encoding
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
|
||||||
@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload):
|
|||||||
traceback.format_exc(), self.name))
|
traceback.format_exc(), self.name))
|
||||||
|
|
||||||
|
|
||||||
|
class NiceBooksError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ISBNNotFound(NiceBooksError):
|
||||||
|
pass
|
||||||
|
|
||||||
def report(verbose):
|
def report(verbose):
|
||||||
if verbose:
|
if verbose:
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
def replace_monthsfr(datefr):
|
|
||||||
# Replace french months by english equivalent for parse_date
|
|
||||||
frtoen = {
|
|
||||||
u'[jJ]anvier': u'jan',
|
|
||||||
u'[fF].vrier': u'feb',
|
|
||||||
u'[mM]ars': u'mar',
|
|
||||||
u'[aA]vril': u'apr',
|
|
||||||
u'[mM]ai': u'may',
|
|
||||||
u'[jJ]uin': u'jun',
|
|
||||||
u'[jJ]uillet': u'jul',
|
|
||||||
u'[aA]o.t': u'aug',
|
|
||||||
u'[sS]eptembre': u'sep',
|
|
||||||
u'[Oo]ctobre': u'oct',
|
|
||||||
u'[nN]ovembre': u'nov',
|
|
||||||
u'[dD].cembre': u'dec' }
|
|
||||||
for k in frtoen.iterkeys():
|
|
||||||
tmp = re.sub(k, frtoen[k], datefr)
|
|
||||||
if tmp <> datefr: break
|
|
||||||
return tmp
|
|
||||||
|
|
||||||
class Query(object):
|
class Query(object):
|
||||||
|
|
||||||
BASE_URL = 'http://fr.nicebooks.com/'
|
BASE_URL = 'http://fr.nicebooks.com/'
|
||||||
@ -119,7 +105,7 @@ class Query(object):
|
|||||||
|
|
||||||
def __call__(self, browser, verbose, timeout = 5.):
|
def __call__(self, browser, verbose, timeout = 5.):
|
||||||
if verbose:
|
if verbose:
|
||||||
print 'Query:', self.BASE_URL+self.urldata
|
print _('Query: %s') % self.BASE_URL+self.urldata
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
|
||||||
@ -128,7 +114,9 @@ class Query(object):
|
|||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return
|
return
|
||||||
raise
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
return
|
return
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
raw = xml_to_unicode(raw, strip_encoding_pats=True,
|
||||||
@ -136,7 +124,11 @@ class Query(object):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
return
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
#nb of page to call
|
#nb of page to call
|
||||||
try:
|
try:
|
||||||
@ -161,7 +153,11 @@ class Query(object):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
continue
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
pages.append(feed)
|
pages.append(feed)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
@ -180,14 +176,12 @@ class ResultList(list):
|
|||||||
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
self.reautclean = re.compile(u'\s*\(.*\)\s*')
|
||||||
|
|
||||||
def get_title(self, entry):
|
def get_title(self, entry):
|
||||||
# title = deepcopy(entry.find("div[@id='book-info']"))
|
|
||||||
title = deepcopy(entry)
|
title = deepcopy(entry)
|
||||||
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
title.remove(title.find("dl[@title='Informations sur le livre']"))
|
||||||
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
title = ' '.join([i.text_content() for i in title.iterchildren()])
|
||||||
return unicode(title.replace('\n', ''))
|
return unicode(title.replace('\n', ''))
|
||||||
|
|
||||||
def get_authors(self, entry):
|
def get_authors(self, entry):
|
||||||
# author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
|
|
||||||
author = entry.find("dl[@title='Informations sur le livre']")
|
author = entry.find("dl[@title='Informations sur le livre']")
|
||||||
authortext = []
|
authortext = []
|
||||||
for x in author.getiterator('dt'):
|
for x in author.getiterator('dt'):
|
||||||
@ -223,7 +217,7 @@ class ResultList(list):
|
|||||||
d = x.getnext().text_content()
|
d = x.getnext().text_content()
|
||||||
try:
|
try:
|
||||||
default = utcnow().replace(day=15)
|
default = utcnow().replace(day=15)
|
||||||
d = replace_monthsfr(d)
|
d = replace_months(d, 'fr')
|
||||||
d = parse_date(d, assume_utc=True, default=default)
|
d = parse_date(d, assume_utc=True, default=default)
|
||||||
mi.pubdate = d
|
mi.pubdate = d
|
||||||
except:
|
except:
|
||||||
@ -234,11 +228,6 @@ class ResultList(list):
|
|||||||
mi = MetaInformation(title, authors)
|
mi = MetaInformation(title, authors)
|
||||||
mi.author_sort = authors_to_sort_string(authors)
|
mi.author_sort = authors_to_sort_string(authors)
|
||||||
mi.comments = self.get_description(entry, verbose)
|
mi.comments = self.get_description(entry, verbose)
|
||||||
# entry = entry.find("dl[@title='Informations sur le livre']")
|
|
||||||
# mi.publisher = self.get_publisher(entry)
|
|
||||||
# mi.pubdate = self.get_date(entry, verbose)
|
|
||||||
# mi.isbn = self.get_ISBN(entry)
|
|
||||||
# mi.language = self.get_language(entry)
|
|
||||||
return self.get_book_info(entry, mi, verbose)
|
return self.get_book_info(entry, mi, verbose)
|
||||||
|
|
||||||
def get_individual_metadata(self, browser, linkdata, verbose):
|
def get_individual_metadata(self, browser, linkdata, verbose):
|
||||||
@ -249,7 +238,9 @@ class ResultList(list):
|
|||||||
if callable(getattr(e, 'getcode', None)) and \
|
if callable(getattr(e, 'getcode', None)) and \
|
||||||
e.getcode() == 404:
|
e.getcode() == 404:
|
||||||
return
|
return
|
||||||
raise
|
if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
|
||||||
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
|
raise NiceBooksError(_('Nicebooks encountered an error.'))
|
||||||
if '<title>404 - ' in raw:
|
if '<title>404 - ' in raw:
|
||||||
report(verbose)
|
report(verbose)
|
||||||
return
|
return
|
||||||
@ -258,7 +249,11 @@ class ResultList(list):
|
|||||||
try:
|
try:
|
||||||
feed = soupparser.fromstring(raw)
|
feed = soupparser.fromstring(raw)
|
||||||
except:
|
except:
|
||||||
return
|
try:
|
||||||
|
#remove ASCII invalid chars
|
||||||
|
feed = soupparser.fromstring(clean_ascii_chars(raw))
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
# get results
|
# get results
|
||||||
return feed.xpath("//div[@id='container']")[0]
|
return feed.xpath("//div[@id='container']")[0]
|
||||||
@ -292,13 +287,6 @@ class ResultList(list):
|
|||||||
continue
|
continue
|
||||||
self.append(self.fill_MI(entry, title, authors, verbose))
|
self.append(self.fill_MI(entry, title, authors, verbose))
|
||||||
|
|
||||||
|
|
||||||
class NiceBooksError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ISBNNotFound(NiceBooksError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Covers(object):
|
class Covers(object):
|
||||||
|
|
||||||
def __init__(self, isbn = None):
|
def __init__(self, isbn = None):
|
||||||
@ -329,11 +317,10 @@ class Covers(object):
|
|||||||
return cover, ext if ext else 'jpg'
|
return cover, ext if ext else 'jpg'
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
||||||
err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
|
||||||
raise err
|
|
||||||
if not len(self.urlimg):
|
if not len(self.urlimg):
|
||||||
if not self.isbnf:
|
if not self.isbnf:
|
||||||
raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
|
raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
|
||||||
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
|
||||||
|
|
||||||
|
|
||||||
@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
|
|||||||
max_results=5, verbose=False, keywords=None):
|
max_results=5, verbose=False, keywords=None):
|
||||||
br = browser()
|
br = browser()
|
||||||
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
|
||||||
keywords=keywords, max_results=max_results)(br, verbose)
|
keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
|
||||||
|
|
||||||
if entries is None or len(entries) == 0:
|
if entries is None or len(entries) == 0:
|
||||||
return
|
return None
|
||||||
|
|
||||||
#List of entry
|
#List of entry
|
||||||
ans = ResultList()
|
ans = ResultList()
|
||||||
@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.):
|
|||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = OptionParser(textwrap.dedent(\
|
parser = OptionParser(textwrap.dedent(\
|
||||||
'''\
|
_('''\
|
||||||
%prog [options]
|
%prog [options]
|
||||||
|
|
||||||
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
Fetch book metadata from Nicebooks. You must specify one of title, author,
|
||||||
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
|
||||||
so you should make your query as specific as possible.
|
so you should make your query as specific as possible.
|
||||||
It can also get covers if the option is activated.
|
It can also get covers if the option is activated.
|
||||||
'''
|
''')
|
||||||
))
|
))
|
||||||
parser.add_option('-t', '--title', help='Book title')
|
parser.add_option('-t', '--title', help=_('Book title'))
|
||||||
parser.add_option('-a', '--author', help='Book author(s)')
|
parser.add_option('-a', '--author', help=_('Book author(s)'))
|
||||||
parser.add_option('-p', '--publisher', help='Book publisher')
|
parser.add_option('-p', '--publisher', help=_('Book publisher'))
|
||||||
parser.add_option('-i', '--isbn', help='Book ISBN')
|
parser.add_option('-i', '--isbn', help=_('Book ISBN'))
|
||||||
parser.add_option('-k', '--keywords', help='Keywords')
|
parser.add_option('-k', '--keywords', help=_('Keywords'))
|
||||||
parser.add_option('-c', '--covers', default=0,
|
parser.add_option('-c', '--covers', default=0,
|
||||||
help='Covers: 1-Check/ 2-Download')
|
help=_('Covers: 1-Check/ 2-Download'))
|
||||||
parser.add_option('-p', '--coverspath', default='',
|
parser.add_option('-p', '--coverspath', default='',
|
||||||
help='Covers files path')
|
help=_('Covers files path'))
|
||||||
parser.add_option('-m', '--max-results', default=20,
|
parser.add_option('-m', '--max-results', default=20,
|
||||||
help='Maximum number of results to fetch')
|
help=_('Maximum number of results to fetch'))
|
||||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||||
help='Be more verbose about errors')
|
help=_('Be more verbose about errors'))
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
@ -400,15 +387,15 @@ def main(args=sys.argv):
|
|||||||
parser.print_help()
|
parser.print_help()
|
||||||
return 1
|
return 1
|
||||||
if results is None or len(results) == 0:
|
if results is None or len(results) == 0:
|
||||||
print 'No result found for this search!'
|
print _('No result found for this search!')
|
||||||
return 0
|
return 0
|
||||||
for result in results:
|
for result in results:
|
||||||
print unicode(result).encode(preferred_encoding, 'replace')
|
print unicode(result).encode(preferred_encoding, 'replace')
|
||||||
covact = int(opts.covers)
|
covact = int(opts.covers)
|
||||||
if covact == 1:
|
if covact == 1:
|
||||||
textcover = 'No cover found!'
|
textcover = _('No cover found!')
|
||||||
if check_for_cover(result.isbn):
|
if check_for_cover(result.isbn):
|
||||||
textcover = 'A cover was found for this book'
|
textcover = _('A cover was found for this book')
|
||||||
print textcover
|
print textcover
|
||||||
elif covact == 2:
|
elif covact == 2:
|
||||||
cover_data, ext = cover_from_isbn(result.isbn)
|
cover_data, ext = cover_from_isbn(result.isbn)
|
||||||
@ -417,7 +404,7 @@ def main(args=sys.argv):
|
|||||||
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
|
||||||
oname = os.path.abspath(cpath+'.'+ext)
|
oname = os.path.abspath(cpath+'.'+ext)
|
||||||
open(oname, 'wb').write(cover_data)
|
open(oname, 'wb').write(cover_data)
|
||||||
print 'Cover saved to file ', oname
|
print _('Cover saved to file '), oname
|
||||||
print
|
print
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -8,12 +8,12 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from Queue import Empty
|
from Queue import Empty
|
||||||
import os, time, sys, shutil
|
import os, time, sys, shutil, json
|
||||||
|
|
||||||
from calibre.utils.ipc.job import ParallelJob
|
from calibre.utils.ipc.job import ParallelJob
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
|
||||||
from calibre import prints
|
from calibre import prints, isbytestring
|
||||||
from calibre.constants import filesystem_encoding
|
from calibre.constants import filesystem_encoding
|
||||||
|
|
||||||
|
|
||||||
@ -194,14 +194,42 @@ class SaveWorker(Thread):
|
|||||||
self.daemon = True
|
self.daemon = True
|
||||||
self.path, self.opts = path, opts
|
self.path, self.opts = path, opts
|
||||||
self.ids = ids
|
self.ids = ids
|
||||||
self.library_path = db.library_path
|
self.db = db
|
||||||
self.canceled = False
|
self.canceled = False
|
||||||
self.result_queue = result_queue
|
self.result_queue = result_queue
|
||||||
self.error = None
|
self.error = None
|
||||||
self.spare_server = spare_server
|
self.spare_server = spare_server
|
||||||
self.start()
|
self.start()
|
||||||
|
|
||||||
|
def collect_data(self, ids):
|
||||||
|
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||||
|
data = {}
|
||||||
|
for i in set(ids):
|
||||||
|
mi = self.db.get_metadata(i, index_is_id=True, get_cover=True)
|
||||||
|
opf = metadata_to_opf(mi)
|
||||||
|
if isbytestring(opf):
|
||||||
|
opf = opf.decode('utf-8')
|
||||||
|
cpath = None
|
||||||
|
if mi.cover:
|
||||||
|
cpath = mi.cover
|
||||||
|
if isbytestring(cpath):
|
||||||
|
cpath = cpath.decode(filesystem_encoding)
|
||||||
|
formats = {}
|
||||||
|
if mi.formats:
|
||||||
|
for fmt in mi.formats:
|
||||||
|
fpath = self.db.format_abspath(i, fmt, index_is_id=True)
|
||||||
|
if fpath is not None:
|
||||||
|
if isbytestring(fpath):
|
||||||
|
fpath = fpath.decode(filesystem_encoding)
|
||||||
|
formats[fmt.lower()] = fpath
|
||||||
|
data[i] = [opf, cpath, formats]
|
||||||
|
return data
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
with TemporaryDirectory('save_to_disk_data') as tdir:
|
||||||
|
self._run(tdir)
|
||||||
|
|
||||||
|
def _run(self, tdir):
|
||||||
from calibre.library.save_to_disk import config
|
from calibre.library.save_to_disk import config
|
||||||
server = Server() if self.spare_server is None else self.spare_server
|
server = Server() if self.spare_server is None else self.spare_server
|
||||||
ids = set(self.ids)
|
ids = set(self.ids)
|
||||||
@ -212,12 +240,19 @@ class SaveWorker(Thread):
|
|||||||
for pref in c.preferences:
|
for pref in c.preferences:
|
||||||
recs[pref.name] = getattr(self.opts, pref.name)
|
recs[pref.name] = getattr(self.opts, pref.name)
|
||||||
|
|
||||||
|
plugboards = self.db.prefs.get('plugboards', {})
|
||||||
|
|
||||||
for i, task in enumerate(tasks):
|
for i, task in enumerate(tasks):
|
||||||
tids = [x[-1] for x in task]
|
tids = [x[-1] for x in task]
|
||||||
|
data = self.collect_data(tids)
|
||||||
|
dpath = os.path.join(tdir, '%d.json'%i)
|
||||||
|
with open(dpath, 'wb') as f:
|
||||||
|
f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))
|
||||||
|
|
||||||
job = ParallelJob('save_book',
|
job = ParallelJob('save_book',
|
||||||
'Save books (%d of %d)'%(i, len(tasks)),
|
'Save books (%d of %d)'%(i, len(tasks)),
|
||||||
lambda x,y:x,
|
lambda x,y:x,
|
||||||
args=[tids, self.library_path, self.path, recs])
|
args=[tids, dpath, plugboards, self.path, recs])
|
||||||
jobs.add(job)
|
jobs.add(job)
|
||||||
server.add_job(job)
|
server.add_job(job)
|
||||||
|
|
||||||
@ -226,21 +261,21 @@ class SaveWorker(Thread):
|
|||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
running = False
|
running = False
|
||||||
for job in jobs:
|
for job in jobs:
|
||||||
job.update(consume_notifications=False)
|
self.get_notifications(job, ids)
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
id, title, ok, tb = job.notifications.get_nowait()[0]
|
|
||||||
if id in ids:
|
|
||||||
self.result_queue.put((id, title, ok, tb))
|
|
||||||
ids.remove(id)
|
|
||||||
except Empty:
|
|
||||||
break
|
|
||||||
if not job.is_finished:
|
if not job.is_finished:
|
||||||
running = True
|
running = True
|
||||||
|
|
||||||
if not running:
|
if not running:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
for job in jobs:
|
||||||
|
if not job.result:
|
||||||
|
continue
|
||||||
|
for id_, title, ok, tb in job.result:
|
||||||
|
if id_ in ids:
|
||||||
|
self.result_queue.put((id_, title, ok, tb))
|
||||||
|
ids.remove(id_)
|
||||||
|
|
||||||
server.close()
|
server.close()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
@ -257,21 +292,39 @@ class SaveWorker(Thread):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_notifications(self, job, ids):
|
||||||
|
job.update(consume_notifications=False)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
id, title, ok, tb = job.notifications.get_nowait()[0]
|
||||||
|
if id in ids:
|
||||||
|
self.result_queue.put((id, title, ok, tb))
|
||||||
|
ids.remove(id)
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
def save_book(task, library_path, path, recs, notification=lambda x,y:x):
|
|
||||||
from calibre.library.database2 import LibraryDatabase2
|
def save_book(ids, dpath, plugboards, path, recs, notification=lambda x,y:x):
|
||||||
db = LibraryDatabase2(library_path)
|
from calibre.library.save_to_disk import config, save_serialized_to_disk
|
||||||
from calibre.library.save_to_disk import config, save_to_disk
|
|
||||||
from calibre.customize.ui import apply_null_metadata
|
from calibre.customize.ui import apply_null_metadata
|
||||||
opts = config().parse()
|
opts = config().parse()
|
||||||
for name in recs:
|
for name in recs:
|
||||||
setattr(opts, name, recs[name])
|
setattr(opts, name, recs[name])
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
def callback(id, title, failed, tb):
|
def callback(id, title, failed, tb):
|
||||||
|
results.append((id, title, not failed, tb))
|
||||||
notification((id, title, not failed, tb))
|
notification((id, title, not failed, tb))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
with apply_null_metadata:
|
data_ = json.loads(open(dpath, 'rb').read().decode('utf-8'))
|
||||||
save_to_disk(db, task, path, opts, callback)
|
data = {}
|
||||||
|
for k, v in data_.iteritems():
|
||||||
|
data[int(k)] = v
|
||||||
|
|
||||||
|
with apply_null_metadata:
|
||||||
|
save_serialized_to_disk(ids, data, plugboards, path, opts, callback)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
@ -544,7 +544,7 @@ class OEBReader(object):
|
|||||||
data = render_html_svg_workaround(path, self.logger)
|
data = render_html_svg_workaround(path, self.logger)
|
||||||
if not data:
|
if not data:
|
||||||
data = ''
|
data = ''
|
||||||
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
|
id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
|
||||||
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
@ -123,6 +123,8 @@ def _config():
|
|||||||
help=_('Download social metadata (tags/rating/etc.)'))
|
help=_('Download social metadata (tags/rating/etc.)'))
|
||||||
c.add_opt('overwrite_author_title_metadata', default=True,
|
c.add_opt('overwrite_author_title_metadata', default=True,
|
||||||
help=_('Overwrite author and title with new metadata'))
|
help=_('Overwrite author and title with new metadata'))
|
||||||
|
c.add_opt('auto_download_cover', default=False,
|
||||||
|
help=_('Automatically download the cover, if available'))
|
||||||
c.add_opt('enforce_cpu_limit', default=True,
|
c.add_opt('enforce_cpu_limit', default=True,
|
||||||
help=_('Limit max simultaneous jobs to number of CPUs'))
|
help=_('Limit max simultaneous jobs to number of CPUs'))
|
||||||
c.add_opt('tag_browser_hidden_categories', default=set(),
|
c.add_opt('tag_browser_hidden_categories', default=set(),
|
||||||
|
@ -61,6 +61,7 @@ class AddAction(InterfaceAction):
|
|||||||
self._adder = Adder(self.gui,
|
self._adder = Adder(self.gui,
|
||||||
self.gui.library_view.model().db,
|
self.gui.library_view.model().db,
|
||||||
self.Dispatcher(self._files_added), spare_server=self.gui.spare_server)
|
self.Dispatcher(self._files_added), spare_server=self.gui.spare_server)
|
||||||
|
self.gui.tags_view.disable_recounting = True
|
||||||
self._adder.add_recursive(root, single)
|
self._adder.add_recursive(root, single)
|
||||||
|
|
||||||
def add_recursive_single(self, *args):
|
def add_recursive_single(self, *args):
|
||||||
@ -201,9 +202,11 @@ class AddAction(InterfaceAction):
|
|||||||
self._adder = Adder(self.gui,
|
self._adder = Adder(self.gui,
|
||||||
None if to_device else self.gui.library_view.model().db,
|
None if to_device else self.gui.library_view.model().db,
|
||||||
self.Dispatcher(self.__adder_func), spare_server=self.gui.spare_server)
|
self.Dispatcher(self.__adder_func), spare_server=self.gui.spare_server)
|
||||||
|
self.gui.tags_view.disable_recounting = True
|
||||||
self._adder.add(paths)
|
self._adder.add(paths)
|
||||||
|
|
||||||
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
||||||
|
self.gui.tags_view.disable_recounting = False
|
||||||
if paths:
|
if paths:
|
||||||
self.gui.upload_books(paths,
|
self.gui.upload_books(paths,
|
||||||
list(map(ascii_filename, names)),
|
list(map(ascii_filename, names)),
|
||||||
@ -214,6 +217,7 @@ class AddAction(InterfaceAction):
|
|||||||
self.gui.library_view.model().books_added(self._adder.number_of_books_added)
|
self.gui.library_view.model().books_added(self._adder.number_of_books_added)
|
||||||
if hasattr(self.gui, 'db_images'):
|
if hasattr(self.gui, 'db_images'):
|
||||||
self.gui.db_images.reset()
|
self.gui.db_images.reset()
|
||||||
|
self.gui.tags_view.recount()
|
||||||
if getattr(self._adder, 'merged_books', False):
|
if getattr(self._adder, 'merged_books', False):
|
||||||
books = u'\n'.join([x if isinstance(x, unicode) else
|
books = u'\n'.join([x if isinstance(x, unicode) else
|
||||||
x.decode(preferred_encoding, 'replace') for x in
|
x.decode(preferred_encoding, 'replace') for x in
|
||||||
|
@ -147,8 +147,13 @@ class EditMetadataAction(InterfaceAction):
|
|||||||
|
|
||||||
d = MetadataSingleDialog(self.gui, row_list[current_row], db,
|
d = MetadataSingleDialog(self.gui, row_list[current_row], db,
|
||||||
prev=prev, next_=next_)
|
prev=prev, next_=next_)
|
||||||
|
d.view_format.connect(lambda
|
||||||
|
fmt:self.gui.iactions['View'].view_format(row_list[current_row],
|
||||||
|
fmt))
|
||||||
if d.exec_() != d.Accepted:
|
if d.exec_() != d.Accepted:
|
||||||
|
d.view_format.disconnect()
|
||||||
break
|
break
|
||||||
|
d.view_format.disconnect()
|
||||||
changed.add(d.id)
|
changed.add(d.id)
|
||||||
if d.row_delta == 0:
|
if d.row_delta == 0:
|
||||||
break
|
break
|
||||||
|
@ -26,7 +26,6 @@ class ViewAction(InterfaceAction):
|
|||||||
|
|
||||||
def genesis(self):
|
def genesis(self):
|
||||||
self.persistent_files = []
|
self.persistent_files = []
|
||||||
self.metadata_view_id = None
|
|
||||||
self.qaction.triggered.connect(self.view_book)
|
self.qaction.triggered.connect(self.view_book)
|
||||||
self.view_menu = QMenu()
|
self.view_menu = QMenu()
|
||||||
self.view_menu.addAction(_('View'), partial(self.view_book, False))
|
self.view_menu.addAction(_('View'), partial(self.view_book, False))
|
||||||
@ -51,14 +50,6 @@ class ViewAction(InterfaceAction):
|
|||||||
if fmt_path:
|
if fmt_path:
|
||||||
self._view_file(fmt_path)
|
self._view_file(fmt_path)
|
||||||
|
|
||||||
def metadata_view_format(self, fmt):
|
|
||||||
fmt_path = self.gui.library_view.model().db.\
|
|
||||||
format_abspath(self.metadata_view_id,
|
|
||||||
fmt, index_is_id=True)
|
|
||||||
if fmt_path:
|
|
||||||
self._view_file(fmt_path)
|
|
||||||
|
|
||||||
|
|
||||||
def book_downloaded_for_viewing(self, job):
|
def book_downloaded_for_viewing(self, job):
|
||||||
if job.failed:
|
if job.failed:
|
||||||
self.gui.device_job_exception(job)
|
self.gui.device_job_exception(job)
|
||||||
|
@ -3,41 +3,55 @@ UI for adding books to the database and saving books to disk
|
|||||||
'''
|
'''
|
||||||
import os, shutil, time
|
import os, shutil, time
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Thread
|
from functools import partial
|
||||||
|
|
||||||
from PyQt4.Qt import QThread, SIGNAL, QObject, QTimer, Qt, \
|
from PyQt4.Qt import QThread, QObject, Qt, QProgressDialog, pyqtSignal, QTimer
|
||||||
QProgressDialog
|
|
||||||
|
|
||||||
from calibre.gui2.dialogs.progress import ProgressDialog
|
from calibre.gui2.dialogs.progress import ProgressDialog
|
||||||
from calibre.gui2 import question_dialog, error_dialog, info_dialog
|
from calibre.gui2 import question_dialog, error_dialog, info_dialog
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.constants import preferred_encoding, filesystem_encoding
|
from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
|
from calibre import prints
|
||||||
|
|
||||||
|
single_shot = partial(QTimer.singleShot, 75)
|
||||||
|
|
||||||
|
class DuplicatesAdder(QObject): # {{{
|
||||||
|
|
||||||
|
added = pyqtSignal(object)
|
||||||
|
adding_done = pyqtSignal()
|
||||||
|
|
||||||
class DuplicatesAdder(QThread): # {{{
|
|
||||||
# Add duplicate books
|
|
||||||
def __init__(self, parent, db, duplicates, db_adder):
|
def __init__(self, parent, db, duplicates, db_adder):
|
||||||
QThread.__init__(self, parent)
|
QObject.__init__(self, parent)
|
||||||
self.db, self.db_adder = db, db_adder
|
self.db, self.db_adder = db, db_adder
|
||||||
self.duplicates = duplicates
|
self.duplicates = list(duplicates)
|
||||||
|
self.count = 0
|
||||||
|
single_shot(self.add_one)
|
||||||
|
|
||||||
|
def add_one(self):
|
||||||
|
if not self.duplicates:
|
||||||
|
self.adding_done.emit()
|
||||||
|
return
|
||||||
|
|
||||||
|
mi, cover, formats = self.duplicates.pop()
|
||||||
|
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
||||||
|
id = self.db.create_book_entry(mi, cover=cover,
|
||||||
|
add_duplicates=True)
|
||||||
|
# here we add all the formats for dupe book record created above
|
||||||
|
self.db_adder.add_formats(id, formats)
|
||||||
|
self.db_adder.number_of_books_added += 1
|
||||||
|
self.count += 1
|
||||||
|
self.added.emit(self.count)
|
||||||
|
single_shot(self.add_one)
|
||||||
|
|
||||||
def run(self):
|
|
||||||
count = 1
|
|
||||||
for mi, cover, formats in self.duplicates:
|
|
||||||
formats = [f for f in formats if not f.lower().endswith('.opf')]
|
|
||||||
id = self.db.create_book_entry(mi, cover=cover,
|
|
||||||
add_duplicates=True)
|
|
||||||
# here we add all the formats for dupe book record created above
|
|
||||||
self.db_adder.add_formats(id, formats)
|
|
||||||
self.db_adder.number_of_books_added += 1
|
|
||||||
self.emit(SIGNAL('added(PyQt_PyObject)'), count)
|
|
||||||
count += 1
|
|
||||||
self.emit(SIGNAL('adding_done()'))
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class RecursiveFind(QThread): # {{{
|
class RecursiveFind(QThread): # {{{
|
||||||
|
|
||||||
|
update = pyqtSignal(object)
|
||||||
|
found = pyqtSignal(object)
|
||||||
|
|
||||||
def __init__(self, parent, db, root, single):
|
def __init__(self, parent, db, root, single):
|
||||||
QThread.__init__(self, parent)
|
QThread.__init__(self, parent)
|
||||||
self.db = db
|
self.db = db
|
||||||
@ -50,8 +64,8 @@ class RecursiveFind(QThread): # {{{
|
|||||||
for dirpath in os.walk(root):
|
for dirpath in os.walk(root):
|
||||||
if self.canceled:
|
if self.canceled:
|
||||||
return
|
return
|
||||||
self.emit(SIGNAL('update(PyQt_PyObject)'),
|
self.update.emit(
|
||||||
_('Searching in')+' '+dirpath[0])
|
_('Searching in')+' '+dirpath[0])
|
||||||
self.books += list(self.db.find_books_in_directory(dirpath[0],
|
self.books += list(self.db.find_books_in_directory(dirpath[0],
|
||||||
self.single_book_per_directory))
|
self.single_book_per_directory))
|
||||||
|
|
||||||
@ -71,46 +85,55 @@ class RecursiveFind(QThread): # {{{
|
|||||||
msg = unicode(err)
|
msg = unicode(err)
|
||||||
except:
|
except:
|
||||||
msg = repr(err)
|
msg = repr(err)
|
||||||
self.emit(SIGNAL('found(PyQt_PyObject)'), msg)
|
self.found.emit(msg)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.books = [formats for formats in self.books if formats]
|
self.books = [formats for formats in self.books if formats]
|
||||||
|
|
||||||
if not self.canceled:
|
if not self.canceled:
|
||||||
self.emit(SIGNAL('found(PyQt_PyObject)'), self.books)
|
self.found.emit(self.books)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class DBAdder(Thread): # {{{
|
class DBAdder(QObject): # {{{
|
||||||
|
|
||||||
|
def __init__(self, parent, db, ids, nmap):
|
||||||
|
QObject.__init__(self, parent)
|
||||||
|
|
||||||
def __init__(self, db, ids, nmap):
|
|
||||||
self.db, self.ids, self.nmap = db, dict(**ids), dict(**nmap)
|
self.db, self.ids, self.nmap = db, dict(**ids), dict(**nmap)
|
||||||
self.end = False
|
|
||||||
self.critical = {}
|
self.critical = {}
|
||||||
self.number_of_books_added = 0
|
self.number_of_books_added = 0
|
||||||
self.duplicates = []
|
self.duplicates = []
|
||||||
self.names, self.paths, self.infos = [], [], []
|
self.names, self.paths, self.infos = [], [], []
|
||||||
Thread.__init__(self)
|
|
||||||
self.daemon = True
|
|
||||||
self.input_queue = Queue()
|
self.input_queue = Queue()
|
||||||
self.output_queue = Queue()
|
self.output_queue = Queue()
|
||||||
self.merged_books = set([])
|
self.merged_books = set([])
|
||||||
|
|
||||||
def run(self):
|
def end(self):
|
||||||
while not self.end:
|
self.input_queue.put((None, None, None))
|
||||||
try:
|
|
||||||
id, opf, cover = self.input_queue.get(True, 0.2)
|
def start(self):
|
||||||
except Empty:
|
try:
|
||||||
continue
|
id, opf, cover = self.input_queue.get_nowait()
|
||||||
name = self.nmap.pop(id)
|
except Empty:
|
||||||
title = None
|
single_shot(self.start)
|
||||||
try:
|
return
|
||||||
title = self.add(id, opf, cover, name)
|
if id is None and opf is None and cover is None:
|
||||||
except:
|
return
|
||||||
import traceback
|
name = self.nmap.pop(id)
|
||||||
self.critical[name] = traceback.format_exc()
|
title = None
|
||||||
title = name
|
if DEBUG:
|
||||||
self.output_queue.put(title)
|
st = time.time()
|
||||||
|
try:
|
||||||
|
title = self.add(id, opf, cover, name)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
self.critical[name] = traceback.format_exc()
|
||||||
|
title = name
|
||||||
|
self.output_queue.put(title)
|
||||||
|
if DEBUG:
|
||||||
|
prints('Added', title, 'to db in:', time.time() - st, 'seconds')
|
||||||
|
single_shot(self.start)
|
||||||
|
|
||||||
def process_formats(self, opf, formats):
|
def process_formats(self, opf, formats):
|
||||||
imp = opf[:-4]+'.import'
|
imp = opf[:-4]+'.import'
|
||||||
@ -201,10 +224,10 @@ class Adder(QObject): # {{{
|
|||||||
self.pd.setModal(True)
|
self.pd.setModal(True)
|
||||||
self.pd.show()
|
self.pd.show()
|
||||||
self._parent = parent
|
self._parent = parent
|
||||||
self.rfind = self.worker = self.timer = None
|
self.rfind = self.worker = None
|
||||||
self.callback = callback
|
self.callback = callback
|
||||||
self.callback_called = False
|
self.callback_called = False
|
||||||
self.connect(self.pd, SIGNAL('canceled()'), self.canceled)
|
self.pd.canceled_signal.connect(self.canceled)
|
||||||
|
|
||||||
def add_recursive(self, root, single=True):
|
def add_recursive(self, root, single=True):
|
||||||
self.path = root
|
self.path = root
|
||||||
@ -213,10 +236,8 @@ class Adder(QObject): # {{{
|
|||||||
self.pd.set_max(0)
|
self.pd.set_max(0)
|
||||||
self.pd.value = 0
|
self.pd.value = 0
|
||||||
self.rfind = RecursiveFind(self, self.db, root, single)
|
self.rfind = RecursiveFind(self, self.db, root, single)
|
||||||
self.connect(self.rfind, SIGNAL('update(PyQt_PyObject)'),
|
self.rfind.update.connect(self.pd.set_msg, type=Qt.QueuedConnection)
|
||||||
self.pd.set_msg, Qt.QueuedConnection)
|
self.rfind.found.connect(self.add, type=Qt.QueuedConnection)
|
||||||
self.connect(self.rfind, SIGNAL('found(PyQt_PyObject)'),
|
|
||||||
self.add, Qt.QueuedConnection)
|
|
||||||
self.rfind.start()
|
self.rfind.start()
|
||||||
|
|
||||||
def add(self, books):
|
def add(self, books):
|
||||||
@ -246,12 +267,12 @@ class Adder(QObject): # {{{
|
|||||||
self.pd.set_min(0)
|
self.pd.set_min(0)
|
||||||
self.pd.set_max(len(self.ids))
|
self.pd.set_max(len(self.ids))
|
||||||
self.pd.value = 0
|
self.pd.value = 0
|
||||||
self.db_adder = DBAdder(self.db, self.ids, self.nmap)
|
self.db_adder = DBAdder(self, self.db, self.ids, self.nmap)
|
||||||
self.db_adder.start()
|
self.db_adder.start()
|
||||||
self.last_added_at = time.time()
|
self.last_added_at = time.time()
|
||||||
self.entry_count = len(self.ids)
|
self.entry_count = len(self.ids)
|
||||||
self.continue_updating = True
|
self.continue_updating = True
|
||||||
QTimer.singleShot(200, self.update)
|
single_shot(self.update)
|
||||||
|
|
||||||
def canceled(self):
|
def canceled(self):
|
||||||
self.continue_updating = False
|
self.continue_updating = False
|
||||||
@ -260,14 +281,14 @@ class Adder(QObject): # {{{
|
|||||||
if self.worker is not None:
|
if self.worker is not None:
|
||||||
self.worker.canceled = True
|
self.worker.canceled = True
|
||||||
if hasattr(self, 'db_adder'):
|
if hasattr(self, 'db_adder'):
|
||||||
self.db_adder.end = True
|
self.db_adder.end()
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback(self.paths, self.names, self.infos)
|
self.callback(self.paths, self.names, self.infos)
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
|
|
||||||
def duplicates_processed(self):
|
def duplicates_processed(self):
|
||||||
self.db_adder.end = True
|
self.db_adder.end()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback(self.paths, self.names, self.infos)
|
self.callback(self.paths, self.names, self.infos)
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
@ -300,7 +321,7 @@ class Adder(QObject): # {{{
|
|||||||
if (time.time() - self.last_added_at) > self.ADD_TIMEOUT:
|
if (time.time() - self.last_added_at) > self.ADD_TIMEOUT:
|
||||||
self.continue_updating = False
|
self.continue_updating = False
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
self.db_adder.end = True
|
self.db_adder.end()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback([], [], [])
|
self.callback([], [], [])
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
@ -311,7 +332,7 @@ class Adder(QObject): # {{{
|
|||||||
'find the problem book.'), show=True)
|
'find the problem book.'), show=True)
|
||||||
|
|
||||||
if self.continue_updating:
|
if self.continue_updating:
|
||||||
QTimer.singleShot(200, self.update)
|
single_shot(self.update)
|
||||||
|
|
||||||
|
|
||||||
def process_duplicates(self):
|
def process_duplicates(self):
|
||||||
@ -332,11 +353,8 @@ class Adder(QObject): # {{{
|
|||||||
self.__p_d = pd
|
self.__p_d = pd
|
||||||
self.__d_a = DuplicatesAdder(self._parent, self.db, duplicates,
|
self.__d_a = DuplicatesAdder(self._parent, self.db, duplicates,
|
||||||
self.db_adder)
|
self.db_adder)
|
||||||
self.connect(self.__d_a, SIGNAL('added(PyQt_PyObject)'),
|
self.__d_a.added.connect(pd.setValue)
|
||||||
pd.setValue)
|
self.__d_a.adding_done.connect(self.duplicates_processed)
|
||||||
self.connect(self.__d_a, SIGNAL('adding_done()'),
|
|
||||||
self.duplicates_processed)
|
|
||||||
self.__d_a.start()
|
|
||||||
else:
|
else:
|
||||||
return self.duplicates_processed()
|
return self.duplicates_processed()
|
||||||
|
|
||||||
@ -407,14 +425,12 @@ class Saver(QObject): # {{{
|
|||||||
self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
|
self.worker = SaveWorker(self.rq, db, self.ids, path, self.opts,
|
||||||
spare_server=self.spare_server)
|
spare_server=self.spare_server)
|
||||||
self.pd.canceled_signal.connect(self.canceled)
|
self.pd.canceled_signal.connect(self.canceled)
|
||||||
self.timer = QTimer(self)
|
self.continue_updating = True
|
||||||
self.connect(self.timer, SIGNAL('timeout()'), self.update)
|
single_shot(self.update)
|
||||||
self.timer.start(200)
|
|
||||||
|
|
||||||
|
|
||||||
def canceled(self):
|
def canceled(self):
|
||||||
if self.timer is not None:
|
self.continue_updating = False
|
||||||
self.timer.stop()
|
|
||||||
if self.worker is not None:
|
if self.worker is not None:
|
||||||
self.worker.canceled = True
|
self.worker.canceled = True
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
@ -424,14 +440,38 @@ class Saver(QObject): # {{{
|
|||||||
|
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
if not self.ids or not self.worker.is_alive():
|
if not self.continue_updating:
|
||||||
self.timer.stop()
|
return
|
||||||
|
if not self.worker.is_alive():
|
||||||
|
# Check that all ids were processed
|
||||||
|
while self.ids:
|
||||||
|
# Get all queued results since worker is dead
|
||||||
|
before = len(self.ids)
|
||||||
|
self.get_result()
|
||||||
|
if before == len(self.ids):
|
||||||
|
# No results available => worker died unexpectedly
|
||||||
|
for i in list(self.ids):
|
||||||
|
self.failures.add(('id:%d'%i, 'Unknown error'))
|
||||||
|
self.ids.remove(i)
|
||||||
|
|
||||||
|
if not self.ids:
|
||||||
|
self.continue_updating = False
|
||||||
self.pd.hide()
|
self.pd.hide()
|
||||||
if not self.callback_called:
|
if not self.callback_called:
|
||||||
self.callback(self.worker.path, self.failures, self.worker.error)
|
try:
|
||||||
|
# Give the worker time to clean up and set worker.error
|
||||||
|
self.worker.join(2)
|
||||||
|
except:
|
||||||
|
pass # The worker was not yet started
|
||||||
self.callback_called = True
|
self.callback_called = True
|
||||||
return
|
self.callback(self.worker.path, self.failures, self.worker.error)
|
||||||
|
|
||||||
|
if self.continue_updating:
|
||||||
|
self.get_result()
|
||||||
|
single_shot(self.update)
|
||||||
|
|
||||||
|
|
||||||
|
def get_result(self):
|
||||||
try:
|
try:
|
||||||
id, title, ok, tb = self.rq.get_nowait()
|
id, title, ok, tb = self.rq.get_nowait()
|
||||||
except Empty:
|
except Empty:
|
||||||
@ -441,6 +481,7 @@ class Saver(QObject): # {{{
|
|||||||
if not isinstance(title, unicode):
|
if not isinstance(title, unicode):
|
||||||
title = str(title).decode(preferred_encoding, 'replace')
|
title = str(title).decode(preferred_encoding, 'replace')
|
||||||
self.pd.set_msg(_('Saved')+' '+title)
|
self.pd.set_msg(_('Saved')+' '+title)
|
||||||
|
|
||||||
if not ok:
|
if not ok:
|
||||||
self.failures.add((title, tb))
|
self.failures.add((title, tb))
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -9,7 +9,7 @@ from threading import Thread
|
|||||||
|
|
||||||
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, pyqtSignal, \
|
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, pyqtSignal, \
|
||||||
QAbstractTableModel, QCoreApplication, QTimer
|
QAbstractTableModel, QCoreApplication, QTimer
|
||||||
from PyQt4.QtGui import QDialog, QItemSelectionModel
|
from PyQt4.QtGui import QDialog, QItemSelectionModel, QIcon
|
||||||
|
|
||||||
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
|
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
|
||||||
from calibre.gui2 import error_dialog, NONE, info_dialog, config
|
from calibre.gui2 import error_dialog, NONE, info_dialog, config
|
||||||
@ -42,13 +42,14 @@ class Matches(QAbstractTableModel):
|
|||||||
|
|
||||||
def __init__(self, matches):
|
def __init__(self, matches):
|
||||||
self.matches = matches
|
self.matches = matches
|
||||||
|
self.yes_icon = QVariant(QIcon(I('ok.png')))
|
||||||
QAbstractTableModel.__init__(self)
|
QAbstractTableModel.__init__(self)
|
||||||
|
|
||||||
def rowCount(self, *args):
|
def rowCount(self, *args):
|
||||||
return len(self.matches)
|
return len(self.matches)
|
||||||
|
|
||||||
def columnCount(self, *args):
|
def columnCount(self, *args):
|
||||||
return 6
|
return 8
|
||||||
|
|
||||||
def headerData(self, section, orientation, role):
|
def headerData(self, section, orientation, role):
|
||||||
if role != Qt.DisplayRole:
|
if role != Qt.DisplayRole:
|
||||||
@ -61,6 +62,8 @@ class Matches(QAbstractTableModel):
|
|||||||
elif section == 3: text = _("Publisher")
|
elif section == 3: text = _("Publisher")
|
||||||
elif section == 4: text = _("ISBN")
|
elif section == 4: text = _("ISBN")
|
||||||
elif section == 5: text = _("Published")
|
elif section == 5: text = _("Published")
|
||||||
|
elif section == 6: text = _("Has Cover")
|
||||||
|
elif section == 7: text = _("Has Summary")
|
||||||
|
|
||||||
return QVariant(text)
|
return QVariant(text)
|
||||||
else:
|
else:
|
||||||
@ -71,8 +74,8 @@ class Matches(QAbstractTableModel):
|
|||||||
|
|
||||||
def data(self, index, role):
|
def data(self, index, role):
|
||||||
row, col = index.row(), index.column()
|
row, col = index.row(), index.column()
|
||||||
|
book = self.matches[row]
|
||||||
if role == Qt.DisplayRole:
|
if role == Qt.DisplayRole:
|
||||||
book = self.matches[row]
|
|
||||||
res = None
|
res = None
|
||||||
if col == 0:
|
if col == 0:
|
||||||
res = book.title
|
res = book.title
|
||||||
@ -90,6 +93,11 @@ class Matches(QAbstractTableModel):
|
|||||||
if not res:
|
if not res:
|
||||||
return NONE
|
return NONE
|
||||||
return QVariant(res)
|
return QVariant(res)
|
||||||
|
elif role == Qt.DecorationRole:
|
||||||
|
if col == 6 and book.has_cover:
|
||||||
|
return self.yes_icon
|
||||||
|
if col == 7 and book.comments:
|
||||||
|
return self.yes_icon
|
||||||
return NONE
|
return NONE
|
||||||
|
|
||||||
class FetchMetadata(QDialog, Ui_FetchMetadata):
|
class FetchMetadata(QDialog, Ui_FetchMetadata):
|
||||||
@ -131,7 +139,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
|||||||
self.fetch_metadata()
|
self.fetch_metadata()
|
||||||
self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
|
self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
|
||||||
self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
|
self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
|
||||||
|
self.opt_auto_download_cover.setChecked(config['auto_download_cover'])
|
||||||
|
|
||||||
def show_summary(self, current, *args):
|
def show_summary(self, current, *args):
|
||||||
row = current.row()
|
row = current.row()
|
||||||
@ -213,6 +221,12 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
|
|||||||
_hung_fetchers.add(self.fetcher)
|
_hung_fetchers.add(self.fetcher)
|
||||||
if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
|
if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
|
||||||
self._hangcheck.stop()
|
self._hangcheck.stop()
|
||||||
|
# Save value of auto_download_cover, since this is the only place it can
|
||||||
|
# be set. The values of the other options can be set in
|
||||||
|
# Preferences->Behavior and should not be set here as they affect bulk
|
||||||
|
# downloading as well.
|
||||||
|
if self.opt_auto_download_cover.isChecked() != config['auto_download_cover']:
|
||||||
|
config.set('auto_download_cover', self.opt_auto_download_cover.isChecked())
|
||||||
|
|
||||||
def __enter__(self, *args):
|
def __enter__(self, *args):
|
||||||
return self
|
return self
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
<rect>
|
<rect>
|
||||||
<x>0</x>
|
<x>0</x>
|
||||||
<y>0</y>
|
<y>0</y>
|
||||||
<width>830</width>
|
<width>890</width>
|
||||||
<height>642</height>
|
<height>642</height>
|
||||||
</rect>
|
</rect>
|
||||||
</property>
|
</property>
|
||||||
@ -109,6 +109,13 @@
|
|||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
|
||||||
|
<property name="text">
|
||||||
|
<string>Overwrite author and title with author and title of selected book</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_get_social_metadata">
|
<widget class="QCheckBox" name="opt_get_social_metadata">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
@ -117,9 +124,9 @@
|
|||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_overwrite_author_title_metadata">
|
<widget class="QCheckBox" name="opt_auto_download_cover">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Overwrite author and title with author and title of selected book</string>
|
<string>Automatically download the cover, if available</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -17,7 +17,7 @@ from calibre.gui2 import error_dialog
|
|||||||
from calibre.gui2.progress_indicator import ProgressIndicator
|
from calibre.gui2.progress_indicator import ProgressIndicator
|
||||||
from calibre.utils.config import dynamic
|
from calibre.utils.config import dynamic
|
||||||
from calibre.utils.titlecase import titlecase
|
from calibre.utils.titlecase import titlecase
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key, capitalize
|
||||||
|
|
||||||
class MyBlockingBusy(QDialog):
|
class MyBlockingBusy(QDialog):
|
||||||
|
|
||||||
@ -187,6 +187,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
|
|||||||
_('Lower Case') : lambda x: icu_lower(x),
|
_('Lower Case') : lambda x: icu_lower(x),
|
||||||
_('Upper Case') : lambda x: icu_upper(x),
|
_('Upper Case') : lambda x: icu_upper(x),
|
||||||
_('Title Case') : lambda x: titlecase(x),
|
_('Title Case') : lambda x: titlecase(x),
|
||||||
|
_('Capitalize') : lambda x: capitalize(x),
|
||||||
}
|
}
|
||||||
|
|
||||||
s_r_match_modes = [ _('Character match'),
|
s_r_match_modes = [ _('Character match'),
|
||||||
|
@ -8,8 +8,9 @@ add/remove formats
|
|||||||
|
|
||||||
import os, re, time, traceback, textwrap
|
import os, re, time, traceback, textwrap
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QThread, QDate, \
|
from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QDate, \
|
||||||
QPixmap, QListWidgetItem, QDialog, pyqtSignal, QMessageBox, QIcon, \
|
QPixmap, QListWidgetItem, QDialog, pyqtSignal, QMessageBox, QIcon, \
|
||||||
QPushButton
|
QPushButton
|
||||||
|
|
||||||
@ -34,9 +35,12 @@ from calibre.gui2.preferences.social import SocialMetadata
|
|||||||
from calibre.gui2.custom_column_widgets import populate_metadata_page
|
from calibre.gui2.custom_column_widgets import populate_metadata_page
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
|
||||||
class CoverFetcher(QThread): # {{{
|
class CoverFetcher(Thread): # {{{
|
||||||
|
|
||||||
def __init__(self, username, password, isbn, timeout, title, author):
|
def __init__(self, username, password, isbn, timeout, title, author):
|
||||||
|
Thread.__init__(self)
|
||||||
|
self.daemon = True
|
||||||
|
|
||||||
self.username = username.strip() if username else username
|
self.username = username.strip() if username else username
|
||||||
self.password = password.strip() if password else password
|
self.password = password.strip() if password else password
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
@ -44,8 +48,7 @@ class CoverFetcher(QThread): # {{{
|
|||||||
self.title = title
|
self.title = title
|
||||||
self.needs_isbn = False
|
self.needs_isbn = False
|
||||||
self.author = author
|
self.author = author
|
||||||
QThread.__init__(self)
|
self.exception = self.traceback = self.cover_data = self.errors = None
|
||||||
self.exception = self.traceback = self.cover_data = None
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
@ -238,20 +241,20 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
self.timeout, title, author)
|
self.timeout, title, author)
|
||||||
self.cover_fetcher.start()
|
self.cover_fetcher.start()
|
||||||
self._hangcheck = QTimer(self)
|
self._hangcheck = QTimer(self)
|
||||||
self.connect(self._hangcheck, SIGNAL('timeout()'), self.hangcheck)
|
self._hangcheck.timeout.connect(self.hangcheck,
|
||||||
|
type=Qt.QueuedConnection)
|
||||||
self.cf_start_time = time.time()
|
self.cf_start_time = time.time()
|
||||||
self.pi.start(_('Downloading cover...'))
|
self.pi.start(_('Downloading cover...'))
|
||||||
self._hangcheck.start(100)
|
self._hangcheck.start(100)
|
||||||
|
|
||||||
def hangcheck(self):
|
def hangcheck(self):
|
||||||
if not self.cover_fetcher.isFinished() and \
|
if self.cover_fetcher.is_alive() and \
|
||||||
time.time()-self.cf_start_time < self.COVER_FETCH_TIMEOUT:
|
time.time()-self.cf_start_time < self.COVER_FETCH_TIMEOUT:
|
||||||
return
|
return
|
||||||
|
|
||||||
self._hangcheck.stop()
|
self._hangcheck.stop()
|
||||||
try:
|
try:
|
||||||
if self.cover_fetcher.isRunning():
|
if self.cover_fetcher.is_alive():
|
||||||
self.cover_fetcher.terminate()
|
|
||||||
error_dialog(self, _('Cannot fetch cover'),
|
error_dialog(self, _('Cannot fetch cover'),
|
||||||
_('<b>Could not fetch cover.</b><br/>')+
|
_('<b>Could not fetch cover.</b><br/>')+
|
||||||
_('The download timed out.')).exec_()
|
_('The download timed out.')).exec_()
|
||||||
@ -760,8 +763,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
if book.publisher: self.publisher.setEditText(book.publisher)
|
if book.publisher: self.publisher.setEditText(book.publisher)
|
||||||
if book.isbn: self.isbn.setText(book.isbn)
|
if book.isbn: self.isbn.setText(book.isbn)
|
||||||
if book.pubdate:
|
if book.pubdate:
|
||||||
d = book.pubdate
|
dt = book.pubdate
|
||||||
self.pubdate.setDate(QDate(d.year, d.month, d.day))
|
self.pubdate.setDate(QDate(dt.year, dt.month, dt.day))
|
||||||
summ = book.comments
|
summ = book.comments
|
||||||
if summ:
|
if summ:
|
||||||
prefix = unicode(self.comments.toPlainText())
|
prefix = unicode(self.comments.toPlainText())
|
||||||
@ -777,8 +780,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
self.series.setText(book.series)
|
self.series.setText(book.series)
|
||||||
if book.series_index is not None:
|
if book.series_index is not None:
|
||||||
self.series_index.setValue(book.series_index)
|
self.series_index.setValue(book.series_index)
|
||||||
# Needed because of Qt focus bug on OS X
|
if book.has_cover:
|
||||||
self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
|
if d.opt_auto_download_cover.isChecked() and book.has_cover:
|
||||||
|
self.fetch_cover()
|
||||||
|
else:
|
||||||
|
self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
|
||||||
else:
|
else:
|
||||||
error_dialog(self, _('Cannot fetch metadata'),
|
error_dialog(self, _('Cannot fetch metadata'),
|
||||||
_('You must specify at least one of ISBN, Title, '
|
_('You must specify at least one of ISBN, Title, '
|
||||||
|
@ -18,7 +18,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.config import tweaks, prefs
|
from calibre.utils.config import tweaks, prefs
|
||||||
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
|
from calibre.utils.date import dt_factory, qt_to_dt, isoformat
|
||||||
from calibre.utils.icu import sort_key
|
from calibre.utils.icu import sort_key, strcmp as icu_strcmp
|
||||||
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
|
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
|
||||||
from calibre.utils.search_query_parser import SearchQueryParser
|
from calibre.utils.search_query_parser import SearchQueryParser
|
||||||
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
|
from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
|
||||||
@ -1023,8 +1023,7 @@ class DeviceBooksModel(BooksModel): # {{{
|
|||||||
x = ''
|
x = ''
|
||||||
if y == None:
|
if y == None:
|
||||||
y = ''
|
y = ''
|
||||||
x, y = x.strip().lower(), y.strip().lower()
|
return icu_strcmp(x.strip(), y.strip())
|
||||||
return cmp(x, y)
|
|
||||||
return _strcmp
|
return _strcmp
|
||||||
def datecmp(x, y):
|
def datecmp(x, y):
|
||||||
x = self.db[x].datetime
|
x = self.db[x].datetime
|
||||||
|
@ -151,6 +151,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
|
|||||||
self._plugin_model.populate()
|
self._plugin_model.populate()
|
||||||
self._plugin_model.reset()
|
self._plugin_model.reset()
|
||||||
self.changed_signal.emit()
|
self.changed_signal.emit()
|
||||||
|
self.plugin_path.setText('')
|
||||||
else:
|
else:
|
||||||
error_dialog(self, _('No valid plugin path'),
|
error_dialog(self, _('No valid plugin path'),
|
||||||
_('%s is not a valid plugin path')%path).exec_()
|
_('%s is not a valid plugin path')%path).exec_()
|
||||||
|
@ -73,6 +73,7 @@ class TagsView(QTreeView): # {{{
|
|||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
QTreeView.__init__(self, parent=None)
|
QTreeView.__init__(self, parent=None)
|
||||||
self.tag_match = None
|
self.tag_match = None
|
||||||
|
self.disable_recounting = False
|
||||||
self.setUniformRowHeights(True)
|
self.setUniformRowHeights(True)
|
||||||
self.setCursor(Qt.PointingHandCursor)
|
self.setCursor(Qt.PointingHandCursor)
|
||||||
self.setIconSize(QSize(30, 30))
|
self.setIconSize(QSize(30, 30))
|
||||||
@ -299,6 +300,8 @@ class TagsView(QTreeView): # {{{
|
|||||||
return self.isExpanded(idx)
|
return self.isExpanded(idx)
|
||||||
|
|
||||||
def recount(self, *args):
|
def recount(self, *args):
|
||||||
|
if self.disable_recounting:
|
||||||
|
return
|
||||||
self.refresh_signal_processed = True
|
self.refresh_signal_processed = True
|
||||||
ci = self.currentIndex()
|
ci = self.currentIndex()
|
||||||
if not ci.isValid():
|
if not ci.isValid():
|
||||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, traceback, cStringIO, re
|
import os, traceback, cStringIO, re, shutil
|
||||||
|
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
from calibre.utils.config import Config, StringConfig, tweaks
|
from calibre.utils.config import Config, StringConfig, tweaks
|
||||||
@ -203,31 +203,49 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
|
|||||||
return shorten_components_to(length, components)
|
return shorten_components_to(length, components)
|
||||||
|
|
||||||
|
|
||||||
def save_book_to_disk(id, db, root, opts, length):
|
def save_book_to_disk(id_, db, root, opts, length):
|
||||||
mi = db.get_metadata(id, index_is_id=True)
|
mi = db.get_metadata(id_, index_is_id=True)
|
||||||
|
cover = db.cover(id_, index_is_id=True, as_path=True)
|
||||||
|
plugboards = db.prefs.get('plugboards', {})
|
||||||
|
|
||||||
available_formats = db.formats(id, index_is_id=True)
|
available_formats = db.formats(id_, index_is_id=True)
|
||||||
if not available_formats:
|
if not available_formats:
|
||||||
available_formats = []
|
available_formats = []
|
||||||
else:
|
else:
|
||||||
available_formats = [x.lower().strip() for x in
|
available_formats = [x.lower().strip() for x in
|
||||||
available_formats.split(',')]
|
available_formats.split(',')]
|
||||||
|
formats = {}
|
||||||
|
fmts = db.formats(id_, index_is_id=True, verify_formats=False)
|
||||||
|
if fmts:
|
||||||
|
fmts = fmts.split(',')
|
||||||
|
for fmt in fmts:
|
||||||
|
fpath = db.format_abspath(id_, fmt, index_is_id=True)
|
||||||
|
if fpath is not None:
|
||||||
|
formats[fmt.lower()] = fpath
|
||||||
|
|
||||||
|
return do_save_book_to_disk(id_, mi, cover, plugboards,
|
||||||
|
formats, root, opts, length)
|
||||||
|
|
||||||
|
|
||||||
|
def do_save_book_to_disk(id_, mi, cover, plugboards,
|
||||||
|
format_map, root, opts, length):
|
||||||
|
available_formats = [x.lower().strip() for x in format_map.keys()]
|
||||||
if opts.formats == 'all':
|
if opts.formats == 'all':
|
||||||
asked_formats = available_formats
|
asked_formats = available_formats
|
||||||
else:
|
else:
|
||||||
asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
|
asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
|
||||||
formats = set(available_formats).intersection(set(asked_formats))
|
formats = set(available_formats).intersection(set(asked_formats))
|
||||||
if not formats:
|
if not formats:
|
||||||
return True, id, mi.title
|
return True, id_, mi.title
|
||||||
|
|
||||||
components = get_components(opts.template, mi, id, opts.timefmt, length,
|
components = get_components(opts.template, mi, id_, opts.timefmt, length,
|
||||||
ascii_filename if opts.asciiize else sanitize_file_name,
|
ascii_filename if opts.asciiize else sanitize_file_name,
|
||||||
to_lowercase=opts.to_lowercase,
|
to_lowercase=opts.to_lowercase,
|
||||||
replace_whitespace=opts.replace_whitespace)
|
replace_whitespace=opts.replace_whitespace)
|
||||||
base_path = os.path.join(root, *components)
|
base_path = os.path.join(root, *components)
|
||||||
base_name = os.path.basename(base_path)
|
base_name = os.path.basename(base_path)
|
||||||
dirpath = os.path.dirname(base_path)
|
dirpath = os.path.dirname(base_path)
|
||||||
# Don't test for existence first are the test could fail but
|
# Don't test for existence first as the test could fail but
|
||||||
# another worker process could create the directory before
|
# another worker process could create the directory before
|
||||||
# the call to makedirs
|
# the call to makedirs
|
||||||
try:
|
try:
|
||||||
@ -236,29 +254,23 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||||||
if not os.path.exists(dirpath):
|
if not os.path.exists(dirpath):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
cdata = db.cover(id, index_is_id=True)
|
if opts.save_cover and cover and os.access(cover, os.R_OK):
|
||||||
if opts.save_cover:
|
with open(base_path+'.jpg', 'wb') as f:
|
||||||
if cdata is not None:
|
with open(cover, 'rb') as s:
|
||||||
with open(base_path+'.jpg', 'wb') as f:
|
shutil.copyfileobj(s, f)
|
||||||
f.write(cdata)
|
mi.cover = base_name+'.jpg'
|
||||||
mi.cover = base_name+'.jpg'
|
else:
|
||||||
else:
|
mi.cover = None
|
||||||
mi.cover = None
|
|
||||||
|
|
||||||
if opts.write_opf:
|
if opts.write_opf:
|
||||||
opf = metadata_to_opf(mi)
|
opf = metadata_to_opf(mi)
|
||||||
with open(base_path+'.opf', 'wb') as f:
|
with open(base_path+'.opf', 'wb') as f:
|
||||||
f.write(opf)
|
f.write(opf)
|
||||||
|
|
||||||
if cdata is not None:
|
|
||||||
mi.cover_data = ('jpg', cdata)
|
|
||||||
mi.cover = None
|
|
||||||
|
|
||||||
written = False
|
written = False
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
global plugboard_save_to_disk_value, plugboard_any_format_value
|
global plugboard_save_to_disk_value, plugboard_any_format_value
|
||||||
dev_name = plugboard_save_to_disk_value
|
dev_name = plugboard_save_to_disk_value
|
||||||
plugboards = db.prefs.get('plugboards', {})
|
|
||||||
cpb = None
|
cpb = None
|
||||||
if fmt in plugboards:
|
if fmt in plugboards:
|
||||||
cpb = plugboards[fmt]
|
cpb = plugboards[fmt]
|
||||||
@ -275,11 +287,12 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||||||
# Leave this here for a while, in case problems arise.
|
# Leave this here for a while, in case problems arise.
|
||||||
if cpb is not None:
|
if cpb is not None:
|
||||||
prints('Save-to-disk using plugboard:', fmt, cpb)
|
prints('Save-to-disk using plugboard:', fmt, cpb)
|
||||||
data = db.format(id, fmt, index_is_id=True)
|
fp = format_map.get(fmt, None)
|
||||||
if data is None:
|
if fp is None:
|
||||||
continue
|
continue
|
||||||
else:
|
with open(fp, 'rb') as f:
|
||||||
written = True
|
data = f.read()
|
||||||
|
written = True
|
||||||
if opts.update_metadata:
|
if opts.update_metadata:
|
||||||
stream = cStringIO.StringIO()
|
stream = cStringIO.StringIO()
|
||||||
stream.write(data)
|
stream.write(data)
|
||||||
@ -300,9 +313,21 @@ def save_book_to_disk(id, db, root, opts, length):
|
|||||||
with open(fmt_path, 'wb') as f:
|
with open(fmt_path, 'wb') as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
|
||||||
return not written, id, mi.title
|
return not written, id_, mi.title
|
||||||
|
|
||||||
|
def _sanitize_args(root, opts):
|
||||||
|
if opts is None:
|
||||||
|
opts = config().parse()
|
||||||
|
if isinstance(root, unicode):
|
||||||
|
root = root.encode(filesystem_encoding)
|
||||||
|
root = os.path.abspath(root)
|
||||||
|
|
||||||
|
opts.template = preprocess_template(opts.template)
|
||||||
|
length = 1000 if supports_long_names(root) else 250
|
||||||
|
length -= len(root)
|
||||||
|
if length < 5:
|
||||||
|
raise ValueError('%r is too long.'%root)
|
||||||
|
return root, opts, length
|
||||||
|
|
||||||
def save_to_disk(db, ids, root, opts=None, callback=None):
|
def save_to_disk(db, ids, root, opts=None, callback=None):
|
||||||
'''
|
'''
|
||||||
@ -316,17 +341,7 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
|
|||||||
:return: A list of failures. Each element of the list is a tuple
|
:return: A list of failures. Each element of the list is a tuple
|
||||||
(id, title, traceback)
|
(id, title, traceback)
|
||||||
'''
|
'''
|
||||||
if opts is None:
|
root, opts, length = _sanitize_args(root, opts)
|
||||||
opts = config().parse()
|
|
||||||
if isinstance(root, unicode):
|
|
||||||
root = root.encode(filesystem_encoding)
|
|
||||||
root = os.path.abspath(root)
|
|
||||||
|
|
||||||
opts.template = preprocess_template(opts.template)
|
|
||||||
length = 1000 if supports_long_names(root) else 250
|
|
||||||
length -= len(root)
|
|
||||||
if length < 5:
|
|
||||||
raise ValueError('%r is too long.'%root)
|
|
||||||
failures = []
|
failures = []
|
||||||
for x in ids:
|
for x in ids:
|
||||||
tb = ''
|
tb = ''
|
||||||
@ -343,4 +358,28 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
|
|||||||
break
|
break
|
||||||
return failures
|
return failures
|
||||||
|
|
||||||
|
def save_serialized_to_disk(ids, data, plugboards, root, opts, callback):
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
|
root, opts, length = _sanitize_args(root, opts)
|
||||||
|
failures = []
|
||||||
|
for x in ids:
|
||||||
|
opf, cover, format_map = data[x]
|
||||||
|
if isinstance(opf, unicode):
|
||||||
|
opf = opf.encode('utf-8')
|
||||||
|
mi = OPF(cStringIO.StringIO(opf)).to_book_metadata()
|
||||||
|
tb = ''
|
||||||
|
try:
|
||||||
|
failed, id, title = do_save_book_to_disk(x, mi, cover, plugboards,
|
||||||
|
format_map, root, opts, length)
|
||||||
|
tb = _('Requested formats not available')
|
||||||
|
except:
|
||||||
|
failed, id, title = True, x, mi.title
|
||||||
|
tb = traceback.format_exc()
|
||||||
|
if failed:
|
||||||
|
failures.append((id, title, tb))
|
||||||
|
if callable(callback):
|
||||||
|
if not callback(int(id), title, failed, tb):
|
||||||
|
break
|
||||||
|
|
||||||
|
return failures
|
||||||
|
|
||||||
|
@ -18,8 +18,9 @@ from functools import partial
|
|||||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
from calibre.utils.date import parse_date, isoformat
|
from calibre.utils.date import parse_date, isoformat
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.constants import iswindows, DEBUG
|
from calibre.constants import iswindows, DEBUG
|
||||||
|
from calibre.utils.icu import strcmp
|
||||||
|
|
||||||
global_lock = RLock()
|
global_lock = RLock()
|
||||||
|
|
||||||
@ -115,8 +116,8 @@ def pynocase(one, two, encoding='utf-8'):
|
|||||||
pass
|
pass
|
||||||
return cmp(one.lower(), two.lower())
|
return cmp(one.lower(), two.lower())
|
||||||
|
|
||||||
def icu_collator(s1, s2, func=None):
|
def icu_collator(s1, s2):
|
||||||
return cmp(func(unicode(s1)), func(unicode(s2)))
|
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
|
||||||
|
|
||||||
def load_c_extensions(conn, debug=DEBUG):
|
def load_c_extensions(conn, debug=DEBUG):
|
||||||
try:
|
try:
|
||||||
@ -169,8 +170,7 @@ class DBThread(Thread):
|
|||||||
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
|
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
|
||||||
# Dummy functions for dynamically created filters
|
# Dummy functions for dynamically created filters
|
||||||
self.conn.create_function('books_list_filter', 1, lambda x: 1)
|
self.conn.create_function('books_list_filter', 1, lambda x: 1)
|
||||||
from calibre.utils.icu import sort_key
|
self.conn.create_collation('icucollate', icu_collator)
|
||||||
self.conn.create_collation('icucollate', partial(icu_collator, func=sort_key))
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
try:
|
try:
|
||||||
|
@ -46,7 +46,6 @@ and if a book does not have a series::
|
|||||||
|
|
||||||
(|app| automatically removes multiple slashes and leading or trailing spaces).
|
(|app| automatically removes multiple slashes and leading or trailing spaces).
|
||||||
|
|
||||||
|
|
||||||
Advanced formatting
|
Advanced formatting
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
@ -80,6 +79,9 @@ For trailing zeros, use::
|
|||||||
|
|
||||||
{series_index:0<3s} - Three digits with trailing zeros
|
{series_index:0<3s} - Three digits with trailing zeros
|
||||||
|
|
||||||
|
If you use series indices with sub values (e.g., 1.1), you might want to ensure that the decimal points line up. For example, you might want the indices 1 and 2.5 to appear as 01.00 and 02.50 so that they will sort correctly. To do this, use::
|
||||||
|
|
||||||
|
{series_index:0>5.2f} - Five characters, consisting of two digits with leading zeros, a decimal point, then 2 digits after the decimal point
|
||||||
|
|
||||||
If you want only the first two letters of the data, use::
|
If you want only the first two letters of the data, use::
|
||||||
|
|
||||||
@ -115,15 +117,15 @@ The functions available are:
|
|||||||
* ``lowercase()`` -- return value of the field in lower case.
|
* ``lowercase()`` -- return value of the field in lower case.
|
||||||
* ``uppercase()`` -- return the value of the field in upper case.
|
* ``uppercase()`` -- return the value of the field in upper case.
|
||||||
* ``titlecase()`` -- return the value of the field in title case.
|
* ``titlecase()`` -- return the value of the field in title case.
|
||||||
* ``capitalize()`` -- return the value as capitalized.
|
* ``capitalize()`` -- return the value with the first letter upper case and the rest lower case.
|
||||||
* ``ifempty(text)`` -- if the field is not empty, return the value of the field. Otherwise return `text`.
|
|
||||||
* ``test(text if not empty, text if empty)`` -- return `text if not empty` if the field is not empty, otherwise return `text if empty`.
|
|
||||||
* ``contains(pattern, text if match, text if not match`` -- checks if field contains matches for the regular expression `pattern`. Returns `text if match` if matches are found, otherwise it returns `text if no match`.
|
* ``contains(pattern, text if match, text if not match`` -- checks if field contains matches for the regular expression `pattern`. Returns `text if match` if matches are found, otherwise it returns `text if no match`.
|
||||||
* ``count(separator)`` -- interprets the value as a list of items separated by `separator`, returning the number of items in the list. Most lists use a comma as the separator, but authors uses an ampersand. Examples: `{tags:count(,)}`, `{authors:count(&)}`
|
* ``count(separator)`` -- interprets the value as a list of items separated by `separator`, returning the number of items in the list. Most lists use a comma as the separator, but authors uses an ampersand. Examples: `{tags:count(,)}`, `{authors:count(&)}`
|
||||||
|
* ``ifempty(text)`` -- if the field is not empty, return the value of the field. Otherwise return `text`.
|
||||||
* ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later).
|
* ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later).
|
||||||
* ``re(pattern, replacement)`` -- return the field after applying the regular expression. All instances of `pattern` are replaced with `replacement`. As in all of |app|, these are python-compatible regular expressions.
|
* ``re(pattern, replacement)`` -- return the field after applying the regular expression. All instances of `pattern` are replaced with `replacement`. As in all of |app|, these are python-compatible regular expressions.
|
||||||
* ``shorten(left chars, middle text, right chars)`` -- Return a shortened version of the field, consisting of `left chars` characters from the beginning of the field, followed by `middle text`, followed by `right chars` characters from the end of the string. `Left chars` and `right chars` must be integers. For example, assume the title of the book is `Ancient English Laws in the Times of Ivanhoe`, and you want it to fit in a space of at most 15 characters. If you use ``{title:shorten(9,-,5)}``, the result will be `Ancient E-nhoe`. If the field's length is less than ``left chars`` + ``right chars`` + the length of ``middle text``, then the field will be used intact. For example, the title `The Dome` would not be changed.
|
* ``shorten(left chars, middle text, right chars)`` -- Return a shortened version of the field, consisting of `left chars` characters from the beginning of the field, followed by `middle text`, followed by `right chars` characters from the end of the string. `Left chars` and `right chars` must be integers. For example, assume the title of the book is `Ancient English Laws in the Times of Ivanhoe`, and you want it to fit in a space of at most 15 characters. If you use ``{title:shorten(9,-,5)}``, the result will be `Ancient E-nhoe`. If the field's length is less than ``left chars`` + ``right chars`` + the length of ``middle text``, then the field will be used intact. For example, the title `The Dome` would not be changed.
|
||||||
* ``switch(pattern, value, pattern, value, ..., else_value)`` -- for each ``pattern, value`` pair, checks if the field matches the regular expression ``pattern`` and if so, returns that ``value``. If no ``pattern`` matches, then ``else_value`` is returned. You can have as many ``pattern, value`` pairs as you want.
|
* ``switch(pattern, value, pattern, value, ..., else_value)`` -- for each ``pattern, value`` pair, checks if the field matches the regular expression ``pattern`` and if so, returns that ``value``. If no ``pattern`` matches, then ``else_value`` is returned. You can have as many ``pattern, value`` pairs as you want.
|
||||||
|
* ``test(text if not empty, text if empty)`` -- return `text if not empty` if the field is not empty, otherwise return `text if empty`.
|
||||||
|
|
||||||
|
|
||||||
Now, about using functions and formatting in the same field. Suppose you have an integer custom column called ``#myint`` that you want to see with leading zeros, as in ``003``. To do this, you would use a format of ``0>3s``. However, by default, if a number (integer or float) equals zero then the field produces the empty value, so zero values will produce nothing, not ``000``. If you really want to see ``000`` values, then you use both the format string and the ``ifempty`` function to change the empty value back to a zero. The field reference would be::
|
Now, about using functions and formatting in the same field. Suppose you have an integer custom column called ``#myint`` that you want to see with leading zeros, as in ``003``. To do this, you would use a format of ``0>3s``. However, by default, if a number (integer or float) equals zero then the field produces the empty value, so zero values will produce nothing, not ``000``. If you really want to see ``000`` values, then you use both the format string and the ``ifempty`` function to change the empty value back to a zero. The field reference would be::
|
||||||
|
23
src/calibre/utils/cleantext.py
Normal file
23
src/calibre/utils/cleantext.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, sengian <sengian1@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
_ascii_pat = None
|
||||||
|
|
||||||
|
def clean_ascii_chars(txt, charlist=None):
|
||||||
|
'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
|
||||||
|
global _ascii_pat
|
||||||
|
if _ascii_pat is None:
|
||||||
|
chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
|
||||||
|
+ [0x1A, 0x1B]
|
||||||
|
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||||
|
|
||||||
|
if charlist is None:
|
||||||
|
pat = _ascii_pat
|
||||||
|
else:
|
||||||
|
pat = re.compile(u'|'.join(map(unichr, charlist)))
|
||||||
|
return pat.sub('', txt)
|
||||||
|
|
@ -151,3 +151,45 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
|
|||||||
format = re.sub('d{1,4}', format_day, format)
|
format = re.sub('d{1,4}', format_day, format)
|
||||||
format = re.sub('M{1,4}', format_month, format)
|
format = re.sub('M{1,4}', format_month, format)
|
||||||
return re.sub('yyyy|yy', format_year, format)
|
return re.sub('yyyy|yy', format_year, format)
|
||||||
|
|
||||||
|
def replace_months(datestr, clang):
|
||||||
|
# Replace months by english equivalent for parse_date
|
||||||
|
frtoen = {
|
||||||
|
u'[jJ]anvier': u'jan',
|
||||||
|
u'[fF].vrier': u'feb',
|
||||||
|
u'[mM]ars': u'mar',
|
||||||
|
u'[aA]vril': u'apr',
|
||||||
|
u'[mM]ai': u'may',
|
||||||
|
u'[jJ]uin': u'jun',
|
||||||
|
u'[jJ]uillet': u'jul',
|
||||||
|
u'[aA]o.t': u'aug',
|
||||||
|
u'[sS]eptembre': u'sep',
|
||||||
|
u'[Oo]ctobre': u'oct',
|
||||||
|
u'[nN]ovembre': u'nov',
|
||||||
|
u'[dD].cembre': u'dec' }
|
||||||
|
detoen = {
|
||||||
|
u'[jJ]anuar': u'jan',
|
||||||
|
u'[fF]ebruar': u'feb',
|
||||||
|
u'[mM].rz': u'mar',
|
||||||
|
u'[aA]pril': u'apr',
|
||||||
|
u'[mM]ai': u'may',
|
||||||
|
u'[jJ]uni': u'jun',
|
||||||
|
u'[jJ]uli': u'jul',
|
||||||
|
u'[aA]ugust': u'aug',
|
||||||
|
u'[sS]eptember': u'sep',
|
||||||
|
u'[Oo]ktober': u'oct',
|
||||||
|
u'[nN]ovember': u'nov',
|
||||||
|
u'[dD]ezember': u'dec' }
|
||||||
|
|
||||||
|
if clang == 'fr':
|
||||||
|
dictoen = frtoen
|
||||||
|
elif clang == 'de':
|
||||||
|
dictoen = detoen
|
||||||
|
else:
|
||||||
|
return datestr
|
||||||
|
|
||||||
|
for k in dictoen.iterkeys():
|
||||||
|
tmp = re.sub(k, dictoen[k], datestr)
|
||||||
|
if tmp != datestr: break
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
@ -8,12 +8,15 @@ import re, string, traceback
|
|||||||
|
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
from calibre.utils.titlecase import titlecase
|
from calibre.utils.titlecase import titlecase
|
||||||
|
from calibre.utils.icu import capitalize
|
||||||
|
|
||||||
class TemplateFormatter(string.Formatter):
|
class TemplateFormatter(string.Formatter):
|
||||||
'''
|
'''
|
||||||
Provides a format function that substitutes '' for any missing value
|
Provides a format function that substitutes '' for any missing value
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
_validation_string = 'This Is Some Text THAT SHOULD be LONG Enough.%^&*'
|
||||||
|
|
||||||
# Dict to do recursion detection. It is up the the individual get_value
|
# Dict to do recursion detection. It is up the the individual get_value
|
||||||
# method to use it. It is cleared when starting to format a template
|
# method to use it. It is cleared when starting to format a template
|
||||||
composite_values = {}
|
composite_values = {}
|
||||||
@ -86,7 +89,7 @@ class TemplateFormatter(string.Formatter):
|
|||||||
'uppercase' : (0, lambda s,x: x.upper()),
|
'uppercase' : (0, lambda s,x: x.upper()),
|
||||||
'lowercase' : (0, lambda s,x: x.lower()),
|
'lowercase' : (0, lambda s,x: x.lower()),
|
||||||
'titlecase' : (0, lambda s,x: titlecase(x)),
|
'titlecase' : (0, lambda s,x: titlecase(x)),
|
||||||
'capitalize' : (0, lambda s,x: x.capitalize()),
|
'capitalize' : (0, lambda s,x: capitalize(x)),
|
||||||
'contains' : (3, _contains),
|
'contains' : (3, _contains),
|
||||||
'ifempty' : (1, _ifempty),
|
'ifempty' : (1, _ifempty),
|
||||||
'lookup' : (-1, _lookup),
|
'lookup' : (-1, _lookup),
|
||||||
@ -97,19 +100,29 @@ class TemplateFormatter(string.Formatter):
|
|||||||
'count' : (1, _count),
|
'count' : (1, _count),
|
||||||
}
|
}
|
||||||
|
|
||||||
format_string_re = re.compile(r'^(.*)\|(.*)\|(.*)$')
|
def _do_format(self, val, fmt):
|
||||||
compress_spaces = re.compile(r'\s+')
|
if not fmt or not val:
|
||||||
backslash_comma_to_comma = re.compile(r'\\,')
|
return val
|
||||||
|
if val == self._validation_string:
|
||||||
arg_parser = re.Scanner([
|
val = '0'
|
||||||
(r',', lambda x,t: ''),
|
typ = fmt[-1]
|
||||||
(r'.*?((?<!\\),)', lambda x,t: t[:-1]),
|
if typ == 's':
|
||||||
(r'.*?\)', lambda x,t: t[:-1]),
|
pass
|
||||||
])
|
elif 'bcdoxXn'.find(typ) >= 0:
|
||||||
|
try:
|
||||||
def get_value(self, key, args, kwargs):
|
val = int(val)
|
||||||
raise Exception('get_value must be implemented in the subclass')
|
except:
|
||||||
|
raise ValueError(
|
||||||
|
_('format: type {0} requires an integer value, got {1}').format(typ, val))
|
||||||
|
elif 'eEfFgGn%'.find(typ) >= 0:
|
||||||
|
try:
|
||||||
|
val = float(val)
|
||||||
|
except:
|
||||||
|
raise ValueError(
|
||||||
|
_('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
|
||||||
|
else:
|
||||||
|
raise ValueError(_('format: unknown format type letter {0}').format(typ))
|
||||||
|
return unicode(('{0:'+fmt+'}').format(val))
|
||||||
|
|
||||||
def _explode_format_string(self, fmt):
|
def _explode_format_string(self, fmt):
|
||||||
try:
|
try:
|
||||||
@ -122,6 +135,21 @@ class TemplateFormatter(string.Formatter):
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return fmt, '', ''
|
return fmt, '', ''
|
||||||
|
|
||||||
|
format_string_re = re.compile(r'^(.*)\|(.*)\|(.*)$')
|
||||||
|
compress_spaces = re.compile(r'\s+')
|
||||||
|
backslash_comma_to_comma = re.compile(r'\\,')
|
||||||
|
|
||||||
|
arg_parser = re.Scanner([
|
||||||
|
(r',', lambda x,t: ''),
|
||||||
|
(r'.*?((?<!\\),)', lambda x,t: t[:-1]),
|
||||||
|
(r'.*?\)', lambda x,t: t[:-1]),
|
||||||
|
])
|
||||||
|
|
||||||
|
################## Override parent classes methods #####################
|
||||||
|
|
||||||
|
def get_value(self, key, args, kwargs):
|
||||||
|
raise Exception('get_value must be implemented in the subclass')
|
||||||
|
|
||||||
def format_field(self, val, fmt):
|
def format_field(self, val, fmt):
|
||||||
# Handle conditional text
|
# Handle conditional text
|
||||||
fmt, prefix, suffix = self._explode_format_string(fmt)
|
fmt, prefix, suffix = self._explode_format_string(fmt)
|
||||||
@ -155,7 +183,7 @@ class TemplateFormatter(string.Formatter):
|
|||||||
else:
|
else:
|
||||||
val = func[1](self, val, *args).strip()
|
val = func[1](self, val, *args).strip()
|
||||||
if val:
|
if val:
|
||||||
val = string.Formatter.format_field(self, val, dispfmt)
|
val = self._do_format(val, dispfmt)
|
||||||
if not val:
|
if not val:
|
||||||
return ''
|
return ''
|
||||||
return prefix + val + suffix
|
return prefix + val + suffix
|
||||||
@ -164,6 +192,8 @@ class TemplateFormatter(string.Formatter):
|
|||||||
ans = string.Formatter.vformat(self, fmt, args, kwargs)
|
ans = string.Formatter.vformat(self, fmt, args, kwargs)
|
||||||
return self.compress_spaces.sub(' ', ans).strip()
|
return self.compress_spaces.sub(' ', ans).strip()
|
||||||
|
|
||||||
|
########## a formatter guaranteed not to throw and exception ############
|
||||||
|
|
||||||
def safe_format(self, fmt, kwargs, error_value, book):
|
def safe_format(self, fmt, kwargs, error_value, book):
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
self.book = book
|
self.book = book
|
||||||
@ -181,7 +211,7 @@ class ValidateFormat(TemplateFormatter):
|
|||||||
Provides a format function that substitutes '' for any missing value
|
Provides a format function that substitutes '' for any missing value
|
||||||
'''
|
'''
|
||||||
def get_value(self, key, args, kwargs):
|
def get_value(self, key, args, kwargs):
|
||||||
return 'this is some text that should be long enough'
|
return self._validation_string
|
||||||
|
|
||||||
def validate(self, x):
|
def validate(self, x):
|
||||||
return self.vformat(x, [], {})
|
return self.vformat(x, [], {})
|
||||||
|
@ -237,8 +237,6 @@ static PyTypeObject icu_CollatorType = { // {{{
|
|||||||
// }}
|
// }}
|
||||||
|
|
||||||
|
|
||||||
// }}}
|
|
||||||
|
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
// Module initialization {{{
|
// Module initialization {{{
|
||||||
@ -286,7 +284,7 @@ icu_upper(PyObject *self, PyObject *args) {
|
|||||||
PyMem_Free(input);
|
PyMem_Free(input);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
} // }}}
|
||||||
|
|
||||||
// lower {{{
|
// lower {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -56,7 +56,7 @@ def py_sort_key(obj):
|
|||||||
def icu_sort_key(collator, obj):
|
def icu_sort_key(collator, obj):
|
||||||
if not obj:
|
if not obj:
|
||||||
return _none2
|
return _none2
|
||||||
return collator.sort_key(obj.lower())
|
return collator.sort_key(lower(obj))
|
||||||
|
|
||||||
def py_case_sensitive_sort_key(obj):
|
def py_case_sensitive_sort_key(obj):
|
||||||
if not obj:
|
if not obj:
|
||||||
@ -69,7 +69,7 @@ def icu_case_sensitive_sort_key(collator, obj):
|
|||||||
return collator.sort_key(obj)
|
return collator.sort_key(obj)
|
||||||
|
|
||||||
def icu_strcmp(collator, a, b):
|
def icu_strcmp(collator, a, b):
|
||||||
return collator.strcmp(a.lower(), b.lower())
|
return collator.strcmp(lower(a), lower(b))
|
||||||
|
|
||||||
def py_strcmp(a, b):
|
def py_strcmp(a, b):
|
||||||
return cmp(a.lower(), b.lower())
|
return cmp(a.lower(), b.lower())
|
||||||
@ -104,6 +104,13 @@ lower = (lambda s: s.lower()) if _icu_not_ok else \
|
|||||||
title_case = (lambda s: s.title()) if _icu_not_ok else \
|
title_case = (lambda s: s.title()) if _icu_not_ok else \
|
||||||
partial(_icu.title, get_locale())
|
partial(_icu.title, get_locale())
|
||||||
|
|
||||||
|
def icu_capitalize(s):
|
||||||
|
s = lower(s)
|
||||||
|
return s.replace(s[0], upper(s[0]))
|
||||||
|
|
||||||
|
capitalize = (lambda s: s.capitalize()) if _icu_not_ok else \
|
||||||
|
(lambda s: icu_capitalize(s))
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
def test(): # {{{
|
def test(): # {{{
|
||||||
@ -215,14 +222,15 @@ pêché'''
|
|||||||
print '\t', x.encode('utf-8')
|
print '\t', x.encode('utf-8')
|
||||||
if fs != create(french_good):
|
if fs != create(french_good):
|
||||||
print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)'
|
print 'French failed (note that French fails with icu < 4.6 i.e. on windows and OS X)'
|
||||||
return
|
# return
|
||||||
test_strcmp(german + french)
|
test_strcmp(german + french)
|
||||||
|
|
||||||
print '\nTesting case transforms in current locale'
|
print '\nTesting case transforms in current locale'
|
||||||
for x in ('a', 'Alice\'s code'):
|
for x in ('a', 'Alice\'s code'):
|
||||||
print 'Upper:', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8')
|
print 'Upper: ', x, '->', 'py:', x.upper().encode('utf-8'), 'icu:', upper(x).encode('utf-8')
|
||||||
print 'Lower:', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8')
|
print 'Lower: ', x, '->', 'py:', x.lower().encode('utf-8'), 'icu:', lower(x).encode('utf-8')
|
||||||
print 'Title:', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8')
|
print 'Title: ', x, '->', 'py:', x.title().encode('utf-8'), 'icu:', title_case(x).encode('utf-8')
|
||||||
|
print 'Capitalize:', x, '->', 'py:', x.capitalize().encode('utf-8'), 'icu:', capitalize(x).encode('utf-8')
|
||||||
print
|
print
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -292,12 +292,12 @@ class Server(Thread):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
for worker in self.workers:
|
for worker in list(self.workers):
|
||||||
try:
|
try:
|
||||||
worker.kill()
|
worker.kill()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
for worker in self.pool:
|
for worker in list(self.pool):
|
||||||
try:
|
try:
|
||||||
worker.kill()
|
worker.kill()
|
||||||
except:
|
except:
|
||||||
|
@ -9,6 +9,8 @@ License: http://www.opensource.org/licenses/mit-license.php
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from calibre.utils.icu import capitalize
|
||||||
|
|
||||||
__all__ = ['titlecase']
|
__all__ = ['titlecase']
|
||||||
__version__ = '0.5'
|
__version__ = '0.5'
|
||||||
|
|
||||||
@ -40,11 +42,6 @@ def titlecase(text):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def capitalize(w):
|
|
||||||
w = icu_lower(w)
|
|
||||||
w = w.replace(w[0], icu_upper(w[0]))
|
|
||||||
return w
|
|
||||||
|
|
||||||
all_caps = ALL_CAPS.match(text)
|
all_caps = ALL_CAPS.match(text)
|
||||||
|
|
||||||
words = re.split('\s', text)
|
words = re.split('\s', text)
|
||||||
|
@ -1227,7 +1227,7 @@ class ZipFile:
|
|||||||
self.fp.flush()
|
self.fp.flush()
|
||||||
if zinfo.flag_bits & 0x08:
|
if zinfo.flag_bits & 0x08:
|
||||||
# Write CRC and file sizes after the file data
|
# Write CRC and file sizes after the file data
|
||||||
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
|
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
|
||||||
zinfo.file_size))
|
zinfo.file_size))
|
||||||
self.filelist.append(zinfo)
|
self.filelist.append(zinfo)
|
||||||
self.NameToInfo[zinfo.filename] = zinfo
|
self.NameToInfo[zinfo.filename] = zinfo
|
||||||
|
@ -166,7 +166,7 @@ class Feed(object):
|
|||||||
self.articles.append(article)
|
self.articles.append(article)
|
||||||
else:
|
else:
|
||||||
t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple())
|
t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple())
|
||||||
self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%
|
self.logger.debug(u'Skipping article %s (%s) from feed %s as it is too old.'%
|
||||||
(title, t, self.title))
|
(title, t, self.title))
|
||||||
d = item.get('date', '')
|
d = item.get('date', '')
|
||||||
article.formatted_date = d
|
article.formatted_date = d
|
||||||
|
Loading…
x
Reference in New Issue
Block a user