diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe
index 2a44fa0980..baa7f409ec 100644
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@@ -4,6 +4,7 @@ __copyright__ = '2010, Hiroshi Miura '
www.mainichi.jp
'''
+import re
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyNews(BasicNewsRecipe):
@@ -22,3 +23,18 @@ class MainichiDailyNews(BasicNewsRecipe):
remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
+ def parse_feeds(self):
+
+ feeds = BasicNewsRecipe.parse_feeds(self)
+
+ for curfeed in feeds:
+ delList = []
+ for a,curarticle in enumerate(curfeed.articles):
+ if re.search(r'pheedo.jp', curarticle.url):
+ delList.append(curarticle)
+ if len(delList)>0:
+ for d in delList:
+ index = curfeed.articles.index(d)
+ curfeed.articles[index:index+1] = []
+
+ return feeds
diff --git a/resources/recipes/mainichi_it_news.recipe b/resources/recipes/mainichi_it_news.recipe
index 8e15496e57..4c285a2c01 100644
--- a/resources/recipes/mainichi_it_news.recipe
+++ b/resources/recipes/mainichi_it_news.recipe
@@ -14,5 +14,19 @@ class MainichiDailyITNews(BasicNewsRecipe):
remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}]
- remove_tags_after = {'class':"Credit"}
+ def parse_feeds(self):
+
+ feeds = BasicNewsRecipe.parse_feeds(self)
+
+ for curfeed in feeds:
+ delList = []
+ for a,curarticle in enumerate(curfeed.articles):
+ if re.search(r'pheedo.jp', curarticle.url):
+ delList.append(curarticle)
+ if len(delList)>0:
+ for d in delList:
+ index = curfeed.articles.index(d)
+ curfeed.articles[index:index+1] = []
+
+ return feeds remove_tags_after = {'class':"Credit"}
diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe
index 1bfa08a55f..60e5b170ca 100644
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@@ -32,12 +32,9 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
remove_tags_after = {'class':"cmn-pr_list"}
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
- (u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
- (u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
- (u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
- (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
+ (u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
]
def get_browser(self):
diff --git a/resources/recipes/nikkei_sub_shakai.recipe b/resources/recipes/nikkei_sub_shakai.recipe
new file mode 100644
index 0000000000..ed86493265
--- /dev/null
+++ b/resources/recipes/nikkei_sub_shakai.recipe
@@ -0,0 +1,102 @@
+__license__ = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura '
+'''
+www.nikkei.com
+'''
+
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+import mechanize
+from calibre.ptempfile import PersistentTemporaryFile
+
+
+class NikkeiNet_sub_life(BasicNewsRecipe):
+ title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
+ __author__ = 'Hiroshi Miura'
+ description = 'News and current market affairs from Japan'
+ cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+ masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
+ needs_subscription = True
+ oldest_article = 2
+ max_articles_per_feed = 20
+ language = 'ja'
+ remove_javascript = False
+ temp_files = []
+
+ remove_tags_before = {'class':"cmn-section cmn-indent"}
+ remove_tags = [
+ {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
+ {'class':"cmn-article_keyword cmn-clearfix"},
+ {'class':"cmn-print_headline cmn-clearfix"},
+ ]
+ remove_tags_after = {'class':"cmn-pr_list"}
+
+ feeds = [
+ (u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
+ ]
+
+ def get_browser(self):
+ br = BasicNewsRecipe.get_browser()
+
+ cj = mechanize.LWPCookieJar()
+ br.set_cookiejar(cj)
+
+ #br.set_debug_http(True)
+ #br.set_debug_redirects(True)
+ #br.set_debug_responses(True)
+
+ if self.username is not None and self.password is not None:
+ #print "----------------------------get login form--------------------------------------------"
+ # open login form
+ br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
+ response = br.response()
+ #print "----------------------------get login form---------------------------------------------"
+ #print "----------------------------set login form---------------------------------------------"
+ # remove disabled input which brings error on mechanize
+ response.set_data(response.get_data().replace("", " -->"))
+ br.set_response(response)
+ br.select_form(name='LA0010Form01')
+ br['LA0010Form01:LA0010Email'] = self.username
+ br['LA0010Form01:LA0010Password'] = self.password
+ br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
+ br.submit()
+ br.response()
+ #print "----------------------------send login form---------------------------------------------"
+ #print "----------------------------open news main page-----------------------------------------"
+ # open news site
+ br.open('http://www.nikkei.com/')
+ br.response()
+ #print "----------------------------www.nikkei.com BODY --------------------------------------"
+ #print response2.get_data()
+ #print "-------------------------^^-got auto redirect form----^^--------------------------------"
+ # forced redirect in default
+ br.select_form(nr=0)
+ br.submit()
+ response3 = br.response()
+ # return some cookie which should be set by Javascript
+ #print response3.geturl()
+ raw = response3.get_data()
+ #print "---------------------------response to form --------------------------------------------"
+ # grab cookie from JS and set it
+ redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
+ br.select_form(nr=0)
+
+ self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+ self.temp_files[-1].write("#LWP-Cookies-2.0\n")
+
+ self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+ self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
+ self.temp_files[-1].close()
+ cj.load(self.temp_files[-1].name)
+
+ br.submit()
+
+ #br.set_debug_http(False)
+ #br.set_debug_redirects(False)
+ #br.set_debug_responses(False)
+ return br
+
+
+
+
diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe
index d30aa9066f..fb17bb1210 100644
--- a/resources/recipes/yomiuri.recipe
+++ b/resources/recipes/yomiuri.recipe
@@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
remove_javascript = True
masthead_title = u'YOMIURI ONLINE'
- remove_tags_before = {'class':"article-def"}
+ keep_only_tags = [{'class':"article-def"}]
remove_tags = [{'class':"RelatedArticle"},
{'class':"sbtns"}
]
diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe
index f5f21c4aab..41ee4fd23d 100644
--- a/resources/recipes/yomiuri_world.recipe
+++ b/resources/recipes/yomiuri_world.recipe
@@ -21,7 +21,7 @@ class YOLNews(BasicNewsRecipe):
remove_javascript = True
masthead_title = u"YOMIURI ONLINE"
- remove_tags_before = {'class':"article-def"}
+ keep_only_tags = [{'class':"article-def"}]
remove_tags = [{'class':"RelatedArticle"},
{'class':"sbtns"}
]
diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 0deef5eb92..9c37b6ff59 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -21,7 +21,7 @@ class ANDROID(USBMS):
# HTC
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
- 0xc92 : [0x100]},
+ 0xc92 : [0x100], 0xc97: [0x226]},
# Eken
0x040d : { 0x8510 : [0x0001] },
@@ -63,7 +63,7 @@ class ANDROID(USBMS):
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
- 'SCH-I500_CARD']
+ 'SCH-I500_CARD', 'SPH-D700_CARD']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID']
diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py
index 23ce1716af..7a5e8c49b3 100644
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@@ -11,9 +11,9 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.devices.mime import mime_type_ext
from calibre.devices.interface import BookList as _BookList
from calibre.constants import preferred_encoding
-from calibre import isbytestring
+from calibre import isbytestring, force_unicode
from calibre.utils.config import prefs, tweaks
-from calibre.utils.icu import sort_key, strcmp as icu_strcmp
+from calibre.utils.icu import strcmp
class Book(Metadata):
def __init__(self, prefix, lpath, size=None, other=None):
@@ -241,7 +241,7 @@ class CollectionsBookList(BookList):
if y is None:
return -1
if isinstance(x, (unicode, str)):
- c = strcmp(x, y)
+ c = strcmp(force_unicode(x), force_unicode(y))
else:
c = cmp(x, y)
if c != 0:
diff --git a/src/calibre/ebooks/metadata/amazonfr.py b/src/calibre/ebooks/metadata/amazonfr.py
new file mode 100644
index 0000000000..156fff3d75
--- /dev/null
+++ b/src/calibre/ebooks/metadata/amazonfr.py
@@ -0,0 +1,516 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, sengian '
+
+import sys, textwrap, re, traceback
+from urllib import urlencode
+from math import ceil
+
+from lxml import html
+from lxml.html import soupparser
+
+from calibre.utils.date import parse_date, utcnow, replace_months
+from calibre.utils.cleantext import clean_ascii_chars
+from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import MetaInformation, check_isbn, \
+ authors_to_sort_string
+from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.utils.config import OptionParser
+from calibre.library.comments import sanitize_comments_html
+
+
+class AmazonFr(MetadataSource):
+
+ name = 'Amazon French'
+ description = _('Downloads metadata from amazon.fr')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Sengian'
+ version = (1, 0, 0)
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose, lang='fr')
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class AmazonEs(MetadataSource):
+
+ name = 'Amazon Spanish'
+ description = _('Downloads metadata from amazon.com in spanish')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Sengian'
+ version = (1, 0, 0)
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose, lang='es')
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class AmazonEn(MetadataSource):
+
+ name = 'Amazon English'
+ description = _('Downloads metadata from amazon.com in english')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Sengian'
+ version = (1, 0, 0)
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose, lang='en')
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class AmazonDe(MetadataSource):
+
+ name = 'Amazon German'
+ description = _('Downloads metadata from amazon.de')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Sengian'
+ version = (1, 0, 0)
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose, lang='de')
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+class Amazon(MetadataSource):
+
+ name = 'Amazon'
+ description = _('Downloads metadata from amazon.com')
+ supported_platforms = ['windows', 'osx', 'linux']
+ author = 'Kovid Goyal & Sengian'
+ version = (1, 1, 0)
+ has_html_comments = True
+
+ def fetch(self):
+ # if not self.site_customization:
+ # return
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose, lang='all')
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ # @property
+ # def string_customization_help(self):
+ # return _('You can select here the language for metadata search with amazon.com')
+
+
+def report(verbose):
+ if verbose:
+ traceback.print_exc()
+
+
+class Query(object):
+
+ BASE_URL_ALL = 'http://www.amazon.com'
+ BASE_URL_FR = 'http://www.amazon.fr'
+ BASE_URL_DE = 'http://www.amazon.de'
+
+ def __init__(self, title=None, author=None, publisher=None, isbn=None, keywords=None,
+ max_results=20, rlang='all'):
+ assert not(title is None and author is None and publisher is None \
+ and isbn is None and keywords is None)
+ assert (max_results < 21)
+
+ self.max_results = int(max_results)
+ self.renbres = re.compile(u'\s*(\d+)\s*')
+
+ q = { 'search-alias' : 'stripbooks' ,
+ 'unfiltered' : '1',
+ 'field-keywords' : '',
+ 'field-author' : '',
+ 'field-title' : '',
+ 'field-isbn' : '',
+ 'field-publisher' : ''
+ #get to amazon detailed search page to get all options
+ # 'node' : '',
+ # 'field-binding' : '',
+ #before, during, after
+ # 'field-dateop' : '',
+ #month as number
+ # 'field-datemod' : '',
+ # 'field-dateyear' : '',
+ #french only
+ # 'field-collection' : '',
+ #many options available
+ }
+
+ if rlang =='all':
+ q['sort'] = 'relevanceexprank'
+ self.urldata = self.BASE_URL_ALL
+ elif rlang =='es':
+ q['sort'] = 'relevanceexprank'
+ q['field-language'] = 'Spanish'
+ self.urldata = self.BASE_URL_ALL
+ elif rlang =='en':
+ q['sort'] = 'relevanceexprank'
+ q['field-language'] = 'English'
+ self.urldata = self.BASE_URL_ALL
+ elif rlang =='fr':
+ q['sort'] = 'relevancerank'
+ self.urldata = self.BASE_URL_FR
+ elif rlang =='de':
+ q['sort'] = 'relevancerank'
+ self.urldata = self.BASE_URL_DE
+ self.baseurl = self.urldata
+
+ if isbn is not None:
+ q['field-isbn'] = isbn.replace('-', '')
+ else:
+ if title is not None:
+ q['field-title'] = title
+ if author is not None:
+ q['field-author'] = author
+ if publisher is not None:
+ q['field-publisher'] = publisher
+ if keywords is not None:
+ q['field-keywords'] = keywords
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.urldata += '/gp/search/ref=sr_adv_b/?' + urlencode(q)
+
+ def __call__(self, browser, verbose, timeout = 5.):
+ if verbose:
+ print 'Query:', self.urldata
+
+ try:
+ raw = browser.open_novisit(self.urldata, timeout=timeout).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ raise
+ if '404 - ' in raw:
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ return None, self.urldata
+
+ #nb of page
+ try:
+ nbresults = self.renbres.findall(feed.xpath("//*[@class='resultCount']")[0].text)
+ except:
+ return None, self.urldata
+
+ pages =[feed]
+ if len(nbresults) > 1:
+ nbpagetoquery = int(ceil(float(min(int(nbresults[2]), self.max_results))/ int(nbresults[1])))
+ for i in xrange(2, nbpagetoquery + 1):
+ try:
+ urldata = self.urldata + '&page=' + str(i)
+ raw = browser.open_novisit(urldata, timeout=timeout).read()
+ except Exception, e:
+ continue
+ if '404 - ' in raw:
+ continue
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ continue
+ pages.append(feed)
+
+ results = []
+ for x in pages:
+ results.extend([i.getparent().get('href') \
+ for i in x.xpath("//a/span[@class='srTitle']")])
+ return results[:self.max_results], self.baseurl
+
+class ResultList(list):
+
+ def __init__(self, baseurl, lang = 'all'):
+ self.baseurl = baseurl
+ self.lang = lang
+ self.repub = re.compile(u'\((.*)\)')
+ self.rerat = re.compile(u'([0-9.]+)')
+ self.reattr = re.compile(r'<([a-zA-Z0-9]+)\s[^>]+>')
+ self.reoutp = re.compile(r'(?s)--This text ref.*?')
+ self.recom = re.compile(r'(?s)')
+ self.republi = re.compile(u'(Editeur|Publisher|Verlag)', re.I)
+ self.reisbn = re.compile(u'(ISBN-10|ISBN-10|ASIN)', re.I)
+ self.relang = re.compile(u'(Language|Langue|Sprache)', re.I)
+ self.reratelt = re.compile(u'(Average\s*Customer\s*Review|Moyenne\s*des\s*commentaires\s*client|Durchschnittliche\s*Kundenbewertung)', re.I)
+ self.reprod = re.compile(u'(Product\s*Details|D.tails\s*sur\s*le\s*produit|Produktinformation)', re.I)
+
+ def strip_tags_etree(self, etreeobj, invalid_tags):
+ for (itag, rmv) in invalid_tags.iteritems():
+ if rmv:
+ for elts in etreeobj.getiterator(itag):
+ elts.drop_tree()
+ else:
+ for elts in etreeobj.getiterator(itag):
+ elts.drop_tag()
+
+ def clean_entry(self, entry, invalid_tags = {'script': True},
+ invalid_id = (), invalid_class=()):
+ #invalid_tags: remove tag and keep content if False else remove
+ #remove tags
+ if invalid_tags:
+ self.strip_tags_etree(entry, invalid_tags)
+ #remove id
+ if invalid_id:
+ for eltid in invalid_id:
+ elt = entry.get_element_by_id(eltid)
+ if elt is not None:
+ elt.drop_tree()
+ #remove class
+ if invalid_class:
+ for eltclass in invalid_class:
+ elts = entry.find_class(eltclass)
+ if elts is not None:
+ for elt in elts:
+ elt.drop_tree()
+
+ def get_title(self, entry):
+ title = entry.get_element_by_id('btAsinTitle')
+ if title is not None:
+ title = title.text
+ return unicode(title.replace('\n', '').strip())
+
+ def get_authors(self, entry):
+ author = entry.get_element_by_id('btAsinTitle')
+ while author.getparent().tag != 'div':
+ author = author.getparent()
+ author = author.getparent()
+ authortext = []
+ for x in author.getiterator('a'):
+ authortext.append(unicode(x.text_content().strip()))
+ return authortext
+
+ def get_description(self, entry, verbose):
+ try:
+ description = entry.get_element_by_id("productDescription").find("div[@class='content']")
+ inv_class = ('seeAll', 'emptyClear')
+ inv_tags ={'img': True, 'a': False}
+ self.clean_entry(description, invalid_tags=inv_tags, invalid_class=inv_class)
+ description = html.tostring(description, method='html', encoding=unicode).strip()
+ # remove all attributes from tags
+ description = self.reattr.sub(r'<\1>', description)
+ # Remove the notice about text referring to out of print editions
+ description = self.reoutp.sub('', description)
+ # Remove comments
+ description = self.recom.sub('', description)
+ return unicode(sanitize_comments_html(description))
+ except:
+ report(verbose)
+ return None
+
+ def get_tags(self, entry, browser, verbose):
+ try:
+ tags = entry.get_element_by_id('tagContentHolder')
+ testptag = tags.find_class('see-all')
+ if testptag:
+ for x in testptag:
+ alink = x.xpath('descendant-or-self::a')
+ if alink:
+ if alink[0].get('class') == 'tgJsActive':
+ continue
+ link = self.baseurl + alink[0].get('href')
+ entry = self.get_individual_metadata(browser, link, verbose)
+ tags = entry.get_element_by_id('tagContentHolder')
+ break
+ tags = [a.text for a in tags.getiterator('a') if a.get('rel') == 'tag']
+ except:
+ report(verbose)
+ tags = []
+ return tags
+
+ def get_book_info(self, entry, mi, verbose):
+ try:
+ entry = entry.get_element_by_id('SalesRank').getparent()
+ except:
+ try:
+ for z in entry.getiterator('h2'):
+ if self.reprod.search(z.text_content()):
+ entry = z.getparent().find("div[@class='content']/ul")
+ break
+ except:
+ report(verbose)
+ return mi
+ elts = entry.findall('li')
+ #pub & date
+ elt = filter(lambda x: self.republi.search(x.find('b').text), elts)
+ if elt:
+ pub = elt[0].find('b').tail
+ mi.publisher = unicode(self.repub.sub('', pub).strip())
+ d = self.repub.search(pub)
+ if d is not None:
+ d = d.group(1)
+ try:
+ default = utcnow().replace(day=15)
+ if self.lang != 'all':
+ d = replace_months(d, self.lang)
+ d = parse_date(d, assume_utc=True, default=default)
+ mi.pubdate = d
+ except:
+ report(verbose)
+ #ISBN
+ elt = filter(lambda x: self.reisbn.search(x.find('b').text), elts)
+ if elt:
+ isbn = elt[0].find('b').tail.replace('-', '').strip()
+ if check_isbn(isbn):
+ mi.isbn = unicode(isbn)
+ elif len(elt) > 1:
+ isbn = elt[1].find('b').tail.replace('-', '').strip()
+ if check_isbn(isbn):
+ mi.isbn = unicode(isbn)
+ #Langue
+ elt = filter(lambda x: self.relang.search(x.find('b').text), elts)
+ if elt:
+ langue = elt[0].find('b').tail.strip()
+ if langue:
+ mi.language = unicode(langue)
+ #ratings
+ elt = filter(lambda x: self.reratelt.search(x.find('b').text), elts)
+ if elt:
+ ratings = elt[0].find_class('swSprite')
+ if ratings:
+ ratings = self.rerat.findall(ratings[0].get('title'))
+ if len(ratings) == 2:
+ mi.rating = float(ratings[0])/float(ratings[1]) * 5
+ return mi
+
+ def fill_MI(self, entry, title, authors, browser, verbose):
+ mi = MetaInformation(title, authors)
+ mi.author_sort = authors_to_sort_string(authors)
+ mi.comments = self.get_description(entry, verbose)
+ mi = self.get_book_info(entry, mi, verbose)
+ mi.tags = self.get_tags(entry, browser, verbose)
+ return mi
+
+ def get_individual_metadata(self, browser, linkdata, verbose):
+ try:
+ raw = browser.open_novisit(linkdata).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ raise
+ if '404 - ' in raw:
+ report(verbose)
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ report(verbose)
+ return
+
+ def populate(self, entries, browser, verbose=False):
+ for x in entries:
+ try:
+ entry = self.get_individual_metadata(browser, x, verbose)
+ # clean results
+ # inv_ids = ('divsinglecolumnminwidth', 'sims.purchase', 'AutoBuyXGetY', 'A9AdsMiddleBoxTop')
+ # inv_class = ('buyingDetailsGrid', 'productImageGrid')
+ # inv_tags ={'script': True, 'style': True, 'form': False}
+ # self.clean_entry(entry, invalid_id=inv_ids)
+ title = self.get_title(entry)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print 'Failed to get all details for an entry'
+ print e
+ print 'URL who failed:', x
+ report(verbose)
+ continue
+ self.append(self.fill_MI(entry, title, authors, browser, verbose))
+
+
+def search(title=None, author=None, publisher=None, isbn=None,
+ max_results=5, verbose=False, keywords=None, lang='all'):
+ br = browser()
+ entries, baseurl = Query(title=title, author=author, isbn=isbn, publisher=publisher,
+ keywords=keywords, max_results=max_results,rlang=lang)(br, verbose)
+
+ if entries is None or len(entries) == 0:
+ return
+
+ #List of entry
+ ans = ResultList(baseurl, lang)
+ ans.populate(entries, br, verbose)
+ return ans
+
+def option_parser():
+ parser = OptionParser(textwrap.dedent(\
+ _('''\
+ %prog [options]
+
+ Fetch book metadata from Amazon. You must specify one of title, author,
+ ISBN, publisher or keywords. Will fetch a maximum of 10 matches,
+ so you should make your query as specific as possible.
+ You can chose the language for metadata retrieval:
+ All & english & french & german & spanish
+ '''
+ )))
+ parser.add_option('-t', '--title', help='Book title')
+ parser.add_option('-a', '--author', help='Book author(s)')
+ parser.add_option('-p', '--publisher', help='Book publisher')
+ parser.add_option('-i', '--isbn', help='Book ISBN')
+ parser.add_option('-k', '--keywords', help='Keywords')
+ parser.add_option('-m', '--max-results', default=10,
+ help='Maximum number of results to fetch')
+ parser.add_option('-l', '--lang', default='all',
+ help='Chosen language for metadata search (all, en, fr, es, de)')
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help='Be more verbose about errors')
+ return parser
+
+def main(args=sys.argv):
+ parser = option_parser()
+ opts, args = parser.parse_args(args)
+ try:
+ results = search(opts.title, opts.author, isbn=opts.isbn, publisher=opts.publisher,
+ keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results,
+ lang=opts.lang)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ if results is None or len(results) == 0:
+ print 'No result found for this search!'
+ return 0
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ print
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/src/calibre/ebooks/metadata/fictionwise.py b/src/calibre/ebooks/metadata/fictionwise.py
new file mode 100644
index 0000000000..b780f2b39d
--- /dev/null
+++ b/src/calibre/ebooks/metadata/fictionwise.py
@@ -0,0 +1,390 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, sengian '
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap, re, traceback, socket
+from urllib import urlencode
+
+from lxml.html import soupparser, tostring
+
+from calibre import browser, preferred_encoding
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata import MetaInformation, check_isbn, \
+ authors_to_sort_string
+from calibre.library.comments import sanitize_comments_html
+from calibre.ebooks.metadata.fetch import MetadataSource
+from calibre.utils.config import OptionParser
+from calibre.utils.date import parse_date, utcnow
+from calibre.utils.cleantext import clean_ascii_chars
+
+class Fictionwise(MetadataSource): # {{{
+
+ author = 'Sengian'
+ name = 'Fictionwise'
+ description = _('Downloads metadata from Fictionwise')
+
+ has_html_comments = True
+
+ def fetch(self):
+ try:
+ self.results = search(self.title, self.book_author, self.publisher,
+ self.isbn, max_results=10, verbose=self.verbose)
+ except Exception, e:
+ self.exception = e
+ self.tb = traceback.format_exc()
+
+ # }}}
+
+class FictionwiseError(Exception):
+ pass
+
+def report(verbose):
+ if verbose:
+ traceback.print_exc()
+
+class Query(object):
+
+ BASE_URL = 'http://www.fictionwise.com/servlet/mw'
+
+ def __init__(self, title=None, author=None, publisher=None, keywords=None, max_results=20):
+ assert not(title is None and author is None and publisher is None and keywords is None)
+ assert (max_results < 21)
+
+ self.max_results = int(max_results)
+ q = { 'template' : 'searchresults_adv.htm' ,
+ 'searchtitle' : '',
+ 'searchauthor' : '',
+ 'searchpublisher' : '',
+ 'searchkeyword' : '',
+ #possibilities startoflast, fullname, lastfirst
+ 'searchauthortype' : 'startoflast',
+ 'searchcategory' : '',
+ 'searchcategory2' : '',
+ 'searchprice_s' : '0',
+ 'searchprice_e' : 'ANY',
+ 'searchformat' : '',
+ 'searchgeo' : 'US',
+ 'searchfwdatetype' : '',
+ #maybe use dates fields if needed?
+ #'sortorder' : 'DESC',
+ #many options available: b.SortTitle, a.SortName,
+ #b.DateFirstPublished, b.FWPublishDate
+ 'sortby' : 'b.SortTitle'
+ }
+ if title is not None:
+ q['searchtitle'] = title
+ if author is not None:
+ q['searchauthor'] = author
+ if publisher is not None:
+ q['searchpublisher'] = publisher
+ if keywords is not None:
+ q['searchkeyword'] = keywords
+
+ if isinstance(q, unicode):
+ q = q.encode('utf-8')
+ self.urldata = urlencode(q)
+
+ def __call__(self, browser, verbose, timeout = 5.):
+ if verbose:
+ print _('Query: %s') % self.BASE_URL+self.urldata
+
+ try:
+ raw = browser.open_novisit(self.BASE_URL, self.urldata, timeout=timeout).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
+ raise FictionwiseError(_('Fictionwise encountered an error.'))
+ if '404 - ' in raw:
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ feed = soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ feed = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ return None
+
+ # get list of results as links
+ results = feed.xpath("//table[3]/tr/td[2]/table/tr/td/p/table[2]/tr[@valign]")
+ results = results[:self.max_results]
+ results = [i.xpath('descendant-or-self::a')[0].get('href') for i in results]
+ #return feed if no links ie normally a single book or nothing
+ if not results:
+ results = [feed]
+ return results
+
+class ResultList(list):
+
+ BASE_URL = 'http://www.fictionwise.com'
+ COLOR_VALUES = {'BLUE': 4, 'GREEN': 3, 'YELLOW': 2, 'RED': 1, 'NA': 0}
+
+ def __init__(self):
+ self.retitle = re.compile(r'\[[^\[\]]+\]')
+ self.rechkauth = re.compile(r'.*book\s*by', re.I)
+ self.redesc = re.compile(r'book\s*description\s*:\s*(
]+>)*(?P.*)
]*>.{,15}publisher\s*:', re.I)
+ self.repub = re.compile(r'.*publisher\s*:\s*', re.I)
+ self.redate = re.compile(r'.*release\s*date\s*:\s*', re.I)
+ self.retag = re.compile(r'.*book\s*category\s*:\s*', re.I)
+ self.resplitbr = re.compile(r'
]*>', re.I)
+ self.recomment = re.compile(r'(?s)')
+ self.reimg = re.compile(r'
]*>', re.I)
+ self.resanitize = re.compile(r'\[HTML_REMOVED\]\s*', re.I)
+ self.renbcom = re.compile('(?P\d+)\s*Reader Ratings:')
+ self.recolor = re.compile('(?P[^/]+).gif')
+ self.resplitbrdiv = re.compile(r'(
]+>|?div[^>]*>)', re.I)
+ self.reisbn = re.compile(r'.*ISBN\s*:\s*', re.I)
+
+ def strip_tags_etree(self, etreeobj, invalid_tags):
+ for (itag, rmv) in invalid_tags.iteritems():
+ if rmv:
+ for elts in etreeobj.getiterator(itag):
+ elts.drop_tree()
+ else:
+ for elts in etreeobj.getiterator(itag):
+ elts.drop_tag()
+
+ def clean_entry(self, entry, invalid_tags = {'script': True},
+ invalid_id = (), invalid_class=(), invalid_xpath = ()):
+ #invalid_tags: remove tag and keep content if False else remove
+ #remove tags
+ if invalid_tags:
+ self.strip_tags_etree(entry, invalid_tags)
+ #remove xpath
+ if invalid_xpath:
+ for eltid in invalid_xpath:
+ elt = entry.xpath(eltid)
+ for el in elt:
+ el.drop_tree()
+ #remove id
+ if invalid_id:
+ for eltid in invalid_id:
+ elt = entry.get_element_by_id(eltid)
+ if elt is not None:
+ elt.drop_tree()
+ #remove class
+ if invalid_class:
+ for eltclass in invalid_class:
+ elts = entry.find_class(eltclass)
+ if elts is not None:
+ for elt in elts:
+ elt.drop_tree()
+
+ def output_entry(self, entry, prettyout = True, htmlrm="\d+"):
+ out = tostring(entry, pretty_print=prettyout)
+ #try to work around tostring to remove this encoding for exemle
+ reclean = re.compile('(\n+|\t+|\r+|'+htmlrm+';)')
+ return reclean.sub('', out)
+
+ def get_title(self, entry):
+ title = entry.findtext('./')
+ return self.retitle.sub('', title).strip()
+
+ def get_authors(self, entry):
+ authortext = entry.find('./br').tail
+ if not self.rechkauth.search(authortext):
+ return []
+ authortext = self.rechkauth.sub('', authortext)
+ return [a.strip() for a in authortext.split('&')]
+
+ def get_rating(self, entrytable, verbose):
+ nbcomment = tostring(entrytable.getprevious())
+ try:
+ nbcomment = self.renbcom.search(nbcomment).group("nbcom")
+ except:
+ report(verbose)
+ return None
+ hval = dict((self.COLOR_VALUES[self.recolor.search(image.get('src', default='NA.gif')).group("ncolor")],
+ float(image.get('height', default=0))) \
+ for image in entrytable.getiterator('img'))
+ #ratings as x/5
+ return float(1.25*sum(k*v for (k, v) in hval.iteritems())/sum(hval.itervalues()))
+
+ def get_description(self, entry):
+ description = self.output_entry(entry.xpath('./p')[1],htmlrm="")
+ description = self.redesc.search(description)
+ if not description or not description.group("desc"):
+ return None
+ #remove invalid tags
+ description = self.reimg.sub('', description.group("desc"))
+ description = self.recomment.sub('', description)
+ description = self.resanitize.sub('', sanitize_comments_html(description))
+ return _('SUMMARY:\n %s') % re.sub(r'\n\s+
','\n', description)
+
+ def get_publisher(self, entry):
+ publisher = self.output_entry(entry.xpath('./p')[1])
+ publisher = filter(lambda x: self.repub.search(x) is not None,
+ self.resplitbr.split(publisher))
+ if not len(publisher):
+ return None
+ publisher = self.repub.sub('', publisher[0])
+ return publisher.split(',')[0].strip()
+
+ def get_tags(self, entry):
+ tag = self.output_entry(entry.xpath('./p')[1])
+ tag = filter(lambda x: self.retag.search(x) is not None,
+ self.resplitbr.split(tag))
+ if not len(tag):
+ return []
+ return map(lambda x: x.strip(), self.retag.sub('', tag[0]).split('/'))
+
+ def get_date(self, entry, verbose):
+ date = self.output_entry(entry.xpath('./p')[1])
+ date = filter(lambda x: self.redate.search(x) is not None,
+ self.resplitbr.split(date))
+ if not len(date):
+ return None
+ try:
+ d = self.redate.sub('', date[0])
+ if d:
+ default = utcnow().replace(day=15)
+ d = parse_date(d, assume_utc=True, default=default)
+ else:
+ d = None
+ except:
+ report(verbose)
+ d = None
+ return d
+
+ def get_ISBN(self, entry):
+ isbns = self.output_entry(entry.xpath('./p')[2])
+ isbns = filter(lambda x: self.reisbn.search(x) is not None,
+ self.resplitbrdiv.split(isbns))
+ if not len(isbns):
+ return None
+ isbns = [self.reisbn.sub('', x) for x in isbns if check_isbn(self.reisbn.sub('', x))]
+ return sorted(isbns, cmp=lambda x,y:cmp(len(x), len(y)))[-1]
+
+ def fill_MI(self, entry, title, authors, ratings, verbose):
+ mi = MetaInformation(title, authors)
+ mi.rating = ratings
+ mi.comments = self.get_description(entry)
+ mi.publisher = self.get_publisher(entry)
+ mi.tags = self.get_tags(entry)
+ mi.pubdate = self.get_date(entry, verbose)
+ mi.isbn = self.get_ISBN(entry)
+ mi.author_sort = authors_to_sort_string(authors)
+ return mi
+
+ def get_individual_metadata(self, browser, linkdata, verbose):
+ try:
+ raw = browser.open_novisit(self.BASE_URL + linkdata).read()
+ except Exception, e:
+ report(verbose)
+ if callable(getattr(e, 'getcode', None)) and \
+ e.getcode() == 404:
+ return
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise FictionwiseError(_('Fictionwise timed out. Try again later.'))
+ raise FictionwiseError(_('Fictionwise encountered an error.'))
+ if '404 - ' in raw:
+ report(verbose)
+ return
+ raw = xml_to_unicode(raw, strip_encoding_pats=True,
+ resolve_entities=True)[0]
+ try:
+ return soupparser.fromstring(raw)
+ except:
+ try:
+ #remove ASCII invalid chars
+ return soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ return None
+
+ def populate(self, entries, browser, verbose=False):
+ inv_tags ={'script': True, 'a': False, 'font': False, 'strong': False, 'b': False,
+ 'ul': False, 'span': False}
+ inv_xpath =('./table',)
+ #single entry
+ if len(entries) == 1 and not isinstance(entries[0], str):
+ try:
+ entry = entries.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")
+ self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
+ title = self.get_title(entry)
+ #maybe strenghten the search
+ ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ return
+ self.append(self.fill_MI(entry, title, authors, ratings, verbose))
+ else:
+ #multiple entries
+ for x in entries:
+ try:
+ entry = self.get_individual_metadata(browser, x, verbose)
+ entry = entry.xpath("//table[3]/tr/td[2]/table[1]/tr/td/font/table/tr/td")[0]
+ self.clean_entry(entry, invalid_tags=inv_tags, invalid_xpath=inv_xpath)
+ title = self.get_title(entry)
+ #maybe strenghten the search
+ ratings = self.get_rating(entry.xpath("./p/table")[1], verbose)
+ authors = self.get_authors(entry)
+ except Exception, e:
+ if verbose:
+ print _('Failed to get all details for an entry')
+ print e
+ continue
+ self.append(self.fill_MI(entry, title, authors, ratings, verbose))
+
+
+def search(title=None, author=None, publisher=None, isbn=None,
+ min_viewability='none', verbose=False, max_results=5,
+ keywords=None):
+ br = browser()
+ entries = Query(title=title, author=author, publisher=publisher,
+ keywords=keywords, max_results=max_results)(br, verbose, timeout = 15.)
+
+ #List of entry
+ ans = ResultList()
+ ans.populate(entries, br, verbose)
+ return ans
+
+
+def option_parser():
+ parser = OptionParser(textwrap.dedent(\
+ _('''\
+ %prog [options]
+
+ Fetch book metadata from Fictionwise. You must specify one of title, author,
+ or keywords. No ISBN specification possible. Will fetch a maximum of 20 matches,
+ so you should make your query as specific as possible.
+ ''')
+ ))
+ parser.add_option('-t', '--title', help=_('Book title'))
+ parser.add_option('-a', '--author', help=_('Book author(s)'))
+ parser.add_option('-p', '--publisher', help=_('Book publisher'))
+ parser.add_option('-k', '--keywords', help=_('Keywords'))
+ parser.add_option('-m', '--max-results', default=20,
+ help=_('Maximum number of results to fetch'))
+ parser.add_option('-v', '--verbose', default=0, action='count',
+ help=_('Be more verbose about errors'))
+ return parser
+
+def main(args=sys.argv):
+ parser = option_parser()
+ opts, args = parser.parse_args(args)
+ try:
+ results = search(opts.title, opts.author, publisher=opts.publisher,
+ keywords=opts.keywords, verbose=opts.verbose, max_results=opts.max_results)
+ except AssertionError:
+ report(True)
+ parser.print_help()
+ return 1
+ if results is None or len(results) == 0:
+ print _('No result found for this search!')
+ return 0
+ for result in results:
+ print unicode(result).encode(preferred_encoding, 'replace')
+ print
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py
index 4d19e9611b..8914e2d985 100644
--- a/src/calibre/ebooks/metadata/nicebooks.py
+++ b/src/calibre/ebooks/metadata/nicebooks.py
@@ -10,7 +10,8 @@ from copy import deepcopy
from lxml.html import soupparser
-from calibre.utils.date import parse_date, utcnow
+from calibre.utils.date import parse_date, utcnow, replace_months
+from calibre.utils.cleantext import clean_ascii_chars
from calibre import browser, preferred_encoding
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.metadata import MetaInformation, check_isbn, \
@@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload):
traceback.format_exc(), self.name))
+class NiceBooksError(Exception):
+ pass
+
+class ISBNNotFound(NiceBooksError):
+ pass
+
def report(verbose):
if verbose:
- import traceback
traceback.print_exc()
-def replace_monthsfr(datefr):
- # Replace french months by english equivalent for parse_date
- frtoen = {
- u'[jJ]anvier': u'jan',
- u'[fF].vrier': u'feb',
- u'[mM]ars': u'mar',
- u'[aA]vril': u'apr',
- u'[mM]ai': u'may',
- u'[jJ]uin': u'jun',
- u'[jJ]uillet': u'jul',
- u'[aA]o.t': u'aug',
- u'[sS]eptembre': u'sep',
- u'[Oo]ctobre': u'oct',
- u'[nN]ovembre': u'nov',
- u'[dD].cembre': u'dec' }
- for k in frtoen.iterkeys():
- tmp = re.sub(k, frtoen[k], datefr)
- if tmp <> datefr: break
- return tmp
-
class Query(object):
BASE_URL = 'http://fr.nicebooks.com/'
@@ -119,7 +105,7 @@ class Query(object):
def __call__(self, browser, verbose, timeout = 5.):
if verbose:
- print 'Query:', self.BASE_URL+self.urldata
+ print _('Query: %s') % self.BASE_URL+self.urldata
try:
raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read()
@@ -128,7 +114,9 @@ class Query(object):
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
- raise
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
+ raise NiceBooksError(_('Nicebooks encountered an error.'))
if '404 - ' in raw:
return
raw = xml_to_unicode(raw, strip_encoding_pats=True,
@@ -136,7 +124,11 @@ class Query(object):
try:
feed = soupparser.fromstring(raw)
except:
- return
+ try:
+ #remove ASCII invalid chars
+ feed = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ return None
#nb of page to call
try:
@@ -161,7 +153,11 @@ class Query(object):
try:
feed = soupparser.fromstring(raw)
except:
- continue
+ try:
+ #remove ASCII invalid chars
+ feed = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ continue
pages.append(feed)
results = []
@@ -180,14 +176,12 @@ class ResultList(list):
self.reautclean = re.compile(u'\s*\(.*\)\s*')
def get_title(self, entry):
- # title = deepcopy(entry.find("div[@id='book-info']"))
title = deepcopy(entry)
title.remove(title.find("dl[@title='Informations sur le livre']"))
title = ' '.join([i.text_content() for i in title.iterchildren()])
return unicode(title.replace('\n', ''))
def get_authors(self, entry):
- # author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']")
author = entry.find("dl[@title='Informations sur le livre']")
authortext = []
for x in author.getiterator('dt'):
@@ -223,7 +217,7 @@ class ResultList(list):
d = x.getnext().text_content()
try:
default = utcnow().replace(day=15)
- d = replace_monthsfr(d)
+ d = replace_months(d, 'fr')
d = parse_date(d, assume_utc=True, default=default)
mi.pubdate = d
except:
@@ -234,11 +228,6 @@ class ResultList(list):
mi = MetaInformation(title, authors)
mi.author_sort = authors_to_sort_string(authors)
mi.comments = self.get_description(entry, verbose)
- # entry = entry.find("dl[@title='Informations sur le livre']")
- # mi.publisher = self.get_publisher(entry)
- # mi.pubdate = self.get_date(entry, verbose)
- # mi.isbn = self.get_ISBN(entry)
- # mi.language = self.get_language(entry)
return self.get_book_info(entry, mi, verbose)
def get_individual_metadata(self, browser, linkdata, verbose):
@@ -249,7 +238,9 @@ class ResultList(list):
if callable(getattr(e, 'getcode', None)) and \
e.getcode() == 404:
return
- raise
+ if isinstance(getattr(e, 'args', [None])[0], socket.timeout):
+ raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
+ raise NiceBooksError(_('Nicebooks encountered an error.'))
if '404 - ' in raw:
report(verbose)
return
@@ -258,7 +249,11 @@ class ResultList(list):
try:
feed = soupparser.fromstring(raw)
except:
- return
+ try:
+ #remove ASCII invalid chars
+ feed = soupparser.fromstring(clean_ascii_chars(raw))
+ except:
+ return None
# get results
return feed.xpath("//div[@id='container']")[0]
@@ -292,13 +287,6 @@ class ResultList(list):
continue
self.append(self.fill_MI(entry, title, authors, verbose))
-
-class NiceBooksError(Exception):
- pass
-
-class ISBNNotFound(NiceBooksError):
- pass
-
class Covers(object):
def __init__(self, isbn = None):
@@ -329,11 +317,10 @@ class Covers(object):
return cover, ext if ext else 'jpg'
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
- err = NiceBooksError(_('Nicebooks timed out. Try again later.'))
- raise err
+ raise NiceBooksError(_('Nicebooks timed out. Try again later.'))
if not len(self.urlimg):
if not self.isbnf:
- raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.'))
+ raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn)
raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher'))
@@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None,
max_results=5, verbose=False, keywords=None):
br = browser()
entries = Query(title=title, author=author, isbn=isbn, publisher=publisher,
- keywords=keywords, max_results=max_results)(br, verbose)
+ keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.)
if entries is None or len(entries) == 0:
- return
+ return None
#List of entry
ans = ResultList()
@@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.):
def option_parser():
parser = OptionParser(textwrap.dedent(\
- '''\
+ _('''\
%prog [options]
Fetch book metadata from Nicebooks. You must specify one of title, author,
ISBN, publisher or keywords. Will fetch a maximum of 20 matches,
so you should make your query as specific as possible.
It can also get covers if the option is activated.
- '''
+ ''')
))
- parser.add_option('-t', '--title', help='Book title')
- parser.add_option('-a', '--author', help='Book author(s)')
- parser.add_option('-p', '--publisher', help='Book publisher')
- parser.add_option('-i', '--isbn', help='Book ISBN')
- parser.add_option('-k', '--keywords', help='Keywords')
+ parser.add_option('-t', '--title', help=_('Book title'))
+ parser.add_option('-a', '--author', help=_('Book author(s)'))
+ parser.add_option('-p', '--publisher', help=_('Book publisher'))
+ parser.add_option('-i', '--isbn', help=_('Book ISBN'))
+ parser.add_option('-k', '--keywords', help=_('Keywords'))
parser.add_option('-c', '--covers', default=0,
- help='Covers: 1-Check/ 2-Download')
+ help=_('Covers: 1-Check/ 2-Download'))
parser.add_option('-p', '--coverspath', default='',
- help='Covers files path')
+ help=_('Covers files path'))
parser.add_option('-m', '--max-results', default=20,
- help='Maximum number of results to fetch')
+ help=_('Maximum number of results to fetch'))
parser.add_option('-v', '--verbose', default=0, action='count',
- help='Be more verbose about errors')
+ help=_('Be more verbose about errors'))
return parser
def main(args=sys.argv):
@@ -400,15 +387,15 @@ def main(args=sys.argv):
parser.print_help()
return 1
if results is None or len(results) == 0:
- print 'No result found for this search!'
+ print _('No result found for this search!')
return 0
for result in results:
print unicode(result).encode(preferred_encoding, 'replace')
covact = int(opts.covers)
if covact == 1:
- textcover = 'No cover found!'
+ textcover = _('No cover found!')
if check_for_cover(result.isbn):
- textcover = 'A cover was found for this book'
+ textcover = _('A cover was found for this book')
print textcover
elif covact == 2:
cover_data, ext = cover_from_isbn(result.isbn)
@@ -417,7 +404,7 @@ def main(args=sys.argv):
cpath = os.path.normpath(opts.coverspath + '/' + result.isbn)
oname = os.path.abspath(cpath+'.'+ext)
open(oname, 'wb').write(cover_data)
- print 'Cover saved to file ', oname
+ print _('Cover saved to file '), oname
print
if __name__ == '__main__':
diff --git a/src/calibre/ebooks/metadata/worker.py b/src/calibre/ebooks/metadata/worker.py
index 247050856d..d059d7e34c 100644
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@@ -8,12 +8,12 @@ __docformat__ = 'restructuredtext en'
from threading import Thread
from Queue import Empty
-import os, time, sys, shutil
+import os, time, sys, shutil, json
from calibre.utils.ipc.job import ParallelJob
from calibre.utils.ipc.server import Server
from calibre.ptempfile import PersistentTemporaryDirectory, TemporaryDirectory
-from calibre import prints
+from calibre import prints, isbytestring
from calibre.constants import filesystem_encoding
@@ -194,14 +194,42 @@ class SaveWorker(Thread):
self.daemon = True
self.path, self.opts = path, opts
self.ids = ids
- self.library_path = db.library_path
+ self.db = db
self.canceled = False
self.result_queue = result_queue
self.error = None
self.spare_server = spare_server
self.start()
+ def collect_data(self, ids):
+ from calibre.ebooks.metadata.opf2 import metadata_to_opf
+ data = {}
+ for i in set(ids):
+ mi = self.db.get_metadata(i, index_is_id=True, get_cover=True)
+ opf = metadata_to_opf(mi)
+ if isbytestring(opf):
+ opf = opf.decode('utf-8')
+ cpath = None
+ if mi.cover:
+ cpath = mi.cover
+ if isbytestring(cpath):
+ cpath = cpath.decode(filesystem_encoding)
+ formats = {}
+ if mi.formats:
+ for fmt in mi.formats:
+ fpath = self.db.format_abspath(i, fmt, index_is_id=True)
+ if fpath is not None:
+ if isbytestring(fpath):
+ fpath = fpath.decode(filesystem_encoding)
+ formats[fmt.lower()] = fpath
+ data[i] = [opf, cpath, formats]
+ return data
+
def run(self):
+ with TemporaryDirectory('save_to_disk_data') as tdir:
+ self._run(tdir)
+
+ def _run(self, tdir):
from calibre.library.save_to_disk import config
server = Server() if self.spare_server is None else self.spare_server
ids = set(self.ids)
@@ -212,12 +240,19 @@ class SaveWorker(Thread):
for pref in c.preferences:
recs[pref.name] = getattr(self.opts, pref.name)
+ plugboards = self.db.prefs.get('plugboards', {})
+
for i, task in enumerate(tasks):
tids = [x[-1] for x in task]
+ data = self.collect_data(tids)
+ dpath = os.path.join(tdir, '%d.json'%i)
+ with open(dpath, 'wb') as f:
+ f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))
+
job = ParallelJob('save_book',
'Save books (%d of %d)'%(i, len(tasks)),
lambda x,y:x,
- args=[tids, self.library_path, self.path, recs])
+ args=[tids, dpath, plugboards, self.path, recs])
jobs.add(job)
server.add_job(job)
@@ -226,21 +261,21 @@ class SaveWorker(Thread):
time.sleep(0.2)
running = False
for job in jobs:
- job.update(consume_notifications=False)
- while True:
- try:
- id, title, ok, tb = job.notifications.get_nowait()[0]
- if id in ids:
- self.result_queue.put((id, title, ok, tb))
- ids.remove(id)
- except Empty:
- break
+ self.get_notifications(job, ids)
if not job.is_finished:
running = True
if not running:
break
+ for job in jobs:
+ if not job.result:
+ continue
+ for id_, title, ok, tb in job.result:
+ if id_ in ids:
+ self.result_queue.put((id_, title, ok, tb))
+ ids.remove(id_)
+
server.close()
time.sleep(1)
@@ -257,21 +292,39 @@ class SaveWorker(Thread):
except:
pass
+ def get_notifications(self, job, ids):
+ job.update(consume_notifications=False)
+ while True:
+ try:
+ id, title, ok, tb = job.notifications.get_nowait()[0]
+ if id in ids:
+ self.result_queue.put((id, title, ok, tb))
+ ids.remove(id)
+ except Empty:
+ break
-def save_book(task, library_path, path, recs, notification=lambda x,y:x):
- from calibre.library.database2 import LibraryDatabase2
- db = LibraryDatabase2(library_path)
- from calibre.library.save_to_disk import config, save_to_disk
+
+def save_book(ids, dpath, plugboards, path, recs, notification=lambda x,y:x):
+ from calibre.library.save_to_disk import config, save_serialized_to_disk
from calibre.customize.ui import apply_null_metadata
opts = config().parse()
for name in recs:
setattr(opts, name, recs[name])
+ results = []
def callback(id, title, failed, tb):
+ results.append((id, title, not failed, tb))
notification((id, title, not failed, tb))
return True
- with apply_null_metadata:
- save_to_disk(db, task, path, opts, callback)
+ data_ = json.loads(open(dpath, 'rb').read().decode('utf-8'))
+ data = {}
+ for k, v in data_.iteritems():
+ data[int(k)] = v
+
+ with apply_null_metadata:
+ save_serialized_to_disk(ids, data, plugboards, path, opts, callback)
+
+ return results
diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 57b914877d..57ca2a1880 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -123,6 +123,8 @@ def _config():
help=_('Download social metadata (tags/rating/etc.)'))
c.add_opt('overwrite_author_title_metadata', default=True,
help=_('Overwrite author and title with new metadata'))
+ c.add_opt('auto_download_cover', default=False,
+ help=_('Automatically download the cover, if available'))
c.add_opt('enforce_cpu_limit', default=True,
help=_('Limit max simultaneous jobs to number of CPUs'))
c.add_opt('tag_browser_hidden_categories', default=set(),
diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py
index 1339070446..5f41f3a8e0 100644
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@@ -427,11 +427,27 @@ class Saver(QObject): # {{{
if not self.ids or not self.worker.is_alive():
self.timer.stop()
self.pd.hide()
+ while self.ids:
+ before = len(self.ids)
+ self.get_result()
+ if before == len(self.ids):
+ for i in list(self.ids):
+ self.failures.add(('id:%d'%i, 'Unknown error'))
+ self.ids.remove(i)
+ break
if not self.callback_called:
+ try:
+ self.worker.join(1.5)
+ except:
+ pass # The worker was not yet started
self.callback(self.worker.path, self.failures, self.worker.error)
self.callback_called = True
return
+ self.get_result()
+
+
+ def get_result(self):
try:
id, title, ok, tb = self.rq.get_nowait()
except Empty:
@@ -441,6 +457,7 @@ class Saver(QObject): # {{{
if not isinstance(title, unicode):
title = str(title).decode(preferred_encoding, 'replace')
self.pd.set_msg(_('Saved')+' '+title)
+
if not ok:
self.failures.add((title, tb))
# }}}
diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py
index 2c64219464..3da0e67e3d 100644
--- a/src/calibre/gui2/dialogs/fetch_metadata.py
+++ b/src/calibre/gui2/dialogs/fetch_metadata.py
@@ -9,7 +9,7 @@ from threading import Thread
from PyQt4.QtCore import Qt, QObject, SIGNAL, QVariant, pyqtSignal, \
QAbstractTableModel, QCoreApplication, QTimer
-from PyQt4.QtGui import QDialog, QItemSelectionModel
+from PyQt4.QtGui import QDialog, QItemSelectionModel, QIcon
from calibre.gui2.dialogs.fetch_metadata_ui import Ui_FetchMetadata
from calibre.gui2 import error_dialog, NONE, info_dialog, config
@@ -42,13 +42,14 @@ class Matches(QAbstractTableModel):
def __init__(self, matches):
self.matches = matches
+ self.yes_icon = QVariant(QIcon(I('ok.png')))
QAbstractTableModel.__init__(self)
def rowCount(self, *args):
return len(self.matches)
def columnCount(self, *args):
- return 6
+ return 8
def headerData(self, section, orientation, role):
if role != Qt.DisplayRole:
@@ -61,6 +62,8 @@ class Matches(QAbstractTableModel):
elif section == 3: text = _("Publisher")
elif section == 4: text = _("ISBN")
elif section == 5: text = _("Published")
+ elif section == 6: text = _("Has Cover")
+ elif section == 7: text = _("Has Summary")
return QVariant(text)
else:
@@ -71,8 +74,8 @@ class Matches(QAbstractTableModel):
def data(self, index, role):
row, col = index.row(), index.column()
+ book = self.matches[row]
if role == Qt.DisplayRole:
- book = self.matches[row]
res = None
if col == 0:
res = book.title
@@ -90,6 +93,11 @@ class Matches(QAbstractTableModel):
if not res:
return NONE
return QVariant(res)
+ elif role == Qt.DecorationRole:
+ if col == 6 and book.has_cover:
+ return self.yes_icon
+ if col == 7 and book.comments:
+ return self.yes_icon
return NONE
class FetchMetadata(QDialog, Ui_FetchMetadata):
@@ -131,7 +139,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
self.fetch_metadata()
self.opt_get_social_metadata.setChecked(config['get_social_metadata'])
self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata'])
-
+ self.opt_auto_download_cover.setChecked(config['auto_download_cover'])
def show_summary(self, current, *args):
row = current.row()
@@ -213,6 +221,12 @@ class FetchMetadata(QDialog, Ui_FetchMetadata):
_hung_fetchers.add(self.fetcher)
if hasattr(self, '_hangcheck') and self._hangcheck.isActive():
self._hangcheck.stop()
+ # Save value of auto_download_cover, since this is the only place it can
+ # be set. The values of the other options can be set in
+ # Preferences->Behavior and should not be set here as they affect bulk
+ # downloading as well.
+ if self.opt_auto_download_cover.isChecked() != config['auto_download_cover']:
+ config.set('auto_download_cover', self.opt_auto_download_cover.isChecked())
def __enter__(self, *args):
return self
diff --git a/src/calibre/gui2/dialogs/fetch_metadata.ui b/src/calibre/gui2/dialogs/fetch_metadata.ui
index 03a362096c..b140fa158d 100644
--- a/src/calibre/gui2/dialogs/fetch_metadata.ui
+++ b/src/calibre/gui2/dialogs/fetch_metadata.ui
@@ -1,172 +1,179 @@
-
-
- FetchMetadata
-
-
- Qt::WindowModal
-
-
-
- 0
- 0
- 830
- 642
-
-
-
- Fetch metadata
-
-
-
- :/images/metadata.png:/images/metadata.png
-
-
- -
-
-
- <p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and enter your access key below.
-
-
- Qt::AlignCenter
-
-
- true
-
-
- true
-
-
-
- -
-
-
-
-
-
- &Access Key:
-
-
- key
-
-
-
- -
-
-
- -
-
-
- Fetch
-
-
-
-
-
- -
-
-
-
-
-
- true
-
-
-
- -
-
-
- Matches
-
-
-
-
-
-
- Select the book that most closely matches your copy from the list below
-
-
-
- -
-
-
-
- 0
- 1
-
-
-
- true
-
-
- QAbstractItemView::SingleSelection
-
-
- QAbstractItemView::SelectRows
-
-
-
- -
-
-
-
-
-
- -
-
-
- Download &social metadata (tags/rating/etc.) for the selected book
-
-
-
- -
-
-
- Overwrite author and title with author and title of selected book
-
-
-
- -
-
-
- QDialogButtonBox::Cancel|QDialogButtonBox::Ok
-
-
-
-
-
-
-
-
-
-
- buttonBox
- accepted()
- FetchMetadata
- accept()
-
-
- 460
- 599
-
-
- 657
- 530
-
-
-
-
- buttonBox
- rejected()
- FetchMetadata
- reject()
-
-
- 417
- 599
-
-
- 0
- 491
-
-
-
-
-
+
+
+ FetchMetadata
+
+
+ Qt::WindowModal
+
+
+
+ 0
+ 0
+ 890
+ 642
+
+
+
+ Fetch metadata
+
+
+
+ :/images/metadata.png:/images/metadata.png
+
+
+ -
+
+
+ <p>calibre can find metadata for your books from two locations: <b>Google Books</b> and <b>isbndb.com</b>. <p>To use isbndb.com you must sign up for a <a href="http://www.isbndb.com">free account</a> and enter your access key below.
+
+
+ Qt::AlignCenter
+
+
+ true
+
+
+ true
+
+
+
+ -
+
+
-
+
+
+ &Access Key:
+
+
+ key
+
+
+
+ -
+
+
+ -
+
+
+ Fetch
+
+
+
+
+
+ -
+
+
+
+
+
+ true
+
+
+
+ -
+
+
+ Matches
+
+
+
-
+
+
+ Select the book that most closely matches your copy from the list below
+
+
+
+ -
+
+
+
+ 0
+ 1
+
+
+
+ true
+
+
+ QAbstractItemView::SingleSelection
+
+
+ QAbstractItemView::SelectRows
+
+
+
+ -
+
+
+
+
+
+ -
+
+
+ Overwrite author and title with author and title of selected book
+
+
+
+ -
+
+
+ Download &social metadata (tags/rating/etc.) for the selected book
+
+
+
+ -
+
+
+ Automatically download the cover, if available
+
+
+
+ -
+
+
+ QDialogButtonBox::Cancel|QDialogButtonBox::Ok
+
+
+
+
+
+
+
+
+
+
+ buttonBox
+ accepted()
+ FetchMetadata
+ accept()
+
+
+ 460
+ 599
+
+
+ 657
+ 530
+
+
+
+
+ buttonBox
+ rejected()
+ FetchMetadata
+ reject()
+
+
+ 417
+ 599
+
+
+ 0
+ 491
+
+
+
+
+
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 8f068075cf..fec58a74f6 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -760,8 +760,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
if book.publisher: self.publisher.setEditText(book.publisher)
if book.isbn: self.isbn.setText(book.isbn)
if book.pubdate:
- d = book.pubdate
- self.pubdate.setDate(QDate(d.year, d.month, d.day))
+ dt = book.pubdate
+ self.pubdate.setDate(QDate(dt.year, dt.month, dt.day))
summ = book.comments
if summ:
prefix = unicode(self.comments.toPlainText())
@@ -777,8 +777,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.series.setText(book.series)
if book.series_index is not None:
self.series_index.setValue(book.series_index)
- # Needed because of Qt focus bug on OS X
- self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
+ if book.has_cover:
+ if d.opt_auto_download_cover.isChecked() and book.has_cover:
+ self.fetch_cover()
+ else:
+ self.fetch_cover_button.setFocus(Qt.OtherFocusReason)
else:
error_dialog(self, _('Cannot fetch metadata'),
_('You must specify at least one of ISBN, Title, '
diff --git a/src/calibre/library/save_to_disk.py b/src/calibre/library/save_to_disk.py
index c6cc12a978..af57d563ac 100644
--- a/src/calibre/library/save_to_disk.py
+++ b/src/calibre/library/save_to_disk.py
@@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-import os, traceback, cStringIO, re
+import os, traceback, cStringIO, re, shutil
from calibre.constants import DEBUG
from calibre.utils.config import Config, StringConfig, tweaks
@@ -203,31 +203,49 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
return shorten_components_to(length, components)
-def save_book_to_disk(id, db, root, opts, length):
- mi = db.get_metadata(id, index_is_id=True)
+def save_book_to_disk(id_, db, root, opts, length):
+ mi = db.get_metadata(id_, index_is_id=True)
+ cover = db.cover(id_, index_is_id=True, as_path=True)
+ plugboards = db.prefs.get('plugboards', {})
- available_formats = db.formats(id, index_is_id=True)
+ available_formats = db.formats(id_, index_is_id=True)
if not available_formats:
available_formats = []
else:
available_formats = [x.lower().strip() for x in
available_formats.split(',')]
+ formats = {}
+ fmts = db.formats(id_, index_is_id=True, verify_formats=False)
+ if fmts:
+ fmts = fmts.split(',')
+ for fmt in fmts:
+ fpath = db.format_abspath(id_, fmt, index_is_id=True)
+ if fpath is not None:
+ formats[fmt.lower()] = fpath
+
+ return do_save_book_to_disk(id_, mi, cover, plugboards,
+ formats, root, opts, length)
+
+
+def do_save_book_to_disk(id_, mi, cover, plugboards,
+ format_map, root, opts, length):
+ available_formats = [x.lower().strip() for x in format_map.keys()]
if opts.formats == 'all':
asked_formats = available_formats
else:
asked_formats = [x.lower().strip() for x in opts.formats.split(',')]
formats = set(available_formats).intersection(set(asked_formats))
if not formats:
- return True, id, mi.title
+ return True, id_, mi.title
- components = get_components(opts.template, mi, id, opts.timefmt, length,
+ components = get_components(opts.template, mi, id_, opts.timefmt, length,
ascii_filename if opts.asciiize else sanitize_file_name,
to_lowercase=opts.to_lowercase,
replace_whitespace=opts.replace_whitespace)
base_path = os.path.join(root, *components)
base_name = os.path.basename(base_path)
dirpath = os.path.dirname(base_path)
- # Don't test for existence first are the test could fail but
+ # Don't test for existence first as the test could fail but
# another worker process could create the directory before
# the call to makedirs
try:
@@ -236,29 +254,23 @@ def save_book_to_disk(id, db, root, opts, length):
if not os.path.exists(dirpath):
raise
- cdata = db.cover(id, index_is_id=True)
- if opts.save_cover:
- if cdata is not None:
- with open(base_path+'.jpg', 'wb') as f:
- f.write(cdata)
- mi.cover = base_name+'.jpg'
- else:
- mi.cover = None
+ if opts.save_cover and cover and os.access(cover, os.R_OK):
+ with open(base_path+'.jpg', 'wb') as f:
+ with open(cover, 'rb') as s:
+ shutil.copyfileobj(s, f)
+ mi.cover = base_name+'.jpg'
+ else:
+ mi.cover = None
if opts.write_opf:
opf = metadata_to_opf(mi)
with open(base_path+'.opf', 'wb') as f:
f.write(opf)
- if cdata is not None:
- mi.cover_data = ('jpg', cdata)
- mi.cover = None
-
written = False
for fmt in formats:
global plugboard_save_to_disk_value, plugboard_any_format_value
dev_name = plugboard_save_to_disk_value
- plugboards = db.prefs.get('plugboards', {})
cpb = None
if fmt in plugboards:
cpb = plugboards[fmt]
@@ -275,11 +287,12 @@ def save_book_to_disk(id, db, root, opts, length):
# Leave this here for a while, in case problems arise.
if cpb is not None:
prints('Save-to-disk using plugboard:', fmt, cpb)
- data = db.format(id, fmt, index_is_id=True)
- if data is None:
+ fp = format_map.get(fmt, None)
+ if fp is None:
continue
- else:
- written = True
+ with open(fp, 'rb') as f:
+ data = f.read()
+ written = True
if opts.update_metadata:
stream = cStringIO.StringIO()
stream.write(data)
@@ -300,9 +313,21 @@ def save_book_to_disk(id, db, root, opts, length):
with open(fmt_path, 'wb') as f:
f.write(data)
- return not written, id, mi.title
+ return not written, id_, mi.title
+def _sanitize_args(root, opts):
+ if opts is None:
+ opts = config().parse()
+ if isinstance(root, unicode):
+ root = root.encode(filesystem_encoding)
+ root = os.path.abspath(root)
+ opts.template = preprocess_template(opts.template)
+ length = 1000 if supports_long_names(root) else 250
+ length -= len(root)
+ if length < 5:
+ raise ValueError('%r is too long.'%root)
+ return root, opts, length
def save_to_disk(db, ids, root, opts=None, callback=None):
'''
@@ -316,17 +341,7 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
:return: A list of failures. Each element of the list is a tuple
(id, title, traceback)
'''
- if opts is None:
- opts = config().parse()
- if isinstance(root, unicode):
- root = root.encode(filesystem_encoding)
- root = os.path.abspath(root)
-
- opts.template = preprocess_template(opts.template)
- length = 1000 if supports_long_names(root) else 250
- length -= len(root)
- if length < 5:
- raise ValueError('%r is too long.'%root)
+ root, opts, length = _sanitize_args(root, opts)
failures = []
for x in ids:
tb = ''
@@ -343,4 +358,28 @@ def save_to_disk(db, ids, root, opts=None, callback=None):
break
return failures
+def save_serialized_to_disk(ids, data, plugboards, root, opts, callback):
+ from calibre.ebooks.metadata.opf2 import OPF
+ root, opts, length = _sanitize_args(root, opts)
+ failures = []
+ for x in ids:
+ opf, cover, format_map = data[x]
+ if isinstance(opf, unicode):
+ opf = opf.encode('utf-8')
+ mi = OPF(cStringIO.StringIO(opf)).to_book_metadata()
+ tb = ''
+ try:
+ failed, id, title = do_save_book_to_disk(x, mi, cover, plugboards,
+ format_map, root, opts, length)
+ tb = _('Requested formats not available')
+ except:
+ failed, id, title = True, x, mi.title
+ tb = traceback.format_exc()
+ if failed:
+ failures.append((id, title, tb))
+ if callable(callback):
+ if not callback(int(id), title, failed, tb):
+ break
+
+ return failures
diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py
new file mode 100644
index 0000000000..b4afe7576d
--- /dev/null
+++ b/src/calibre/utils/cleantext.py
@@ -0,0 +1,23 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, sengian '
+__docformat__ = 'restructuredtext en'
+
+import re
+
+_ascii_pat = None
+
+def clean_ascii_chars(txt, charlist=None):
+ 'remove ASCII invalid chars : 0 to 8 and 11-14 to 24-26-27 by default'
+ global _ascii_pat
+ if _ascii_pat is None:
+ chars = list(range(8)) + [0x0B, 0x0E, 0x0F] + list(range(0x10, 0x19)) \
+ + [0x1A, 0x1B]
+ _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
+
+ if charlist is None:
+ pat = _ascii_pat
+ else:
+ pat = re.compile(u'|'.join(map(unichr, charlist)))
+ return pat.sub('', txt)
+
diff --git a/src/calibre/utils/date.py b/src/calibre/utils/date.py
index ec58c49628..f025a0c9bf 100644
--- a/src/calibre/utils/date.py
+++ b/src/calibre/utils/date.py
@@ -151,3 +151,45 @@ def format_date(dt, format, assume_utc=False, as_utc=False):
format = re.sub('d{1,4}', format_day, format)
format = re.sub('M{1,4}', format_month, format)
return re.sub('yyyy|yy', format_year, format)
+
+def replace_months(datestr, clang):
+ # Replace months by english equivalent for parse_date
+ frtoen = {
+ u'[jJ]anvier': u'jan',
+ u'[fF].vrier': u'feb',
+ u'[mM]ars': u'mar',
+ u'[aA]vril': u'apr',
+ u'[mM]ai': u'may',
+ u'[jJ]uin': u'jun',
+ u'[jJ]uillet': u'jul',
+ u'[aA]o.t': u'aug',
+ u'[sS]eptembre': u'sep',
+ u'[Oo]ctobre': u'oct',
+ u'[nN]ovembre': u'nov',
+ u'[dD].cembre': u'dec' }
+ detoen = {
+ u'[jJ]anuar': u'jan',
+ u'[fF]ebruar': u'feb',
+ u'[mM].rz': u'mar',
+ u'[aA]pril': u'apr',
+ u'[mM]ai': u'may',
+ u'[jJ]uni': u'jun',
+ u'[jJ]uli': u'jul',
+ u'[aA]ugust': u'aug',
+ u'[sS]eptember': u'sep',
+ u'[Oo]ktober': u'oct',
+ u'[nN]ovember': u'nov',
+ u'[dD]ezember': u'dec' }
+
+ if clang == 'fr':
+ dictoen = frtoen
+ elif clang == 'de':
+ dictoen = detoen
+ else:
+ return datestr
+
+ for k in dictoen.iterkeys():
+ tmp = re.sub(k, dictoen[k], datestr)
+ if tmp != datestr: break
+ return tmp
+
diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c
index 51d9ac25ba..7ec94c32ff 100644
--- a/src/calibre/utils/icu.c
+++ b/src/calibre/utils/icu.c
@@ -237,8 +237,6 @@ static PyTypeObject icu_CollatorType = { // {{{
// }}
-// }}}
-
// }}}
// Module initialization {{{
@@ -286,7 +284,7 @@ icu_upper(PyObject *self, PyObject *args) {
PyMem_Free(input);
return ret;
-}
+} // }}}
// lower {{{
static PyObject *
diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py
index 6ae7398fb4..4b0f6d4821 100644
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@@ -56,7 +56,7 @@ def py_sort_key(obj):
def icu_sort_key(collator, obj):
if not obj:
return _none2
- return collator.sort_key(obj.lower())
+ return collator.sort_key(lower(obj))
def py_case_sensitive_sort_key(obj):
if not obj:
diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py
index dbcc125274..5c19444bd6 100644
--- a/src/calibre/utils/zipfile.py
+++ b/src/calibre/utils/zipfile.py
@@ -1227,7 +1227,7 @@ class ZipFile:
self.fp.flush()
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
- self.fp.write(struct.pack("