diff --git a/src/calibre/ebooks/metadata/nicebooks.py b/src/calibre/ebooks/metadata/nicebooks.py index 4d19e9611b..01e20261b3 100644 --- a/src/calibre/ebooks/metadata/nicebooks.py +++ b/src/calibre/ebooks/metadata/nicebooks.py @@ -10,7 +10,8 @@ from copy import deepcopy from lxml.html import soupparser -from calibre.utils.date import parse_date, utcnow +from calibre.utils.date import parse_date, utcnow, replace_months +from calibre.utils.cleantext import clean_ascii_char from calibre import browser, preferred_encoding from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata import MetaInformation, check_isbn, \ @@ -71,31 +72,16 @@ class NiceBooksCovers(CoverDownload): traceback.format_exc(), self.name)) +class NiceBooksError(Exception): + pass + +class ISBNNotFound(NiceBooksError): + pass + def report(verbose): if verbose: - import traceback traceback.print_exc() -def replace_monthsfr(datefr): - # Replace french months by english equivalent for parse_date - frtoen = { - u'[jJ]anvier': u'jan', - u'[fF].vrier': u'feb', - u'[mM]ars': u'mar', - u'[aA]vril': u'apr', - u'[mM]ai': u'may', - u'[jJ]uin': u'jun', - u'[jJ]uillet': u'jul', - u'[aA]o.t': u'aug', - u'[sS]eptembre': u'sep', - u'[Oo]ctobre': u'oct', - u'[nN]ovembre': u'nov', - u'[dD].cembre': u'dec' } - for k in frtoen.iterkeys(): - tmp = re.sub(k, frtoen[k], datefr) - if tmp <> datefr: break - return tmp - class Query(object): BASE_URL = 'http://fr.nicebooks.com/' @@ -119,7 +105,7 @@ class Query(object): def __call__(self, browser, verbose, timeout = 5.): if verbose: - print 'Query:', self.BASE_URL+self.urldata + print _('Query: %s') % self.BASE_URL+self.urldata try: raw = browser.open_novisit(self.BASE_URL+self.urldata, timeout=timeout).read() @@ -128,7 +114,9 @@ class Query(object): if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return - raise + if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) + raise NiceBooksError(_('Nicebooks encountered an error.')) if '404 - ' in raw: return raw = xml_to_unicode(raw, strip_encoding_pats=True, @@ -136,7 +124,11 @@ class Query(object): try: feed = soupparser.fromstring(raw) except: - return + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + return None #nb of page to call try: @@ -161,7 +153,11 @@ class Query(object): try: feed = soupparser.fromstring(raw) except: - continue + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + continue pages.append(feed) results = [] @@ -180,14 +176,12 @@ class ResultList(list): self.reautclean = re.compile(u'\s*\(.*\)\s*') def get_title(self, entry): - # title = deepcopy(entry.find("div[@id='book-info']")) title = deepcopy(entry) title.remove(title.find("dl[@title='Informations sur le livre']")) title = ' '.join([i.text_content() for i in title.iterchildren()]) return unicode(title.replace('\n', '')) def get_authors(self, entry): - # author = entry.find("div[@id='book-info']/dl[@title='Informations sur le livre']") author = entry.find("dl[@title='Informations sur le livre']") authortext = [] for x in author.getiterator('dt'): @@ -223,7 +217,7 @@ class ResultList(list): d = x.getnext().text_content() try: default = utcnow().replace(day=15) - d = replace_monthsfr(d) + d = replace_months(d, 'fr') d = parse_date(d, assume_utc=True, default=default) mi.pubdate = d except: @@ -234,11 +228,6 @@ class ResultList(list): mi = MetaInformation(title, authors) mi.author_sort = authors_to_sort_string(authors) mi.comments = self.get_description(entry, verbose) - # entry = entry.find("dl[@title='Informations sur le livre']") - # mi.publisher = self.get_publisher(entry) - # mi.pubdate = self.get_date(entry, verbose) - # mi.isbn = self.get_ISBN(entry) - # mi.language = self.get_language(entry) return self.get_book_info(entry, mi, verbose) def get_individual_metadata(self, browser, linkdata, verbose): @@ -249,7 +238,9 @@ class ResultList(list): if callable(getattr(e, 'getcode', None)) and \ e.getcode() == 404: return - raise + if isinstance(getattr(e, 'args', [None])[0], socket.timeout): + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) + raise NiceBooksError(_('Nicebooks encountered an error.')) if '<title>404 - ' in raw: report(verbose) return @@ -258,7 +249,11 @@ class ResultList(list): try: feed = soupparser.fromstring(raw) except: - return + try: + #remove ASCII invalid chars + feed = soupparser.fromstring(clean_ascii_char(raw)) + except: + return None # get results return feed.xpath("//div[@id='container']")[0] @@ -292,13 +287,6 @@ class ResultList(list): continue self.append(self.fill_MI(entry, title, authors, verbose)) - -class NiceBooksError(Exception): - pass - -class ISBNNotFound(NiceBooksError): - pass - class Covers(object): def __init__(self, isbn = None): @@ -329,11 +317,10 @@ class Covers(object): return cover, ext if ext else 'jpg' except Exception, err: if isinstance(getattr(err, 'args', [None])[0], socket.timeout): - err = NiceBooksError(_('Nicebooks timed out. Try again later.')) - raise err + raise NiceBooksError(_('Nicebooks timed out. Try again later.')) if not len(self.urlimg): if not self.isbnf: - raise ISBNNotFound('ISBN: '+self.isbn+_(' not found.')) + raise ISBNNotFound(_('ISBN: %s not found.') % self.isbn) raise NiceBooksError(_('An errror occured with Nicebooks cover fetcher')) @@ -341,10 +328,10 @@ def search(title=None, author=None, publisher=None, isbn=None, max_results=5, verbose=False, keywords=None): br = browser() entries = Query(title=title, author=author, isbn=isbn, publisher=publisher, - keywords=keywords, max_results=max_results)(br, verbose) + keywords=keywords, max_results=max_results)(br, verbose,timeout = 10.) if entries is None or len(entries) == 0: - return + return None #List of entry ans = ResultList() @@ -364,28 +351,28 @@ def cover_from_isbn(isbn, timeout = 5.): def option_parser(): parser = OptionParser(textwrap.dedent(\ - '''\ + _('''\ %prog [options] Fetch book metadata from Nicebooks. You must specify one of title, author, ISBN, publisher or keywords. Will fetch a maximum of 20 matches, so you should make your query as specific as possible. It can also get covers if the option is activated. - ''' + ''') )) - parser.add_option('-t', '--title', help='Book title') - parser.add_option('-a', '--author', help='Book author(s)') - parser.add_option('-p', '--publisher', help='Book publisher') - parser.add_option('-i', '--isbn', help='Book ISBN') - parser.add_option('-k', '--keywords', help='Keywords') + parser.add_option('-t', '--title', help=_('Book title')) + parser.add_option('-a', '--author', help=_('Book author(s)')) + parser.add_option('-p', '--publisher', help=_('Book publisher')) + parser.add_option('-i', '--isbn', help=_('Book ISBN')) + parser.add_option('-k', '--keywords', help=_('Keywords')) parser.add_option('-c', '--covers', default=0, - help='Covers: 1-Check/ 2-Download') + help=_('Covers: 1-Check/ 2-Download')) parser.add_option('-p', '--coverspath', default='', - help='Covers files path') + help=_('Covers files path')) parser.add_option('-m', '--max-results', default=20, - help='Maximum number of results to fetch') + help=_('Maximum number of results to fetch')) parser.add_option('-v', '--verbose', default=0, action='count', - help='Be more verbose about errors') + help=_('Be more verbose about errors')) return parser def main(args=sys.argv): @@ -400,15 +387,15 @@ def main(args=sys.argv): parser.print_help() return 1 if results is None or len(results) == 0: - print 'No result found for this search!' + print _('No result found for this search!') return 0 for result in results: print unicode(result).encode(preferred_encoding, 'replace') covact = int(opts.covers) if covact == 1: - textcover = 'No cover found!' + textcover = _('No cover found!') if check_for_cover(result.isbn): - textcover = 'A cover was found for this book' + textcover = _('A cover was found for this book') print textcover elif covact == 2: cover_data, ext = cover_from_isbn(result.isbn) @@ -417,7 +404,7 @@ def main(args=sys.argv): cpath = os.path.normpath(opts.coverspath + '/' + result.isbn) oname = os.path.abspath(cpath+'.'+ext) open(oname, 'wb').write(cover_data) - print 'Cover saved to file ', oname + print _('Cover saved to file '), oname print if __name__ == '__main__':