diff --git a/INSTALL b/INSTALL index cb8261eff6..93b119b2e1 100644 --- a/INSTALL +++ b/INSTALL @@ -1,6 +1,9 @@ calibre supports installation from source, only on Linux. -On Windows and OS X use the provided installers and use -the facilities of the calibre-debug command to hack on the calibre source. + +Note that you *do not* need to install from source to hack on +the calibre source code. To get started with calibre development, +use a normal calibre install and follow the instructions at +http://calibre-ebook.com/user_manual/develop.html On Linux, there are two kinds of installation from source possible. Note that both kinds require lots of dependencies as well as a @@ -45,3 +48,4 @@ This type of install can be run with the command:: sudo python setup.py develop Use the -h flag for help on the develop command. + diff --git a/README b/README index 2c916fc7d7..b518e977c8 100644 --- a/README +++ b/README @@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X. For screenshots: https://calibre-ebook.com/demo For installation/usage instructions please see -http://calibre-ebook.com +http://calibre-ebook.com/user_manual For source code access: bzr branch lp:calibre diff --git a/recipes/developpez.recipe b/recipes/developpez.recipe new file mode 100644 index 0000000000..707e702c0a --- /dev/null +++ b/recipes/developpez.recipe @@ -0,0 +1,21 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1301849956(BasicNewsRecipe): + title = u'Developpez.com' + description = u'Toutes les news du site Developpez.com' + publisher = u'Developpez.com' + timefmt = ' [%a, %d %b, %Y]' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'ISO-8859-1' + language = 'fr' + __author__ = 'louhike' + remove_javascript = True + keep_only_tags = [dict(name='div', attrs={'class':'content'})] + + feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')] + + def get_cover_url(self): + return 'http://javascript.developpez.com/template/images/logo.gif' + diff --git a/recipes/f_secure.recipe b/recipes/f_secure.recipe new file mode 100644 index 0000000000..f276a4961a --- /dev/null +++ b/recipes/f_secure.recipe @@ -0,0 +1,22 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1301860159(BasicNewsRecipe): + title = u'F-Secure Weblog' + language = 'en' + __author__ = 'louhike' + description = u'All the news from the weblog of F-Secure' + publisher = u'F-Secure' + timefmt = ' [%a, %d %b, %Y]' + encoding = 'ISO-8859-1' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'en_EN' + remove_javascript = True + keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})] + remove_tags = [dict(name='a'),dict(name='hr')] + + feeds = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')] + def get_cover_url(self): + return 'http://www.f-secure.com/weblog/archives/images/company_logo.png' diff --git a/recipes/kommersant.recipe b/recipes/kommersant.recipe index f24a5da909..09fb8f8ad8 100644 --- a/recipes/kommersant.recipe +++ b/recipes/kommersant.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' www.kommersant.ru ''' @@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe): language = 'ru' publication_type = 'newspaper' masthead_url = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} ' + extra_css = """ + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif} + .title{font-size: x-large; font-weight: bold; margin-bottom: 1em} + .subtitle{font-size: large; margin-bottom: 1em} + .document_vvodka{font-weight: bold; margin-bottom: 1em} + """ conversion_options = { 'comment' : description @@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [ - dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'}) - ,dict(attrs={'class':['vvodka','paragraph','author']}) - ] - remove_tags = [dict(name=['iframe','object','link','img','base'])] + keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})] + remove_tags = [dict(name=['iframe','object','link','img','base','meta'])] feeds = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')] - def print_version(self, url): - return url.replace('doc-rss.aspx','doc.aspx') + '&print=true' - + def print_version(self, url): + return url.replace('/doc-rss/','/Doc/') + '/Print' + \ No newline at end of file diff --git a/recipes/perfil.recipe b/recipes/perfil.recipe index 7db86f9d4a..1104202318 100644 --- a/recipes/perfil.recipe +++ b/recipes/perfil.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' perfil.com ''' @@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe): dict(name=['iframe','embed','object','base','meta','link']) ,dict(name='a', attrs={'href':'#comentarios'}) ,dict(name='div', attrs={'class':'foto3'}) - ,dict(name='img', attrs={'alt':'ampliar'}) + ,dict(name='img', attrs={'alt':['ampliar','Ampliar']}) ] - keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})] + keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})] remove_attributes=['onload','lang','width','height','border'] feeds = [ diff --git a/recipes/toi.recipe b/recipes/toi.recipe index 643d120a36..8a772b6f9d 100644 --- a/recipes/toi.recipe +++ b/recipes/toi.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe class TimesOfIndia(BasicNewsRecipe): @@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe): max_articles_per_feed = 25 no_stylesheets = True - keep_only_tags = [dict(attrs={'class':'maintable12'})] + keep_only_tags = [{'class':['maintable12', 'prttabl']}] remove_tags = [ dict(style=lambda x: x and 'float' in x), - dict(attrs={'class':'prvnxtbg'}), + {'class':['prvnxtbg', 'footbdrin', 'bcclftr']}, ] feeds = [ @@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe): ('Most Read', 'http://timesofindia.indiatimes.com/rssfeedmostread.cms') ] - def print_version(self, url): - return url + '?prtpage=1' + + def get_article_url(self, article): + url = BasicNewsRecipe.get_article_url(self, article) + if '/0Ltimesofindia' in url: + url = url.partition('/0L')[-1] + url = url.replace('0B', '.').replace('0N', '.com').replace('0C', + '/').replace('0E', '-') + url = 'http://' + url.rpartition('/')[0] + match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url) + if match is not None: + num = match.group(1) + num = re.sub(r'[^0-9]', '', num) + return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' % + num) + else: + cms = re.search(r'/(\d+)\.cms', url) + if cms is not None: + return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' % + cms.group(1)) + + return url + def preprocess_html(self, soup): return soup diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index c3aca457ad..2f457bf2bc 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -217,14 +217,25 @@ def filename_to_utf8(name): return name.decode(codec, 'replace').encode('utf8') def extract(path, dir): - ext = os.path.splitext(path)[1][1:].lower() extractor = None - if ext in ['zip', 'cbz', 'epub', 'oebzip']: - from calibre.libunzip import extract as zipextract - extractor = zipextract - elif ext in ['cbr', 'rar']: + # First use the file header to identify its type + with open(path, 'rb') as f: + id_ = f.read(3) + if id_ == b'Rar': from calibre.libunrar import extract as rarextract extractor = rarextract + elif id_.startswith(b'PK'): + from calibre.libunzip import extract as zipextract + extractor = zipextract + if extractor is None: + # Fallback to file extension + ext = os.path.splitext(path)[1][1:].lower() + if ext in ['zip', 'cbz', 'epub', 'oebzip']: + from calibre.libunzip import extract as zipextract + extractor = zipextract + elif ext in ['cbr', 'rar']: + from calibre.libunrar import extract as rarextract + extractor = rarextract if extractor is None: raise Exception('Unknown archive type') extractor(path, dir) @@ -281,16 +292,17 @@ def get_parsed_proxy(typ='http', debug=True): def random_user_agent(): choices = [ - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)', + 'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', + 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)', - 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)', ] + #return choices[-1] return choices[random.randint(0, len(choices)-1)] diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1e40a8e5ff..298799daa5 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -10,6 +10,7 @@ from calibre.constants import numeric_version from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.oeb.base import OEB_IMAGES +from calibre.utils.config import test_eight_code # To archive plugins {{{ class HTML2ZIP(FileTypePlugin): @@ -166,6 +167,14 @@ class ComicMetadataReader(MetadataReaderPlugin): description = _('Extract cover from comic files') def get_metadata(self, stream, ftype): + if hasattr(stream, 'seek') and hasattr(stream, 'tell'): + pos = stream.tell() + id_ = stream.read(3) + stream.seek(pos) + if id_ == b'Rar': + ftype = 'cbr' + elif id.startswith(b'PK'): + ftype = 'cbz' if ftype == 'cbr': from calibre.libunrar import extract_first_alphabetically as extract_first extract_first @@ -604,20 +613,34 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO from calibre.devices.bambook.driver import BAMBOOK -from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ - KentDistrictLibrary -from calibre.ebooks.metadata.douban import DoubanBooks -from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers -from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ - AmazonCovers, DoubanCovers from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX from calibre.ebooks.epub.fix.unmanifested import Unmanifested from calibre.ebooks.epub.fix.epubcheck import Epubcheck -plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, - KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, - Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, - NiceBooksCovers] +plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested, + Epubcheck, ] + +if test_eight_code: +# New metadata download plugins {{{ + from calibre.ebooks.metadata.sources.google import GoogleBooks + from calibre.ebooks.metadata.sources.amazon import Amazon + from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary + + plugins += [GoogleBooks, Amazon, OpenLibrary] + +# }}} +else: + from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \ + KentDistrictLibrary + from calibre.ebooks.metadata.douban import DoubanBooks + from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers + from calibre.ebooks.metadata.covers import OpenLibraryCovers, \ + AmazonCovers, DoubanCovers + + plugins += [GoogleBooks, ISBNDB, Amazon, + OpenLibraryCovers, AmazonCovers, DoubanCovers, + NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks] + plugins += [ ComicInput, EPUBInput, @@ -1055,11 +1078,4 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions, #}}} -# New metadata download plugins {{{ -from calibre.ebooks.metadata.sources.google import GoogleBooks -from calibre.ebooks.metadata.sources.amazon import Amazon -from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary -plugins += [GoogleBooks, Amazon, OpenLibrary] - -# }}} diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 54e4979524..7702a7caf0 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -36,7 +36,9 @@ class ANDROID(USBMS): # Motorola 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100], 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], - 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] }, + 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216], + 0x7086 : [0x0226], + }, # Sony Ericsson 0xfce : { 0xd12e : [0x0100]}, @@ -101,7 +103,8 @@ class ANDROID(USBMS): 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', - '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2'] + '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', + 'MB860'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7'] diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py index 5374c6c4e2..01277980db 100644 --- a/src/calibre/devices/eb600/driver.py +++ b/src/calibre/devices/eb600/driver.py @@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS): BCD = [0x0324] VENDOR_NAME = '' - WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903'] + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', + 'PB903', 'PB'] class POCKETBOOK701(USBMS): diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py index b8a6362a99..b3bd9d7782 100644 --- a/src/calibre/ebooks/htmlz/oeb2html.py +++ b/src/calibre/ebooks/htmlz/oeb2html.py @@ -96,7 +96,7 @@ class OEB2HTML(object): href, id = urldefrag(href) if href in self.base_hrefs: self.get_link_id(href, id) - + def rewrite_link(self, url, page=None): if not page: return url @@ -106,7 +106,7 @@ class OEB2HTML(object): if abs_url in self.links: return self.links[abs_url] return url - + def rewrite_ids(self, root, page): for el in root.iter(): try: diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 328ab7be26..ff22cd3608 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -125,7 +125,10 @@ class Metadata(object): _data = object.__getattribute__(self, '_data') if field in TOP_LEVEL_IDENTIFIERS: field, val = self._clean_identifier(field, val) - _data['identifiers'].update({field: val}) + identifiers = _data['identifiers'] + identifiers.pop(field, None) + if val: + identifiers[field] = val elif field == 'identifiers': if not val: val = copy.copy(NULL_VALUES.get('identifiers', None)) @@ -224,8 +227,7 @@ class Metadata(object): identifiers = object.__getattribute__(self, '_data')['identifiers'] - if not val and typ in identifiers: - identifiers.pop(typ) + identifiers.pop(typ, None) if val: identifiers[typ] = val @@ -647,7 +649,7 @@ class Metadata(object): fmt('Tags', u', '.join([unicode(t) for t in self.tags])) if self.series: fmt('Series', self.series + ' #%s'%self.format_series_index()) - if self.language: + if not self.is_null('language'): fmt('Language', self.language) if self.rating is not None: fmt('Rating', self.rating) diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py index 5a5e09234e..2e52bf020d 100644 --- a/src/calibre/ebooks/metadata/google_books.py +++ b/src/calibre/ebooks/metadata/google_books.py @@ -193,6 +193,7 @@ class ResultList(list): def search(title=None, author=None, publisher=None, isbn=None, min_viewability='none', verbose=False, max_results=40): br = browser() + br.set_handle_gzip(True) start, entries = 1, [] while start > 0 and len(entries) <= max_results: new, start = Query(title=title, author=author, publisher=publisher, diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index cfa2b09ea8..d48f502c29 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.library.comments import sanitize_comments_html from calibre.utils.date import parse_date -class Worker(Thread): # {{{ +class Worker(Thread): # Get details {{{ ''' Get book details from amazons book page in a separate thread @@ -64,7 +64,7 @@ class Worker(Thread): # {{{ raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] - # open('/t/t.html', 'wb').write(raw) + #open('/t/t.html', 'wb').write(raw) if '404 - ' in raw: self.log.error('URL malformed: %r'%self.url) @@ -218,6 +218,9 @@ class Worker(Thread): # {{{ ' @class="emptyClear" or @href]'): c.getparent().remove(c) desc = tostring(desc, method='html', encoding=unicode).strip() + # Encoding bug in Amazon data U+fffd (replacement char) + # in some examples it is present in place of ' + desc = desc.replace('\ufffd', "'") # remove all attributes from tags desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) # Collapse whitespace @@ -276,12 +279,14 @@ class Worker(Thread): # {{{ class Amazon(Source): - name = 'Amazon' + name = 'Amazon Metadata' description = _('Downloads metadata from Amazon') capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'identifier:amazon', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate']) + has_html_comments = True + supports_gzip_transfer_encoding = True AMAZON_DOMAINS = { 'com': _('US'), @@ -408,6 +413,18 @@ class Amazon(Source): if 'bulk pack' not in title: matches.append(a.get('href')) break + if not matches: + # This can happen for some user agents that Amazon thinks are + # mobile/less capable + log('Trying alternate results page markup') + for td in root.xpath( + r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'): + for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'): + title = tostring(a, method='text', encoding=unicode).lower() + if 'bulk pack' not in title: + matches.append(a.get('href')) + break + # Keep only the top 5 matches as the matches are sorted by relevance by # Amazon so lower matches are not likely to be very relevant @@ -476,9 +493,10 @@ class Amazon(Source): if abort.is_set(): return br = self.browser + log('Downloading cover from:', cached_url) try: cdata = br.open_novisit(cached_url, timeout=timeout).read() - result_queue.put(cdata) + result_queue.put((self, cdata)) except: log.exception('Failed to download cover from:', cached_url) # }}} diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 7cc4ed3518..faa7420081 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -15,9 +15,20 @@ from calibre.customize import Plugin from calibre.utils.logging import ThreadSafeLog, FileStream from calibre.utils.config import JSONConfig from calibre.utils.titlecase import titlecase +from calibre.utils.icu import capitalize, lower from calibre.ebooks.metadata import check_isbn -msprefs = JSONConfig('metadata_sources.json') +msprefs = JSONConfig('metadata_sources/global.json') +msprefs.defaults['txt_comments'] = False +msprefs.defaults['ignore_fields'] = [] +msprefs.defaults['max_tags'] = 20 +msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds +msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds + +# Google covers are often poor quality (scans/errors) but they have high +# resolution, so they trump covers from better sources. So make sure they +# are only used if no other covers are found. +msprefs.defaults['cover_priorities'] = {'Google':2} def create_log(ostream=None): log = ThreadSafeLog(level=ThreadSafeLog.DEBUG) @@ -89,6 +100,39 @@ class InternalMetadataCompareKeyGen(object): # }}} +def get_cached_cover_urls(mi): + from calibre.customize.ui import metadata_plugins + plugins = list(metadata_plugins(['identify'])) + for p in plugins: + url = p.get_cached_cover_url(mi.identifiers) + if url: + yield (p, url) + +def cap_author_token(token): + lt = lower(token) + if lt in ('von', 'de', 'el', 'van', 'le'): + return lt + if re.match(r'([a-z]\.){2,}$', lt) is not None: + # Normalize tokens of the form J.K. to J. K. + parts = token.split('.') + return '. '.join(map(capitalize, parts)).strip() + return capitalize(token) + +def fixauthors(authors): + if not authors: + return authors + ans = [] + for x in authors: + ans.append(' '.join(map(cap_author_token, x.split()))) + return ans + +def fixcase(x): + if x: + x = titlecase(x) + return x + + + class Source(Plugin): type = _('Metadata source') @@ -104,6 +148,15 @@ class Source(Plugin): #: during the identify phase touched_fields = frozenset() + #: Set this to True if your plugin return HTML formatted comments + has_html_comments = False + + #: Setting this to True means that the browser object will add + #: Accept-Encoding: gzip to all requests. This can speedup downloads + #: but make sure that the source actually supports gzip transfer encoding + #: correctly first + supports_gzip_transfer_encoding = False + def __init__(self, *args, **kwargs): Plugin.__init__(self, *args, **kwargs) self._isbn_to_identifier_cache = {} @@ -127,6 +180,8 @@ class Source(Plugin): def browser(self): if self._browser is None: self._browser = browser(user_agent=random_user_agent()) + if self.supports_gzip_transfer_encoding: + self._browser.set_handle_gzip(True) return self._browser.clone_browser() # }}} @@ -229,13 +284,9 @@ class Source(Plugin): before putting the Metadata object into result_queue. You can of course, use a custom algorithm suited to your metadata source. ''' - def fixcase(x): - if x: - x = titlecase(x) - return x if mi.title: mi.title = fixcase(mi.title) - mi.authors = list(map(fixcase, mi.authors)) + mi.authors = fixauthors(mi.authors) mi.tags = list(map(fixcase, mi.tags)) mi.isbn = check_isbn(mi.isbn) @@ -316,7 +367,8 @@ class Source(Plugin): title=None, authors=None, identifiers={}, timeout=30): ''' Download a cover and put it into result_queue. The parameters all have - the same meaning as for :meth:`identify`. + the same meaning as for :meth:`identify`. Put (self, cover_data) into + result_queue. This method should use cached cover URLs for efficiency whenever possible. When cached data is not present, most plugins simply call diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py new file mode 100644 index 0000000000..cb422f939d --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +import sys, textwrap +from io import BytesIO +from threading import Event + +from calibre import prints +from calibre.utils.config import OptionParser +from calibre.utils.magick.draw import save_cover_data_to +from calibre.ebooks.metadata import string_to_authors +from calibre.ebooks.metadata.opf2 import metadata_to_opf +from calibre.ebooks.metadata.sources.base import create_log +from calibre.ebooks.metadata.sources.identify import identify +from calibre.ebooks.metadata.sources.covers import download_cover +from calibre.utils.config import test_eight_code + +def option_parser(): + if not test_eight_code: + from calibre.ebooks.metadata.fetch import option_parser + return option_parser() + + parser = OptionParser(textwrap.dedent( + '''\ + %prog [options] + + Fetch book metadata from online sources. You must specify at least one + of title, authors or ISBN. + ''' + )) + parser.add_option('-t', '--title', help='Book title') + parser.add_option('-a', '--authors', help='Book author(s)') + parser.add_option('-i', '--isbn', help='Book ISBN') + parser.add_option('-v', '--verbose', default=False, action='store_true', + help='Print the log to the console (stderr)') + parser.add_option('-o', '--opf', help='Output the metadata in OPF format') + parser.add_option('-c', '--cover', + help='Specify a filename. The cover, if available, will be saved to it') + parser.add_option('-d', '--timeout', default='30', + help='Timeout in seconds. Default is 30') + + return parser + +def main(args=sys.argv): + if not test_eight_code: + from calibre.ebooks.metadata.fetch import main + return main(args) + parser = option_parser() + opts, args = parser.parse_args(args) + + buf = BytesIO() + log = create_log(buf) + abort = Event() + + authors = [] + if opts.authors: + authors = string_to_authors(opts.authors) + + identifiers = {} + if opts.isbn: + identifiers['isbn'] = opts.isbn + + results = identify(log, abort, title=opts.title, authors=authors, + identifiers=identifiers, timeout=int(opts.timeout)) + + if not results: + print (log, file=sys.stderr) + prints('No results found', file=sys.stderr) + raise SystemExit(1) + result = results[0] + + cf = None + if opts.cover and results: + cover = download_cover(log, title=opts.title, authors=authors, + identifiers=result.identifiers, timeout=int(opts.timeout)) + if cover is None: + prints('No cover found', file=sys.stderr) + else: + save_cover_data_to(cover[-1], opts.cover) + result.cover = cf = opts.cover + + + log = buf.getvalue() + + + result = (metadata_to_opf(result) if opts.opf else + unicode(result).encode('utf-8')) + + if opts.verbose: + print (log, file=sys.stderr) + + print (result) + if not opts.opf and opts.cover: + prints('Cover :', cf) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py new file mode 100644 index 0000000000..46b278397c --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/covers.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +import time +from Queue import Queue, Empty +from threading import Thread, Event +from io import BytesIO + +from calibre.customize.ui import metadata_plugins +from calibre.ebooks.metadata.sources.base import msprefs, create_log +from calibre.utils.magick.draw import Image, save_cover_data_to + +class Worker(Thread): + + def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq): + Thread.__init__(self) + self.daemon = True + + self.plugin = plugin + self.abort = abort + self.buf = BytesIO() + self.log = create_log(self.buf) + self.title, self.authors, self.identifiers = (title, authors, + identifiers) + self.timeout, self.rq = timeout, rq + self.time_spent = None + + def run(self): + start_time = time.time() + if not self.abort.is_set(): + try: + self.plugin.download_cover(self.log, self.rq, self.abort, + title=self.title, authors=self.authors, + identifiers=self.identifiers, timeout=self.timeout) + except: + self.log.exception('Failed to download cover from', + self.plugin.name) + self.time_spent = time.time() - start_time + +def is_worker_alive(workers): + for w in workers: + if w.is_alive(): + return True + return False + +def process_result(log, result): + plugin, data = result + try: + im = Image() + im.load(data) + im.trim(10) + width, height = im.size + fmt = im.format + + if width < 50 or height < 50: + raise ValueError('Image too small') + data = save_cover_data_to(im, '/cover.jpg', return_data=True) + except: + log.exception('Invalid cover from', plugin.name) + return None + return (plugin, width, height, fmt, data) + +def run_download(log, results, abort, + title=None, authors=None, identifiers={}, timeout=30): + ''' + Run the cover download, putting results into the queue :param:`results`. + + Each result is a tuple of the form: + + (plugin, width, height, fmt, bytes) + + ''' + plugins = list(metadata_plugins(['cover'])) + + rq = Queue() + workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p + in plugins] + for w in workers: + w.start() + + first_result_at = None + wait_time = msprefs['wait_after_first_cover_result'] + found_results = {} + + while True: + time.sleep(0.1) + try: + x = rq.get_nowait() + result = process_result(log, x) + if result is not None: + results.put(result) + found_results[result[0]] = result + if first_result_at is not None: + first_result_at = time.time() + except Empty: + pass + + if not is_worker_alive(workers): + break + + if first_result_at is not None and time.time() - first_result_at > wait_time: + log('Not waiting for any more results') + abort.set() + + if abort.is_set(): + break + + while True: + try: + x = rq.get_nowait() + result = process_result(log, x) + if result is not None: + results.put(result) + found_results[result[0]] = result + except Empty: + break + + for w in workers: + wlog = w.buf.getvalue().strip() + log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30) + log('Request extra headers:', w.plugin.browser.addheaders) + if w.plugin in found_results: + result = found_results[w.plugin] + log('Downloaded cover:', '%dx%d'%(result[1], result[2])) + else: + log('Failed to download valid cover') + if w.time_spent is None: + log('Download aborted') + else: + log('Took', w.time_spent, 'seconds') + if wlog: + log(wlog) + log('\n'+'*'*80) + + +def download_cover(log, + title=None, authors=None, identifiers={}, timeout=30): + ''' + Synchronous cover download. Returns the "best" cover as per user + prefs/cover resolution. + + Return cover is a tuple: (plugin, width, height, fmt, data) + + Returns None if no cover is found. + ''' + rq = Queue() + abort = Event() + + run_download(log, rq, abort, title=title, authors=authors, + identifiers=identifiers, timeout=timeout) + + results = [] + + while True: + try: + results.append(rq.get_nowait()) + except Empty: + break + + cp = msprefs['cover_priorities'] + + def keygen(result): + plugin, width, height, fmt, data = result + return (cp.get(plugin.name, 1), 1/(width*height)) + + results.sort(key=keygen) + + return results[0] if results else None + + + + diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 989320f710..47cfb823bb 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -145,21 +145,25 @@ def to_metadata(browser, log, entry_, timeout): # {{{ log.exception('Failed to parse rating') # Cover - mi.has_google_cover = len(extra.xpath( - '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0 + mi.has_google_cover = None + for x in extra.xpath( + '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'): + mi.has_google_cover = x.get('href') + break return mi # }}} class GoogleBooks(Source): - name = 'Google Books' + name = 'Google' description = _('Downloads metadata from Google Books') capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating', 'identifier:google']) # language currently disabled + supports_gzip_transfer_encoding = True GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1' @@ -212,7 +216,7 @@ class GoogleBooks(Source): results.sort(key=self.identify_results_keygen( title=title, authors=authors, identifiers=identifiers)) for mi in results: - cached_url = self.cover_url_from_identifiers(mi.identifiers) + cached_url = self.get_cached_cover_url(mi.identifiers) if cached_url is not None: break if cached_url is None: @@ -222,9 +226,10 @@ class GoogleBooks(Source): if abort.is_set(): return br = self.browser + log('Downloading cover from:', cached_url) try: cdata = br.open_novisit(cached_url, timeout=timeout).read() - result_queue.put(cdata) + result_queue.put((self, cdata)) except: log.exception('Failed to download cover from:', cached_url) @@ -253,9 +258,9 @@ class GoogleBooks(Source): goog = ans.identifiers['google'] for isbn in getattr(ans, 'all_isbns', []): self.cache_isbn_to_identifier(isbn, goog) - if ans.has_google_cover: - self.cache_identifier_to_cover_url(goog, - self.GOOGLE_COVER%goog) + if ans.has_google_cover: + self.cache_identifier_to_cover_url(goog, + self.GOOGLE_COVER%goog) self.clean_downloaded_metadata(ans) result_queue.put(ans) except: @@ -270,6 +275,9 @@ class GoogleBooks(Source): identifiers={}, timeout=30): query = self.create_query(log, title=title, authors=authors, identifiers=identifiers) + if not query: + log.error('Insufficient metadata to construct query') + return br = self.browser try: raw = br.open_novisit(query, timeout=timeout).read() diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 1d4d8840e8..cbc12b6167 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -8,17 +8,21 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import time +from datetime import datetime from Queue import Queue, Empty from threading import Thread from io import BytesIO +from operator import attrgetter from calibre.customize.ui import metadata_plugins -from calibre.ebooks.metadata.sources.base import create_log +from calibre.ebooks.metadata.sources.base import create_log, msprefs from calibre.ebooks.metadata.xisbn import xisbn +from calibre.ebooks.metadata.book.base import Metadata +from calibre.utils.date import utc_tz +from calibre.utils.html2text import html2text +from calibre.utils.icu import lower -# How long to wait for more results after first result is found -WAIT_AFTER_FIRST_RESULT = 30 # seconds - +# Download worker {{{ class Worker(Thread): def __init__(self, plugin, kwargs, abort): @@ -31,10 +35,12 @@ class Worker(Thread): self.log = create_log(self.buf) def run(self): + start = time.time() try: self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs) except: self.log.exception('Plugin', self.plugin.name, 'failed') + self.plugin.dl_time_spent = time.time() - start def is_worker_alive(workers): for w in workers: @@ -42,9 +48,209 @@ def is_worker_alive(workers): return True return False -def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): +# }}} + +# Merge results from different sources {{{ + +class ISBNMerge(object): + + def __init__(self): + self.pools = {} + self.isbnless_results = [] + + def isbn_in_pool(self, isbn): + if isbn: + for isbns, pool in self.pools.iteritems(): + if isbn in isbns: + return pool + return None + + def pool_has_result_from_same_source(self, pool, result): + results = pool[1] + for r in results: + if r.identify_plugin is result.identify_plugin: + return True + return False + + def add_result(self, result): + isbn = result.isbn + if isbn: + pool = self.isbn_in_pool(isbn) + if pool is None: + isbns, min_year = xisbn.get_isbn_pool(isbn) + if not isbns: + isbns = frozenset([isbn]) + self.pools[isbns] = pool = (min_year, []) + + if not self.pool_has_result_from_same_source(pool, result): + pool[1].append(result) + else: + self.isbnless_results.append(result) + + def finalize(self): + has_isbn_result = False + for results in self.pools.itervalues(): + if results: + has_isbn_result = True + break + self.has_isbn_result = has_isbn_result + + if has_isbn_result: + self.merge_isbn_results() + else: + results = sorted(self.isbnless_results, + key=attrgetter('relevance_in_source')) + # Pick only the most relevant result from each source + self.results = [] + seen = set() + for result in results: + if result.identify_plugin not in seen: + seen.add(result.identify_plugin) + self.results.append(result) + result.average_source_relevance = \ + result.relevance_in_source + + self.merge_metadata_results() + + return self.results + + def merge_metadata_results(self): + ' Merge results with identical title and authors ' + groups = {} + for result in self.results: + title = lower(result.title if result.title else '') + key = (title, tuple([lower(x) for x in result.authors])) + if key not in groups: + groups[key] = [] + groups[key].append(result) + + if len(groups) != len(self.results): + self.results = [] + for rgroup in groups.itervalues(): + rel = [r.average_source_relevance for r in rgroup] + if len(rgroup) > 1: + result = self.merge(rgroup, None, do_asr=False) + result.average_source_relevance = sum(rel)/len(rel) + else: + result = rgroup[0] + self.results.append(result) + + self.results.sort(key=attrgetter('average_source_relevance')) + + def merge_isbn_results(self): + self.results = [] + for min_year, results in self.pools.itervalues(): + if results: + self.results.append(self.merge(results, min_year)) + + self.results.sort(key=attrgetter('average_source_relevance')) + + def length_merge(self, attr, results, null_value=None, shortest=True): + values = [getattr(x, attr) for x in results if not x.is_null(attr)] + values = [x for x in values if len(x) > 0] + if not values: + return null_value + values.sort(key=len, reverse=not shortest) + return values[0] + + def random_merge(self, attr, results, null_value=None): + values = [getattr(x, attr) for x in results if not x.is_null(attr)] + return values[0] if values else null_value + + def merge(self, results, min_year, do_asr=True): + ans = Metadata(_('Unknown')) + + # We assume the shortest title has the least cruft in it + ans.title = self.length_merge('title', results, null_value=ans.title) + + # No harm in having extra authors, maybe something useful like an + # editor or translator + ans.authors = self.length_merge('authors', results, + null_value=ans.authors, shortest=False) + + # We assume the shortest publisher has the least cruft in it + ans.publisher = self.length_merge('publisher', results, + null_value=ans.publisher) + + # We assume the smallest set of tags has the least cruft in it + ans.tags = self.length_merge('tags', results, + null_value=ans.tags) + + # We assume the longest series has the most info in it + ans.series = self.length_merge('series', results, + null_value=ans.series, shortest=False) + for r in results: + if r.series and r.series == ans.series: + ans.series_index = r.series_index + break + + # Average the rating over all sources + ratings = [] + for r in results: + rating = r.rating + if rating and rating > 0 and rating <= 5: + ratings.append(rating) + if ratings: + ans.rating = sum(ratings)/len(ratings) + + # Smallest language is likely to be valid + ans.language = self.length_merge('language', results, + null_value=ans.language) + + # Choose longest comments + ans.comments = self.length_merge('comments', results, + null_value=ans.comments, shortest=False) + + # Published date + if min_year: + min_date = datetime(min_year, 1, 2, tzinfo=utc_tz) + ans.pubdate = min_date + else: + min_date = datetime(3001, 1, 1, tzinfo=utc_tz) + for r in results: + if r.pubdate is not None and r.pubdate < min_date: + min_date = r.pubdate + if min_date.year < 3000: + ans.pubdate = min_date + + # Identifiers + for r in results: + ans.identifiers.update(r.identifiers) + + # Merge any other fields with no special handling (random merge) + touched_fields = set() + for r in results: + if hasattr(r, 'identify_plugin'): + touched_fields |= r.identify_plugin.touched_fields + + for f in touched_fields: + if f.startswith('identifier:') or not ans.is_null(f): + continue + setattr(ans, f, self.random_merge(f, results, + null_value=getattr(ans, f))) + + if do_asr: + avg = [x.relevance_in_source for x in results] + avg = sum(avg)/len(avg) + ans.average_source_relevance = avg + + return ans + + +def merge_identify_results(result_map, log): + isbn_merge = ISBNMerge() + for plugin, results in result_map.iteritems(): + for result in results: + isbn_merge.add_result(result) + + return isbn_merge.finalize() + +# }}} + +def identify(log, abort, # {{{ + title=None, authors=None, identifiers={}, timeout=30): start_time = time.time() - plugins = list(metadata_plugins['identify']) + plugins = list(metadata_plugins(['identify'])) kwargs = { 'title': title, @@ -56,14 +262,17 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): log('Running identify query with parameters:') log(kwargs) log('Using plugins:', ', '.join([p.name for p in plugins])) - log('The log (if any) from individual plugins is below') + log('The log from individual plugins is below') workers = [Worker(p, kwargs, abort) for p in plugins] for w in workers: w.start() first_result_at = None - results = dict.fromkeys(plugins, []) + results = {} + for p in plugins: + results[p] = [] + logs = dict([(w.plugin, w.buf) for w in workers]) def get_results(): found = False @@ -77,6 +286,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): found = True return found + wait_time = msprefs['wait_after_first_identify_result'] while True: time.sleep(0.2) @@ -86,76 +296,118 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30): if not is_worker_alive(workers): break - if (first_result_at is not None and time.time() - first_result_at < - WAIT_AFTER_FIRST_RESULT): + if (first_result_at is not None and time.time() - first_result_at > + wait_time): log('Not waiting any longer for more results') abort.set() break - get_results() + while not abort.is_set() and get_results(): + pass + sort_kwargs = dict(kwargs) for k in list(sort_kwargs.iterkeys()): if k not in ('title', 'authors', 'identifiers'): sort_kwargs.pop(k) - for plugin, results in results.iteritems(): - results.sort(key=plugin.identify_results_keygen(**sort_kwargs)) - plog = plugin.buf.getvalue().strip() + longest, lp = -1, '' + for plugin, presults in results.iteritems(): + presults.sort(key=plugin.identify_results_keygen(**sort_kwargs)) + plog = logs[plugin].getvalue().strip() + log('\n'+'*'*30, plugin.name, '*'*30) + log('Request extra headers:', plugin.browser.addheaders) + log('Found %d results'%len(presults)) + time_spent = getattr(plugin, 'dl_time_spent', None) + if time_spent is None: + log('Downloading was aborted') + longest, lp = -1, plugin.name + else: + log('Downloading from', plugin.name, 'took', time_spent) + if time_spent > longest: + longest, lp = time_spent, plugin.name + for r in presults: + log('\n\n---') + log(unicode(r)) if plog: - log('\n'+'*'*35, plugin.name, '*'*35) - log('Found %d results'%len(results)) log(plog) - log('\n'+'*'*80) + log('\n'+'*'*80) - for i, result in enumerate(results): + for i, result in enumerate(presults): result.relevance_in_source = i result.has_cached_cover_url = \ plugin.get_cached_cover_url(result.identifiers) is not None result.identify_plugin = plugin log('The identify phase took %.2f seconds'%(time.time() - start_time)) + log('The longest time (%f) was taken by:'%longest, lp) log('Merging results from different sources and finding earliest', 'publication dates') start_time = time.time() - merged_results = merge_identify_results(results, log) + results = merge_identify_results(results, log) log('We have %d merged results, merging took: %.2f seconds' % - (len(merged_results), time.time() - start_time)) + (len(results), time.time() - start_time)) -class ISBNMerge(object): - - def __init__(self): - self.pools = {} - - def isbn_in_pool(self, isbn): - if isbn: - for p in self.pools: - if isbn in p: - return p - return None - - def pool_has_result_from_same_source(self, pool, result): - results = self.pools[pool][1] + if msprefs['txt_comments']: for r in results: - if r.identify_plugin is result.identify_plugin: - return True - return False + if r.plugin.has_html_comments and r.comments: + r.comments = html2text(r.comments) - def add_result(self, result, isbn): - pool = self.isbn_in_pool(isbn) - if pool is None: - isbns, min_year = xisbn.get_isbn_pool(isbn) - if not isbns: - isbns = frozenset([isbn]) - self.pool[isbns] = pool = (min_year, []) + dummy = Metadata(_('Unknown')) + max_tags = msprefs['max_tags'] + for r in results: + for f in msprefs['ignore_fields']: + setattr(r, f, getattr(dummy, f)) + r.tags = r.tags[:max_tags] - if not self.pool_has_result_from_same_source(pool, result): - pool[1].append(result) + return results +# }}} -def merge_identify_results(result_map, log): - for plugin, results in result_map.iteritems(): - for result in results: - isbn = result.isbn - if isbn: - isbns, min_year = xisbn.get_isbn_pool(isbn) +if __name__ == '__main__': # tests {{{ + # To run these test use: calibre-debug -e + # src/calibre/ebooks/metadata/sources/identify.py + from calibre.ebooks.metadata.sources.test import (test_identify, + title_test, authors_test) + tests = [ + ( # An e-book ISBN not on Amazon, one of the authors is + # unknown to Amazon + {'identifiers':{'isbn': '9780307459671'}, + 'title':'Invisible Gorilla', 'authors':['Christopher Chabris']}, + [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us', + exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])] + + ), + + ( # Test absence of identifiers + {'title':'Learning Python', + 'authors':['Lutz']}, + [title_test('Learning Python', + exact=True), authors_test(['Mark Lutz']) + ] + + ), + + ( # Sophisticated comment formatting + {'identifiers':{'isbn': '9781416580829'}}, + [title_test('Angels & Demons', + exact=True), authors_test(['Dan Brown'])] + ), + + ( # No ISBN + {'title':'Justine', 'authors':['Durrel']}, + [title_test('Justine', exact=True), + authors_test(['Lawrence Durrel'])] + ), + + ( # A newer book + {'identifiers':{'isbn': '9780316044981'}}, + [title_test('The Heroes', exact=True), + authors_test(['Joe Abercrombie'])] + + ), + + ] + #test_identify(tests[1:2]) + test_identify(tests) +# }}} diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py new file mode 100644 index 0000000000..3cd9d96c81 --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/isbndb.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +from calibre.ebooks.metadata.sources.base import Source + +class ISBNDB(Source): + + name = 'ISBNDB' + description = _('Downloads metadata from isbndb.com') + + capabilities = frozenset(['identify']) + touched_fields = frozenset(['title', 'authors', + 'identifier:isbn', 'comments', 'publisher']) + supports_gzip_transfer_encoding = True + + def __init__(self, *args, **kwargs): + Source.__init__(self, *args, **kwargs) + + prefs = self.prefs + prefs.defaults['key_migrated'] = False + prefs.defaults['isbndb_key'] = None + + if not prefs['key_migrated']: + prefs['key_migrated'] = True + try: + from calibre.customize.ui import config + key = config['plugin_customization']['IsbnDB'] + prefs['isbndb_key'] = key + except: + pass + + self.isbndb_key = prefs['isbndb_key'] + + diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py index 1fcb33e35f..19b8747265 100644 --- a/src/calibre/ebooks/metadata/sources/openlibrary.py +++ b/src/calibre/ebooks/metadata/sources/openlibrary.py @@ -26,7 +26,7 @@ class OpenLibrary(Source): br = self.browser try: ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read() - result_queue.put(ans) + result_queue.put((self, ans)) except Exception as e: if callable(getattr(e, 'getcode', None)) and e.getcode() == 404: log.error('No cover for ISBN: %r found'%isbn) diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py index de95a9b887..2e72f86c47 100644 --- a/src/calibre/ebooks/metadata/sources/test.py +++ b/src/calibre/ebooks/metadata/sources/test.py @@ -14,7 +14,8 @@ from threading import Event from calibre.customize.ui import metadata_plugins from calibre import prints, sanitize_file_name2 from calibre.ebooks.metadata import check_isbn -from calibre.ebooks.metadata.sources.base import create_log +from calibre.ebooks.metadata.sources.base import (create_log, + get_cached_cover_urls) def isbn_test(isbn): isbn_ = check_isbn(isbn) @@ -45,8 +46,80 @@ def authors_test(authors): return test +def init_test(tdir_name): + tdir = tempfile.gettempdir() + lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt') + log = create_log(open(lf, 'wb')) + abort = Event() + return tdir, lf, log, abort -def test_identify_plugin(name, tests): +def test_identify(tests): # {{{ + ''' + :param tests: List of 2-tuples. Each two tuple is of the form (args, + test_funcs). args is a dict of keyword arguments to pass to + the identify method. test_funcs are callables that accept a + Metadata object and return True iff the object passes the + test. + ''' + from calibre.ebooks.metadata.sources.identify import identify + + tdir, lf, log, abort = init_test('Full Identify') + prints('Log saved to', lf) + + times = [] + + for kwargs, test_funcs in tests: + log('#'*80) + log('### Running test with:', kwargs) + log('#'*80) + prints('Running test with:', kwargs) + args = (log, abort) + start_time = time.time() + results = identify(*args, **kwargs) + total_time = time.time() - start_time + times.append(total_time) + if not results: + prints('identify failed to find any results') + break + + prints('Found', len(results), 'matches:', end=' ') + prints('Smaller relevance means better match') + + for i, mi in enumerate(results): + prints('*'*30, 'Relevance:', i, '*'*30) + prints(mi) + prints('\nCached cover URLs :', + [x[0].name for x in get_cached_cover_urls(mi)]) + prints('*'*75, '\n\n') + + possibles = [] + for mi in results: + test_failed = False + for tfunc in test_funcs: + if not tfunc(mi): + test_failed = True + break + if not test_failed: + possibles.append(mi) + + if not possibles: + prints('ERROR: No results that passed all tests were found') + prints('Log saved to', lf) + raise SystemExit(1) + + if results[0] is not possibles[0]: + prints('Most relevant result failed the tests') + raise SystemExit(1) + + log('\n\n') + + prints('Average time per query', sum(times)/len(times)) + + prints('Full log is at:', lf) + +# }}} + +def test_identify_plugin(name, tests): # {{{ ''' :param name: Plugin name :param tests: List of 2-tuples. Each two tuple is of the form (args, @@ -61,11 +134,9 @@ def test_identify_plugin(name, tests): plugin = x break prints('Testing the identify function of', plugin.name) + prints('Using extra headers:', plugin.browser.addheaders) - tdir = tempfile.gettempdir() - lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt') - log = create_log(open(lf, 'wb')) - abort = Event() + tdir, lf, log, abort = init_test(plugin.name) prints('Log saved to', lf) times = [] @@ -159,4 +230,5 @@ def test_identify_plugin(name, tests): if os.stat(lf).st_size > 10: prints('There were some errors/warnings, see log', lf) +# }}} diff --git a/src/calibre/ebooks/metadata/xisbn.py b/src/calibre/ebooks/metadata/xisbn.py index 69cc3f7cb3..56156c034e 100644 --- a/src/calibre/ebooks/metadata/xisbn.py +++ b/src/calibre/ebooks/metadata/xisbn.py @@ -73,7 +73,11 @@ class xISBN(object): def get_isbn_pool(self, isbn): data = self.get_data(isbn) - isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x]) + raw = tuple(x.get('isbn') for x in data if 'isbn' in x) + isbns = [] + for x in raw: + isbns += x + isbns = frozenset(isbns) min_year = 100000 for x in data: try: diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index fccaad8811..5f4c47cdf3 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -282,8 +282,8 @@ class Serializer(object): buffer.write('="') self.serialize_text(val, quot=True) buffer.write('"') + buffer.write('>') if elem.text or len(elem) > 0: - buffer.write('>') if elem.text: self.anchor_offset = None self.serialize_text(elem.text) @@ -292,9 +292,7 @@ class Serializer(object): if child.tail: self.anchor_offset = None self.serialize_text(child.tail) - buffer.write('</%s>' % tag) - else: - buffer.write('/>') + buffer.write('</%s>' % tag) def serialize_text(self, text, quot=False): text = text.replace('&', '&') diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 0cd17387fe..42974be355 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \ from cssutils import profile as cssprofiles from lxml import etree from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError + +from calibre import force_unicode from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize from calibre.ebooks.oeb.profile import PROFILES @@ -140,13 +142,22 @@ class Stylizer(object): log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: - if elem.tag == XHTML('style') and elem.text \ - and elem.get('type', CSS_MIME) in OEB_STYLES: - text = XHTML_CSS_NAMESPACE + elem.text - text = oeb.css_preprocessor(text) - stylesheet = parser.parseString(text, href=cssname) - stylesheet.namespaces['h'] = XHTML_NS - stylesheets.append(stylesheet) + if (elem.tag == XHTML('style') and + elem.get('type', CSS_MIME) in OEB_STYLES): + text = elem.text if elem.text else u'' + for x in elem: + t = getattr(x, 'text', None) + if t: + text += u'\n\n' + force_unicode(t, u'utf-8') + t = getattr(x, 'tail', None) + if t: + text += u'\n\n' + force_unicode(t, u'utf-8') + if text: + text = XHTML_CSS_NAMESPACE + elem.text + text = oeb.css_preprocessor(text) + stylesheet = parser.parseString(text, href=cssname) + stylesheet.namespaces['h'] = XHTML_NS + stylesheets.append(stylesheet) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp index 99ab7517c1..c3a709869e 100644 --- a/src/calibre/ebooks/pdf/fonts.cpp +++ b/src/calibre/ebooks/pdf/fonts.cpp @@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) : size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name), font_family(NULL), color(rgb) { + if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY); this->font_family = family_name(this->font_name); if (strcasestr(font_name->c_str(), "bold")) this->bold = true; @@ -134,7 +135,12 @@ Fonts::size_type Fonts::add_font(XMLFont *f) { } Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) { - XMLFont *f = new XMLFont(font_name, size, rgb); + XMLFont *f = NULL; + if (font_name == NULL) + font_name = new string("Unknown"); + // font_name must not be deleted + f = new XMLFont(font_name, size, rgb); + return this->add_font(f); } diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 3f053e5223..c3ceb27e7e 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.tag_list_editor import TagListEditor from calibre.gui2.actions import InterfaceAction from calibre.utils.icu import sort_key +from calibre.utils.config import test_eight_code class EditMetadataAction(InterfaceAction): @@ -133,8 +134,6 @@ class EditMetadataAction(InterfaceAction): row_list = [r.row() for r in rows] current_row = 0 - changed = set([]) - db = self.gui.library_view.model().db if len(row_list) == 1: cr = row_list[0] @@ -142,6 +141,24 @@ class EditMetadataAction(InterfaceAction): list(range(self.gui.library_view.model().rowCount(QModelIndex()))) current_row = row_list.index(cr) + if test_eight_code: + changed = self.do_edit_metadata(row_list, current_row) + else: + changed = self.do_edit_metadata_old(row_list, current_row) + + if changed: + self.gui.library_view.model().refresh_ids(list(changed)) + current = self.gui.library_view.currentIndex() + m = self.gui.library_view.model() + if self.gui.cover_flow: + self.gui.cover_flow.dataChanged() + m.current_changed(current, previous) + self.gui.tags_view.recount() + + def do_edit_metadata_old(self, row_list, current_row): + changed = set([]) + db = self.gui.library_view.model().db + while True: prev = next_ = None if current_row > 0: @@ -167,15 +184,28 @@ class EditMetadataAction(InterfaceAction): self.gui.library_view.set_current_row(current_row) self.gui.library_view.scroll_to_row(current_row) + def do_edit_metadata(self, row_list, current_row): + from calibre.gui2.metadata.single import edit_metadata + db = self.gui.library_view.model().db + changed, rows_to_refresh = edit_metadata(db, row_list, current_row, + parent=self.gui, view_slot=self.view_format_callback, + set_current_callback=self.set_current_callback) + return changed + + def set_current_callback(self, id_): + db = self.gui.library_view.model().db + current_row = db.row(id_) + self.gui.library_view.set_current_row(current_row) + self.gui.library_view.scroll_to_row(current_row) + + def view_format_callback(self, id_, fmt): + view = self.gui.iactions['View'] + if id_ is None: + view._view_file(fmt) + else: + db = self.gui.library_view.model().db + view.view_format(db.row(id_), fmt) - if changed: - self.gui.library_view.model().refresh_ids(list(changed)) - current = self.gui.library_view.currentIndex() - m = self.gui.library_view.model() - if self.gui.cover_flow: - self.gui.cover_flow.dataChanged() - m.current_changed(current, previous) - self.gui.tags_view.recount() def edit_bulk_metadata(self, checked): ''' diff --git a/src/calibre/gui2/dialogs/book_info.ui b/src/calibre/gui2/dialogs/book_info.ui index 412126a610..9e9e71eda0 100644 --- a/src/calibre/gui2/dialogs/book_info.ui +++ b/src/calibre/gui2/dialogs/book_info.ui @@ -7,15 +7,25 @@ <x>0</x> <y>0</y> <width>917</width> - <height>480</height> + <height>492</height> </rect> </property> <property name="windowTitle"> <string>Dialog</string> </property> + <property name="windowIcon"> + <iconset resource="../../../../resources/images.qrc"> + <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset> + </property> <layout class="QGridLayout" name="gridLayout"> <item row="0" column="0" colspan="2"> <widget class="QLabel" name="title"> + <property name="font"> + <font> + <weight>75</weight> + <bold>true</bold> + </font> + </property> <property name="text"> <string>TextLabel</string> </property> @@ -24,86 +34,104 @@ </property> </widget> </item> - <item row="1" column="0"> + <item row="1" column="0" rowspan="3"> <widget class="CoverView" name="cover"/> </item> <item row="1" column="1"> - <layout class="QVBoxLayout" name="verticalLayout"> - <item> - <widget class="QLabel" name="text"> - <property name="text"> - <string>TextLabel</string> - </property> - <property name="alignment"> - <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set> - </property> - <property name="wordWrap"> - <bool>true</bool> - </property> - </widget> - </item> - <item> - <widget class="QGroupBox" name="groupBox"> - <property name="title"> - <string>Comments</string> - </property> - <layout class="QVBoxLayout" name="verticalLayout_2"> - <item> - <widget class="QWebView" name="comments"> - <property name="sizePolicy"> - <sizepolicy hsizetype="Preferred" vsizetype="Expanding"> - <horstretch>0</horstretch> - <verstretch>0</verstretch> - </sizepolicy> - </property> - <property name="maximumSize"> - <size> - <width>350</width> - <height>16777215</height> - </size> - </property> - <property name="url"> - <url> - <string>about:blank</string> - </url> - </property> - </widget> - </item> - </layout> - </widget> - </item> - <item> - <widget class="QCheckBox" name="fit_cover"> - <property name="text"> - <string>Fit &cover within view</string> - </property> - </widget> - </item> - <item> - <layout class="QHBoxLayout" name="horizontalLayout"> + <widget class="QScrollArea" name="scrollArea"> + <property name="frameShape"> + <enum>QFrame::NoFrame</enum> + </property> + <property name="widgetResizable"> + <bool>true</bool> + </property> + <widget class="QWidget" name="scrollAreaWidgetContents"> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>435</width> + <height>670</height> + </rect> + </property> + <layout class="QVBoxLayout" name="verticalLayout"> <item> - <widget class="QPushButton" name="previous_button"> + <widget class="QLabel" name="text"> <property name="text"> - <string>&Previous</string> + <string>TextLabel</string> </property> - <property name="icon"> - <iconset resource="../../../../resources/images.qrc"> - <normaloff>:/images/previous.png</normaloff>:/images/previous.png</iconset> + <property name="alignment"> + <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set> + </property> + <property name="wordWrap"> + <bool>true</bool> </property> </widget> </item> <item> - <widget class="QPushButton" name="next_button"> - <property name="text"> - <string>&Next</string> - </property> - <property name="icon"> - <iconset resource="../../../../resources/images.qrc"> - <normaloff>:/images/next.png</normaloff>:/images/next.png</iconset> + <widget class="QGroupBox" name="groupBox"> + <property name="title"> + <string>Comments</string> </property> + <layout class="QVBoxLayout" name="verticalLayout_2"> + <item> + <widget class="QWebView" name="comments"> + <property name="sizePolicy"> + <sizepolicy hsizetype="Preferred" vsizetype="Expanding"> + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="maximumSize"> + <size> + <width>350</width> + <height>16777215</height> + </size> + </property> + <property name="url"> + <url> + <string>about:blank</string> + </url> + </property> + </widget> + </item> + </layout> </widget> </item> </layout> + </widget> + </widget> + </item> + <item row="2" column="1"> + <widget class="QCheckBox" name="fit_cover"> + <property name="text"> + <string>Fit &cover within view</string> + </property> + </widget> + </item> + <item row="3" column="1"> + <layout class="QHBoxLayout" name="horizontalLayout"> + <item> + <widget class="QPushButton" name="previous_button"> + <property name="text"> + <string>&Previous</string> + </property> + <property name="icon"> + <iconset resource="../../../../resources/images.qrc"> + <normaloff>:/images/previous.png</normaloff>:/images/previous.png</iconset> + </property> + </widget> + </item> + <item> + <widget class="QPushButton" name="next_button"> + <property name="text"> + <string>&Next</string> + </property> + <property name="icon"> + <iconset resource="../../../../resources/images.qrc"> + <normaloff>:/images/next.png</normaloff>:/images/next.png</iconset> + </property> + </widget> </item> </layout> </item> diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 635a037482..b0b7115ca1 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' @@ -7,10 +9,10 @@ __docformat__ = 'restructuredtext en' import textwrap, re, os -from PyQt4.Qt import Qt, QDateEdit, QDate, \ - QIcon, QToolButton, QWidget, QLabel, QGridLayout, \ - QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \ - QPushButton, QSpinBox, QLineEdit +from PyQt4.Qt import (Qt, QDateEdit, QDate, + QIcon, QToolButton, QWidget, QLabel, QGridLayout, + QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, + QPushButton, QSpinBox, QLineEdit, QSizePolicy) from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox @@ -22,7 +24,7 @@ from calibre.ebooks.metadata.meta import get_metadata from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \ choose_files, error_dialog, choose_images, question_dialog from calibre.utils.date import local_tz, qt_to_dt -from calibre import strftime +from calibre import strftime, fit_image from calibre.ebooks import BOOK_EXTENSIONS from calibre.customize.ui import run_plugins_on_import from calibre.utils.date import utcfromtimestamp @@ -426,7 +428,7 @@ class Format(QListWidgetItem): # {{{ if timestamp is not None: ts = timestamp.astimezone(local_tz) t = strftime('%a, %d %b %Y [%H:%M:%S]', ts.timetuple()) - text = _('Last modified: %s')%t + text = _('Last modified: %s\n\nDouble click to view')%t self.setToolTip(text) self.setStatusTip(text) @@ -480,6 +482,7 @@ class FormatsManager(QWidget): # {{{ def initialize(self, db, id_): self.changed = False + self.formats.clear() exts = db.formats(id_, index_is_id=True) self.original_val = set([]) if exts: @@ -574,8 +577,7 @@ class FormatsManager(QWidget): # {{{ self.changed = True def show_format(self, item, *args): - fmt = item.ext - self.dialog.view_format.emit(fmt) + self.dialog.do_view_format(item.path, item.ext) def get_selected_format_metadata(self, db, id_): old = prefs['read_file_metadata'] @@ -638,6 +640,23 @@ class Cover(ImageView): # {{{ self.trim_cover_button, self.download_cover_button, self.generate_cover_button] + self.frame_size = (300, 400) + self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred, + QSizePolicy.Preferred)) + + def frame_resized(self, ev): + sz = ev.size() + self.frame_size = (sz.width()//3, sz.height()) + + def sizeHint(self): + sz = ImageView.sizeHint(self) + w, h = sz.width(), sz.height() + resized, nw, nh = fit_image(w, h, self.frame_size[0], + self.frame_size[1]) + if resized: + sz = QSize(nw, nh) + return sz + def select_cover(self, *args): files = choose_images(self, 'change cover dialog', _('Choose cover for ') + @@ -882,8 +901,11 @@ class TagsEdit(MultiCompleteLineEdit): # {{{ # }}} -class ISBNEdit(QLineEdit): # {{{ - LABEL = _('IS&BN:') +class IdentifiersEdit(QLineEdit): # {{{ + LABEL = _('I&ds:') + BASE_TT = _('Edit the identifiers for this book. ' + 'For example: \n\n%s')%( + 'isbn:1565927249, doi:10.1000/182, amazon:1565927249') def __init__(self, parent): QLineEdit.__init__(self, parent) @@ -893,32 +915,44 @@ class ISBNEdit(QLineEdit): # {{{ @dynamic_property def current_val(self): def fget(self): - return self.pat.sub('', unicode(self.text()).strip()) + raw = unicode(self.text()).strip() + parts = [x.strip() for x in raw.split(',')] + ans = {} + for x in parts: + c = x.split(':') + if len(c) == 2: + ans[c[0]] = c[1] + return ans def fset(self, val): if not val: - val = '' - self.setText(val.strip()) + val = {} + txt = ', '.join(['%s:%s'%(k, v) for k, v in val.iteritems()]) + self.setText(txt.strip()) return property(fget=fget, fset=fset) def initialize(self, db, id_): - self.current_val = db.isbn(id_, index_is_id=True) + self.current_val = db.get_identifiers(id_, index_is_id=True) self.original_val = self.current_val def commit(self, db, id_): - db.set_isbn(id_, self.current_val, notify=False, commit=False) + if self.original_val != self.current_val: + db.set_identifiers(id_, self.current_val, notify=False, commit=False) return True def validate(self, *args): - isbn = self.current_val - tt = _('This ISBN number is valid') + identifiers = self.current_val + isbn = identifiers.get('isbn', '') + tt = self.BASE_TT + extra = '' if not isbn: col = 'rgba(0,255,0,0%)' elif check_isbn(isbn) is not None: col = 'rgba(0,255,0,20%)' + extra = '\n\n'+_('This ISBN number is valid') else: col = 'rgba(255,0,0,20%)' - tt = _('This ISBN number is invalid') - self.setToolTip(tt) + extra = '\n\n' + _('This ISBN number is invalid') + self.setToolTip(tt+extra) self.setStyleSheet('QLineEdit { background-color: %s }'%col) # }}} diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index 5b17b454e7..4f66e0d2ba 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' @@ -8,31 +10,31 @@ __docformat__ = 'restructuredtext en' import os from functools import partial -from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \ - QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \ - QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \ - QSizePolicy, QPalette, QFrame, QSize, QKeySequence +from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, + QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, + QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, + QSizePolicy, QPalette, QFrame, QSize, QKeySequence) from calibre.ebooks.metadata import authors_to_string, string_to_authors from calibre.gui2 import ResizableDialog, error_dialog, gprefs -from calibre.gui2.metadata.basic_widgets import TitleEdit, AuthorsEdit, \ - AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, ISBNEdit, \ - RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, \ - BuddyLabel, DateEdit, PubdateEdit +from calibre.gui2.metadata.basic_widgets import (TitleEdit, AuthorsEdit, + AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, IdentifiersEdit, + RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, + BuddyLabel, DateEdit, PubdateEdit) from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.utils.config import tweaks class MetadataSingleDialogBase(ResizableDialog): - view_format = pyqtSignal(object) + view_format = pyqtSignal(object, object) cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields'] one_line_comments_toolbar = False def __init__(self, db, parent=None): self.db = db - self.changed = set([]) - self.books_to_refresh = set([]) - self.rows_to_refresh = set([]) + self.changed = set() + self.books_to_refresh = set() + self.rows_to_refresh = set() ResizableDialog.__init__(self, parent) def setupUi(self, *args): # {{{ @@ -145,8 +147,8 @@ class MetadataSingleDialogBase(ResizableDialog): self.tags_editor_button.clicked.connect(self.tags_editor) self.basic_metadata_widgets.append(self.tags) - self.isbn = ISBNEdit(self) - self.basic_metadata_widgets.append(self.isbn) + self.identifiers = IdentifiersEdit(self) + self.basic_metadata_widgets.append(self.identifiers) self.publisher = PublisherEdit(self) self.basic_metadata_widgets.append(self.publisher) @@ -192,6 +194,13 @@ class MetadataSingleDialogBase(ResizableDialog): pass # Do something # }}} + def do_view_format(self, path, fmt): + if path: + self.view_format.emit(None, path) + else: + self.view_format.emit(self.book_id, fmt) + + def do_layout(self): raise NotImplementedError() @@ -202,6 +211,8 @@ class MetadataSingleDialogBase(ResizableDialog): widget.initialize(self.db, id_) for widget in getattr(self, 'custom_metadata_widgets', []): widget.initialize(id_) + if callable(self.set_current_callback): + self.set_current_callback(id_) # Commented out as it doesn't play nice with Next, Prev buttons #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason) @@ -280,8 +291,8 @@ class MetadataSingleDialogBase(ResizableDialog): self.publisher.current_val = mi.publisher if not mi.is_null('tags'): self.tags.current_val = mi.tags - if not mi.is_null('isbn'): - self.isbn.current_val = mi.isbn + if not mi.is_null('identifiers'): + self.identifiers.current_val = mi.identifiers if not mi.is_null('pubdate'): self.pubdate.current_val = mi.pubdate if not mi.is_null('series') and mi.series.strip(): @@ -337,11 +348,13 @@ class MetadataSingleDialogBase(ResizableDialog): gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry()) # Dialog use methods {{{ - def start(self, row_list, current_row, view_slot=None): + def start(self, row_list, current_row, view_slot=None, + set_current_callback=None): self.row_list = row_list self.current_row = current_row if view_slot is not None: self.view_format.connect(view_slot) + self.set_current_callback = set_current_callback self.do_one(apply_changes=False) ret = self.exec_() self.break_cycles() @@ -373,6 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog): def break_cycles(self): # Break any reference cycles that could prevent python # from garbage collecting this dialog + self.set_current_callback = self.db = None def disconnect(signal): try: signal.disconnect() @@ -385,6 +399,14 @@ class MetadataSingleDialogBase(ResizableDialog): disconnect(x.clicked) # }}} +class Splitter(QSplitter): + + frame_resized = pyqtSignal(object) + + def resizeEvent(self, ev): + self.frame_resized.emit(ev) + return QSplitter.resizeEvent(self, ev) + class MetadataSingleDialog(MetadataSingleDialogBase): # {{{ def do_layout(self): @@ -437,8 +459,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{ tl.addWidget(self.formats_manager, 0, 6, 3, 1) - self.splitter = QSplitter(Qt.Horizontal, self) + self.splitter = Splitter(Qt.Horizontal, self) self.splitter.addWidget(self.cover) + self.splitter.frame_resized.connect(self.cover.frame_resized) l.addWidget(self.splitter) self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self) gb.l = l = QGridLayout() @@ -475,9 +498,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{ create_row2(1, self.rating) sto(self.rating, self.tags) create_row2(2, self.tags, self.tags_editor_button) - sto(self.tags_editor_button, self.isbn) - create_row2(3, self.isbn) - sto(self.isbn, self.timestamp) + sto(self.tags_editor_button, self.identifiers) + create_row2(3, self.identifiers) + sto(self.identifiers, self.timestamp) create_row2(4, self.timestamp, self.timestamp.clear_button) sto(self.timestamp.clear_button, self.pubdate) create_row2(5, self.pubdate, self.pubdate.clear_button) @@ -562,9 +585,9 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{ create_row(8, self.pubdate, self.publisher, button=self.pubdate.clear_button, icon='trash.png') create_row(9, self.publisher, self.timestamp) - create_row(10, self.timestamp, self.isbn, + create_row(10, self.timestamp, self.identifiers, button=self.timestamp.clear_button, icon='trash.png') - create_row(11, self.isbn, self.comments) + create_row(11, self.identifiers, self.comments) tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding), 12, 1, 1 ,1) @@ -580,7 +603,7 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{ sr.setWidget(w) gbl.addWidget(sr) self.tabs[0].l.addWidget(gb, 0, 1, 1, 1) - sto(self.isbn, gb) + sto(self.identifiers, gb) w = QGroupBox(_('&Comments'), tab0) sp = QSizePolicy() @@ -632,9 +655,11 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{ # }}} -def edit_metadata(db, row_list, current_row, parent=None, view_slot=None): +def edit_metadata(db, row_list, current_row, parent=None, view_slot=None, + set_current_callback=None): d = MetadataSingleDialog(db, parent) - d.start(row_list, current_row, view_slot=view_slot) + d.start(row_list, current_row, view_slot=view_slot, + set_current_callback=set_current_callback) return d.changed, d.rows_to_refresh if __name__ == '__main__': diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py new file mode 100644 index 0000000000..ace4133d7a --- /dev/null +++ b/src/calibre/gui2/metadata/single_download.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, + QStyle, QApplication) + +class RichTextDelegate(QStyledItemDelegate): # {{{ + + def __init__(self, parent=None): + QStyledItemDelegate.__init__(self, parent) + + def to_doc(self, index): + doc = QTextDocument() + doc.setHtml(index.data().toString()) + return doc + + def sizeHint(self, option, index): + ans = self.to_doc(index).size().toSize() + ans.setHeight(ans.height()+10) + return ans + + def paint(self, painter, option, index): + painter.save() + painter.setClipRect(QRectF(option.rect)) + if hasattr(QStyle, 'CE_ItemViewItem'): + QApplication.style().drawControl(QStyle.CE_ItemViewItem, option, painter) + elif option.state & QStyle.State_Selected: + painter.fillRect(option.rect, option.palette.highlight()) + painter.translate(option.rect.topLeft()) + self.to_doc(index).drawContents(painter) + painter.restore() +# }}} + diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index f86e261443..73f423981a 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -985,6 +985,7 @@ class TagsModel(QAbstractItemModel): # {{{ def do_drop_from_library(self, md, action, row, column, parent): idx = parent if idx.isValid(): + self.tags_view.setCurrentIndex(idx) node = self.data(idx, Qt.UserRole) if node.type == TagTreeItem.TAG: fm = self.db.metadata_for_field(node.tag.category) diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index e5f1c94342..ea0d2570e5 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -312,6 +312,7 @@ class ImageView(QWidget, ImageDropMixin): p.setPen(pen) if self.draw_border: p.drawRect(target) + #p.drawRect(self.rect()) p.end() class CoverView(QGraphicsView, ImageDropMixin): diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py index dba6abbfa5..eea28469a9 100644 --- a/src/calibre/library/server/base.py +++ b/src/calibre/library/server/base.py @@ -24,6 +24,8 @@ from calibre.library.server.xml import XMLServer from calibre.library.server.opds import OPDSServer from calibre.library.server.cache import Cache from calibre.library.server.browse import BrowseServer +from calibre.utils.search_query_parser import saved_searches +from calibre import prints class DispatchController(object): # {{{ @@ -178,7 +180,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache, def set_search_restriction(self, restriction): self.search_restriction_name = restriction if restriction: - self.search_restriction = 'search:"%s"'%restriction + if restriction not in saved_searches().names(): + prints('WARNING: Content server: search restriction ', + restriction, ' does not exist') + self.search_restriction = '' + else: + self.search_restriction = 'search:"%s"'%restriction else: self.search_restriction = '' self.reset_caches() diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 64c363b8ba..5c80df20df 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -30,7 +30,7 @@ entry_points = { 'calibre-customize = calibre.customize.ui:main', 'calibre-complete = calibre.utils.complete:main', 'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main', - 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main', + 'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main', 'epub-fix = calibre.ebooks.epub.fix.main:main', 'calibre-smtp = calibre.utils.smtp:main', ], @@ -183,7 +183,7 @@ class PostInstall: from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop from calibre.gui2.viewer.main import option_parser as viewer_op - from calibre.ebooks.metadata.fetch import option_parser as fem_op + from calibre.ebooks.metadata.sources.cli import option_parser as fem_op from calibre.gui2.main import option_parser as guiop from calibre.utils.smtp import option_parser as smtp_op from calibre.library.server.main import option_parser as serv_op diff --git a/src/calibre/manual/conf.py b/src/calibre/manual/conf.py index fc8962bcfd..d2b3a91d8d 100644 --- a/src/calibre/manual/conf.py +++ b/src/calibre/manual/conf.py @@ -126,7 +126,7 @@ html_use_modindex = False html_use_index = False # If true, the reST sources are included in the HTML build as _sources/<name>. -html_copy_source = False +html_copy_source = True # Output file base name for HTML help builder. htmlhelp_basename = 'calibredoc' diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 97ef32e9d4..f8b257fd75 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -99,7 +99,8 @@ We just need some information from you: device. Once you send us the output for a particular operating system, support for the device in that operating system -will appear in the next release of |app|. +will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it. +See `calibre bugs <http://calibre-ebook.com/bugs>`_. My device is not being detected by |app|? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index 9307ff30f6..7b6e60c93a 100644 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -71,7 +71,7 @@ Edit metadata |emii| The :guilabel:`Edit metadata` action has six variations, which can be accessed by clicking the down arrow on the right side of the button. - 1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. For more detail see :ref:`metadata`. + 1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. 2. **Edit metadata in bulk**: This allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`. 3. **Download metadata and covers**: Downloads metadata and covers (if available), for the books that are selected in the book list. 4. **Download only metadata**: Downloads only metadata (if available), for the books that are selected in the book list. @@ -79,6 +79,7 @@ Edit metadata 6. **Download only social metadata**: Downloads only social metadata such as tags and reviews (if available), for the books that are selected in the book list. 7. **Merge Book Records**: Gives you the capability of merging the metadata and formats of two or more book records together. You can choose to either delete or keep the records that were not clicked first. +For more details see :ref:`metadata`. .. _convert_ebooks: diff --git a/src/calibre/manual/index.rst b/src/calibre/manual/index.rst index 996a1de382..e54882dda0 100644 --- a/src/calibre/manual/index.rst +++ b/src/calibre/manual/index.rst @@ -70,7 +70,7 @@ Customizing |app|'s e-book conversion .. toctree:: :maxdepth: 2 - viewer + conversion Editing e-book metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -78,7 +78,7 @@ Editing e-book metadata .. toctree:: :maxdepth: 2 - viewer + metadata Frequently Asked Questions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py index 2f77ede6b3..6f8703ab49 100644 --- a/src/calibre/utils/browser.py +++ b/src/calibre/utils/browser.py @@ -38,10 +38,10 @@ class Browser(B): self._clone_actions['set_handle_equiv'] = ('set_handle_equiv', args, kwargs) - def set_handle_gzip(self, *args, **kwargs): - B.set_handle_gzip(self, *args, **kwargs) + def set_handle_gzip(self, handle): + B._set_handler(self, '_gzip', handle) self._clone_actions['set_handle_gzip'] = ('set_handle_gzip', - args, kwargs) + (handle,), {}) def set_debug_redirect(self, *args, **kwargs): B.set_debug_redirect(self, *args, **kwargs) diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index d5a489acf1..66316d051b 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -784,6 +784,7 @@ def write_tweaks(raw): tweaks = read_tweaks() +test_eight_code = tweaks.get('test_eight_code', False) def migrate(): if hasattr(os, 'geteuid') and os.geteuid() == 0: diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index 42659d70cc..fdce30177a 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -92,12 +92,12 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None, ret = None if return_data: ret = data - if changed: + if changed or isinstance(ret, Image): if hasattr(img, 'set_compression_quality') and fmt == 'jpg': img.set_compression_quality(compression_quality) ret = img.export(fmt) else: - if changed: + if changed or isinstance(ret, Image): if hasattr(img, 'set_compression_quality') and fmt == 'jpg': img.set_compression_quality(compression_quality) img.save(path) diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 390d407d16..26da9d9905 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -1386,12 +1386,19 @@ ol, ul { padding-left: 2em; } self.purgedata() def s_text_s(self, tag, attrs): - """ Generate a number of spaces. ODF has an element; HTML uses   - We use   so we can send the output through an XML parser if we desire to + # Changed by Kovid to fix non breaking spaces being prepended to + # element instead of being part of the text flow. + # We don't use an entity for the nbsp as the contents of self.data will + # be escaped on writeout. + """ Generate a number of spaces. We use the non breaking space for + the text:s ODF element. """ - c = attrs.get( (TEXTNS,'c'),"1") - for x in xrange(int(c)): - self.writeout(' ') + try: + c = int(attrs.get((TEXTNS, 'c'), 1)) + except: + c = 0 + if c > 0: + self.data.append(u'\u00a0'*c) def s_text_span(self, tag, attrs): """ The <text:span> element matches the <span> element in HTML. It is