From 9f1091c9a2f394d1e94b75d2c6440f076f22192f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 17 Aug 2011 20:32:10 -0600 Subject: [PATCH] Enable downloading of language metadata from the various metadata sources --- src/calibre/ebooks/metadata/sources/amazon.py | 19 ++++++++++++------- src/calibre/ebooks/metadata/sources/google.py | 7 +++++-- .../ebooks/metadata/sources/identify.py | 1 + .../ebooks/metadata/sources/overdrive.py | 8 +++++--- src/calibre/gui2/metadata/single.py | 2 ++ .../gui2/preferences/metadata_sources.py | 2 +- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 6220f29020..aaa13d5769 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -22,6 +22,7 @@ from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata.book.base import Metadata from calibre.library.comments import sanitize_comments_html from calibre.utils.date import parse_date +from calibre.utils.localization import canonicalize_lang class Worker(Thread): # Get details {{{ @@ -106,10 +107,11 @@ class Worker(Thread): # Get details {{{ r'([0-9.]+) (out of|von|su|étoiles sur) (\d+)( (stars|Sternen|stelle)){0,1}') lm = { - 'en': ('English', 'Englisch'), - 'fr': ('French', 'Français'), - 'it': ('Italian', 'Italiano'), - 'de': ('German', 'Deutsch'), + 'eng': ('English', 'Englisch'), + 'fra': ('French', 'Français'), + 'ita': ('Italian', 'Italiano'), + 'deu': ('German', 'Deutsch'), + 'spa': ('Spanish', 'Espa\xf1ol', 'Espaniol'), } self.lang_map = {} for code, names in lm.iteritems(): @@ -374,8 +376,11 @@ class Worker(Thread): # Get details {{{ def parse_language(self, pd): for x in reversed(pd.xpath(self.language_xpath)): if x.tail: - ans = x.tail.strip() - ans = self.lang_map.get(ans, None) + raw = x.tail.strip() + ans = self.lang_map.get(raw, None) + if ans: + return ans + ans = canonicalize_lang(ans) if ans: return ans # }}} @@ -388,7 +393,7 @@ class Amazon(Source): capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'identifier:amazon', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate', - 'language']) + 'languages']) has_html_comments = True supports_gzip_transfer_encoding = True diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index bd1043b774..f9c43d86cc 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -20,6 +20,7 @@ from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.date import parse_date, utcnow from calibre.utils.cleantext import clean_ascii_chars +from calibre.utils.localization import canonicalize_lang from calibre import as_unicode NAMESPACES = { @@ -95,7 +96,9 @@ def to_metadata(browser, log, entry_, timeout): # {{{ return mi mi.comments = get_text(extra, description) - #mi.language = get_text(extra, language) + lang = canonicalize_lang(get_text(extra, language)) + if lang: + mi.language = lang mi.publisher = get_text(extra, publisher) # ISBN @@ -162,7 +165,7 @@ class GoogleBooks(Source): capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating', - 'identifier:google']) # language currently disabled + 'identifier:google', 'languages']) supports_gzip_transfer_encoding = True cached_cover_url_is_reliable = False diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index a7bcbc5a89..97fbae4727 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -484,6 +484,7 @@ def identify(log, abort, # {{{ 'publication dates') start_time = time.time() results = merge_identify_results(results, log) + log('We have %d merged results, merging took: %.2f seconds' % (len(results), time.time() - start_time)) diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index f52b1f423b..2e63a2e267 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -35,7 +35,7 @@ class OverDrive(Source): capabilities = frozenset(['identify', 'cover']) touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', 'comments', 'publisher', 'identifier:isbn', 'series', 'series_index', - 'language', 'identifier:overdrive']) + 'languages', 'identifier:overdrive']) has_html_comments = True supports_gzip_transfer_encoding = False cached_cover_url_is_reliable = True @@ -421,8 +421,10 @@ class OverDrive(Source): pass if lang: lang = lang[0].strip().lower() - mi.language = {'english':'en', 'french':'fr', 'german':'de', - 'spanish':'es'}.get(lang, None) + lang = {'english':'eng', 'french':'fra', 'german':'deu', + 'spanish':'spa'}.get(lang, None) + if lang: + mi.language = lang if ebook_isbn: #print "ebook isbn is "+str(ebook_isbn[0]) diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index bfe80f983f..54744b682a 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -354,6 +354,8 @@ class MetadataSingleDialogBase(ResizableDialog): self.series.current_val = mi.series if mi.series_index is not None: self.series_index.current_val = float(mi.series_index) + if not mi.is_null('languages'): + self.languages.lang_codes = mi.languages if mi.comments and mi.comments.strip(): self.comments.current_val = mi.comments diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index d9dd64af6c..541da2e203 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -161,7 +161,7 @@ class FieldsModel(QAbstractListModel): # {{{ 'tags' : _('Tags'), 'title': _('Title'), 'series': _('Series'), - 'language': _('Language'), + 'languages': _('Languages'), } self.overrides = {} self.exclude = frozenset(['series_index'])