diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 2ca67e526d..f744bb7f41 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -33,7 +33,7 @@ class MetaInformation(object): for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'tags', 'cover_data', 'application_id', - 'manifest', 'spine', 'toc', 'cover'): + 'manifest', 'spine', 'toc', 'cover', 'language'): if hasattr(mi, attr): setattr(ans, attr, getattr(mi, attr)) @@ -63,6 +63,7 @@ class MetaInformation(object): self.rating = None if not mi else mi.rating self.isbn = None if not mi else mi.isbn self.tags = [] if not mi else mi.tags + self.language = None if not mi else mi.language # Typically a string describing the language #: mi.cover_data = (ext, data) self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None) self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None @@ -84,7 +85,8 @@ class MetaInformation(object): for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', - 'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'): + 'isbn', 'application_id', 'manifest', 'spine', 'toc', + 'cover', 'language'): if hasattr(mi, attr): val = getattr(mi, attr) if val is not None: @@ -115,6 +117,8 @@ class MetaInformation(object): ans += u'Tags : ' +unicode(self.tags) + '\n' if self.series: ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index + if self.language: + ans += u'Language : ' + unicode(self.language) + u'\n' return ans.strip() def __nonzero__(self): diff --git a/src/calibre/ebooks/metadata/opf.xml b/src/calibre/ebooks/metadata/opf.xml index f2c719d7f6..ed54c10a32 100644 --- a/src/calibre/ebooks/metadata/opf.xml +++ b/src/calibre/ebooks/metadata/opf.xml @@ -9,7 +9,8 @@ ${mi.title} ${author} ${mi.application_id} - + + ${mi.language if mi.language else 'Unknown'} ${mi.category} ${mi.comments} ${mi.publisher} @@ -17,7 +18,9 @@ ${mi.series} ${mi.series_index} ${mi.rating} - ${tag} + + ${tag} + diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py new file mode 100644 index 0000000000..c0884e55a8 --- /dev/null +++ b/src/calibre/ebooks/mobi/langcodes.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +main_language = { + 0 : "NEUTRAL", + 54 : "AFRIKAANS", + 28 : "ALBANIAN", + 1 : "ARABIC", + 43 : "ARMENIAN", + 77 : "ASSAMESE", + 44 : "AZERI", + 45 : "BASQUE", + 35 : "BELARUSIAN", + 69 : "BENGALI", + 2 : "BULGARIAN", + 3 : "CATALAN", + 4 : "CHINESE", + 26 : "CROATIAN", + 5 : "CZECH", + 6 : "DANISH", + 19 : "DUTCH", + 9 : "ENGLISH", + 37 : "ESTONIAN", + 56 : "FAEROESE", + 41 : "FARSI", + 11 : "FINNISH", + 12 : "FRENCH", + 55 : "GEORGIAN", + 7 : "GERMAN", + 8 : "GREEK", + 71 : "GUJARATI", + 13 : "HEBREW", + 57 : "HINDI", + 14 : "HUNGARIAN", + 15 : "ICELANDIC", + 33 : "INDONESIAN", + 16 : "ITALIAN", + 17 : "JAPANESE", + 75 : "KANNADA", + 63 : "KAZAK", + 87 : "KONKANI", + 18 : "KOREAN", + 38 : "LATVIAN", + 39 : "LITHUANIAN", + 47 : "MACEDONIAN", + 62 : "MALAY", + 76 : "MALAYALAM", + 58 : "MALTESE", + 78 : "MARATHI", + 97 : "NEPALI", + 20 : "NORWEGIAN", + 72 : "ORIYA", + 21 : "POLISH", + 22 : "PORTUGUESE", + 70 : "PUNJABI", + 23 : "RHAETOROMANIC", + 24 : "ROMANIAN", + 25 : "RUSSIAN", + 59 : "SAMI", + 79 : "SANSKRIT", + 26 : "SERBIAN", + 27 : "SLOVAK", + 36 : "SLOVENIAN", + 46 : "SORBIAN", + 10 : "SPANISH", + 48 : "SUTU", + 65 : "SWAHILI", + 29 : "SWEDISH", + 73 : "TAMIL", + 68 : "TATAR", + 74 : "TELUGU", + 30 : "THAI", + 49 : "TSONGA", + 50 : "TSWANA", + 31 : "TURKISH", + 34 : "UKRAINIAN", + 32 : "URDU", + 67 : "UZBEK", + 42 : "VIETNAMESE", + 52 : "XHOSA", + 53 : "ZULU", +} + +sub_language = { + 0 : "NEUTRAL", + 1 : "ARABIC_SAUDI_ARABIA", + 2 : "ARABIC_IRAQ", + 3 : "ARABIC_EGYPT", + 4 : "ARABIC_LIBYA", + 5 : "ARABIC_ALGERIA", + 6 : "ARABIC_MOROCCO", + 7 : "ARABIC_TUNISIA", + 8 : "ARABIC_OMAN", + 9 : "ARABIC_YEMEN", + 10 : "ARABIC_SYRIA", + 11 : "ARABIC_JORDAN", + 12 : "ARABIC_LEBANON", + 13 : "ARABIC_KUWAIT", + 14 : "ARABIC_UAE", + 15 : "ARABIC_BAHRAIN", + 16 : "ARABIC_QATAR", + 1 : "AZERI_LATIN", + 2 : "AZERI_CYRILLIC", + 1 : "CHINESE_TRADITIONAL", + 2 : "CHINESE_SIMPLIFIED", + 3 : "CHINESE_HONGKONG", + 4 : "CHINESE_SINGAPORE", + 1 : "DUTCH", + 2 : "DUTCH_BELGIAN", + 1 : "FRENCH", + 2 : "FRENCH_BELGIAN", + 3 : "FRENCH_CANADIAN", + 4 : "FRENCH_SWISS", + 5 : "FRENCH_LUXEMBOURG", + 6 : "FRENCH_MONACO", + 1 : "GERMAN", + 2 : "GERMAN_SWISS", + 3 : "GERMAN_AUSTRIAN", + 4 : "GERMAN_LUXEMBOURG", + 5 : "GERMAN_LIECHTENSTEIN", + 1 : "ITALIAN", + 2 : "ITALIAN_SWISS", + 1 : "KOREAN", + 1 : "LITHUANIAN", + 1 : "MALAY_MALAYSIA", + 2 : "MALAY_BRUNEI_DARUSSALAM", + 1 : "NORWEGIAN_BOKMAL", + 2 : "NORWEGIAN_NYNORSK", + 2 : "PORTUGUESE", + 1 : "PORTUGUESE_BRAZILIAN", + 2 : "SERBIAN_LATIN", + 3 : "SERBIAN_CYRILLIC", + 1 : "SPANISH", + 2 : "SPANISH_MEXICAN", + 4 : "SPANISH_GUATEMALA", + 5 : "SPANISH_COSTA_RICA", + 6 : "SPANISH_PANAMA", + 7 : "SPANISH_DOMINICAN_REPUBLIC", + 8 : "SPANISH_VENEZUELA", + 9 : "SPANISH_COLOMBIA", + 10 : "SPANISH_PERU", + 11 : "SPANISH_ARGENTINA", + 12 : "SPANISH_ECUADOR", + 13 : "SPANISH_CHILE", + 14 : "SPANISH_URUGUAY", + 15 : "SPANISH_PARAGUAY", + 16 : "SPANISH_BOLIVIA", + 17 : "SPANISH_EL_SALVADOR", + 18 : "SPANISH_HONDURAS", + 19 : "SPANISH_NICARAGUA", + 20 : "SPANISH_PUERTO_RICO", + 1 : "SWEDISH", + 2 : "SWEDISH_FINLAND", + 1 : "UZBEK_LATIN", + 2 : "UZBEK_CYRILLIC", + + } \ No newline at end of file diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index b89fcc2324..8f314026f8 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -17,6 +17,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.huffcdic import HuffReader from calibre.ebooks.mobi.palmdoc import decompress_doc +from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf import OPFCreator @@ -51,18 +52,13 @@ class EXTHHeader(object): def process_metadata(self, id, content, codec): if id == 100: - aus = content.split(',') - if len(aus) > 0: - self.mi.author_sort = aus[0].decode(codec, 'ignore').strip() - self.mi.authors = [aus[1].decode(codec, 'ignore').strip()] - else: - self.mi.authors = [aus[0].decode(codec, 'ignore').strip()] + self.mi.authors = [content.decode(codec, 'ignore').strip()] elif id == 101: self.mi.publisher = content.decode(codec, 'ignore').strip() elif id == 103: - self.mi.comments = content.decode(codec, 'ignore') + self.mi.comments = content.decode(codec, 'ignore') elif id == 104: - self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '') + self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '') elif id == 105: if not self.mi.tags: self.mi.tags = [] @@ -76,6 +72,7 @@ class BookHeader(object): self.compression_type = raw[:2] self.records, self.records_size = struct.unpack('>HH', raw[8:12]) self.encryption_type, = struct.unpack('>H', raw[12:14]) + self.doctype = raw[16:20] self.length, self.type, self.codepage, self.unique_id, self.version = \ struct.unpack('>LLLLL', raw[20:40]) @@ -100,11 +97,18 @@ class BookHeader(object): if self.compression_type == 'DH': self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78]) + langcode = struct.unpack('!L', raw[0x5C:0x60])[0] + langid = langcode & 0xFF + sublangid = (langcode >> 10) & 0xFF + self.language = main_language.get(langid, 'ENGLISH') + self.sublanguage = sub_language.get(sublangid, 'NEUTRAL') + self.exth_flag, = struct.unpack('>L', raw[0x80:0x84]) self.exth = None if self.exth_flag & 0x40: self.exth = EXTHHeader(raw[16+self.length:], self.codec) self.exth.mi.uid = self.unique_id + self.exth.mi.language = self.language class MobiReader(object):