diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 2ca67e526d..f744bb7f41 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -33,7 +33,7 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id',
- 'manifest', 'spine', 'toc', 'cover'):
+ 'manifest', 'spine', 'toc', 'cover', 'language'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
@@ -63,6 +63,7 @@ class MetaInformation(object):
self.rating = None if not mi else mi.rating
self.isbn = None if not mi else mi.isbn
self.tags = [] if not mi else mi.tags
+ self.language = None if not mi else mi.language # Typically a string describing the language
#: mi.cover_data = (ext, data)
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None
@@ -84,7 +85,8 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
- 'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'):
+ 'isbn', 'application_id', 'manifest', 'spine', 'toc',
+ 'cover', 'language'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
@@ -115,6 +117,8 @@ class MetaInformation(object):
ans += u'Tags : ' +unicode(self.tags) + '\n'
if self.series:
ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index
+ if self.language:
+ ans += u'Language : ' + unicode(self.language) + u'\n'
return ans.strip()
def __nonzero__(self):
diff --git a/src/calibre/ebooks/metadata/opf.xml b/src/calibre/ebooks/metadata/opf.xml
index f2c719d7f6..ed54c10a32 100644
--- a/src/calibre/ebooks/metadata/opf.xml
+++ b/src/calibre/ebooks/metadata/opf.xml
@@ -9,7 +9,8 @@
${mi.title}
${author}
${mi.application_id}
-
+
+ ${mi.language if mi.language else 'Unknown'}
${mi.category}
${mi.comments}
${mi.publisher}
@@ -17,7 +18,9 @@
${mi.series}
${mi.series_index}
${mi.rating}
- ${tag}
+
+ ${tag}
+
diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py
new file mode 100644
index 0000000000..c0884e55a8
--- /dev/null
+++ b/src/calibre/ebooks/mobi/langcodes.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+main_language = {
+ 0 : "NEUTRAL",
+ 54 : "AFRIKAANS",
+ 28 : "ALBANIAN",
+ 1 : "ARABIC",
+ 43 : "ARMENIAN",
+ 77 : "ASSAMESE",
+ 44 : "AZERI",
+ 45 : "BASQUE",
+ 35 : "BELARUSIAN",
+ 69 : "BENGALI",
+ 2 : "BULGARIAN",
+ 3 : "CATALAN",
+ 4 : "CHINESE",
+ 26 : "CROATIAN",
+ 5 : "CZECH",
+ 6 : "DANISH",
+ 19 : "DUTCH",
+ 9 : "ENGLISH",
+ 37 : "ESTONIAN",
+ 56 : "FAEROESE",
+ 41 : "FARSI",
+ 11 : "FINNISH",
+ 12 : "FRENCH",
+ 55 : "GEORGIAN",
+ 7 : "GERMAN",
+ 8 : "GREEK",
+ 71 : "GUJARATI",
+ 13 : "HEBREW",
+ 57 : "HINDI",
+ 14 : "HUNGARIAN",
+ 15 : "ICELANDIC",
+ 33 : "INDONESIAN",
+ 16 : "ITALIAN",
+ 17 : "JAPANESE",
+ 75 : "KANNADA",
+ 63 : "KAZAK",
+ 87 : "KONKANI",
+ 18 : "KOREAN",
+ 38 : "LATVIAN",
+ 39 : "LITHUANIAN",
+ 47 : "MACEDONIAN",
+ 62 : "MALAY",
+ 76 : "MALAYALAM",
+ 58 : "MALTESE",
+ 78 : "MARATHI",
+ 97 : "NEPALI",
+ 20 : "NORWEGIAN",
+ 72 : "ORIYA",
+ 21 : "POLISH",
+ 22 : "PORTUGUESE",
+ 70 : "PUNJABI",
+ 23 : "RHAETOROMANIC",
+ 24 : "ROMANIAN",
+ 25 : "RUSSIAN",
+ 59 : "SAMI",
+ 79 : "SANSKRIT",
+ 26 : "SERBIAN",
+ 27 : "SLOVAK",
+ 36 : "SLOVENIAN",
+ 46 : "SORBIAN",
+ 10 : "SPANISH",
+ 48 : "SUTU",
+ 65 : "SWAHILI",
+ 29 : "SWEDISH",
+ 73 : "TAMIL",
+ 68 : "TATAR",
+ 74 : "TELUGU",
+ 30 : "THAI",
+ 49 : "TSONGA",
+ 50 : "TSWANA",
+ 31 : "TURKISH",
+ 34 : "UKRAINIAN",
+ 32 : "URDU",
+ 67 : "UZBEK",
+ 42 : "VIETNAMESE",
+ 52 : "XHOSA",
+ 53 : "ZULU",
+}
+
+sub_language = {
+ 0 : "NEUTRAL",
+ 1 : "ARABIC_SAUDI_ARABIA",
+ 2 : "ARABIC_IRAQ",
+ 3 : "ARABIC_EGYPT",
+ 4 : "ARABIC_LIBYA",
+ 5 : "ARABIC_ALGERIA",
+ 6 : "ARABIC_MOROCCO",
+ 7 : "ARABIC_TUNISIA",
+ 8 : "ARABIC_OMAN",
+ 9 : "ARABIC_YEMEN",
+ 10 : "ARABIC_SYRIA",
+ 11 : "ARABIC_JORDAN",
+ 12 : "ARABIC_LEBANON",
+ 13 : "ARABIC_KUWAIT",
+ 14 : "ARABIC_UAE",
+ 15 : "ARABIC_BAHRAIN",
+ 16 : "ARABIC_QATAR",
+ 1 : "AZERI_LATIN",
+ 2 : "AZERI_CYRILLIC",
+ 1 : "CHINESE_TRADITIONAL",
+ 2 : "CHINESE_SIMPLIFIED",
+ 3 : "CHINESE_HONGKONG",
+ 4 : "CHINESE_SINGAPORE",
+ 1 : "DUTCH",
+ 2 : "DUTCH_BELGIAN",
+ 1 : "FRENCH",
+ 2 : "FRENCH_BELGIAN",
+ 3 : "FRENCH_CANADIAN",
+ 4 : "FRENCH_SWISS",
+ 5 : "FRENCH_LUXEMBOURG",
+ 6 : "FRENCH_MONACO",
+ 1 : "GERMAN",
+ 2 : "GERMAN_SWISS",
+ 3 : "GERMAN_AUSTRIAN",
+ 4 : "GERMAN_LUXEMBOURG",
+ 5 : "GERMAN_LIECHTENSTEIN",
+ 1 : "ITALIAN",
+ 2 : "ITALIAN_SWISS",
+ 1 : "KOREAN",
+ 1 : "LITHUANIAN",
+ 1 : "MALAY_MALAYSIA",
+ 2 : "MALAY_BRUNEI_DARUSSALAM",
+ 1 : "NORWEGIAN_BOKMAL",
+ 2 : "NORWEGIAN_NYNORSK",
+ 2 : "PORTUGUESE",
+ 1 : "PORTUGUESE_BRAZILIAN",
+ 2 : "SERBIAN_LATIN",
+ 3 : "SERBIAN_CYRILLIC",
+ 1 : "SPANISH",
+ 2 : "SPANISH_MEXICAN",
+ 4 : "SPANISH_GUATEMALA",
+ 5 : "SPANISH_COSTA_RICA",
+ 6 : "SPANISH_PANAMA",
+ 7 : "SPANISH_DOMINICAN_REPUBLIC",
+ 8 : "SPANISH_VENEZUELA",
+ 9 : "SPANISH_COLOMBIA",
+ 10 : "SPANISH_PERU",
+ 11 : "SPANISH_ARGENTINA",
+ 12 : "SPANISH_ECUADOR",
+ 13 : "SPANISH_CHILE",
+ 14 : "SPANISH_URUGUAY",
+ 15 : "SPANISH_PARAGUAY",
+ 16 : "SPANISH_BOLIVIA",
+ 17 : "SPANISH_EL_SALVADOR",
+ 18 : "SPANISH_HONDURAS",
+ 19 : "SPANISH_NICARAGUA",
+ 20 : "SPANISH_PUERTO_RICO",
+ 1 : "SWEDISH",
+ 2 : "SWEDISH_FINLAND",
+ 1 : "UZBEK_LATIN",
+ 2 : "UZBEK_CYRILLIC",
+
+ }
\ No newline at end of file
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index b89fcc2324..8f314026f8 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -17,6 +17,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
+from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
@@ -51,18 +52,13 @@ class EXTHHeader(object):
def process_metadata(self, id, content, codec):
if id == 100:
- aus = content.split(',')
- if len(aus) > 0:
- self.mi.author_sort = aus[0].decode(codec, 'ignore').strip()
- self.mi.authors = [aus[1].decode(codec, 'ignore').strip()]
- else:
- self.mi.authors = [aus[0].decode(codec, 'ignore').strip()]
+ self.mi.authors = [content.decode(codec, 'ignore').strip()]
elif id == 101:
self.mi.publisher = content.decode(codec, 'ignore').strip()
elif id == 103:
- self.mi.comments = content.decode(codec, 'ignore')
+ self.mi.comments = content.decode(codec, 'ignore')
elif id == 104:
- self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
+ self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
elif id == 105:
if not self.mi.tags:
self.mi.tags = []
@@ -76,6 +72,7 @@ class BookHeader(object):
self.compression_type = raw[:2]
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
self.encryption_type, = struct.unpack('>H', raw[12:14])
+
self.doctype = raw[16:20]
self.length, self.type, self.codepage, self.unique_id, self.version = \
struct.unpack('>LLLLL', raw[20:40])
@@ -100,11 +97,18 @@ class BookHeader(object):
if self.compression_type == 'DH':
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
+ langcode = struct.unpack('!L', raw[0x5C:0x60])[0]
+ langid = langcode & 0xFF
+ sublangid = (langcode >> 10) & 0xFF
+ self.language = main_language.get(langid, 'ENGLISH')
+ self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
+
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None
if self.exth_flag & 0x40:
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
self.exth.mi.uid = self.unique_id
+ self.exth.mi.language = self.language
class MobiReader(object):