Add support for language codes to mobi2oeb

This commit is contained in:
Kovid Goyal 2008-06-20 15:42:44 -07:00
parent 520328c196
commit d3691a7be2
4 changed files with 182 additions and 12 deletions

View File

@ -33,7 +33,7 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id',
'manifest', 'spine', 'toc', 'cover'):
'manifest', 'spine', 'toc', 'cover', 'language'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
@ -63,6 +63,7 @@ class MetaInformation(object):
self.rating = None if not mi else mi.rating
self.isbn = None if not mi else mi.isbn
self.tags = [] if not mi else mi.tags
self.language = None if not mi else mi.language # Typically a string describing the language
#: mi.cover_data = (ext, data)
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None
@ -84,7 +85,8 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'):
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
@ -115,6 +117,8 @@ class MetaInformation(object):
ans += u'Tags : ' +unicode(self.tags) + '\n'
if self.series:
ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index
if self.language:
ans += u'Language : ' + unicode(self.language) + u'\n'
return ans.strip()
def __nonzero__(self):

View File

@ -10,6 +10,7 @@
<dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:with="attrs={'file-as':mi.author_sort if i==0 else None}" py:attrs="attrs">${author}</dc:creator>
<dc:identifier scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
<dc:language>${mi.language if mi.language else 'Unknown'}</dc:language>
<dc:type py:if="mi.category">${mi.category}</dc:type>
<dc:description py:if="mi.comments">${mi.comments}</dc:description>
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
@ -17,7 +18,9 @@
<series py:if="mi.series">${mi.series}</series>
<series-index py:if="mi.series_index is not None">${mi.series_index}</series-index>
<rating py:if="mi.rating is not None">${mi.rating}</rating>
<dc:subject py:if="mi.tags is not None" py:for="tag in mi.tags">${tag}</dc:subject>
<py:for each="tag in mi.tags">
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
</py:for>
</metadata>
<guide>

View File

@ -0,0 +1,159 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
main_language = {
0 : "NEUTRAL",
54 : "AFRIKAANS",
28 : "ALBANIAN",
1 : "ARABIC",
43 : "ARMENIAN",
77 : "ASSAMESE",
44 : "AZERI",
45 : "BASQUE",
35 : "BELARUSIAN",
69 : "BENGALI",
2 : "BULGARIAN",
3 : "CATALAN",
4 : "CHINESE",
26 : "CROATIAN",
5 : "CZECH",
6 : "DANISH",
19 : "DUTCH",
9 : "ENGLISH",
37 : "ESTONIAN",
56 : "FAEROESE",
41 : "FARSI",
11 : "FINNISH",
12 : "FRENCH",
55 : "GEORGIAN",
7 : "GERMAN",
8 : "GREEK",
71 : "GUJARATI",
13 : "HEBREW",
57 : "HINDI",
14 : "HUNGARIAN",
15 : "ICELANDIC",
33 : "INDONESIAN",
16 : "ITALIAN",
17 : "JAPANESE",
75 : "KANNADA",
63 : "KAZAK",
87 : "KONKANI",
18 : "KOREAN",
38 : "LATVIAN",
39 : "LITHUANIAN",
47 : "MACEDONIAN",
62 : "MALAY",
76 : "MALAYALAM",
58 : "MALTESE",
78 : "MARATHI",
97 : "NEPALI",
20 : "NORWEGIAN",
72 : "ORIYA",
21 : "POLISH",
22 : "PORTUGUESE",
70 : "PUNJABI",
23 : "RHAETOROMANIC",
24 : "ROMANIAN",
25 : "RUSSIAN",
59 : "SAMI",
79 : "SANSKRIT",
26 : "SERBIAN",
27 : "SLOVAK",
36 : "SLOVENIAN",
46 : "SORBIAN",
10 : "SPANISH",
48 : "SUTU",
65 : "SWAHILI",
29 : "SWEDISH",
73 : "TAMIL",
68 : "TATAR",
74 : "TELUGU",
30 : "THAI",
49 : "TSONGA",
50 : "TSWANA",
31 : "TURKISH",
34 : "UKRAINIAN",
32 : "URDU",
67 : "UZBEK",
42 : "VIETNAMESE",
52 : "XHOSA",
53 : "ZULU",
}
sub_language = {
0 : "NEUTRAL",
1 : "ARABIC_SAUDI_ARABIA",
2 : "ARABIC_IRAQ",
3 : "ARABIC_EGYPT",
4 : "ARABIC_LIBYA",
5 : "ARABIC_ALGERIA",
6 : "ARABIC_MOROCCO",
7 : "ARABIC_TUNISIA",
8 : "ARABIC_OMAN",
9 : "ARABIC_YEMEN",
10 : "ARABIC_SYRIA",
11 : "ARABIC_JORDAN",
12 : "ARABIC_LEBANON",
13 : "ARABIC_KUWAIT",
14 : "ARABIC_UAE",
15 : "ARABIC_BAHRAIN",
16 : "ARABIC_QATAR",
1 : "AZERI_LATIN",
2 : "AZERI_CYRILLIC",
1 : "CHINESE_TRADITIONAL",
2 : "CHINESE_SIMPLIFIED",
3 : "CHINESE_HONGKONG",
4 : "CHINESE_SINGAPORE",
1 : "DUTCH",
2 : "DUTCH_BELGIAN",
1 : "FRENCH",
2 : "FRENCH_BELGIAN",
3 : "FRENCH_CANADIAN",
4 : "FRENCH_SWISS",
5 : "FRENCH_LUXEMBOURG",
6 : "FRENCH_MONACO",
1 : "GERMAN",
2 : "GERMAN_SWISS",
3 : "GERMAN_AUSTRIAN",
4 : "GERMAN_LUXEMBOURG",
5 : "GERMAN_LIECHTENSTEIN",
1 : "ITALIAN",
2 : "ITALIAN_SWISS",
1 : "KOREAN",
1 : "LITHUANIAN",
1 : "MALAY_MALAYSIA",
2 : "MALAY_BRUNEI_DARUSSALAM",
1 : "NORWEGIAN_BOKMAL",
2 : "NORWEGIAN_NYNORSK",
2 : "PORTUGUESE",
1 : "PORTUGUESE_BRAZILIAN",
2 : "SERBIAN_LATIN",
3 : "SERBIAN_CYRILLIC",
1 : "SPANISH",
2 : "SPANISH_MEXICAN",
4 : "SPANISH_GUATEMALA",
5 : "SPANISH_COSTA_RICA",
6 : "SPANISH_PANAMA",
7 : "SPANISH_DOMINICAN_REPUBLIC",
8 : "SPANISH_VENEZUELA",
9 : "SPANISH_COLOMBIA",
10 : "SPANISH_PERU",
11 : "SPANISH_ARGENTINA",
12 : "SPANISH_ECUADOR",
13 : "SPANISH_CHILE",
14 : "SPANISH_URUGUAY",
15 : "SPANISH_PARAGUAY",
16 : "SPANISH_BOLIVIA",
17 : "SPANISH_EL_SALVADOR",
18 : "SPANISH_HONDURAS",
19 : "SPANISH_NICARAGUA",
20 : "SPANISH_PUERTO_RICO",
1 : "SWEDISH",
2 : "SWEDISH_FINLAND",
1 : "UZBEK_LATIN",
2 : "UZBEK_CYRILLIC",
}

View File

@ -17,6 +17,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
@ -51,12 +52,7 @@ class EXTHHeader(object):
def process_metadata(self, id, content, codec):
if id == 100:
aus = content.split(',')
if len(aus) > 0:
self.mi.author_sort = aus[0].decode(codec, 'ignore').strip()
self.mi.authors = [aus[1].decode(codec, 'ignore').strip()]
else:
self.mi.authors = [aus[0].decode(codec, 'ignore').strip()]
self.mi.authors = [content.decode(codec, 'ignore').strip()]
elif id == 101:
self.mi.publisher = content.decode(codec, 'ignore').strip()
elif id == 103:
@ -76,6 +72,7 @@ class BookHeader(object):
self.compression_type = raw[:2]
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
self.encryption_type, = struct.unpack('>H', raw[12:14])
self.doctype = raw[16:20]
self.length, self.type, self.codepage, self.unique_id, self.version = \
struct.unpack('>LLLLL', raw[20:40])
@ -100,11 +97,18 @@ class BookHeader(object):
if self.compression_type == 'DH':
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
langcode = struct.unpack('!L', raw[0x5C:0x60])[0]
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
self.language = main_language.get(langid, 'ENGLISH')
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None
if self.exth_flag & 0x40:
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
self.exth.mi.uid = self.unique_id
self.exth.mi.language = self.language
class MobiReader(object):