mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add support for language codes to mobi2oeb
This commit is contained in:
parent
520328c196
commit
d3691a7be2
@ -33,7 +33,7 @@ class MetaInformation(object):
|
|||||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'tags', 'cover_data', 'application_id',
|
'isbn', 'tags', 'cover_data', 'application_id',
|
||||||
'manifest', 'spine', 'toc', 'cover'):
|
'manifest', 'spine', 'toc', 'cover', 'language'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
setattr(ans, attr, getattr(mi, attr))
|
setattr(ans, attr, getattr(mi, attr))
|
||||||
|
|
||||||
@ -63,6 +63,7 @@ class MetaInformation(object):
|
|||||||
self.rating = None if not mi else mi.rating
|
self.rating = None if not mi else mi.rating
|
||||||
self.isbn = None if not mi else mi.isbn
|
self.isbn = None if not mi else mi.isbn
|
||||||
self.tags = [] if not mi else mi.tags
|
self.tags = [] if not mi else mi.tags
|
||||||
|
self.language = None if not mi else mi.language # Typically a string describing the language
|
||||||
#: mi.cover_data = (ext, data)
|
#: mi.cover_data = (ext, data)
|
||||||
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
|
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
|
||||||
self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None
|
self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None
|
||||||
@ -84,7 +85,8 @@ class MetaInformation(object):
|
|||||||
|
|
||||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||||
'publisher', 'series', 'series_index', 'rating',
|
'publisher', 'series', 'series_index', 'rating',
|
||||||
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'):
|
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||||
|
'cover', 'language'):
|
||||||
if hasattr(mi, attr):
|
if hasattr(mi, attr):
|
||||||
val = getattr(mi, attr)
|
val = getattr(mi, attr)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
@ -115,6 +117,8 @@ class MetaInformation(object):
|
|||||||
ans += u'Tags : ' +unicode(self.tags) + '\n'
|
ans += u'Tags : ' +unicode(self.tags) + '\n'
|
||||||
if self.series:
|
if self.series:
|
||||||
ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index
|
ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index
|
||||||
|
if self.language:
|
||||||
|
ans += u'Language : ' + unicode(self.language) + u'\n'
|
||||||
return ans.strip()
|
return ans.strip()
|
||||||
|
|
||||||
def __nonzero__(self):
|
def __nonzero__(self):
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
<dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:with="attrs={'file-as':mi.author_sort if i==0 else None}" py:attrs="attrs">${author}</dc:creator>
|
<dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:with="attrs={'file-as':mi.author_sort if i==0 else None}" py:attrs="attrs">${author}</dc:creator>
|
||||||
<dc:identifier scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
|
<dc:identifier scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
|
||||||
|
|
||||||
|
<dc:language>${mi.language if mi.language else 'Unknown'}</dc:language>
|
||||||
<dc:type py:if="mi.category">${mi.category}</dc:type>
|
<dc:type py:if="mi.category">${mi.category}</dc:type>
|
||||||
<dc:description py:if="mi.comments">${mi.comments}</dc:description>
|
<dc:description py:if="mi.comments">${mi.comments}</dc:description>
|
||||||
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
|
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
|
||||||
@ -17,7 +18,9 @@
|
|||||||
<series py:if="mi.series">${mi.series}</series>
|
<series py:if="mi.series">${mi.series}</series>
|
||||||
<series-index py:if="mi.series_index is not None">${mi.series_index}</series-index>
|
<series-index py:if="mi.series_index is not None">${mi.series_index}</series-index>
|
||||||
<rating py:if="mi.rating is not None">${mi.rating}</rating>
|
<rating py:if="mi.rating is not None">${mi.rating}</rating>
|
||||||
<dc:subject py:if="mi.tags is not None" py:for="tag in mi.tags">${tag}</dc:subject>
|
<py:for each="tag in mi.tags">
|
||||||
|
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
|
||||||
|
</py:for>
|
||||||
</metadata>
|
</metadata>
|
||||||
|
|
||||||
<guide>
|
<guide>
|
||||||
|
159
src/calibre/ebooks/mobi/langcodes.py
Normal file
159
src/calibre/ebooks/mobi/langcodes.py
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
main_language = {
|
||||||
|
0 : "NEUTRAL",
|
||||||
|
54 : "AFRIKAANS",
|
||||||
|
28 : "ALBANIAN",
|
||||||
|
1 : "ARABIC",
|
||||||
|
43 : "ARMENIAN",
|
||||||
|
77 : "ASSAMESE",
|
||||||
|
44 : "AZERI",
|
||||||
|
45 : "BASQUE",
|
||||||
|
35 : "BELARUSIAN",
|
||||||
|
69 : "BENGALI",
|
||||||
|
2 : "BULGARIAN",
|
||||||
|
3 : "CATALAN",
|
||||||
|
4 : "CHINESE",
|
||||||
|
26 : "CROATIAN",
|
||||||
|
5 : "CZECH",
|
||||||
|
6 : "DANISH",
|
||||||
|
19 : "DUTCH",
|
||||||
|
9 : "ENGLISH",
|
||||||
|
37 : "ESTONIAN",
|
||||||
|
56 : "FAEROESE",
|
||||||
|
41 : "FARSI",
|
||||||
|
11 : "FINNISH",
|
||||||
|
12 : "FRENCH",
|
||||||
|
55 : "GEORGIAN",
|
||||||
|
7 : "GERMAN",
|
||||||
|
8 : "GREEK",
|
||||||
|
71 : "GUJARATI",
|
||||||
|
13 : "HEBREW",
|
||||||
|
57 : "HINDI",
|
||||||
|
14 : "HUNGARIAN",
|
||||||
|
15 : "ICELANDIC",
|
||||||
|
33 : "INDONESIAN",
|
||||||
|
16 : "ITALIAN",
|
||||||
|
17 : "JAPANESE",
|
||||||
|
75 : "KANNADA",
|
||||||
|
63 : "KAZAK",
|
||||||
|
87 : "KONKANI",
|
||||||
|
18 : "KOREAN",
|
||||||
|
38 : "LATVIAN",
|
||||||
|
39 : "LITHUANIAN",
|
||||||
|
47 : "MACEDONIAN",
|
||||||
|
62 : "MALAY",
|
||||||
|
76 : "MALAYALAM",
|
||||||
|
58 : "MALTESE",
|
||||||
|
78 : "MARATHI",
|
||||||
|
97 : "NEPALI",
|
||||||
|
20 : "NORWEGIAN",
|
||||||
|
72 : "ORIYA",
|
||||||
|
21 : "POLISH",
|
||||||
|
22 : "PORTUGUESE",
|
||||||
|
70 : "PUNJABI",
|
||||||
|
23 : "RHAETOROMANIC",
|
||||||
|
24 : "ROMANIAN",
|
||||||
|
25 : "RUSSIAN",
|
||||||
|
59 : "SAMI",
|
||||||
|
79 : "SANSKRIT",
|
||||||
|
26 : "SERBIAN",
|
||||||
|
27 : "SLOVAK",
|
||||||
|
36 : "SLOVENIAN",
|
||||||
|
46 : "SORBIAN",
|
||||||
|
10 : "SPANISH",
|
||||||
|
48 : "SUTU",
|
||||||
|
65 : "SWAHILI",
|
||||||
|
29 : "SWEDISH",
|
||||||
|
73 : "TAMIL",
|
||||||
|
68 : "TATAR",
|
||||||
|
74 : "TELUGU",
|
||||||
|
30 : "THAI",
|
||||||
|
49 : "TSONGA",
|
||||||
|
50 : "TSWANA",
|
||||||
|
31 : "TURKISH",
|
||||||
|
34 : "UKRAINIAN",
|
||||||
|
32 : "URDU",
|
||||||
|
67 : "UZBEK",
|
||||||
|
42 : "VIETNAMESE",
|
||||||
|
52 : "XHOSA",
|
||||||
|
53 : "ZULU",
|
||||||
|
}
|
||||||
|
|
||||||
|
sub_language = {
|
||||||
|
0 : "NEUTRAL",
|
||||||
|
1 : "ARABIC_SAUDI_ARABIA",
|
||||||
|
2 : "ARABIC_IRAQ",
|
||||||
|
3 : "ARABIC_EGYPT",
|
||||||
|
4 : "ARABIC_LIBYA",
|
||||||
|
5 : "ARABIC_ALGERIA",
|
||||||
|
6 : "ARABIC_MOROCCO",
|
||||||
|
7 : "ARABIC_TUNISIA",
|
||||||
|
8 : "ARABIC_OMAN",
|
||||||
|
9 : "ARABIC_YEMEN",
|
||||||
|
10 : "ARABIC_SYRIA",
|
||||||
|
11 : "ARABIC_JORDAN",
|
||||||
|
12 : "ARABIC_LEBANON",
|
||||||
|
13 : "ARABIC_KUWAIT",
|
||||||
|
14 : "ARABIC_UAE",
|
||||||
|
15 : "ARABIC_BAHRAIN",
|
||||||
|
16 : "ARABIC_QATAR",
|
||||||
|
1 : "AZERI_LATIN",
|
||||||
|
2 : "AZERI_CYRILLIC",
|
||||||
|
1 : "CHINESE_TRADITIONAL",
|
||||||
|
2 : "CHINESE_SIMPLIFIED",
|
||||||
|
3 : "CHINESE_HONGKONG",
|
||||||
|
4 : "CHINESE_SINGAPORE",
|
||||||
|
1 : "DUTCH",
|
||||||
|
2 : "DUTCH_BELGIAN",
|
||||||
|
1 : "FRENCH",
|
||||||
|
2 : "FRENCH_BELGIAN",
|
||||||
|
3 : "FRENCH_CANADIAN",
|
||||||
|
4 : "FRENCH_SWISS",
|
||||||
|
5 : "FRENCH_LUXEMBOURG",
|
||||||
|
6 : "FRENCH_MONACO",
|
||||||
|
1 : "GERMAN",
|
||||||
|
2 : "GERMAN_SWISS",
|
||||||
|
3 : "GERMAN_AUSTRIAN",
|
||||||
|
4 : "GERMAN_LUXEMBOURG",
|
||||||
|
5 : "GERMAN_LIECHTENSTEIN",
|
||||||
|
1 : "ITALIAN",
|
||||||
|
2 : "ITALIAN_SWISS",
|
||||||
|
1 : "KOREAN",
|
||||||
|
1 : "LITHUANIAN",
|
||||||
|
1 : "MALAY_MALAYSIA",
|
||||||
|
2 : "MALAY_BRUNEI_DARUSSALAM",
|
||||||
|
1 : "NORWEGIAN_BOKMAL",
|
||||||
|
2 : "NORWEGIAN_NYNORSK",
|
||||||
|
2 : "PORTUGUESE",
|
||||||
|
1 : "PORTUGUESE_BRAZILIAN",
|
||||||
|
2 : "SERBIAN_LATIN",
|
||||||
|
3 : "SERBIAN_CYRILLIC",
|
||||||
|
1 : "SPANISH",
|
||||||
|
2 : "SPANISH_MEXICAN",
|
||||||
|
4 : "SPANISH_GUATEMALA",
|
||||||
|
5 : "SPANISH_COSTA_RICA",
|
||||||
|
6 : "SPANISH_PANAMA",
|
||||||
|
7 : "SPANISH_DOMINICAN_REPUBLIC",
|
||||||
|
8 : "SPANISH_VENEZUELA",
|
||||||
|
9 : "SPANISH_COLOMBIA",
|
||||||
|
10 : "SPANISH_PERU",
|
||||||
|
11 : "SPANISH_ARGENTINA",
|
||||||
|
12 : "SPANISH_ECUADOR",
|
||||||
|
13 : "SPANISH_CHILE",
|
||||||
|
14 : "SPANISH_URUGUAY",
|
||||||
|
15 : "SPANISH_PARAGUAY",
|
||||||
|
16 : "SPANISH_BOLIVIA",
|
||||||
|
17 : "SPANISH_EL_SALVADOR",
|
||||||
|
18 : "SPANISH_HONDURAS",
|
||||||
|
19 : "SPANISH_NICARAGUA",
|
||||||
|
20 : "SPANISH_PUERTO_RICO",
|
||||||
|
1 : "SWEDISH",
|
||||||
|
2 : "SWEDISH_FINLAND",
|
||||||
|
1 : "UZBEK_LATIN",
|
||||||
|
2 : "UZBEK_CYRILLIC",
|
||||||
|
|
||||||
|
}
|
@ -17,6 +17,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|||||||
from calibre.ebooks.mobi import MobiError
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||||
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.opf import OPFCreator
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
|
||||||
@ -51,18 +52,13 @@ class EXTHHeader(object):
|
|||||||
|
|
||||||
def process_metadata(self, id, content, codec):
|
def process_metadata(self, id, content, codec):
|
||||||
if id == 100:
|
if id == 100:
|
||||||
aus = content.split(',')
|
self.mi.authors = [content.decode(codec, 'ignore').strip()]
|
||||||
if len(aus) > 0:
|
|
||||||
self.mi.author_sort = aus[0].decode(codec, 'ignore').strip()
|
|
||||||
self.mi.authors = [aus[1].decode(codec, 'ignore').strip()]
|
|
||||||
else:
|
|
||||||
self.mi.authors = [aus[0].decode(codec, 'ignore').strip()]
|
|
||||||
elif id == 101:
|
elif id == 101:
|
||||||
self.mi.publisher = content.decode(codec, 'ignore').strip()
|
self.mi.publisher = content.decode(codec, 'ignore').strip()
|
||||||
elif id == 103:
|
elif id == 103:
|
||||||
self.mi.comments = content.decode(codec, 'ignore')
|
self.mi.comments = content.decode(codec, 'ignore')
|
||||||
elif id == 104:
|
elif id == 104:
|
||||||
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
|
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
|
||||||
elif id == 105:
|
elif id == 105:
|
||||||
if not self.mi.tags:
|
if not self.mi.tags:
|
||||||
self.mi.tags = []
|
self.mi.tags = []
|
||||||
@ -76,6 +72,7 @@ class BookHeader(object):
|
|||||||
self.compression_type = raw[:2]
|
self.compression_type = raw[:2]
|
||||||
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
||||||
self.encryption_type, = struct.unpack('>H', raw[12:14])
|
self.encryption_type, = struct.unpack('>H', raw[12:14])
|
||||||
|
|
||||||
self.doctype = raw[16:20]
|
self.doctype = raw[16:20]
|
||||||
self.length, self.type, self.codepage, self.unique_id, self.version = \
|
self.length, self.type, self.codepage, self.unique_id, self.version = \
|
||||||
struct.unpack('>LLLLL', raw[20:40])
|
struct.unpack('>LLLLL', raw[20:40])
|
||||||
@ -100,11 +97,18 @@ class BookHeader(object):
|
|||||||
if self.compression_type == 'DH':
|
if self.compression_type == 'DH':
|
||||||
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
|
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
|
||||||
|
|
||||||
|
langcode = struct.unpack('!L', raw[0x5C:0x60])[0]
|
||||||
|
langid = langcode & 0xFF
|
||||||
|
sublangid = (langcode >> 10) & 0xFF
|
||||||
|
self.language = main_language.get(langid, 'ENGLISH')
|
||||||
|
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||||
|
|
||||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||||
self.exth = None
|
self.exth = None
|
||||||
if self.exth_flag & 0x40:
|
if self.exth_flag & 0x40:
|
||||||
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
|
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
|
||||||
self.exth.mi.uid = self.unique_id
|
self.exth.mi.uid = self.unique_id
|
||||||
|
self.exth.mi.language = self.language
|
||||||
|
|
||||||
|
|
||||||
class MobiReader(object):
|
class MobiReader(object):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user