mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add support for language codes to mobi2oeb
This commit is contained in:
parent
520328c196
commit
d3691a7be2
@ -33,7 +33,7 @@ class MetaInformation(object):
|
||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'tags', 'cover_data', 'application_id',
|
||||
'manifest', 'spine', 'toc', 'cover'):
|
||||
'manifest', 'spine', 'toc', 'cover', 'language'):
|
||||
if hasattr(mi, attr):
|
||||
setattr(ans, attr, getattr(mi, attr))
|
||||
|
||||
@ -63,6 +63,7 @@ class MetaInformation(object):
|
||||
self.rating = None if not mi else mi.rating
|
||||
self.isbn = None if not mi else mi.isbn
|
||||
self.tags = [] if not mi else mi.tags
|
||||
self.language = None if not mi else mi.language # Typically a string describing the language
|
||||
#: mi.cover_data = (ext, data)
|
||||
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
|
||||
self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None
|
||||
@ -84,7 +85,8 @@ class MetaInformation(object):
|
||||
|
||||
for attr in ('author_sort', 'title_sort', 'comments', 'category',
|
||||
'publisher', 'series', 'series_index', 'rating',
|
||||
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'):
|
||||
'isbn', 'application_id', 'manifest', 'spine', 'toc',
|
||||
'cover', 'language'):
|
||||
if hasattr(mi, attr):
|
||||
val = getattr(mi, attr)
|
||||
if val is not None:
|
||||
@ -115,6 +117,8 @@ class MetaInformation(object):
|
||||
ans += u'Tags : ' +unicode(self.tags) + '\n'
|
||||
if self.series:
|
||||
ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index
|
||||
if self.language:
|
||||
ans += u'Language : ' + unicode(self.language) + u'\n'
|
||||
return ans.strip()
|
||||
|
||||
def __nonzero__(self):
|
||||
|
@ -10,6 +10,7 @@
|
||||
<dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:with="attrs={'file-as':mi.author_sort if i==0 else None}" py:attrs="attrs">${author}</dc:creator>
|
||||
<dc:identifier scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
|
||||
|
||||
<dc:language>${mi.language if mi.language else 'Unknown'}</dc:language>
|
||||
<dc:type py:if="mi.category">${mi.category}</dc:type>
|
||||
<dc:description py:if="mi.comments">${mi.comments}</dc:description>
|
||||
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
|
||||
@ -17,7 +18,9 @@
|
||||
<series py:if="mi.series">${mi.series}</series>
|
||||
<series-index py:if="mi.series_index is not None">${mi.series_index}</series-index>
|
||||
<rating py:if="mi.rating is not None">${mi.rating}</rating>
|
||||
<dc:subject py:if="mi.tags is not None" py:for="tag in mi.tags">${tag}</dc:subject>
|
||||
<py:for each="tag in mi.tags">
|
||||
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
|
||||
</py:for>
|
||||
</metadata>
|
||||
|
||||
<guide>
|
||||
|
159
src/calibre/ebooks/mobi/langcodes.py
Normal file
159
src/calibre/ebooks/mobi/langcodes.py
Normal file
@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
main_language = {
|
||||
0 : "NEUTRAL",
|
||||
54 : "AFRIKAANS",
|
||||
28 : "ALBANIAN",
|
||||
1 : "ARABIC",
|
||||
43 : "ARMENIAN",
|
||||
77 : "ASSAMESE",
|
||||
44 : "AZERI",
|
||||
45 : "BASQUE",
|
||||
35 : "BELARUSIAN",
|
||||
69 : "BENGALI",
|
||||
2 : "BULGARIAN",
|
||||
3 : "CATALAN",
|
||||
4 : "CHINESE",
|
||||
26 : "CROATIAN",
|
||||
5 : "CZECH",
|
||||
6 : "DANISH",
|
||||
19 : "DUTCH",
|
||||
9 : "ENGLISH",
|
||||
37 : "ESTONIAN",
|
||||
56 : "FAEROESE",
|
||||
41 : "FARSI",
|
||||
11 : "FINNISH",
|
||||
12 : "FRENCH",
|
||||
55 : "GEORGIAN",
|
||||
7 : "GERMAN",
|
||||
8 : "GREEK",
|
||||
71 : "GUJARATI",
|
||||
13 : "HEBREW",
|
||||
57 : "HINDI",
|
||||
14 : "HUNGARIAN",
|
||||
15 : "ICELANDIC",
|
||||
33 : "INDONESIAN",
|
||||
16 : "ITALIAN",
|
||||
17 : "JAPANESE",
|
||||
75 : "KANNADA",
|
||||
63 : "KAZAK",
|
||||
87 : "KONKANI",
|
||||
18 : "KOREAN",
|
||||
38 : "LATVIAN",
|
||||
39 : "LITHUANIAN",
|
||||
47 : "MACEDONIAN",
|
||||
62 : "MALAY",
|
||||
76 : "MALAYALAM",
|
||||
58 : "MALTESE",
|
||||
78 : "MARATHI",
|
||||
97 : "NEPALI",
|
||||
20 : "NORWEGIAN",
|
||||
72 : "ORIYA",
|
||||
21 : "POLISH",
|
||||
22 : "PORTUGUESE",
|
||||
70 : "PUNJABI",
|
||||
23 : "RHAETOROMANIC",
|
||||
24 : "ROMANIAN",
|
||||
25 : "RUSSIAN",
|
||||
59 : "SAMI",
|
||||
79 : "SANSKRIT",
|
||||
26 : "SERBIAN",
|
||||
27 : "SLOVAK",
|
||||
36 : "SLOVENIAN",
|
||||
46 : "SORBIAN",
|
||||
10 : "SPANISH",
|
||||
48 : "SUTU",
|
||||
65 : "SWAHILI",
|
||||
29 : "SWEDISH",
|
||||
73 : "TAMIL",
|
||||
68 : "TATAR",
|
||||
74 : "TELUGU",
|
||||
30 : "THAI",
|
||||
49 : "TSONGA",
|
||||
50 : "TSWANA",
|
||||
31 : "TURKISH",
|
||||
34 : "UKRAINIAN",
|
||||
32 : "URDU",
|
||||
67 : "UZBEK",
|
||||
42 : "VIETNAMESE",
|
||||
52 : "XHOSA",
|
||||
53 : "ZULU",
|
||||
}
|
||||
|
||||
sub_language = {
|
||||
0 : "NEUTRAL",
|
||||
1 : "ARABIC_SAUDI_ARABIA",
|
||||
2 : "ARABIC_IRAQ",
|
||||
3 : "ARABIC_EGYPT",
|
||||
4 : "ARABIC_LIBYA",
|
||||
5 : "ARABIC_ALGERIA",
|
||||
6 : "ARABIC_MOROCCO",
|
||||
7 : "ARABIC_TUNISIA",
|
||||
8 : "ARABIC_OMAN",
|
||||
9 : "ARABIC_YEMEN",
|
||||
10 : "ARABIC_SYRIA",
|
||||
11 : "ARABIC_JORDAN",
|
||||
12 : "ARABIC_LEBANON",
|
||||
13 : "ARABIC_KUWAIT",
|
||||
14 : "ARABIC_UAE",
|
||||
15 : "ARABIC_BAHRAIN",
|
||||
16 : "ARABIC_QATAR",
|
||||
1 : "AZERI_LATIN",
|
||||
2 : "AZERI_CYRILLIC",
|
||||
1 : "CHINESE_TRADITIONAL",
|
||||
2 : "CHINESE_SIMPLIFIED",
|
||||
3 : "CHINESE_HONGKONG",
|
||||
4 : "CHINESE_SINGAPORE",
|
||||
1 : "DUTCH",
|
||||
2 : "DUTCH_BELGIAN",
|
||||
1 : "FRENCH",
|
||||
2 : "FRENCH_BELGIAN",
|
||||
3 : "FRENCH_CANADIAN",
|
||||
4 : "FRENCH_SWISS",
|
||||
5 : "FRENCH_LUXEMBOURG",
|
||||
6 : "FRENCH_MONACO",
|
||||
1 : "GERMAN",
|
||||
2 : "GERMAN_SWISS",
|
||||
3 : "GERMAN_AUSTRIAN",
|
||||
4 : "GERMAN_LUXEMBOURG",
|
||||
5 : "GERMAN_LIECHTENSTEIN",
|
||||
1 : "ITALIAN",
|
||||
2 : "ITALIAN_SWISS",
|
||||
1 : "KOREAN",
|
||||
1 : "LITHUANIAN",
|
||||
1 : "MALAY_MALAYSIA",
|
||||
2 : "MALAY_BRUNEI_DARUSSALAM",
|
||||
1 : "NORWEGIAN_BOKMAL",
|
||||
2 : "NORWEGIAN_NYNORSK",
|
||||
2 : "PORTUGUESE",
|
||||
1 : "PORTUGUESE_BRAZILIAN",
|
||||
2 : "SERBIAN_LATIN",
|
||||
3 : "SERBIAN_CYRILLIC",
|
||||
1 : "SPANISH",
|
||||
2 : "SPANISH_MEXICAN",
|
||||
4 : "SPANISH_GUATEMALA",
|
||||
5 : "SPANISH_COSTA_RICA",
|
||||
6 : "SPANISH_PANAMA",
|
||||
7 : "SPANISH_DOMINICAN_REPUBLIC",
|
||||
8 : "SPANISH_VENEZUELA",
|
||||
9 : "SPANISH_COLOMBIA",
|
||||
10 : "SPANISH_PERU",
|
||||
11 : "SPANISH_ARGENTINA",
|
||||
12 : "SPANISH_ECUADOR",
|
||||
13 : "SPANISH_CHILE",
|
||||
14 : "SPANISH_URUGUAY",
|
||||
15 : "SPANISH_PARAGUAY",
|
||||
16 : "SPANISH_BOLIVIA",
|
||||
17 : "SPANISH_EL_SALVADOR",
|
||||
18 : "SPANISH_HONDURAS",
|
||||
19 : "SPANISH_NICARAGUA",
|
||||
20 : "SPANISH_PUERTO_RICO",
|
||||
1 : "SWEDISH",
|
||||
2 : "SWEDISH_FINLAND",
|
||||
1 : "UZBEK_LATIN",
|
||||
2 : "UZBEK_CYRILLIC",
|
||||
|
||||
}
|
@ -17,6 +17,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf import OPFCreator
|
||||
|
||||
@ -51,12 +52,7 @@ class EXTHHeader(object):
|
||||
|
||||
def process_metadata(self, id, content, codec):
|
||||
if id == 100:
|
||||
aus = content.split(',')
|
||||
if len(aus) > 0:
|
||||
self.mi.author_sort = aus[0].decode(codec, 'ignore').strip()
|
||||
self.mi.authors = [aus[1].decode(codec, 'ignore').strip()]
|
||||
else:
|
||||
self.mi.authors = [aus[0].decode(codec, 'ignore').strip()]
|
||||
self.mi.authors = [content.decode(codec, 'ignore').strip()]
|
||||
elif id == 101:
|
||||
self.mi.publisher = content.decode(codec, 'ignore').strip()
|
||||
elif id == 103:
|
||||
@ -76,6 +72,7 @@ class BookHeader(object):
|
||||
self.compression_type = raw[:2]
|
||||
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
||||
self.encryption_type, = struct.unpack('>H', raw[12:14])
|
||||
|
||||
self.doctype = raw[16:20]
|
||||
self.length, self.type, self.codepage, self.unique_id, self.version = \
|
||||
struct.unpack('>LLLLL', raw[20:40])
|
||||
@ -100,11 +97,18 @@ class BookHeader(object):
|
||||
if self.compression_type == 'DH':
|
||||
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
|
||||
|
||||
langcode = struct.unpack('!L', raw[0x5C:0x60])[0]
|
||||
langid = langcode & 0xFF
|
||||
sublangid = (langcode >> 10) & 0xFF
|
||||
self.language = main_language.get(langid, 'ENGLISH')
|
||||
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||
|
||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||
self.exth = None
|
||||
if self.exth_flag & 0x40:
|
||||
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
|
||||
self.exth.mi.uid = self.unique_id
|
||||
self.exth.mi.language = self.language
|
||||
|
||||
|
||||
class MobiReader(object):
|
||||
|
Loading…
x
Reference in New Issue
Block a user