diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py index c0884e55a8..17fbd7fe3e 100644 --- a/src/calibre/ebooks/mobi/langcodes.py +++ b/src/calibre/ebooks/mobi/langcodes.py @@ -3,6 +3,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' +from struct import pack + main_language = { 0 : "NEUTRAL", 54 : "AFRIKAANS", @@ -155,5 +157,168 @@ sub_language = { 2 : "SWEDISH_FINLAND", 1 : "UZBEK_LATIN", 2 : "UZBEK_CYRILLIC", - - } \ No newline at end of file + } + +IANA_MOBI = \ + {None: {None: (0, 0)}, + 'af': {None: (54, 0)}, + 'ar': {None: (1, 0), + 'AE': (1, 56), + 'BH': (1, 60), + 'DZ': (1, 20), + 'EG': (1, 12), + 'JO': (1, 44), + 'KW': (1, 52), + 'LB': (1, 48), + 'MA': (1, 24), + 'OM': (1, 32), + 'QA': (1, 64), + 'SA': (1, 4), + 'SY': (1, 40), + 'TN': (1, 28), + 'YE': (1, 36)}, + 'as': {None: (77, 0)}, + 'az': {None: (44, 0)}, + 'be': {None: (35, 0)}, + 'bg': {None: (2, 0)}, + 'bn': {None: (69, 0)}, + 'ca': {None: (3, 0)}, + 'cs': {None: (5, 0)}, + 'da': {None: (6, 0)}, + 'de': {None: (7, 0), + 'AT': (7, 12), + 'CH': (7, 8), + 'LI': (7, 20), + 'LU': (7, 16)}, + 'el': {None: (8, 0)}, + 'en': {None: (9, 0), + 'AU': (9, 12), + 'BZ': (9, 40), + 'CA': (9, 16), + 'GB': (9, 8), + 'IE': (9, 24), + 'JM': (9, 32), + 'NZ': (9, 20), + 'PH': (9, 52), + 'TT': (9, 44), + 'US': (9, 4), + 'ZA': (9, 28), + 'ZW': (9, 48)}, + 'es': {None: (10, 0), + 'AR': (10, 44), + 'BO': (10, 64), + 'CL': (10, 52), + 'CO': (10, 36), + 'CR': (10, 20), + 'DO': (10, 28), + 'EC': (10, 48), + 'ES': (10, 4), + 'GT': (10, 16), + 'HN': (10, 72), + 'MX': (10, 8), + 'NI': (10, 76), + 'PA': (10, 24), + 'PE': (10, 40), + 'PR': (10, 80), + 'PY': (10, 60), + 'SV': (10, 68), + 'UY': (10, 56), + 'VE': (10, 32)}, + 'et': {None: (37, 0)}, + 'eu': {None: (45, 0)}, + 'fa': {None: (41, 0)}, + 'fi': {None: (11, 0)}, + 'fo': {None: (56, 0)}, + 'fr': {None: (12, 0), + 'BE': (12, 8), + 'CA': (12, 12), + 'CH': (12, 16), + 'FR': (12, 4), + 'LU': (12, 20), + 'MC': (12, 24)}, + 'gu': {None: (71, 0)}, + 'he': {None: (13, 0)}, + 'hi': {None: (57, 0)}, + 'hr': {None: (26, 0)}, + 'hu': {None: (14, 0)}, + 'hy': {None: (43, 0)}, + 'id': {None: (33, 0)}, + 'is': {None: (15, 0)}, + 'it': {None: (16, 0), + 'CH': (16, 8), + 'IT': (16, 4)}, + 'ja': {None: (17, 0)}, + 'ka': {None: (55, 0)}, + 'kk': {None: (63, 0)}, + 'kn': {None: (75, 0)}, + 'ko': {None: (18, 0)}, + 'kok': {None: (87, 0)}, + 'lt': {None: (39, 0)}, + 'lv': {None: (38, 0)}, + 'mk': {None: (47, 0)}, + 'ml': {None: (76, 0)}, + 'mr': {None: (78, 0)}, + 'ms': {None: (62, 0)}, + 'mt': {None: (58, 0)}, + 'ne': {None: (97, 0)}, + 'nl': {None: (19, 0), + 'BE': (19, 8)}, + 'no': {None: (20, 0)}, + 'or': {None: (72, 0)}, + 'pa': {None: (70, 0)}, + 'pl': {None: (21, 0)}, + 'pt': {None: (22, 0), + 'BR': (22, 4), + 'PT': (22, 8)}, + 'rm': {None: (23, 0)}, + 'ro': {None: (24, 0)}, + 'ru': {None: (25, 0)}, + 'sa': {None: (79, 0)}, + 'se': {None: (59, 0)}, + 'sk': {None: (27, 0)}, + 'sl': {None: (36, 0)}, + 'sq': {None: (28, 0)}, + 'sr': {None: (26, 12), + 'RS': (26, 12)}, + 'st': {None: (48, 0)}, + 'sv': {None: (29, 0), + 'FI': (29, 8)}, + 'sw': {None: (65, 0)}, + 'ta': {None: (73, 0)}, + 'te': {None: (74, 0)}, + 'th': {None: (30, 0)}, + 'tn': {None: (50, 0)}, + 'tr': {None: (31, 0)}, + 'ts': {None: (49, 0)}, + 'tt': {None: (68, 0)}, + 'uk': {None: (34, 0)}, + 'ur': {None: (32, 0)}, + 'uz': {None: (67, 0), + 'UZ': (67, 8)}, + 'vi': {None: (42, 0)}, + 'wen': {None: (46, 0)}, + 'xh': {None: (52, 0)}, + 'zh': {None: (4, 0), + 'CN': (4, 8), + 'HK': (4, 12), + 'SG': (4, 16), + 'TW': (4, 4)}, + 'zu': {None: (53, 0)}} + +def iana2mobi(self, icode): + subtags = list(code.split('-')) + langdict = IANA_MOBI[None] + while len(subtags) > 0: + lang = subtags.pop(0).lower() + if lang in IANA_MOBI: + langdict = IANA_MOBI[lang] + break + mcode = langdict[None] + while len(subtags) > 0: + subtag = subtags.pop(0) + if subtag not in langdict: + subtag = subtag.upper() + if subtag in langdict: + mcode = langdict[subtag] + break + return pack('>HBB', 0, mcode[1], mcode[0]) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 43c4aaa52d..4986790828 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -20,6 +20,7 @@ from urlparse import urldefrag from lxml import etree from PIL import Image from calibre.ebooks.mobi.palmdoc import compress_doc +from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname from calibre.ebooks.lit.oeb import FauxLogger, OEBBook @@ -87,19 +88,20 @@ class Serializer(object): buffer.write('/>') buffer.write('') - def serialize_href(self, href, baseid=None): + def serialize_href(self, href, base=None): hrefs = self.oeb.manifest.hrefs path, frag = urldefrag(href) - # TODO: Absolute path translation + if path and base: + path = base.abshref(path) if path and path not in hrefs: return False buffer = self.buffer item = hrefs[path] if path else None if item and item.spine_position is None: return False - id = item.id if item else baseid + id = item.id if item else base.id frag = frag if frag else 'calibre_top' - href = '_'.join((id, frag)) + href = '#'.join((id, frag)) buffer.write('filepos=') self.href_offsets[href].append(buffer.tell()) buffer.write('0000000000') @@ -117,7 +119,7 @@ class Serializer(object): buffer.write('') # TODO: Figure out how to make the 'crossable' stuff work for # non-"linear" spine items. - self.id_offsets[item.id + '_calibre_top'] = buffer.tell() + self.id_offsets[item.id + '#calibre_top'] = buffer.tell() for elem in item.data.find(XHTML('body')): self.serialize_elem(elem, item) @@ -129,7 +131,7 @@ class Serializer(object): tag = prefixname(elem.tag, nsrmap) for attr in ('name', 'id'): if attr in elem.attrib: - id = '_'.join((item.id, elem.attrib[attr])) + id = '#'.join((item.id, elem.attrib[attr])) self.id_offsets[id] = buffer.tell() del elem.attrib[attr] buffer.write('<') @@ -141,7 +143,7 @@ class Serializer(object): attr = prefixname(attr, nsrmap) buffer.write(' ') if attr == 'href': - if self.serialize_href(val, item.id): + if self.serialize_href(val, item): continue elif attr == 'src' and val in hrefs: index = self.images[val] @@ -256,19 +258,19 @@ class MobiWriter(object): self._records.append(data) def _generate_record0(self): + metadata = self._oeb.metadata exth = self._build_exth() record0 = StringIO() record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length, self._text_nrecords, 0x1000, 0, 0)) uid = random.randint(0, 0xffffffff) - title = str(self._oeb.metadata.title[0]) + title = str(metadata.title[0]) record0.write('MOBI') record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5)) record0.write('\xff' * 40) record0.write(pack('>I', self._text_nrecords + 1)) record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title))) - # TODO: Translate to language code - record0.write(pack('>I', 9)) + record0.write(iana2mobi(str(metadata.language[0]))) record0.write('\0' * 8) record0.write(pack('>II', 5, self._text_nrecords + 1)) record0.write('\0' * 16)