Mobipocket generation:

- Produce correct Mobi language codes. - Properly interpret relative @href attributes.
2025-07-09 03:04:10 -04:00 · 2008-12-30 19:39:52 -05:00 · 2008-12-30 19:39:52 -05:00 · b899f3084c
commit b899f3084c
parent 0182685b76
2 changed files with 179 additions and 12 deletions
--- a/src/calibre/ebooks/mobi/langcodes.py
+++ b/src/calibre/ebooks/mobi/langcodes.py
@ -3,6 +3,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 from struct import pack
 main_language = {
         0 : "NEUTRAL",
         54 : "AFRIKAANS",
@ -155,5 +157,168 @@ sub_language = {
           2 : "SWEDISH_FINLAND",
           1 : "UZBEK_LATIN",
           2 : "UZBEK_CYRILLIC",
           }
 IANA_MOBI = \
    {None: {None: (0, 0)},
     'af': {None: (54, 0)},
     'ar': {None: (1, 0),
            'AE': (1, 56),
            'BH': (1, 60),
            'DZ': (1, 20),
            'EG': (1, 12),
            'JO': (1, 44),
            'KW': (1, 52),
            'LB': (1, 48),
            'MA': (1, 24),
            'OM': (1, 32),
            'QA': (1, 64),
            'SA': (1, 4),
            'SY': (1, 40),
            'TN': (1, 28),
            'YE': (1, 36)},
     'as': {None: (77, 0)},
     'az': {None: (44, 0)},
     'be': {None: (35, 0)},
     'bg': {None: (2, 0)},
     'bn': {None: (69, 0)},
     'ca': {None: (3, 0)},
     'cs': {None: (5, 0)},
     'da': {None: (6, 0)},
     'de': {None: (7, 0),
            'AT': (7, 12),
            'CH': (7, 8),
            'LI': (7, 20),
            'LU': (7, 16)},
     'el': {None: (8, 0)},
     'en': {None: (9, 0),
            'AU': (9, 12),
            'BZ': (9, 40),
            'CA': (9, 16),
            'GB': (9, 8),
            'IE': (9, 24),
            'JM': (9, 32),
            'NZ': (9, 20),
            'PH': (9, 52),
            'TT': (9, 44),
            'US': (9, 4),
            'ZA': (9, 28),
            'ZW': (9, 48)},
     'es': {None: (10, 0),
            'AR': (10, 44),
            'BO': (10, 64),
            'CL': (10, 52),
            'CO': (10, 36),
            'CR': (10, 20),
            'DO': (10, 28),
            'EC': (10, 48),
            'ES': (10, 4),
            'GT': (10, 16),
            'HN': (10, 72),
            'MX': (10, 8),
            'NI': (10, 76),
            'PA': (10, 24),
            'PE': (10, 40),
            'PR': (10, 80),
            'PY': (10, 60),
            'SV': (10, 68),
            'UY': (10, 56),
            'VE': (10, 32)},
     'et': {None: (37, 0)},
     'eu': {None: (45, 0)},
     'fa': {None: (41, 0)},
     'fi': {None: (11, 0)},
     'fo': {None: (56, 0)},
     'fr': {None: (12, 0),
            'BE': (12, 8),
            'CA': (12, 12),
            'CH': (12, 16),
            'FR': (12, 4),
            'LU': (12, 20),
            'MC': (12, 24)},
     'gu': {None: (71, 0)},
     'he': {None: (13, 0)},
     'hi': {None: (57, 0)},
     'hr': {None: (26, 0)},
     'hu': {None: (14, 0)},
     'hy': {None: (43, 0)},
     'id': {None: (33, 0)},
     'is': {None: (15, 0)},
     'it': {None: (16, 0),
            'CH': (16, 8),
            'IT': (16, 4)},
     'ja': {None: (17, 0)},
     'ka': {None: (55, 0)},
     'kk': {None: (63, 0)},
     'kn': {None: (75, 0)},
     'ko': {None: (18, 0)},
     'kok': {None: (87, 0)},
     'lt': {None: (39, 0)},
     'lv': {None: (38, 0)},
     'mk': {None: (47, 0)},
     'ml': {None: (76, 0)},
     'mr': {None: (78, 0)},
     'ms': {None: (62, 0)},
     'mt': {None: (58, 0)},
     'ne': {None: (97, 0)},
     'nl': {None: (19, 0),
            'BE': (19, 8)},
     'no': {None: (20, 0)},
     'or': {None: (72, 0)},
     'pa': {None: (70, 0)},
     'pl': {None: (21, 0)},
     'pt': {None: (22, 0),
            'BR': (22, 4),
            'PT': (22, 8)},
     'rm': {None: (23, 0)},
     'ro': {None: (24, 0)},
     'ru': {None: (25, 0)},
     'sa': {None: (79, 0)},
     'se': {None: (59, 0)},
     'sk': {None: (27, 0)},
     'sl': {None: (36, 0)},
     'sq': {None: (28, 0)},
     'sr': {None: (26, 12),
            'RS': (26, 12)},
     'st': {None: (48, 0)},
     'sv': {None: (29, 0),
            'FI': (29, 8)},
     'sw': {None: (65, 0)},
     'ta': {None: (73, 0)},
     'te': {None: (74, 0)},
     'th': {None: (30, 0)},
     'tn': {None: (50, 0)},
     'tr': {None: (31, 0)},
     'ts': {None: (49, 0)},
     'tt': {None: (68, 0)},
     'uk': {None: (34, 0)},
     'ur': {None: (32, 0)},
     'uz': {None: (67, 0),
            'UZ': (67, 8)},
     'vi': {None: (42, 0)},
     'wen': {None: (46, 0)},
     'xh': {None: (52, 0)},
     'zh': {None: (4, 0),
            'CN': (4, 8),
            'HK': (4, 12),
            'SG': (4, 16),
            'TW': (4, 4)},
     'zu': {None: (53, 0)}}
 def iana2mobi(self, icode):
    subtags = list(code.split('-'))
    langdict = IANA_MOBI[None]
    while len(subtags) > 0:
        lang = subtags.pop(0).lower()
        if lang in IANA_MOBI:
            langdict = IANA_MOBI[lang]
            break
    mcode = langdict[None]
    while len(subtags) > 0:
        subtag = subtags.pop(0)
        if subtag not in langdict:
            subtag = subtag.upper()
        if subtag in langdict:
            mcode = langdict[subtag]
            break
    return pack('>HBB', 0, mcode[1], mcode[0])
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -20,6 +20,7 @@ from urlparse import urldefrag
 from lxml import etree
 from PIL import Image
 from calibre.ebooks.mobi.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS
 from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname
 from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
@ -87,19 +88,20 @@ class Serializer(object):
            buffer.write('/>')
        buffer.write('</guide>')
-    def serialize_href(self, href, baseid=None):
+    def serialize_href(self, href, base=None):
        hrefs = self.oeb.manifest.hrefs
        path, frag = urldefrag(href)
-        # TODO: Absolute path translation
+        if path and base:
            path = base.abshref(path)
        if path and path not in hrefs:
            return False
        buffer = self.buffer
        item = hrefs[path] if path else None
        if item and item.spine_position is None:
            return False
-        id =  item.id if item else baseid
+        id =  item.id if item else base.id
        frag = frag if frag else 'calibre_top'
-        href = '_'.join((id, frag))
+        href = '#'.join((id, frag))
        buffer.write('filepos=')
        self.href_offsets[href].append(buffer.tell())
        buffer.write('0000000000')
@ -117,7 +119,7 @@ class Serializer(object):
        buffer.write('<mbp:pagebreak/>')
        # TODO: Figure out how to make the 'crossable' stuff work for
        # non-"linear" spine items.
-        self.id_offsets[item.id + '_calibre_top'] = buffer.tell()
+        self.id_offsets[item.id + '#calibre_top'] = buffer.tell()
        for elem in item.data.find(XHTML('body')):
            self.serialize_elem(elem, item)
@ -129,7 +131,7 @@ class Serializer(object):
        tag = prefixname(elem.tag, nsrmap)
        for attr in ('name', 'id'):
            if attr in elem.attrib:
-                id = '_'.join((item.id, elem.attrib[attr]))
+                id = '#'.join((item.id, elem.attrib[attr]))
                self.id_offsets[id] = buffer.tell()
                del elem.attrib[attr]
        buffer.write('<')
@ -141,7 +143,7 @@ class Serializer(object):
                attr = prefixname(attr, nsrmap)
                buffer.write(' ')
                if attr == 'href':
-                    if self.serialize_href(val, item.id):
+                    if self.serialize_href(val, item):
                        continue
                elif attr == 'src' and val in hrefs:
                    index = self.images[val]
@ -256,19 +258,19 @@ class MobiWriter(object):
            self._records.append(data)
    def _generate_record0(self):
        metadata = self._oeb.metadata
        exth = self._build_exth()
        record0 = StringIO()
        record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length,
            self._text_nrecords, 0x1000, 0, 0))
        uid = random.randint(0, 0xffffffff)
-        title = str(self._oeb.metadata.title[0])
+        title = str(metadata.title[0])
        record0.write('MOBI')
        record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5))
        record0.write('\xff' * 40)
        record0.write(pack('>I', self._text_nrecords + 1))
        record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))
-        # TODO: Translate <dc:language/> to language code
+        record0.write(iana2mobi(str(metadata.language[0])))
        record0.write(pack('>I', 9))
        record0.write('\0' * 8)
        record0.write(pack('>II', 5, self._text_nrecords + 1))
        record0.write('\0' * 16)