mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge changes from zeno.
This commit is contained in:
commit
12935cec01
@ -128,6 +128,7 @@ class Metadata(object):
|
|||||||
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
||||||
'format', 'identifier', 'language', 'publisher', 'relation',
|
'format', 'identifier', 'language', 'publisher', 'relation',
|
||||||
'rights', 'source', 'subject', 'title', 'type'])
|
'rights', 'source', 'subject', 'title', 'type'])
|
||||||
|
ATTRS = set(['role', 'file-as', 'scheme'])
|
||||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||||
'xsi': XSI_NS}
|
'xsi': XSI_NS}
|
||||||
@ -144,7 +145,12 @@ class Metadata(object):
|
|||||||
self.value = value
|
self.value = value
|
||||||
self.attrib = attrib = {}
|
self.attrib = attrib = {}
|
||||||
for fq_attr in fq_attrib:
|
for fq_attr in fq_attrib:
|
||||||
attr = barename(fq_attr)
|
if fq_attr in Metadata.ATTRS:
|
||||||
|
attr = fq_attr
|
||||||
|
fq_attr = OPF2(fq_attr)
|
||||||
|
fq_attrib[fq_attr] = fq_attrib.pop(attr)
|
||||||
|
else:
|
||||||
|
attr = barename(fq_attr)
|
||||||
attrib[attr] = fq_attrib[fq_attr]
|
attrib[attr] = fq_attrib[fq_attr]
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
@ -161,7 +167,7 @@ class Metadata(object):
|
|||||||
% (barename(self.term), self.value, self.attrib)
|
% (barename(self.term), self.value, self.attrib)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.value.encode('ascii', 'xmlcharrefreplace')
|
return unicode(self.value).encode('ascii', 'xmlcharrefreplace')
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return unicode(self.value)
|
return unicode(self.value)
|
||||||
@ -276,6 +282,14 @@ class Manifest(object):
|
|||||||
if result != 0:
|
if result != 0:
|
||||||
return result
|
return result
|
||||||
return cmp(self.id, other.id)
|
return cmp(self.id, other.id)
|
||||||
|
|
||||||
|
def abshref(self, href):
|
||||||
|
if '/' not in self.href:
|
||||||
|
return href
|
||||||
|
dirname = os.path.dirname(self.href)
|
||||||
|
href = os.path.join(dirname, href)
|
||||||
|
href = os.path.normpath(href).replace('\\', '/')
|
||||||
|
return href
|
||||||
|
|
||||||
def __init__(self, oeb):
|
def __init__(self, oeb):
|
||||||
self.oeb = oeb
|
self.oeb = oeb
|
||||||
@ -581,6 +595,15 @@ class OEBBook(object):
|
|||||||
else:
|
else:
|
||||||
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
|
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
|
||||||
self.uid = metadata.identifier[0]
|
self.uid = metadata.identifier[0]
|
||||||
|
if not metadata.language:
|
||||||
|
self.logger.log_warn(u'Language not specified.')
|
||||||
|
metadata.add('language', 'en')
|
||||||
|
if not metadata.creator:
|
||||||
|
self.logger.log_warn(u'Creator not specified.')
|
||||||
|
metadata.add('creator', 'Unknown')
|
||||||
|
if not metadata.title:
|
||||||
|
self.logger.log_warn(u'Title not specified.')
|
||||||
|
metadata.add('title', 'Unknown')
|
||||||
|
|
||||||
def _manifest_from_opf(self, opf):
|
def _manifest_from_opf(self, opf):
|
||||||
self.manifest = manifest = Manifest(self)
|
self.manifest = manifest = Manifest(self)
|
||||||
|
@ -3,6 +3,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from struct import pack
|
||||||
|
|
||||||
main_language = {
|
main_language = {
|
||||||
0 : "NEUTRAL",
|
0 : "NEUTRAL",
|
||||||
54 : "AFRIKAANS",
|
54 : "AFRIKAANS",
|
||||||
@ -155,5 +157,168 @@ sub_language = {
|
|||||||
2 : "SWEDISH_FINLAND",
|
2 : "SWEDISH_FINLAND",
|
||||||
1 : "UZBEK_LATIN",
|
1 : "UZBEK_LATIN",
|
||||||
2 : "UZBEK_CYRILLIC",
|
2 : "UZBEK_CYRILLIC",
|
||||||
|
}
|
||||||
}
|
|
||||||
|
IANA_MOBI = \
|
||||||
|
{None: {None: (0, 0)},
|
||||||
|
'af': {None: (54, 0)},
|
||||||
|
'ar': {None: (1, 0),
|
||||||
|
'AE': (1, 56),
|
||||||
|
'BH': (1, 60),
|
||||||
|
'DZ': (1, 20),
|
||||||
|
'EG': (1, 12),
|
||||||
|
'JO': (1, 44),
|
||||||
|
'KW': (1, 52),
|
||||||
|
'LB': (1, 48),
|
||||||
|
'MA': (1, 24),
|
||||||
|
'OM': (1, 32),
|
||||||
|
'QA': (1, 64),
|
||||||
|
'SA': (1, 4),
|
||||||
|
'SY': (1, 40),
|
||||||
|
'TN': (1, 28),
|
||||||
|
'YE': (1, 36)},
|
||||||
|
'as': {None: (77, 0)},
|
||||||
|
'az': {None: (44, 0)},
|
||||||
|
'be': {None: (35, 0)},
|
||||||
|
'bg': {None: (2, 0)},
|
||||||
|
'bn': {None: (69, 0)},
|
||||||
|
'ca': {None: (3, 0)},
|
||||||
|
'cs': {None: (5, 0)},
|
||||||
|
'da': {None: (6, 0)},
|
||||||
|
'de': {None: (7, 0),
|
||||||
|
'AT': (7, 12),
|
||||||
|
'CH': (7, 8),
|
||||||
|
'LI': (7, 20),
|
||||||
|
'LU': (7, 16)},
|
||||||
|
'el': {None: (8, 0)},
|
||||||
|
'en': {None: (9, 0),
|
||||||
|
'AU': (9, 12),
|
||||||
|
'BZ': (9, 40),
|
||||||
|
'CA': (9, 16),
|
||||||
|
'GB': (9, 8),
|
||||||
|
'IE': (9, 24),
|
||||||
|
'JM': (9, 32),
|
||||||
|
'NZ': (9, 20),
|
||||||
|
'PH': (9, 52),
|
||||||
|
'TT': (9, 44),
|
||||||
|
'US': (9, 4),
|
||||||
|
'ZA': (9, 28),
|
||||||
|
'ZW': (9, 48)},
|
||||||
|
'es': {None: (10, 0),
|
||||||
|
'AR': (10, 44),
|
||||||
|
'BO': (10, 64),
|
||||||
|
'CL': (10, 52),
|
||||||
|
'CO': (10, 36),
|
||||||
|
'CR': (10, 20),
|
||||||
|
'DO': (10, 28),
|
||||||
|
'EC': (10, 48),
|
||||||
|
'ES': (10, 4),
|
||||||
|
'GT': (10, 16),
|
||||||
|
'HN': (10, 72),
|
||||||
|
'MX': (10, 8),
|
||||||
|
'NI': (10, 76),
|
||||||
|
'PA': (10, 24),
|
||||||
|
'PE': (10, 40),
|
||||||
|
'PR': (10, 80),
|
||||||
|
'PY': (10, 60),
|
||||||
|
'SV': (10, 68),
|
||||||
|
'UY': (10, 56),
|
||||||
|
'VE': (10, 32)},
|
||||||
|
'et': {None: (37, 0)},
|
||||||
|
'eu': {None: (45, 0)},
|
||||||
|
'fa': {None: (41, 0)},
|
||||||
|
'fi': {None: (11, 0)},
|
||||||
|
'fo': {None: (56, 0)},
|
||||||
|
'fr': {None: (12, 0),
|
||||||
|
'BE': (12, 8),
|
||||||
|
'CA': (12, 12),
|
||||||
|
'CH': (12, 16),
|
||||||
|
'FR': (12, 4),
|
||||||
|
'LU': (12, 20),
|
||||||
|
'MC': (12, 24)},
|
||||||
|
'gu': {None: (71, 0)},
|
||||||
|
'he': {None: (13, 0)},
|
||||||
|
'hi': {None: (57, 0)},
|
||||||
|
'hr': {None: (26, 0)},
|
||||||
|
'hu': {None: (14, 0)},
|
||||||
|
'hy': {None: (43, 0)},
|
||||||
|
'id': {None: (33, 0)},
|
||||||
|
'is': {None: (15, 0)},
|
||||||
|
'it': {None: (16, 0),
|
||||||
|
'CH': (16, 8),
|
||||||
|
'IT': (16, 4)},
|
||||||
|
'ja': {None: (17, 0)},
|
||||||
|
'ka': {None: (55, 0)},
|
||||||
|
'kk': {None: (63, 0)},
|
||||||
|
'kn': {None: (75, 0)},
|
||||||
|
'ko': {None: (18, 0)},
|
||||||
|
'kok': {None: (87, 0)},
|
||||||
|
'lt': {None: (39, 0)},
|
||||||
|
'lv': {None: (38, 0)},
|
||||||
|
'mk': {None: (47, 0)},
|
||||||
|
'ml': {None: (76, 0)},
|
||||||
|
'mr': {None: (78, 0)},
|
||||||
|
'ms': {None: (62, 0)},
|
||||||
|
'mt': {None: (58, 0)},
|
||||||
|
'ne': {None: (97, 0)},
|
||||||
|
'nl': {None: (19, 0),
|
||||||
|
'BE': (19, 8)},
|
||||||
|
'no': {None: (20, 0)},
|
||||||
|
'or': {None: (72, 0)},
|
||||||
|
'pa': {None: (70, 0)},
|
||||||
|
'pl': {None: (21, 0)},
|
||||||
|
'pt': {None: (22, 0),
|
||||||
|
'BR': (22, 4),
|
||||||
|
'PT': (22, 8)},
|
||||||
|
'rm': {None: (23, 0)},
|
||||||
|
'ro': {None: (24, 0)},
|
||||||
|
'ru': {None: (25, 0)},
|
||||||
|
'sa': {None: (79, 0)},
|
||||||
|
'se': {None: (59, 0)},
|
||||||
|
'sk': {None: (27, 0)},
|
||||||
|
'sl': {None: (36, 0)},
|
||||||
|
'sq': {None: (28, 0)},
|
||||||
|
'sr': {None: (26, 12),
|
||||||
|
'RS': (26, 12)},
|
||||||
|
'st': {None: (48, 0)},
|
||||||
|
'sv': {None: (29, 0),
|
||||||
|
'FI': (29, 8)},
|
||||||
|
'sw': {None: (65, 0)},
|
||||||
|
'ta': {None: (73, 0)},
|
||||||
|
'te': {None: (74, 0)},
|
||||||
|
'th': {None: (30, 0)},
|
||||||
|
'tn': {None: (50, 0)},
|
||||||
|
'tr': {None: (31, 0)},
|
||||||
|
'ts': {None: (49, 0)},
|
||||||
|
'tt': {None: (68, 0)},
|
||||||
|
'uk': {None: (34, 0)},
|
||||||
|
'ur': {None: (32, 0)},
|
||||||
|
'uz': {None: (67, 0),
|
||||||
|
'UZ': (67, 8)},
|
||||||
|
'vi': {None: (42, 0)},
|
||||||
|
'wen': {None: (46, 0)},
|
||||||
|
'xh': {None: (52, 0)},
|
||||||
|
'zh': {None: (4, 0),
|
||||||
|
'CN': (4, 8),
|
||||||
|
'HK': (4, 12),
|
||||||
|
'SG': (4, 16),
|
||||||
|
'TW': (4, 4)},
|
||||||
|
'zu': {None: (53, 0)}}
|
||||||
|
|
||||||
|
def iana2mobi(self, icode):
|
||||||
|
subtags = list(code.split('-'))
|
||||||
|
langdict = IANA_MOBI[None]
|
||||||
|
while len(subtags) > 0:
|
||||||
|
lang = subtags.pop(0).lower()
|
||||||
|
if lang in IANA_MOBI:
|
||||||
|
langdict = IANA_MOBI[lang]
|
||||||
|
break
|
||||||
|
mcode = langdict[None]
|
||||||
|
while len(subtags) > 0:
|
||||||
|
subtag = subtags.pop(0)
|
||||||
|
if subtag not in langdict:
|
||||||
|
subtag = subtag.upper()
|
||||||
|
if subtag in langdict:
|
||||||
|
mcode = langdict[subtag]
|
||||||
|
break
|
||||||
|
return pack('>HBB', 0, mcode[1], mcode[0])
|
||||||
|
@ -20,6 +20,7 @@ from urlparse import urldefrag
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||||
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS
|
from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS
|
||||||
from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname
|
from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname
|
||||||
from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
|
from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
|
||||||
@ -87,19 +88,20 @@ class Serializer(object):
|
|||||||
buffer.write('/>')
|
buffer.write('/>')
|
||||||
buffer.write('</guide>')
|
buffer.write('</guide>')
|
||||||
|
|
||||||
def serialize_href(self, href, baseid=None):
|
def serialize_href(self, href, base=None):
|
||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
path, frag = urldefrag(href)
|
path, frag = urldefrag(href)
|
||||||
# TODO: Absolute path translation
|
if path and base:
|
||||||
|
path = base.abshref(path)
|
||||||
if path and path not in hrefs:
|
if path and path not in hrefs:
|
||||||
return False
|
return False
|
||||||
buffer = self.buffer
|
buffer = self.buffer
|
||||||
item = hrefs[path] if path else None
|
item = hrefs[path] if path else None
|
||||||
if item and item.spine_position is None:
|
if item and item.spine_position is None:
|
||||||
return False
|
return False
|
||||||
id = item.id if item else baseid
|
id = item.id if item else base.id
|
||||||
frag = frag if frag else 'calibre_top'
|
frag = frag if frag else 'calibre_top'
|
||||||
href = '_'.join((id, frag))
|
href = '#'.join((id, frag))
|
||||||
buffer.write('filepos=')
|
buffer.write('filepos=')
|
||||||
self.href_offsets[href].append(buffer.tell())
|
self.href_offsets[href].append(buffer.tell())
|
||||||
buffer.write('0000000000')
|
buffer.write('0000000000')
|
||||||
@ -117,7 +119,7 @@ class Serializer(object):
|
|||||||
buffer.write('<mbp:pagebreak/>')
|
buffer.write('<mbp:pagebreak/>')
|
||||||
# TODO: Figure out how to make the 'crossable' stuff work for
|
# TODO: Figure out how to make the 'crossable' stuff work for
|
||||||
# non-"linear" spine items.
|
# non-"linear" spine items.
|
||||||
self.id_offsets[item.id + '_calibre_top'] = buffer.tell()
|
self.id_offsets[item.id + '#calibre_top'] = buffer.tell()
|
||||||
for elem in item.data.find(XHTML('body')):
|
for elem in item.data.find(XHTML('body')):
|
||||||
self.serialize_elem(elem, item)
|
self.serialize_elem(elem, item)
|
||||||
|
|
||||||
@ -129,7 +131,7 @@ class Serializer(object):
|
|||||||
tag = prefixname(elem.tag, nsrmap)
|
tag = prefixname(elem.tag, nsrmap)
|
||||||
for attr in ('name', 'id'):
|
for attr in ('name', 'id'):
|
||||||
if attr in elem.attrib:
|
if attr in elem.attrib:
|
||||||
id = '_'.join((item.id, elem.attrib[attr]))
|
id = '#'.join((item.id, elem.attrib[attr]))
|
||||||
self.id_offsets[id] = buffer.tell()
|
self.id_offsets[id] = buffer.tell()
|
||||||
del elem.attrib[attr]
|
del elem.attrib[attr]
|
||||||
buffer.write('<')
|
buffer.write('<')
|
||||||
@ -141,7 +143,7 @@ class Serializer(object):
|
|||||||
attr = prefixname(attr, nsrmap)
|
attr = prefixname(attr, nsrmap)
|
||||||
buffer.write(' ')
|
buffer.write(' ')
|
||||||
if attr == 'href':
|
if attr == 'href':
|
||||||
if self.serialize_href(val, item.id):
|
if self.serialize_href(val, item):
|
||||||
continue
|
continue
|
||||||
elif attr == 'src' and val in hrefs:
|
elif attr == 'src' and val in hrefs:
|
||||||
index = self.images[val]
|
index = self.images[val]
|
||||||
@ -256,19 +258,19 @@ class MobiWriter(object):
|
|||||||
self._records.append(data)
|
self._records.append(data)
|
||||||
|
|
||||||
def _generate_record0(self):
|
def _generate_record0(self):
|
||||||
|
metadata = self._oeb.metadata
|
||||||
exth = self._build_exth()
|
exth = self._build_exth()
|
||||||
record0 = StringIO()
|
record0 = StringIO()
|
||||||
record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length,
|
record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length,
|
||||||
self._text_nrecords, 0x1000, 0, 0))
|
self._text_nrecords, 0x1000, 0, 0))
|
||||||
uid = random.randint(0, 0xffffffff)
|
uid = random.randint(0, 0xffffffff)
|
||||||
title = str(self._oeb.metadata.title[0])
|
title = str(metadata.title[0])
|
||||||
record0.write('MOBI')
|
record0.write('MOBI')
|
||||||
record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5))
|
record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5))
|
||||||
record0.write('\xff' * 40)
|
record0.write('\xff' * 40)
|
||||||
record0.write(pack('>I', self._text_nrecords + 1))
|
record0.write(pack('>I', self._text_nrecords + 1))
|
||||||
record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))
|
record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))
|
||||||
# TODO: Translate <dc:language/> to language code
|
record0.write(iana2mobi(str(metadata.language[0])))
|
||||||
record0.write(pack('>I', 9))
|
|
||||||
record0.write('\0' * 8)
|
record0.write('\0' * 8)
|
||||||
record0.write(pack('>II', 5, self._text_nrecords + 1))
|
record0.write(pack('>II', 5, self._text_nrecords + 1))
|
||||||
record0.write('\0' * 16)
|
record0.write('\0' * 16)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user