mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Significantly improve Mobipocket content conversion.
This commit is contained in:
parent
fc59f7b63d
commit
c4582ed2a8
@ -23,6 +23,8 @@ HEADER_TAGS = set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
NESTABLE_TAGS = set(['ol', 'ul', 'li', 'table', 'tr', 'td'])
|
||||
CONTENT_TAGS = set(['img', 'hr', 'br'])
|
||||
|
||||
PAGE_BREAKS = set(['always', 'odd', 'even'])
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
class BlockState(object):
|
||||
@ -34,6 +36,7 @@ class BlockState(object):
|
||||
self.vpadding = 0.
|
||||
self.vmargin = 0.
|
||||
self.left = 0.
|
||||
self.pbreak = False
|
||||
self.istate = None
|
||||
|
||||
class FormatState(object):
|
||||
@ -92,6 +95,9 @@ class MobiMLizer(object):
|
||||
istate = istates[-1]
|
||||
if bstate.para is None:
|
||||
bstate.istate = None
|
||||
if bstate.pbreak:
|
||||
etree.SubElement(bstate.body, MBP('pagebreak'))
|
||||
bstate.pbreak = False
|
||||
if tag in NESTABLE_TAGS:
|
||||
parent = bstate.nested[-1] if bstate.nested else bstate.body
|
||||
para = wrapper = etree.SubElement(parent, tag)
|
||||
@ -117,8 +123,11 @@ class MobiMLizer(object):
|
||||
pstate = bstate.istate
|
||||
para = bstate.para
|
||||
if istate.ids:
|
||||
body = bstate.body
|
||||
index = max((0, len(body) - 2))
|
||||
for id in istate.ids:
|
||||
etree.SubElement(para, 'a', id=id)
|
||||
body.insert(index, etree.Element('a', attrib={'id': id}))
|
||||
istate.ids.clear()
|
||||
if tag in CONTENT_TAGS:
|
||||
bstate.inline = para
|
||||
pstate = bstate.istate = None
|
||||
@ -176,9 +185,12 @@ class MobiMLizer(object):
|
||||
left = margin + padding
|
||||
bstate.left += left
|
||||
bstate.vmargin = max((bstate.vmargin, style['margin-top']))
|
||||
if style['padding-top']:
|
||||
padding = style['padding-top']
|
||||
if isinstance(padding, (int, float)) and padding > 0:
|
||||
bstate.vpadding += bstate.vmargin
|
||||
bstate.vpadding = style['padding-top']
|
||||
bstate.vpadding = padding
|
||||
if style['page-break-before'] in PAGE_BREAKS:
|
||||
bstate.pbreak = True
|
||||
istate.fsize = self.mobimlize_font(style['font-size'])
|
||||
istate.italic = True if style['font-style'] == 'italic' else False
|
||||
weight = style['font-weight']
|
||||
@ -207,6 +219,7 @@ class MobiMLizer(object):
|
||||
istate.attrib.clear()
|
||||
if tag == 'img' and 'src' in elem.attrib:
|
||||
istate.attrib['src'] = elem.attrib['src']
|
||||
istate.attrib['align'] = 'baseline'
|
||||
if tag == 'hr' and 'width' in style.cssdict():
|
||||
istate.attrib['width'] = mobimlize_measure(style['width'])
|
||||
text = None
|
||||
@ -225,19 +238,25 @@ class MobiMLizer(object):
|
||||
if child.tail:
|
||||
if istate.preserve:
|
||||
tail = child.tail
|
||||
elif bstate.para is None and child.text.isspace():
|
||||
elif bstate.para is None and child.tail.isspace():
|
||||
tail = None
|
||||
else:
|
||||
tail = COLLAPSE.sub(' ', child.tail)
|
||||
if tail:
|
||||
self.mobimlize_content(tag, tail, bstate, istates)
|
||||
if style['page-break-after'] in PAGE_BREAKS:
|
||||
bstate.pbreak = True
|
||||
if isblock:
|
||||
para = bstate.para
|
||||
if para is not None and para.text == u'\xa0':
|
||||
para.getparent().replace(para, etree.Element('br'))
|
||||
bstate.para = None
|
||||
bstate.left -= left
|
||||
bstate.vmargin = max((bstate.vmargin, style['margin-bottom']))
|
||||
if style['padding-bottom']:
|
||||
padding = style['padding-bottom']
|
||||
if isinstance(padding, (int, float)) and padding > 0:
|
||||
bstate.vpadding += bstate.vmargin
|
||||
bstate.vpadding = style['padding-bottom']
|
||||
bstate.vpadding = padding
|
||||
if bstate.nested:
|
||||
bstate.nested.pop()
|
||||
istates.pop()
|
||||
|
@ -50,7 +50,8 @@ PALMDOC = 2
|
||||
HUFFDIC = 17480
|
||||
|
||||
def encode(data):
|
||||
return data.encode('ascii', 'xmlcharrefreplace')
|
||||
#return data.encode('ascii', 'xmlcharrefreplace')
|
||||
return data.encode('utf-8')
|
||||
|
||||
# Almost like the one for MS LIT, but not quite.
|
||||
def decint(value):
|
||||
@ -92,12 +93,16 @@ class Serializer(object):
|
||||
|
||||
def serialize_guide(self):
|
||||
buffer = self.buffer
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
buffer.write('<guide>')
|
||||
for ref in self.oeb.guide.values():
|
||||
path, frag = urldefrag(ref.href)
|
||||
if hrefs[path].media_type not in OEB_DOCS:
|
||||
continue
|
||||
buffer.write('<reference title="%s" type="%s" '
|
||||
% (ref.title, ref.type))
|
||||
self.serialize_href(ref.href)
|
||||
buffer.write('/>')
|
||||
buffer.write(' />')
|
||||
buffer.write('</guide>')
|
||||
|
||||
def serialize_href(self, href, base=None):
|
||||
@ -238,19 +243,21 @@ class MobiWriter(object):
|
||||
while len(data) > 0:
|
||||
if self._compress == PALMDOC:
|
||||
data = compress_doc(data)
|
||||
record = StringIO()
|
||||
record.write(data)
|
||||
# Without the NUL Mobipocket Desktop 6.2 will thrash. Why?
|
||||
record = [data, '\0']
|
||||
record.write('\0')
|
||||
nextra = 0
|
||||
pbreak = 0
|
||||
running = offset
|
||||
while breaks and (breaks[0] - offset) < RECORD_SIZE:
|
||||
pbreak = (breaks.pop(0) - running) >> 3
|
||||
encoded = decint(pbreak)
|
||||
record.append(encoded)
|
||||
record.write(encoded)
|
||||
running += pbreak << 3
|
||||
nextra += len(encoded)
|
||||
record.append(decint(nextra + 1))
|
||||
self._records.append(''.join(record))
|
||||
record.write(decint(nextra + 1))
|
||||
self._records.append(record.getvalue())
|
||||
nrecords += 1
|
||||
offset += RECORD_SIZE
|
||||
data = text.read(RECORD_SIZE)
|
||||
@ -385,7 +392,7 @@ def main(argv=sys.argv):
|
||||
inpath, outpath = argv[1:]
|
||||
context = Context('MSReader', 'Cybook3')
|
||||
oeb = OEBBook(inpath)
|
||||
writer = MobiWriter()
|
||||
writer = MobiWriter(compress=PALMDOC)
|
||||
#writer = DirWriter()
|
||||
fbase = context.dest.fbase
|
||||
fkey = context.dest.fnums.values()
|
||||
|
@ -42,7 +42,7 @@ PROFILES = {
|
||||
# No clue on usable screen size and DPI
|
||||
'Cybook3':
|
||||
Profile(width=584, height=754, dpi=168.451, fbase=12,
|
||||
fsizes=[9, 10, 11, 12, 14, 16, 18, 20]),
|
||||
fsizes=[9, 10, 11, 12, 14, 17, 20, 24]),
|
||||
|
||||
'Firefox':
|
||||
Profile(width=800, height=600, dpi=100.0, fbase=12,
|
||||
|
Loading…
x
Reference in New Issue
Block a user