mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New mobi output: Wrap sections in <div> tags
This commit is contained in:
parent
c313bdf380
commit
e2ba917116
@ -9,6 +9,9 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import struct, datetime, sys, os, shutil
|
import struct, datetime, sys, os, shutil
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
from calibre.utils.date import utc_tz
|
from calibre.utils.date import utc_tz
|
||||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||||
@ -1208,6 +1211,19 @@ def inspect_mobi(path_or_stream, prefix='decompiled'):
|
|||||||
for rec in getattr(f, attr):
|
for rec in getattr(f, attr):
|
||||||
rec.dump(tdir)
|
rec.dump(tdir)
|
||||||
|
|
||||||
|
alltext = os.path.join(ddir, 'text.html')
|
||||||
|
with open(alltext, 'wb') as of:
|
||||||
|
alltext = b''
|
||||||
|
for rec in f.text_records:
|
||||||
|
of.write(rec.raw)
|
||||||
|
alltext += rec.raw
|
||||||
|
of.seek(0)
|
||||||
|
root = html.fromstring(alltext.decode('utf-8'))
|
||||||
|
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
||||||
|
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
||||||
|
include_meta_content_type=True))
|
||||||
|
|
||||||
|
|
||||||
print ('Debug data saved to:', ddir)
|
print ('Debug data saved to:', ddir)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -53,6 +53,35 @@ class Serializer(object):
|
|||||||
# become uncrossable breaks in the MOBI
|
# become uncrossable breaks in the MOBI
|
||||||
self.breaks = []
|
self.breaks = []
|
||||||
|
|
||||||
|
self.find_blocks()
|
||||||
|
|
||||||
|
def find_blocks(self):
|
||||||
|
'''
|
||||||
|
Mark every item in the spine if it is the start/end of a
|
||||||
|
section/article, so that it can be wrapped in divs appropariately.
|
||||||
|
'''
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
item.is_section_start = item.is_section_end = False
|
||||||
|
item.is_article_start = item.is_article_end = False
|
||||||
|
|
||||||
|
def spine_item(tocitem):
|
||||||
|
href = urldefrag(tocitem.href)[0]
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
if item.href == href:
|
||||||
|
return item
|
||||||
|
|
||||||
|
for item in self.oeb.toc.iterdescendants():
|
||||||
|
if item.klass == 'section':
|
||||||
|
articles = list(item)
|
||||||
|
if not articles: continue
|
||||||
|
spine_item(item).is_section_start = True
|
||||||
|
for i, article in enumerate(articles):
|
||||||
|
si = spine_item(article)
|
||||||
|
si.is_article_start = True
|
||||||
|
si.is_article_end = True
|
||||||
|
if i == len(articles) - 1:
|
||||||
|
si.is_section_end = True
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
'''
|
'''
|
||||||
Return the document serialized as a single UTF-8 encoded bytestring.
|
Return the document serialized as a single UTF-8 encoded bytestring.
|
||||||
@ -155,6 +184,8 @@ class Serializer(object):
|
|||||||
if not item.linear:
|
if not item.linear:
|
||||||
self.breaks.append(buf.tell() - 1)
|
self.breaks.append(buf.tell() - 1)
|
||||||
self.id_offsets[urlnormalize(item.href)] = buf.tell()
|
self.id_offsets[urlnormalize(item.href)] = buf.tell()
|
||||||
|
if item.is_section_start:
|
||||||
|
buf.write(b'<div>')
|
||||||
# Kindle periodical articles are contained in a <div> tag
|
# Kindle periodical articles are contained in a <div> tag
|
||||||
buf.write(b'<div>')
|
buf.write(b'<div>')
|
||||||
for elem in item.data.find(XHTML('body')):
|
for elem in item.data.find(XHTML('body')):
|
||||||
@ -164,6 +195,8 @@ class Serializer(object):
|
|||||||
if self.write_page_breaks_after_item:
|
if self.write_page_breaks_after_item:
|
||||||
buf.write(b'<mbp:pagebreak/>')
|
buf.write(b'<mbp:pagebreak/>')
|
||||||
buf.write(b'</div>')
|
buf.write(b'</div>')
|
||||||
|
if item.is_section_end:
|
||||||
|
buf.write(b'</div>')
|
||||||
self.anchor_offset = None
|
self.anchor_offset = None
|
||||||
|
|
||||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user