Fixes for Mobipocket extraction:

- Fix #1817.  Ignore invalid filepos hyperlink targets.
  - From output of book producing #1817 noticed bug with <i/> and </b> ->
    <span/> conversion.  Move conversion to post-parsing.
  - Correct image MIME type in output OPF.
This commit is contained in:
Marshall T. Vandegrift 2009-02-11 00:46:48 -05:00
parent dcd258b52b
commit 2326873b7c

View File

@ -247,9 +247,6 @@ class MobiReader(object):
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<')
for t, c in [('b', 'bold'), ('i', 'italic')]:
self.processed_html = re.sub(r'(?i)<%s>'%t, r'<span class="%s">'%c, self.processed_html)
self.processed_html = re.sub(r'(?i)</%s>'%t, r'</span>', self.processed_html)
def upshift_markup(self, root):
if self.verbose:
@ -295,27 +292,35 @@ class MobiReader(object):
styles.append('page-break-before: always')
styles.append('display: block')
styles.append('margin: 0')
if styles:
attrib['style'] = '; '.join(styles)
if tag.tag.lower() == 'font':
elif tag.tag == 'i':
tag.tag = 'span'
tag.attrib['class'] = 'italic'
elif tag.tag == 'b':
tag.tag = 'span'
tag.attrib['class'] = 'bold'
elif tag.tag == 'font':
sz = tag.get('size', '').lower()
try:
float(sz)
except ValueError:
if sz in size_map.keys():
attrib['size'] = size_map[sz]
if 'filepos-id' in attrib:
attrib['id'] = attrib.pop('filepos-id')
if 'filepos' in attrib:
filepos = int(attrib.pop('filepos'))
attrib['href'] = "#filepos%d" % filepos
if tag.tag == 'img':
elif tag.tag == 'img':
recindex = None
for attr in self.IMAGE_ATTRS:
recindex = attrib.pop(attr, None) or recindex
if recindex is not None:
attrib['src'] = 'images/%s.jpg' % recindex
if styles:
attrib['style'] = '; '.join(styles)
if 'filepos-id' in attrib:
attrib['id'] = attrib.pop('filepos-id')
if 'filepos' in attrib:
filepos = attrib.pop('filepos')
try:
attrib['href'] = "#filepos%d" % int(filepos)
except ValueError:
pass
def create_opf(self, htmlfile, guide=None):
mi = self.book_header.exth.mi
@ -325,7 +330,7 @@ class MobiReader(object):
manifest = [(htmlfile, 'text/x-oeb1-document')]
bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []):
manifest.append((os.path.join(bp, 'images/', i), 'image/jpg'))
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
opf.create_manifest(manifest)
opf.create_spine([os.path.basename(htmlfile)])