mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Input: Merge multiple html sections instead of just using the largest one
This commit is contained in:
parent
233c3bcb1b
commit
8d1bcee8dd
@ -233,7 +233,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
name = 'HTML Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = 'Convert HTML and OPF files to an OEB'
|
||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm'])
|
||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='breadth_first',
|
||||
|
@ -315,18 +315,24 @@ class MobiReader(object):
|
||||
htmls = list(root.xpath('//html'))
|
||||
|
||||
if len(htmls) > 1:
|
||||
self.log.warn('Markup contains multiple <html> tags')
|
||||
# Keep only the largest head and body
|
||||
self.log.warn('Markup contains multiple <html> tags, merging.')
|
||||
# Merge all <head> and <body> sections
|
||||
for h in htmls:
|
||||
p = h.getparent()
|
||||
if hasattr(p, 'remove'):
|
||||
p.remove(h)
|
||||
bodies, heads = root.xpath('//body'), root.xpath('//head')
|
||||
def sz(x): return len(list(x.iter()))
|
||||
def scmp(x, y): return cmp(sz(x), sz(y))
|
||||
body = list(sorted(bodies, cmp=scmp))
|
||||
head = list(sorted(heads, cmp=scmp))
|
||||
for x in root: root.remove(x)
|
||||
if head:
|
||||
root.append(head[-1])
|
||||
if body:
|
||||
root.append(body[-1])
|
||||
head, body = map(root.makeelement, ('head', 'body'))
|
||||
for h in heads:
|
||||
for x in h:
|
||||
h.remove(x)
|
||||
head.append(x)
|
||||
for b in bodies:
|
||||
for x in b:
|
||||
b.remove(x)
|
||||
body.append(x)
|
||||
root.append(head), root.append(body)
|
||||
for x in root.xpath('//script'):
|
||||
x.getparent().remove(x)
|
||||
|
||||
|
@ -823,6 +823,7 @@ class Manifest(object):
|
||||
for key in list(body.attrib.keys()):
|
||||
if key == 'lang' or key.endswith('}lang'):
|
||||
body.attrib.pop(key)
|
||||
|
||||
return data
|
||||
|
||||
def _parse_css(self, data):
|
||||
|
Loading…
x
Reference in New Issue
Block a user