mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Input: Merge multiple html sections instead of just using the largest one
This commit is contained in:
parent
233c3bcb1b
commit
8d1bcee8dd
@ -233,7 +233,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
name = 'HTML Input'
|
name = 'HTML Input'
|
||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
description = 'Convert HTML and OPF files to an OEB'
|
description = 'Convert HTML and OPF files to an OEB'
|
||||||
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm'])
|
file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
|
||||||
|
|
||||||
options = set([
|
options = set([
|
||||||
OptionRecommendation(name='breadth_first',
|
OptionRecommendation(name='breadth_first',
|
||||||
|
@ -315,18 +315,24 @@ class MobiReader(object):
|
|||||||
htmls = list(root.xpath('//html'))
|
htmls = list(root.xpath('//html'))
|
||||||
|
|
||||||
if len(htmls) > 1:
|
if len(htmls) > 1:
|
||||||
self.log.warn('Markup contains multiple <html> tags')
|
self.log.warn('Markup contains multiple <html> tags, merging.')
|
||||||
# Keep only the largest head and body
|
# Merge all <head> and <body> sections
|
||||||
|
for h in htmls:
|
||||||
|
p = h.getparent()
|
||||||
|
if hasattr(p, 'remove'):
|
||||||
|
p.remove(h)
|
||||||
bodies, heads = root.xpath('//body'), root.xpath('//head')
|
bodies, heads = root.xpath('//body'), root.xpath('//head')
|
||||||
def sz(x): return len(list(x.iter()))
|
|
||||||
def scmp(x, y): return cmp(sz(x), sz(y))
|
|
||||||
body = list(sorted(bodies, cmp=scmp))
|
|
||||||
head = list(sorted(heads, cmp=scmp))
|
|
||||||
for x in root: root.remove(x)
|
for x in root: root.remove(x)
|
||||||
if head:
|
head, body = map(root.makeelement, ('head', 'body'))
|
||||||
root.append(head[-1])
|
for h in heads:
|
||||||
if body:
|
for x in h:
|
||||||
root.append(body[-1])
|
h.remove(x)
|
||||||
|
head.append(x)
|
||||||
|
for b in bodies:
|
||||||
|
for x in b:
|
||||||
|
b.remove(x)
|
||||||
|
body.append(x)
|
||||||
|
root.append(head), root.append(body)
|
||||||
for x in root.xpath('//script'):
|
for x in root.xpath('//script'):
|
||||||
x.getparent().remove(x)
|
x.getparent().remove(x)
|
||||||
|
|
||||||
|
@ -823,6 +823,7 @@ class Manifest(object):
|
|||||||
for key in list(body.attrib.keys()):
|
for key in list(body.attrib.keys()):
|
||||||
if key == 'lang' or key.endswith('}lang'):
|
if key == 'lang' or key.endswith('}lang'):
|
||||||
body.attrib.pop(key)
|
body.attrib.pop(key)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _parse_css(self, data):
|
def _parse_css(self, data):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user