diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index f566714878..4385291f69 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -233,7 +233,7 @@ class HTMLInput(InputFormatPlugin): name = 'HTML Input' author = 'Kovid Goyal' description = 'Convert HTML and OPF files to an OEB' - file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm']) + file_types = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml']) options = set([ OptionRecommendation(name='breadth_first', diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index c1ea4d8f2f..686a6efdf9 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -315,18 +315,24 @@ class MobiReader(object): htmls = list(root.xpath('//html')) if len(htmls) > 1: - self.log.warn('Markup contains multiple tags') - # Keep only the largest head and body + self.log.warn('Markup contains multiple tags, merging.') + # Merge all and sections + for h in htmls: + p = h.getparent() + if hasattr(p, 'remove'): + p.remove(h) bodies, heads = root.xpath('//body'), root.xpath('//head') - def sz(x): return len(list(x.iter())) - def scmp(x, y): return cmp(sz(x), sz(y)) - body = list(sorted(bodies, cmp=scmp)) - head = list(sorted(heads, cmp=scmp)) for x in root: root.remove(x) - if head: - root.append(head[-1]) - if body: - root.append(body[-1]) + head, body = map(root.makeelement, ('head', 'body')) + for h in heads: + for x in h: + h.remove(x) + head.append(x) + for b in bodies: + for x in b: + b.remove(x) + body.append(x) + root.append(head), root.append(body) for x in root.xpath('//script'): x.getparent().remove(x) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 711dce0b8d..67a8f04cc8 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -823,6 +823,7 @@ class Manifest(object): for key in list(body.attrib.keys()): if key == 'lang' or key.endswith('}lang'): body.attrib.pop(key) + return data def _parse_css(self, data):