From 662b696fd0e094bcd36bc40662d3b687a6026733 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Jan 2014 14:22:25 +0530 Subject: [PATCH] CHM Input: Handle CHM files with broken markup where all the content is placed inside the tag. Fixes #1273512 [words in italics removed after conversion to mobi](https://bugs.launchpad.net/calibre/+bug/1273512) --- src/calibre/ebooks/chm/reader.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 57d28c2e0e..3947040f6b 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -185,6 +185,13 @@ class CHMReader(CHMFile): return data # nuke javascript... [s.extract() for s in soup('script')] + # See if everything is inside a tag + # https://bugs.launchpad.net/bugs/1273512 + body = soup.find('body') + if body is not None and body.parent.name == 'head': + html = soup.find('html') + html.insert(len(html), body) + # remove forward and back nav bars from the top/bottom of each page # cos they really fuck with the flow of things and generally waste space # since we can't use [a,b] syntax to select arbitrary items from a list