mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: Handle CHM files with broken markup where all the content is placed inside the <head> tag. Fixes #1273512 [words in italics removed after conversion to mobi](https://bugs.launchpad.net/calibre/+bug/1273512)
This commit is contained in:
parent
6c04e010a5
commit
662b696fd0
@ -185,6 +185,13 @@ class CHMReader(CHMFile):
|
||||
return data
|
||||
# nuke javascript...
|
||||
[s.extract() for s in soup('script')]
|
||||
# See if everything is inside a <head> tag
|
||||
# https://bugs.launchpad.net/bugs/1273512
|
||||
body = soup.find('body')
|
||||
if body is not None and body.parent.name == 'head':
|
||||
html = soup.find('html')
|
||||
html.insert(len(html), body)
|
||||
|
||||
# remove forward and back nav bars from the top/bottom of each page
|
||||
# cos they really fuck with the flow of things and generally waste space
|
||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||
|
Loading…
x
Reference in New Issue
Block a user