mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
CHM Input: Handle CHM files with broken markup where all the content is placed inside the <head> tag. Fixes #1273512 [words in italics removed after conversion to mobi](https://bugs.launchpad.net/calibre/+bug/1273512)
This commit is contained in:
parent
6c04e010a5
commit
662b696fd0
@ -185,6 +185,13 @@ class CHMReader(CHMFile):
|
|||||||
return data
|
return data
|
||||||
# nuke javascript...
|
# nuke javascript...
|
||||||
[s.extract() for s in soup('script')]
|
[s.extract() for s in soup('script')]
|
||||||
|
# See if everything is inside a <head> tag
|
||||||
|
# https://bugs.launchpad.net/bugs/1273512
|
||||||
|
body = soup.find('body')
|
||||||
|
if body is not None and body.parent.name == 'head':
|
||||||
|
html = soup.find('html')
|
||||||
|
html.insert(len(html), body)
|
||||||
|
|
||||||
# remove forward and back nav bars from the top/bottom of each page
|
# remove forward and back nav bars from the top/bottom of each page
|
||||||
# cos they really fuck with the flow of things and generally waste space
|
# cos they really fuck with the flow of things and generally waste space
|
||||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||||
|
Loading…
x
Reference in New Issue
Block a user