mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
remove br from top of page in chm conversion
This commit is contained in:
parent
c91f022385
commit
ac8ccceef8
@ -11,7 +11,7 @@ from mimetypes import guess_type as guess_mimetype
|
||||
from htmlentitydefs import name2codepoint
|
||||
from pprint import PrettyPrinter
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
||||
from lxml import html, etree
|
||||
from pychm.chm import CHMFile
|
||||
from pychm.chmlib import (
|
||||
@ -35,6 +35,17 @@ def match_string(s1, s2_already_lowered):
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_all_prev_empty(tag):
|
||||
if tag is None:
|
||||
return True
|
||||
if tag.__class__ == NavigableString and not check_empty(tag):
|
||||
return False
|
||||
return check_all_prev_empty(tag.previousSibling)
|
||||
|
||||
def check_empty(s, rex = re.compile(r'\S')):
|
||||
return rex.search(s) is None
|
||||
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||
@ -161,6 +172,12 @@ class CHMReader(CHMFile):
|
||||
# for some very odd reason each page's content appears to be in a table
|
||||
# too. and this table has sub-tables for random asides... grr.
|
||||
|
||||
# remove br at top of page if present after nav bars removed
|
||||
br = html('br')
|
||||
if br:
|
||||
if check_all_prev_empty(br[0].previousSibling):
|
||||
br[0].extract()
|
||||
|
||||
# some images seem to be broken in some chm's :/
|
||||
for img in html('img'):
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user