mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix handling of img in <a href> tags. Remove show-broken-links option.
This commit is contained in:
parent
7056e35aff
commit
7329106b99
@ -247,7 +247,7 @@ class HTMLConverter(object):
|
|||||||
chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
|
chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
|
||||||
link_exclude=re.compile('$'),
|
link_exclude=re.compile('$'),
|
||||||
page_break=re.compile('h[12]', re.IGNORECASE),
|
page_break=re.compile('h[12]', re.IGNORECASE),
|
||||||
profile=PRS500_PROFILE, hide_broken_links=False,
|
profile=PRS500_PROFILE,
|
||||||
disable_autorotation=False):
|
disable_autorotation=False):
|
||||||
'''
|
'''
|
||||||
Convert HTML file at C{path} and add it to C{book}. After creating
|
Convert HTML file at C{path} and add it to C{book}. After creating
|
||||||
@ -283,7 +283,7 @@ class HTMLConverter(object):
|
|||||||
tags if no page-breaks are found and no chapter headings
|
tags if no page-breaks are found and no chapter headings
|
||||||
are detected.
|
are detected.
|
||||||
@param profile: Defines the geometry of the display device
|
@param profile: Defines the geometry of the display device
|
||||||
@param hide_broken_links: Don't display broken links
|
@param disable_autorotation: Don't autorotate very wide images
|
||||||
'''
|
'''
|
||||||
# Defaults for various formatting tags
|
# Defaults for various formatting tags
|
||||||
self.css = dict(
|
self.css = dict(
|
||||||
@ -330,7 +330,6 @@ class HTMLConverter(object):
|
|||||||
self.book = book #: The Book object representing a BBeB book
|
self.book = book #: The Book object representing a BBeB book
|
||||||
self.is_root = is_root #: Are we converting the root HTML file
|
self.is_root = is_root #: Are we converting the root HTML file
|
||||||
self.lstrip_toggle = False #: If true the next add_text call will do an lstrip
|
self.lstrip_toggle = False #: If true the next add_text call will do an lstrip
|
||||||
self.hide_broken_links = hide_broken_links
|
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
os.chdir(os.path.dirname(path))
|
os.chdir(os.path.dirname(path))
|
||||||
self.file_name = os.path.basename(path)
|
self.file_name = os.path.basename(path)
|
||||||
@ -486,6 +485,9 @@ class HTMLConverter(object):
|
|||||||
if isinstance(c, NavigableString):
|
if isinstance(c, NavigableString):
|
||||||
text += str(c)
|
text += str(c)
|
||||||
elif isinstance(c, Tag):
|
elif isinstance(c, Tag):
|
||||||
|
if c.name.lower() == 'img' and c.has_key('alt'):
|
||||||
|
text += c['alt']
|
||||||
|
return text
|
||||||
text += self.get_text(c)
|
text += self.get_text(c)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@ -545,9 +547,6 @@ class HTMLConverter(object):
|
|||||||
text = img['alt']
|
text = img['alt']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
if self.hide_broken_links:
|
|
||||||
para.contents = []
|
|
||||||
para.append(_Span(text=text))
|
|
||||||
purl = urlparse(link.tag['href'])
|
purl = urlparse(link.tag['href'])
|
||||||
if purl[1]: # Not a link to a file on the local filesystem
|
if purl[1]: # Not a link to a file on the local filesystem
|
||||||
continue
|
continue
|
||||||
@ -584,7 +583,6 @@ class HTMLConverter(object):
|
|||||||
chapter_regex=self.chapter_regex,
|
chapter_regex=self.chapter_regex,
|
||||||
link_exclude=self.link_exclude,
|
link_exclude=self.link_exclude,
|
||||||
page_break=self.page_break,
|
page_break=self.page_break,
|
||||||
hide_broken_links=self.hide_broken_links,
|
|
||||||
disable_autorotation=self.disable_autorotation)
|
disable_autorotation=self.disable_autorotation)
|
||||||
HTMLConverter.processed_files[path] = self.files[path]
|
HTMLConverter.processed_files[path] = self.files[path]
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -911,7 +909,10 @@ class HTMLConverter(object):
|
|||||||
['png', 'jpg', 'bmp', 'jpeg']:
|
['png', 'jpg', 'bmp', 'jpeg']:
|
||||||
self.process_image(path, tag_css)
|
self.process_image(path, tag_css)
|
||||||
else:
|
else:
|
||||||
self.add_text(self.get_text(tag), tag_css)
|
text = self.get_text(tag)
|
||||||
|
if not text:
|
||||||
|
text = "Link"
|
||||||
|
self.add_text(text, tag_css)
|
||||||
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
|
||||||
elif tagname == 'img':
|
elif tagname == 'img':
|
||||||
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
|
||||||
@ -1163,7 +1164,6 @@ def process_file(path, options):
|
|||||||
chapter_detection=options.chapter_detection,
|
chapter_detection=options.chapter_detection,
|
||||||
chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
|
chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
|
||||||
link_exclude=re.compile(le), page_break=pb,
|
link_exclude=re.compile(le), page_break=pb,
|
||||||
hide_broken_links=not options.show_broken_links,
|
|
||||||
disable_autorotation=options.disable_autorotation)
|
disable_autorotation=options.disable_autorotation)
|
||||||
conv.process_links()
|
conv.process_links()
|
||||||
oname = options.output
|
oname = options.output
|
||||||
@ -1270,12 +1270,6 @@ def parse_options(argv=None, cli=True):
|
|||||||
prepro = parser.add_option_group('PREPROCESSING OPTIONS')
|
prepro = parser.add_option_group('PREPROCESSING OPTIONS')
|
||||||
prepro.add_option('--baen', action='store_true', default=False, dest='baen',
|
prepro.add_option('--baen', action='store_true', default=False, dest='baen',
|
||||||
help='''Preprocess Baen HTML files to improve generated LRF.''')
|
help='''Preprocess Baen HTML files to improve generated LRF.''')
|
||||||
debug = None
|
|
||||||
for g in parser.option_groups:
|
|
||||||
if g.title == 'DEBUG OPTIONS':
|
|
||||||
debug = g
|
|
||||||
debug.add_option('--show-broken-links', dest='show_broken_links', action='store_true',
|
|
||||||
default=False, help='''Show the href of broken links in generated LRF''')
|
|
||||||
options, args = parser.parse_args(args=argv)
|
options, args = parser.parse_args(args=argv)
|
||||||
if len(args) != 1:
|
if len(args) != 1:
|
||||||
if cli:
|
if cli:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user