mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Move replacement of nbsp in pdftohtml output from pipeline to input plugin. Makes viewing PDF in the viewer a bit better. See #1814626 (Text goes over to other page)
This commit is contained in:
parent
0c815cd06d
commit
aafc038b17
@ -531,9 +531,6 @@ class HTMLPreProcessor(object):
|
|||||||
rules = []
|
rules = []
|
||||||
|
|
||||||
start_rules = []
|
start_rules = []
|
||||||
if is_pdftohtml:
|
|
||||||
# Remove non breaking spaces
|
|
||||||
start_rules.append((re.compile(unicode(r'\u00a0')), lambda match : ' '))
|
|
||||||
|
|
||||||
if not getattr(self.extra_opts, 'keep_ligatures', False):
|
if not getattr(self.extra_opts, 'keep_ligatures', False):
|
||||||
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
|
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
|
||||||
|
@ -107,6 +107,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
|
|||||||
raw = re.sub(r'<a id="(\d+)"', r'<a id="p\1"', raw, flags=re.I)
|
raw = re.sub(r'<a id="(\d+)"', r'<a id="p\1"', raw, flags=re.I)
|
||||||
raw = re.sub(r'<a href="index.html#(\d+)"', r'<a href="#p\1"', raw, flags=re.I)
|
raw = re.sub(r'<a href="index.html#(\d+)"', r'<a href="#p\1"', raw, flags=re.I)
|
||||||
raw = replace_entities(raw)
|
raw = replace_entities(raw)
|
||||||
|
raw = raw.replace('\u00a0', ' ')
|
||||||
|
|
||||||
i.write(raw.encode('utf-8'))
|
i.write(raw.encode('utf-8'))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user