mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PDF input: Replace U+2029 with spaces. See #1917386 (search & replace doesnt work with "newlines" in PDF to MOBI conversion)
This commit is contained in:
parent
3b79e215e8
commit
20f4e43044
@ -101,7 +101,7 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
|
||||
raw = re.sub(r'<a id="(\d+)"', r'<a id="p\1"', raw, flags=re.I)
|
||||
raw = re.sub(r'<a href="index.html#(\d+)"', r'<a href="#p\1"', raw, flags=re.I)
|
||||
raw = xml_replace_entities(raw)
|
||||
raw = raw.replace('\u00a0', ' ')
|
||||
raw = re.sub('[\u00a0\u2029]', ' ', raw)
|
||||
|
||||
i.write(raw.encode('utf-8'))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user