mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix #1065 (EPUB Conversion Error)
This commit is contained in:
parent
0b8168258a
commit
ca806a09c3
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Split the flows in an epub file to conform to size limitations.
|
Split the flows in an epub file to conform to size limitations.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, math, copy, logging, functools
|
import os, math, copy, logging, functools, collections
|
||||||
|
|
||||||
from lxml.etree import XPath as _XPath
|
from lxml.etree import XPath as _XPath
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
@ -234,7 +234,7 @@ class Splitter(LoggingInterface):
|
|||||||
all anchors in the original tree. Internal links are re-directed. The
|
all anchors in the original tree. Internal links are re-directed. The
|
||||||
original file is deleted and the split files are saved.
|
original file is deleted and the split files are saved.
|
||||||
'''
|
'''
|
||||||
self.anchor_map = {None:self.base%0}
|
self.anchor_map = collections.defaultdict(lambda :self.base%0)
|
||||||
self.files = []
|
self.files = []
|
||||||
|
|
||||||
for i, tree in enumerate(self.trees):
|
for i, tree in enumerate(self.trees):
|
||||||
|
@ -252,15 +252,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
|
|||||||
|
|
||||||
class PreProcessor(object):
|
class PreProcessor(object):
|
||||||
PREPROCESS = []
|
PREPROCESS = []
|
||||||
# Fix Baen markup
|
|
||||||
BAEN = [
|
|
||||||
(re.compile(r'page-break-before:\s*\w+([\s;\}])', re.IGNORECASE),
|
|
||||||
lambda match: match.group(1)),
|
|
||||||
(re.compile(r'<p>\s*(<a id.*?>\s*</a>)\s*</p>', re.IGNORECASE),
|
|
||||||
lambda match: match.group(1)),
|
|
||||||
(re.compile(r'<\s*a\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*</a>', re.IGNORECASE),
|
|
||||||
lambda match: ''),
|
|
||||||
]
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
@ -275,6 +267,9 @@ class PreProcessor(object):
|
|||||||
# Remove hyphenation
|
# Remove hyphenation
|
||||||
(re.compile(r'-\n\r?'), lambda match: ''),
|
(re.compile(r'-\n\r?'), lambda match: ''),
|
||||||
|
|
||||||
|
# Remove gray background
|
||||||
|
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>')
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix Book Designer markup
|
# Fix Book Designer markup
|
||||||
@ -305,7 +300,7 @@ class PreProcessor(object):
|
|||||||
|
|
||||||
def preprocess(self, html):
|
def preprocess(self, html):
|
||||||
if self.is_baen(html):
|
if self.is_baen(html):
|
||||||
rules = self.BAEN
|
rules = []
|
||||||
elif self.is_book_designer(html):
|
elif self.is_book_designer(html):
|
||||||
rules = self.BOOK_DESIGNER
|
rules = self.BOOK_DESIGNER
|
||||||
elif self.is_pdftohtml(html):
|
elif self.is_pdftohtml(html):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user