mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix #1065 (EPUB Conversion Error)
This commit is contained in:
parent
0b8168258a
commit
ca806a09c3
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Split the flows in an epub file to conform to size limitations.
|
||||
'''
|
||||
|
||||
import os, math, copy, logging, functools
|
||||
import os, math, copy, logging, functools, collections
|
||||
|
||||
from lxml.etree import XPath as _XPath
|
||||
from lxml import etree, html
|
||||
@ -234,7 +234,7 @@ class Splitter(LoggingInterface):
|
||||
all anchors in the original tree. Internal links are re-directed. The
|
||||
original file is deleted and the split files are saved.
|
||||
'''
|
||||
self.anchor_map = {None:self.base%0}
|
||||
self.anchor_map = collections.defaultdict(lambda :self.base%0)
|
||||
self.files = []
|
||||
|
||||
for i, tree in enumerate(self.trees):
|
||||
|
@ -252,15 +252,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
|
||||
|
||||
class PreProcessor(object):
|
||||
PREPROCESS = []
|
||||
# Fix Baen markup
|
||||
BAEN = [
|
||||
(re.compile(r'page-break-before:\s*\w+([\s;\}])', re.IGNORECASE),
|
||||
lambda match: match.group(1)),
|
||||
(re.compile(r'<p>\s*(<a id.*?>\s*</a>)\s*</p>', re.IGNORECASE),
|
||||
lambda match: match.group(1)),
|
||||
(re.compile(r'<\s*a\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*</a>', re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
# Fix pdftohtml markup
|
||||
PDFTOHTML = [
|
||||
# Remove <hr> tags
|
||||
@ -275,6 +267,9 @@ class PreProcessor(object):
|
||||
# Remove hyphenation
|
||||
(re.compile(r'-\n\r?'), lambda match: ''),
|
||||
|
||||
# Remove gray background
|
||||
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>')
|
||||
|
||||
]
|
||||
|
||||
# Fix Book Designer markup
|
||||
@ -305,7 +300,7 @@ class PreProcessor(object):
|
||||
|
||||
def preprocess(self, html):
|
||||
if self.is_baen(html):
|
||||
rules = self.BAEN
|
||||
rules = []
|
||||
elif self.is_book_designer(html):
|
||||
rules = self.BOOK_DESIGNER
|
||||
elif self.is_pdftohtml(html):
|
||||
|
Loading…
x
Reference in New Issue
Block a user