Fix #1065 (EPUB Conversion Error)

This commit is contained in:
Kovid Goyal 2008-09-26 09:25:30 -07:00
parent 0b8168258a
commit ca806a09c3
2 changed files with 7 additions and 12 deletions

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
Split the flows in an epub file to conform to size limitations. Split the flows in an epub file to conform to size limitations.
''' '''
import os, math, copy, logging, functools import os, math, copy, logging, functools, collections
from lxml.etree import XPath as _XPath from lxml.etree import XPath as _XPath
from lxml import etree, html from lxml import etree, html
@ -234,7 +234,7 @@ class Splitter(LoggingInterface):
all anchors in the original tree. Internal links are re-directed. The all anchors in the original tree. Internal links are re-directed. The
original file is deleted and the split files are saved. original file is deleted and the split files are saved.
''' '''
self.anchor_map = {None:self.base%0} self.anchor_map = collections.defaultdict(lambda :self.base%0)
self.files = [] self.files = []
for i, tree in enumerate(self.trees): for i, tree in enumerate(self.trees):

View File

@ -252,15 +252,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
class PreProcessor(object): class PreProcessor(object):
PREPROCESS = [] PREPROCESS = []
# Fix Baen markup
BAEN = [
(re.compile(r'page-break-before:\s*\w+([\s;\}])', re.IGNORECASE),
lambda match: match.group(1)),
(re.compile(r'<p>\s*(<a id.*?>\s*</a>)\s*</p>', re.IGNORECASE),
lambda match: match.group(1)),
(re.compile(r'<\s*a\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*</a>', re.IGNORECASE),
lambda match: ''),
]
# Fix pdftohtml markup # Fix pdftohtml markup
PDFTOHTML = [ PDFTOHTML = [
# Remove <hr> tags # Remove <hr> tags
@ -275,6 +267,9 @@ class PreProcessor(object):
# Remove hyphenation # Remove hyphenation
(re.compile(r'-\n\r?'), lambda match: ''), (re.compile(r'-\n\r?'), lambda match: ''),
# Remove gray background
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>')
] ]
# Fix Book Designer markup # Fix Book Designer markup
@ -305,7 +300,7 @@ class PreProcessor(object):
def preprocess(self, html): def preprocess(self, html):
if self.is_baen(html): if self.is_baen(html):
rules = self.BAEN rules = []
elif self.is_book_designer(html): elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html): elif self.is_pdftohtml(html):