Fix #1085 (NY times news feed in epub format)

This commit is contained in:
Kovid Goyal 2008-09-28 13:11:25 -07:00
parent a1d1e273c3
commit 1f1be79a46

View File

@ -17,7 +17,7 @@ from lxml.html import soupparser
from lxml.etree import XPath from lxml.etree import XPath
get_text = XPath("//text()") get_text = XPath("//text()")
from calibre import LoggingInterface, unicode_path from calibre import LoggingInterface, unicode_path, entity_to_unicode
from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
from calibre.utils.config import Config, StringConfig from calibre.utils.config import Config, StringConfig
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -250,7 +250,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
class PreProcessor(object): class PreProcessor(object):
PREPROCESS = [] PREPROCESS = [(re.compile(r'&(\S+?);'), entity_to_unicode)]
# Fix pdftohtml markup # Fix pdftohtml markup
PDFTOHTML = [ PDFTOHTML = [