mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1085 (NY times news feed in epub format)
This commit is contained in:
parent
a1d1e273c3
commit
1f1be79a46
@ -17,7 +17,7 @@ from lxml.html import soupparser
|
|||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
get_text = XPath("//text()")
|
get_text = XPath("//text()")
|
||||||
|
|
||||||
from calibre import LoggingInterface, unicode_path
|
from calibre import LoggingInterface, unicode_path, entity_to_unicode
|
||||||
from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
|
from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
|
||||||
from calibre.utils.config import Config, StringConfig
|
from calibre.utils.config import Config, StringConfig
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
@ -250,7 +250,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
|
|||||||
|
|
||||||
|
|
||||||
class PreProcessor(object):
|
class PreProcessor(object):
|
||||||
PREPROCESS = []
|
PREPROCESS = [(re.compile(r'&(\S+?);'), entity_to_unicode)]
|
||||||
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user