From ca37c601ec6bb272b4c6c391b0b8dd866abcbba5 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 7 Dec 2008 21:30:28 -0500 Subject: [PATCH 01/22] Refactor the LitReader: - Protect whitespace-only nodes with tags so they can be safely pretty-printed. - Separate the LitReader class into a low-level LitFile class and a high-level LitReader class. --- src/calibre/ebooks/lit/reader.py | 373 +++++++++++++++++-------------- 1 file changed, 206 insertions(+), 167 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 671e48ab76..1ab11548e5 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -7,9 +7,10 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' \ 'and Marshall T. Vandegrift ' -import sys, struct, cStringIO, os +import sys, struct, os import functools import re +from cStringIO import StringIO from lxml import etree from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP @@ -19,6 +20,8 @@ from calibre import plugins lzx, lxzerror = plugins['lzx'] msdes, msdeserror = plugins['msdes'] +__all__ = ["LitReader"] + XML_DECL = """ """ OPF_DECL = """ @@ -106,31 +109,54 @@ def consume_sized_utf8_string(bytes, zpad=False): pos += 1 return u''.join(result), bytes[pos:] + class UnBinary(object): AMPERSAND_RE = re.compile( r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)') OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)') - CLOSE_ANGLE_RE = re.compile(r'(?