From 106b7172080ef522009d440ea7be440f1942d2e1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 28 Oct 2013 16:22:23 +0530 Subject: [PATCH] Use the new parser for Tweak Books --- src/calibre/ebooks/oeb/polish/container.py | 28 ++++++++++++------- src/calibre/ebooks/oeb/polish/parsing.py | 1 + .../ebooks/oeb/polish/tests/container.py | 6 +++- src/calibre/gui2/tweak_book/boss.py | 6 +++- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index f6b5841ca5..d87f90a73c 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -30,6 +30,7 @@ from calibre.ebooks.oeb.base import ( serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF, rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote) from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError +from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.filenames import nlinks_file, hardlink_file @@ -98,6 +99,7 @@ class Container(object): # {{{ self.log = log self.html_preprocessor = HTMLPreProcessor() self.css_preprocessor = CSSPreProcessor() + self.tweak_mode = False self.parsed_cache = {} self.mime_map = {} @@ -110,7 +112,7 @@ class Container(object): # {{{ if clone_data is not None: self.cloned = True - for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map'): + for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map', 'tweak_mode'): setattr(self, x, clone_data[x]) self.opf_dir = os.path.dirname(self.name_path_map[self.opf_name]) return @@ -150,6 +152,7 @@ class Container(object): # {{{ 'mime_map': self.mime_map.copy(), 'pretty_print': set(self.pretty_print), 'encoding_map': self.encoding_map.copy(), + 'tweak_mode': self.tweak_mode, 'name_path_map': { name:os.path.join(dest_dir, os.path.relpath(path, self.root)) for name, path in self.name_path_map.iteritems()} @@ -343,13 +346,16 @@ class Container(object): # {{{ return etree.fromstring(data, parser=RECOVER_PARSER) def parse_xhtml(self, data, fname): - try: - return parse_html( - data, log=self.log, decoder=self.decode, - preprocessor=self.html_preprocessor, filename=fname, - non_html_file_tags={'ncx'}) - except NotHTML: - return self.parse_xml(data) + if self.tweak_mode: + return parse_html_tweak(data, log=self.log, decoder=self.decode) + else: + try: + return parse_html( + data, log=self.log, decoder=self.decode, + preprocessor=self.html_preprocessor, filename=fname, + non_html_file_tags={'ncx'}) + except NotHTML: + return self.parse_xml(data) def parse(self, path, mime): with open(path, 'rb') as src: @@ -367,7 +373,8 @@ class Container(object): # {{{ log.setLevel(logging.WARN) log.raiseExceptions = False data = self.decode(data) - data = self.css_preprocessor(data) + if not self.tweak_mode: + data = self.css_preprocessor(data) parser = CSSParser(loglevel=logging.WARNING, # We dont care about @import rules fetcher=lambda x: (None, None), log=_css_logger) @@ -1000,11 +1007,12 @@ class AZW3Container(Container): return set(self.name_path_map) # }}} -def get_container(path, log=None, tdir=None): +def get_container(path, log=None, tdir=None, tweak_mode=False): if log is None: log = default_log ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'} else EpubContainer)(path, log, tdir=tdir) + ebook.tweak_mode = tweak_mode return ebook def test_roundtrip(): diff --git a/src/calibre/ebooks/oeb/polish/parsing.py b/src/calibre/ebooks/oeb/polish/parsing.py index 961ce8b5a9..4a697bb473 100644 --- a/src/calibre/ebooks/oeb/polish/parsing.py +++ b/src/calibre/ebooks/oeb/polish/parsing.py @@ -616,6 +616,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N for elem in ans.iter(LxmlElement): if elem.sourceline is not None: elem.set(linenumber_attribute, str(elem.sourceline)) + return ans except Exception: if log is not None: log.exception('Failed to parse as XML, parsing as tag soup') diff --git a/src/calibre/ebooks/oeb/polish/tests/container.py b/src/calibre/ebooks/oeb/polish/tests/container.py index 0ca95730d1..cc836b7b04 100644 --- a/src/calibre/ebooks/oeb/polish/tests/container.py +++ b/src/calibre/ebooks/oeb/polish/tests/container.py @@ -10,11 +10,15 @@ import os, subprocess from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book -from calibre.ebooks.oeb.polish.container import get_container, clone_container, OCF_NS +from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS from calibre.ebooks.oeb.polish.replace import rename_files from calibre.utils.filenames import nlinks_file from calibre.ptempfile import TemporaryFile +def get_container(*args, **kwargs): + kwargs['tweak_mode'] = True + return _gc(*args, **kwargs) + class ContainerTests(BaseTest): def test_clone(self): diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index e12b6e0026..b2084a3820 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -17,7 +17,7 @@ from calibre import prints from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.polish.main import SUPPORTED -from calibre.ebooks.oeb.polish.container import get_container, clone_container, guess_type +from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type from calibre.ebooks.oeb.polish.replace import rename_files from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog from calibre.gui2.dialogs.confirm_delete import confirm @@ -25,6 +25,10 @@ from calibre.gui2.tweak_book import set_current_container, current_container, tp from calibre.gui2.tweak_book.undo import GlobalUndoHistory from calibre.gui2.tweak_book.save import SaveManager +def get_container(*args, **kwargs): + kwargs['tweak_mode'] = True + return _gc(*args, **kwargs) + class Boss(QObject): def __init__(self, parent):