Use the new parser for Tweak Books

This commit is contained in:
Kovid Goyal 2013-10-28 16:22:23 +05:30
parent a2aae7fa8e
commit 106b717208
4 changed files with 29 additions and 12 deletions

View File

@ -30,6 +30,7 @@ from calibre.ebooks.oeb.base import (
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file
@ -98,6 +99,7 @@ class Container(object): # {{{
self.log = log
self.html_preprocessor = HTMLPreProcessor()
self.css_preprocessor = CSSPreProcessor()
self.tweak_mode = False
self.parsed_cache = {}
self.mime_map = {}
@ -110,7 +112,7 @@ class Container(object): # {{{
if clone_data is not None:
self.cloned = True
for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map'):
for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map', 'tweak_mode'):
setattr(self, x, clone_data[x])
self.opf_dir = os.path.dirname(self.name_path_map[self.opf_name])
return
@ -150,6 +152,7 @@ class Container(object): # {{{
'mime_map': self.mime_map.copy(),
'pretty_print': set(self.pretty_print),
'encoding_map': self.encoding_map.copy(),
'tweak_mode': self.tweak_mode,
'name_path_map': {
name:os.path.join(dest_dir, os.path.relpath(path, self.root))
for name, path in self.name_path_map.iteritems()}
@ -343,6 +346,9 @@ class Container(object): # {{{
return etree.fromstring(data, parser=RECOVER_PARSER)
def parse_xhtml(self, data, fname):
if self.tweak_mode:
return parse_html_tweak(data, log=self.log, decoder=self.decode)
else:
try:
return parse_html(
data, log=self.log, decoder=self.decode,
@ -367,6 +373,7 @@ class Container(object): # {{{
log.setLevel(logging.WARN)
log.raiseExceptions = False
data = self.decode(data)
if not self.tweak_mode:
data = self.css_preprocessor(data)
parser = CSSParser(loglevel=logging.WARNING,
# We dont care about @import rules
@ -1000,11 +1007,12 @@ class AZW3Container(Container):
return set(self.name_path_map)
# }}}
def get_container(path, log=None, tdir=None):
def get_container(path, log=None, tdir=None, tweak_mode=False):
if log is None:
log = default_log
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'}
else EpubContainer)(path, log, tdir=tdir)
ebook.tweak_mode = tweak_mode
return ebook
def test_roundtrip():

View File

@ -616,6 +616,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
for elem in ans.iter(LxmlElement):
if elem.sourceline is not None:
elem.set(linenumber_attribute, str(elem.sourceline))
return ans
except Exception:
if log is not None:
log.exception('Failed to parse as XML, parsing as tag soup')

View File

@ -10,11 +10,15 @@ import os, subprocess
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book
from calibre.ebooks.oeb.polish.container import get_container, clone_container, OCF_NS
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.utils.filenames import nlinks_file
from calibre.ptempfile import TemporaryFile
def get_container(*args, **kwargs):
kwargs['tweak_mode'] = True
return _gc(*args, **kwargs)
class ContainerTests(BaseTest):
def test_clone(self):

View File

@ -17,7 +17,7 @@ from calibre import prints
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.polish.main import SUPPORTED
from calibre.ebooks.oeb.polish.container import get_container, clone_container, guess_type
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
from calibre.gui2.dialogs.confirm_delete import confirm
@ -25,6 +25,10 @@ from calibre.gui2.tweak_book import set_current_container, current_container, tp
from calibre.gui2.tweak_book.undo import GlobalUndoHistory
from calibre.gui2.tweak_book.save import SaveManager
def get_container(*args, **kwargs):
kwargs['tweak_mode'] = True
return _gc(*args, **kwargs)
class Boss(QObject):
def __init__(self, parent):