Use the new parser for Tweak Books

This commit is contained in:
Kovid Goyal 2013-10-28 16:22:23 +05:30
parent a2aae7fa8e
commit 106b717208
4 changed files with 29 additions and 12 deletions

View File

@ -30,6 +30,7 @@ from calibre.ebooks.oeb.base import (
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF, serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote) rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file from calibre.utils.filenames import nlinks_file, hardlink_file
@ -98,6 +99,7 @@ class Container(object): # {{{
self.log = log self.log = log
self.html_preprocessor = HTMLPreProcessor() self.html_preprocessor = HTMLPreProcessor()
self.css_preprocessor = CSSPreProcessor() self.css_preprocessor = CSSPreProcessor()
self.tweak_mode = False
self.parsed_cache = {} self.parsed_cache = {}
self.mime_map = {} self.mime_map = {}
@ -110,7 +112,7 @@ class Container(object): # {{{
if clone_data is not None: if clone_data is not None:
self.cloned = True self.cloned = True
for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map'): for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map', 'tweak_mode'):
setattr(self, x, clone_data[x]) setattr(self, x, clone_data[x])
self.opf_dir = os.path.dirname(self.name_path_map[self.opf_name]) self.opf_dir = os.path.dirname(self.name_path_map[self.opf_name])
return return
@ -150,6 +152,7 @@ class Container(object): # {{{
'mime_map': self.mime_map.copy(), 'mime_map': self.mime_map.copy(),
'pretty_print': set(self.pretty_print), 'pretty_print': set(self.pretty_print),
'encoding_map': self.encoding_map.copy(), 'encoding_map': self.encoding_map.copy(),
'tweak_mode': self.tweak_mode,
'name_path_map': { 'name_path_map': {
name:os.path.join(dest_dir, os.path.relpath(path, self.root)) name:os.path.join(dest_dir, os.path.relpath(path, self.root))
for name, path in self.name_path_map.iteritems()} for name, path in self.name_path_map.iteritems()}
@ -343,13 +346,16 @@ class Container(object): # {{{
return etree.fromstring(data, parser=RECOVER_PARSER) return etree.fromstring(data, parser=RECOVER_PARSER)
def parse_xhtml(self, data, fname): def parse_xhtml(self, data, fname):
try: if self.tweak_mode:
return parse_html( return parse_html_tweak(data, log=self.log, decoder=self.decode)
data, log=self.log, decoder=self.decode, else:
preprocessor=self.html_preprocessor, filename=fname, try:
non_html_file_tags={'ncx'}) return parse_html(
except NotHTML: data, log=self.log, decoder=self.decode,
return self.parse_xml(data) preprocessor=self.html_preprocessor, filename=fname,
non_html_file_tags={'ncx'})
except NotHTML:
return self.parse_xml(data)
def parse(self, path, mime): def parse(self, path, mime):
with open(path, 'rb') as src: with open(path, 'rb') as src:
@ -367,7 +373,8 @@ class Container(object): # {{{
log.setLevel(logging.WARN) log.setLevel(logging.WARN)
log.raiseExceptions = False log.raiseExceptions = False
data = self.decode(data) data = self.decode(data)
data = self.css_preprocessor(data) if not self.tweak_mode:
data = self.css_preprocessor(data)
parser = CSSParser(loglevel=logging.WARNING, parser = CSSParser(loglevel=logging.WARNING,
# We dont care about @import rules # We dont care about @import rules
fetcher=lambda x: (None, None), log=_css_logger) fetcher=lambda x: (None, None), log=_css_logger)
@ -1000,11 +1007,12 @@ class AZW3Container(Container):
return set(self.name_path_map) return set(self.name_path_map)
# }}} # }}}
def get_container(path, log=None, tdir=None): def get_container(path, log=None, tdir=None, tweak_mode=False):
if log is None: if log is None:
log = default_log log = default_log
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'} ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'}
else EpubContainer)(path, log, tdir=tdir) else EpubContainer)(path, log, tdir=tdir)
ebook.tweak_mode = tweak_mode
return ebook return ebook
def test_roundtrip(): def test_roundtrip():

View File

@ -616,6 +616,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
for elem in ans.iter(LxmlElement): for elem in ans.iter(LxmlElement):
if elem.sourceline is not None: if elem.sourceline is not None:
elem.set(linenumber_attribute, str(elem.sourceline)) elem.set(linenumber_attribute, str(elem.sourceline))
return ans
except Exception: except Exception:
if log is not None: if log is not None:
log.exception('Failed to parse as XML, parsing as tag soup') log.exception('Failed to parse as XML, parsing as tag soup')

View File

@ -10,11 +10,15 @@ import os, subprocess
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book
from calibre.ebooks.oeb.polish.container import get_container, clone_container, OCF_NS from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
from calibre.ebooks.oeb.polish.replace import rename_files from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.utils.filenames import nlinks_file from calibre.utils.filenames import nlinks_file
from calibre.ptempfile import TemporaryFile from calibre.ptempfile import TemporaryFile
def get_container(*args, **kwargs):
kwargs['tweak_mode'] = True
return _gc(*args, **kwargs)
class ContainerTests(BaseTest): class ContainerTests(BaseTest):
def test_clone(self): def test_clone(self):

View File

@ -17,7 +17,7 @@ from calibre import prints
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.polish.main import SUPPORTED from calibre.ebooks.oeb.polish.main import SUPPORTED
from calibre.ebooks.oeb.polish.container import get_container, clone_container, guess_type from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
from calibre.ebooks.oeb.polish.replace import rename_files from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
@ -25,6 +25,10 @@ from calibre.gui2.tweak_book import set_current_container, current_container, tp
from calibre.gui2.tweak_book.undo import GlobalUndoHistory from calibre.gui2.tweak_book.undo import GlobalUndoHistory
from calibre.gui2.tweak_book.save import SaveManager from calibre.gui2.tweak_book.save import SaveManager
def get_container(*args, **kwargs):
kwargs['tweak_mode'] = True
return _gc(*args, **kwargs)
class Boss(QObject): class Boss(QObject):
def __init__(self, parent): def __init__(self, parent):