mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Use the new parser for Tweak Books
This commit is contained in:
parent
a2aae7fa8e
commit
106b717208
@ -30,6 +30,7 @@ from calibre.ebooks.oeb.base import (
|
|||||||
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
|
serialize, OEB_DOCS, _css_logger, OEB_STYLES, OPF2_NS, DC11_NS, OPF,
|
||||||
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
|
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote)
|
||||||
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
|
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
|
||||||
|
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
|
||||||
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
|
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||||
from calibre.utils.filenames import nlinks_file, hardlink_file
|
from calibre.utils.filenames import nlinks_file, hardlink_file
|
||||||
@ -98,6 +99,7 @@ class Container(object): # {{{
|
|||||||
self.log = log
|
self.log = log
|
||||||
self.html_preprocessor = HTMLPreProcessor()
|
self.html_preprocessor = HTMLPreProcessor()
|
||||||
self.css_preprocessor = CSSPreProcessor()
|
self.css_preprocessor = CSSPreProcessor()
|
||||||
|
self.tweak_mode = False
|
||||||
|
|
||||||
self.parsed_cache = {}
|
self.parsed_cache = {}
|
||||||
self.mime_map = {}
|
self.mime_map = {}
|
||||||
@ -110,7 +112,7 @@ class Container(object): # {{{
|
|||||||
|
|
||||||
if clone_data is not None:
|
if clone_data is not None:
|
||||||
self.cloned = True
|
self.cloned = True
|
||||||
for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map'):
|
for x in ('name_path_map', 'opf_name', 'mime_map', 'pretty_print', 'encoding_map', 'tweak_mode'):
|
||||||
setattr(self, x, clone_data[x])
|
setattr(self, x, clone_data[x])
|
||||||
self.opf_dir = os.path.dirname(self.name_path_map[self.opf_name])
|
self.opf_dir = os.path.dirname(self.name_path_map[self.opf_name])
|
||||||
return
|
return
|
||||||
@ -150,6 +152,7 @@ class Container(object): # {{{
|
|||||||
'mime_map': self.mime_map.copy(),
|
'mime_map': self.mime_map.copy(),
|
||||||
'pretty_print': set(self.pretty_print),
|
'pretty_print': set(self.pretty_print),
|
||||||
'encoding_map': self.encoding_map.copy(),
|
'encoding_map': self.encoding_map.copy(),
|
||||||
|
'tweak_mode': self.tweak_mode,
|
||||||
'name_path_map': {
|
'name_path_map': {
|
||||||
name:os.path.join(dest_dir, os.path.relpath(path, self.root))
|
name:os.path.join(dest_dir, os.path.relpath(path, self.root))
|
||||||
for name, path in self.name_path_map.iteritems()}
|
for name, path in self.name_path_map.iteritems()}
|
||||||
@ -343,13 +346,16 @@ class Container(object): # {{{
|
|||||||
return etree.fromstring(data, parser=RECOVER_PARSER)
|
return etree.fromstring(data, parser=RECOVER_PARSER)
|
||||||
|
|
||||||
def parse_xhtml(self, data, fname):
|
def parse_xhtml(self, data, fname):
|
||||||
try:
|
if self.tweak_mode:
|
||||||
return parse_html(
|
return parse_html_tweak(data, log=self.log, decoder=self.decode)
|
||||||
data, log=self.log, decoder=self.decode,
|
else:
|
||||||
preprocessor=self.html_preprocessor, filename=fname,
|
try:
|
||||||
non_html_file_tags={'ncx'})
|
return parse_html(
|
||||||
except NotHTML:
|
data, log=self.log, decoder=self.decode,
|
||||||
return self.parse_xml(data)
|
preprocessor=self.html_preprocessor, filename=fname,
|
||||||
|
non_html_file_tags={'ncx'})
|
||||||
|
except NotHTML:
|
||||||
|
return self.parse_xml(data)
|
||||||
|
|
||||||
def parse(self, path, mime):
|
def parse(self, path, mime):
|
||||||
with open(path, 'rb') as src:
|
with open(path, 'rb') as src:
|
||||||
@ -367,7 +373,8 @@ class Container(object): # {{{
|
|||||||
log.setLevel(logging.WARN)
|
log.setLevel(logging.WARN)
|
||||||
log.raiseExceptions = False
|
log.raiseExceptions = False
|
||||||
data = self.decode(data)
|
data = self.decode(data)
|
||||||
data = self.css_preprocessor(data)
|
if not self.tweak_mode:
|
||||||
|
data = self.css_preprocessor(data)
|
||||||
parser = CSSParser(loglevel=logging.WARNING,
|
parser = CSSParser(loglevel=logging.WARNING,
|
||||||
# We dont care about @import rules
|
# We dont care about @import rules
|
||||||
fetcher=lambda x: (None, None), log=_css_logger)
|
fetcher=lambda x: (None, None), log=_css_logger)
|
||||||
@ -1000,11 +1007,12 @@ class AZW3Container(Container):
|
|||||||
return set(self.name_path_map)
|
return set(self.name_path_map)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def get_container(path, log=None, tdir=None):
|
def get_container(path, log=None, tdir=None, tweak_mode=False):
|
||||||
if log is None:
|
if log is None:
|
||||||
log = default_log
|
log = default_log
|
||||||
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'}
|
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'}
|
||||||
else EpubContainer)(path, log, tdir=tdir)
|
else EpubContainer)(path, log, tdir=tdir)
|
||||||
|
ebook.tweak_mode = tweak_mode
|
||||||
return ebook
|
return ebook
|
||||||
|
|
||||||
def test_roundtrip():
|
def test_roundtrip():
|
||||||
|
@ -616,6 +616,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
|
|||||||
for elem in ans.iter(LxmlElement):
|
for elem in ans.iter(LxmlElement):
|
||||||
if elem.sourceline is not None:
|
if elem.sourceline is not None:
|
||||||
elem.set(linenumber_attribute, str(elem.sourceline))
|
elem.set(linenumber_attribute, str(elem.sourceline))
|
||||||
|
return ans
|
||||||
except Exception:
|
except Exception:
|
||||||
if log is not None:
|
if log is not None:
|
||||||
log.exception('Failed to parse as XML, parsing as tag soup')
|
log.exception('Failed to parse as XML, parsing as tag soup')
|
||||||
|
@ -10,11 +10,15 @@ import os, subprocess
|
|||||||
|
|
||||||
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book
|
from calibre.ebooks.oeb.polish.tests.base import BaseTest, get_simple_book
|
||||||
|
|
||||||
from calibre.ebooks.oeb.polish.container import get_container, clone_container, OCF_NS
|
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, OCF_NS
|
||||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||||
from calibre.utils.filenames import nlinks_file
|
from calibre.utils.filenames import nlinks_file
|
||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
|
||||||
|
def get_container(*args, **kwargs):
|
||||||
|
kwargs['tweak_mode'] = True
|
||||||
|
return _gc(*args, **kwargs)
|
||||||
|
|
||||||
class ContainerTests(BaseTest):
|
class ContainerTests(BaseTest):
|
||||||
|
|
||||||
def test_clone(self):
|
def test_clone(self):
|
||||||
|
@ -17,7 +17,7 @@ from calibre import prints
|
|||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.ebooks.oeb.base import urlnormalize
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
from calibre.ebooks.oeb.polish.main import SUPPORTED
|
from calibre.ebooks.oeb.polish.main import SUPPORTED
|
||||||
from calibre.ebooks.oeb.polish.container import get_container, clone_container, guess_type
|
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
|
||||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||||
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
|
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
|
||||||
from calibre.gui2.dialogs.confirm_delete import confirm
|
from calibre.gui2.dialogs.confirm_delete import confirm
|
||||||
@ -25,6 +25,10 @@ from calibre.gui2.tweak_book import set_current_container, current_container, tp
|
|||||||
from calibre.gui2.tweak_book.undo import GlobalUndoHistory
|
from calibre.gui2.tweak_book.undo import GlobalUndoHistory
|
||||||
from calibre.gui2.tweak_book.save import SaveManager
|
from calibre.gui2.tweak_book.save import SaveManager
|
||||||
|
|
||||||
|
def get_container(*args, **kwargs):
|
||||||
|
kwargs['tweak_mode'] = True
|
||||||
|
return _gc(*args, **kwargs)
|
||||||
|
|
||||||
class Boss(QObject):
|
class Boss(QObject):
|
||||||
|
|
||||||
def __init__(self, parent):
|
def __init__(self, parent):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user