mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Forced default encoding:
- Support for forced default encoding of HTML content of OEBs - oeb2mobi command-line option to set the encoding
This commit is contained in:
parent
bf50a850f4
commit
7abdacff58
@ -560,7 +560,7 @@ def oeb2mobi(opts, inpath):
|
||||
compression = PALMDOC if opts.compress else UNCOMPRESSED
|
||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||
context = Context(source, dest)
|
||||
oeb = OEBBook(inpath, logger=logger)
|
||||
oeb = OEBBook(inpath, logger=logger, encoding=opts.encoding)
|
||||
tocadder = HTMLTOCAdder(title=opts.toc_title)
|
||||
tocadder.transform(oeb, context)
|
||||
mangler = CaseMangler()
|
||||
|
@ -90,6 +90,9 @@ def prefixname(name, nsrmap):
|
||||
return barename(name)
|
||||
return ':'.join((prefix, barename(name)))
|
||||
|
||||
def XPath(expr):
|
||||
return etree.XPath(expr, namespaces=XPNSMAP)
|
||||
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
@ -292,15 +295,19 @@ class Metadata(object):
|
||||
class Manifest(object):
|
||||
class Item(object):
|
||||
NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)')
|
||||
META_XP = XPath('/h:html/h:head/h:meta[@http-equiv="Content-Type"]')
|
||||
|
||||
def __init__(self, id, href, media_type,
|
||||
def __init__(self, oeb, id, href, media_type,
|
||||
fallback=None, loader=str, data=None):
|
||||
self.oeb = oeb
|
||||
self.id = id
|
||||
self.href = self.path = urlnormalize(href)
|
||||
self.media_type = media_type
|
||||
self.fallback = fallback
|
||||
self.spine_position = None
|
||||
self.linear = True
|
||||
if loader is None and data is None:
|
||||
loader = oeb.container.read
|
||||
self._loader = loader
|
||||
self._data = data
|
||||
|
||||
@ -309,16 +316,20 @@ class Manifest(object):
|
||||
% (self.id, self.href, self.media_type)
|
||||
|
||||
def _force_xhtml(self, data):
|
||||
if self.oeb.encoding is not None:
|
||||
data = data.decode(self.oeb.encoding, 'replace')
|
||||
try:
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
except etree.XMLSyntaxError:
|
||||
data = html.fromstring(data, parser=XML_PARSER)
|
||||
data = html.fromstring(data)
|
||||
data = etree.tostring(data, encoding=unicode)
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
if namespace(data.tag) != XHTML_NS:
|
||||
data.attrib['xmlns'] = XHTML_NS
|
||||
data = etree.tostring(data)
|
||||
data = etree.tostring(data, encoding=unicode)
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
for meta in self.META_XP(data):
|
||||
meta.getparent().remove(meta)
|
||||
return data
|
||||
|
||||
def data():
|
||||
@ -395,9 +406,8 @@ class Manifest(object):
|
||||
self.hrefs = {}
|
||||
|
||||
def add(self, id, href, media_type, fallback=None, loader=None, data=None):
|
||||
loader = loader or self.oeb.container.read
|
||||
item = self.Item(
|
||||
id, href, media_type, fallback, loader, data)
|
||||
self.oeb, id, href, media_type, fallback, loader, data)
|
||||
self.ids[item.id] = item
|
||||
self.hrefs[item.href] = item
|
||||
return item
|
||||
@ -607,9 +617,7 @@ class Guide(object):
|
||||
__iter__ = iterkeys
|
||||
|
||||
def values(self):
|
||||
values = list(self.refs.values())
|
||||
values.sort()
|
||||
return values
|
||||
return sorted(self.refs.values())
|
||||
|
||||
def items(self):
|
||||
for type, ref in self.refs.items():
|
||||
@ -713,11 +721,13 @@ class TOC(object):
|
||||
|
||||
|
||||
class OEBBook(object):
|
||||
def __init__(self, opfpath=None, container=None, logger=FauxLogger()):
|
||||
def __init__(self, opfpath=None, container=None, encoding=None,
|
||||
logger=FauxLogger()):
|
||||
if opfpath and not container:
|
||||
container = DirContainer(os.path.dirname(opfpath))
|
||||
opfpath = os.path.basename(opfpath)
|
||||
self.container = container
|
||||
self.encoding = encoding
|
||||
self.logger = logger
|
||||
if opfpath or container:
|
||||
opf = self._read_opf(opfpath)
|
||||
|
Loading…
x
Reference in New Issue
Block a user