mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
MOBI Output:Fix bug that cause MOBI conversion to sometimes fail when linking to an external stylesheet
This commit is contained in:
parent
8dd974ff42
commit
241a2fc099
@ -30,10 +30,10 @@ from calibre.utils.zipfile import ZipFile
|
|||||||
from cssutils import CSSParser
|
from cssutils import CSSParser
|
||||||
|
|
||||||
class HTMLElement(HtmlElement):
|
class HTMLElement(HtmlElement):
|
||||||
|
|
||||||
@apply
|
@apply
|
||||||
def specified_font_size():
|
def specified_font_size():
|
||||||
|
|
||||||
def fget(self):
|
def fget(self):
|
||||||
ans = self.get('specified_font_size', '')
|
ans = self.get('specified_font_size', '')
|
||||||
if not ans:
|
if not ans:
|
||||||
@ -41,12 +41,12 @@ class HTMLElement(HtmlElement):
|
|||||||
if ans.startswith('f'):
|
if ans.startswith('f'):
|
||||||
return functools.partial(operator.mul, float(ans[1:]))
|
return functools.partial(operator.mul, float(ans[1:]))
|
||||||
return float(ans)
|
return float(ans)
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
self.set('specified_font_size', ('f'+repr(val(1))) if callable(val) else repr(val))
|
self.set('specified_font_size', ('f'+repr(val(1))) if callable(val) else repr(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
@apply
|
@apply
|
||||||
def computed_font_size():
|
def computed_font_size():
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -54,48 +54,48 @@ class HTMLElement(HtmlElement):
|
|||||||
if ans == '':
|
if ans == '':
|
||||||
return None
|
return None
|
||||||
return float(ans)
|
return float(ans)
|
||||||
|
|
||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
self.set('computed_font_size', repr(val))
|
self.set('computed_font_size', repr(val))
|
||||||
|
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
|
||||||
def remove_font_size_information(self):
|
def remove_font_size_information(self):
|
||||||
for elem in self.iter():
|
for elem in self.iter():
|
||||||
for p in ('computed', 'specified'):
|
for p in ('computed', 'specified'):
|
||||||
elem.attrib.pop(p+'_font_size', None)
|
elem.attrib.pop(p+'_font_size', None)
|
||||||
|
|
||||||
def getpath(self):
|
def getpath(self):
|
||||||
return self.getroottree().getpath(self)
|
return self.getroottree().getpath(self)
|
||||||
|
|
||||||
class Lookup(HtmlElementClassLookup):
|
class Lookup(HtmlElementClassLookup):
|
||||||
|
|
||||||
def lookup(self, node_type, document, namespace, name):
|
def lookup(self, node_type, document, namespace, name):
|
||||||
if node_type == 'element':
|
if node_type == 'element':
|
||||||
return HTMLElement
|
return HTMLElement
|
||||||
return HtmlElementClassLookup.lookup(self, node_type, document, namespace, name)
|
return HtmlElementClassLookup.lookup(self, node_type, document, namespace, name)
|
||||||
|
|
||||||
class HTMLParser(_HTMLParser):
|
class HTMLParser(_HTMLParser):
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(HTMLParser, self).__init__(**kwargs)
|
super(HTMLParser, self).__init__(**kwargs)
|
||||||
self.set_element_class_lookup(Lookup())
|
self.set_element_class_lookup(Lookup())
|
||||||
|
|
||||||
parser = HTMLParser()
|
parser = HTMLParser()
|
||||||
|
|
||||||
def fromstring(raw, **kw):
|
def fromstring(raw, **kw):
|
||||||
return _fromstring(raw, parser=parser, **kw)
|
return _fromstring(raw, parser=parser, **kw)
|
||||||
|
|
||||||
def tostring(root, pretty_print=False):
|
def tostring(root, pretty_print=False):
|
||||||
return _tostring(root, encoding='utf-8', method='xml',
|
return _tostring(root, encoding='utf-8', method='xml',
|
||||||
include_meta_content_type=True,
|
include_meta_content_type=True,
|
||||||
pretty_print=pretty_print)
|
pretty_print=pretty_print)
|
||||||
|
|
||||||
class Link(object):
|
class Link(object):
|
||||||
'''
|
'''
|
||||||
Represents a link in a HTML file.
|
Represents a link in a HTML file.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def url_to_local_path(cls, url, base):
|
def url_to_local_path(cls, url, base):
|
||||||
path = urlunparse(('', '', url.path, url.params, url.query, ''))
|
path = urlunparse(('', '', url.path, url.params, url.query, ''))
|
||||||
@ -103,7 +103,7 @@ class Link(object):
|
|||||||
if os.path.isabs(path):
|
if os.path.isabs(path):
|
||||||
return path
|
return path
|
||||||
return os.path.abspath(os.path.join(base, path))
|
return os.path.abspath(os.path.join(base, path))
|
||||||
|
|
||||||
def __init__(self, url, base):
|
def __init__(self, url, base):
|
||||||
'''
|
'''
|
||||||
:param url: The url this link points to. Must be an unquoted unicode string.
|
:param url: The url this link points to. Must be an unquoted unicode string.
|
||||||
@ -127,13 +127,13 @@ class Link(object):
|
|||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return self.path == getattr(other, 'path', other)
|
return self.path == getattr(other, 'path', other)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return u'Link: %s --> %s'%(self.url, self.path)
|
return u'Link: %s --> %s'%(self.url, self.path)
|
||||||
|
|
||||||
|
|
||||||
class IgnoreFile(Exception):
|
class IgnoreFile(Exception):
|
||||||
|
|
||||||
def __init__(self, msg, errno):
|
def __init__(self, msg, errno):
|
||||||
Exception.__init__(self, msg)
|
Exception.__init__(self, msg)
|
||||||
self.doesnt_exist = errno == 2
|
self.doesnt_exist = errno == 2
|
||||||
@ -148,13 +148,13 @@ class HTMLFile(object):
|
|||||||
|
|
||||||
The encoding of the file is available as :member:`encoding`.
|
The encoding of the file is available as :member:`encoding`.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
|
HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
|
||||||
TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
|
TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
|
||||||
LINK_PAT = re.compile(
|
LINK_PAT = re.compile(
|
||||||
r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
|
r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
|
||||||
re.DOTALL|re.IGNORECASE)
|
re.DOTALL|re.IGNORECASE)
|
||||||
|
|
||||||
def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
|
def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
|
||||||
'''
|
'''
|
||||||
:param level: The level of this file. Should be 0 for the root file.
|
:param level: The level of this file. Should be 0 for the root file.
|
||||||
@ -167,7 +167,7 @@ class HTMLFile(object):
|
|||||||
self.level = level
|
self.level = level
|
||||||
self.referrer = referrer
|
self.referrer = referrer
|
||||||
self.links = []
|
self.links = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(self.path, 'rb') as f:
|
with open(self.path, 'rb') as f:
|
||||||
src = f.read()
|
src = f.read()
|
||||||
@ -176,7 +176,7 @@ class HTMLFile(object):
|
|||||||
if level == 0:
|
if level == 0:
|
||||||
raise IOError(msg)
|
raise IOError(msg)
|
||||||
raise IgnoreFile(msg, err.errno)
|
raise IgnoreFile(msg, err.errno)
|
||||||
|
|
||||||
self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
|
self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
|
||||||
if not self.is_binary:
|
if not self.is_binary:
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
@ -189,19 +189,19 @@ class HTMLFile(object):
|
|||||||
match = self.TITLE_PAT.search(src)
|
match = self.TITLE_PAT.search(src)
|
||||||
self.title = match.group(1) if match is not None else self.title
|
self.title = match.group(1) if match is not None else self.title
|
||||||
self.find_links(src)
|
self.find_links(src)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return self.path == getattr(other, 'path', other)
|
return self.path == getattr(other, 'path', other)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
|
return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
|
|
||||||
def find_links(self, src):
|
def find_links(self, src):
|
||||||
for match in self.LINK_PAT.finditer(src):
|
for match in self.LINK_PAT.finditer(src):
|
||||||
url = None
|
url = None
|
||||||
@ -212,7 +212,7 @@ class HTMLFile(object):
|
|||||||
link = self.resolve(url)
|
link = self.resolve(url)
|
||||||
if link not in self.links:
|
if link not in self.links:
|
||||||
self.links.append(link)
|
self.links.append(link)
|
||||||
|
|
||||||
def resolve(self, url):
|
def resolve(self, url):
|
||||||
return Link(url, self.base)
|
return Link(url, self.base)
|
||||||
|
|
||||||
@ -234,13 +234,13 @@ def depth_first(root, flat, visited=set([])):
|
|||||||
if hf not in visited:
|
if hf not in visited:
|
||||||
yield hf
|
yield hf
|
||||||
visited.add(hf)
|
visited.add(hf)
|
||||||
|
|
||||||
|
|
||||||
def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
|
def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
|
||||||
'''
|
'''
|
||||||
Recursively traverse all links in the HTML file.
|
Recursively traverse all links in the HTML file.
|
||||||
|
|
||||||
:param max_levels: Maximum levels of recursion. Must be non-negative. 0
|
:param max_levels: Maximum levels of recursion. Must be non-negative. 0
|
||||||
implies that no links in the root HTML file are followed.
|
implies that no links in the root HTML file are followed.
|
||||||
:param encoding: Specify character encoding of HTML files. If `None` it is
|
:param encoding: Specify character encoding of HTML files. If `None` it is
|
||||||
auto-detected.
|
auto-detected.
|
||||||
@ -271,7 +271,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
|||||||
print repr(err)
|
print repr(err)
|
||||||
for link in rejects:
|
for link in rejects:
|
||||||
hf.links.remove(link)
|
hf.links.remove(link)
|
||||||
|
|
||||||
next_level = list(nl)
|
next_level = list(nl)
|
||||||
orec = sys.getrecursionlimit()
|
orec = sys.getrecursionlimit()
|
||||||
sys.setrecursionlimit(500000)
|
sys.setrecursionlimit(500000)
|
||||||
@ -279,14 +279,14 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
|||||||
return flat, list(depth_first(flat[0], flat))
|
return flat, list(depth_first(flat[0], flat))
|
||||||
finally:
|
finally:
|
||||||
sys.setrecursionlimit(orec)
|
sys.setrecursionlimit(orec)
|
||||||
|
|
||||||
|
|
||||||
def opf_traverse(opf_reader, verbose=0, encoding=None):
|
def opf_traverse(opf_reader, verbose=0, encoding=None):
|
||||||
'''
|
'''
|
||||||
Return a list of :class:`HTMLFile` objects in the order specified by the
|
Return a list of :class:`HTMLFile` objects in the order specified by the
|
||||||
`<spine>` element of the OPF.
|
`<spine>` element of the OPF.
|
||||||
|
|
||||||
:param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.
|
:param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.
|
||||||
:param encoding: Specify character encoding of HTML files. If `None` it is
|
:param encoding: Specify character encoding of HTML files. If `None` it is
|
||||||
auto-detected.
|
auto-detected.
|
||||||
'''
|
'''
|
||||||
@ -317,7 +317,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
|
|||||||
print 'WARNING: OPF spine item %s does not exist'%path
|
print 'WARNING: OPF spine item %s does not exist'%path
|
||||||
ans = [f for f in ans if not f.is_binary]
|
ans = [f for f in ans if not f.is_binary]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
|
convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
|
||||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||||
@ -326,20 +326,20 @@ def sanitize_head(match):
|
|||||||
x = match.group(1)
|
x = match.group(1)
|
||||||
x = _span_pat.sub('', x)
|
x = _span_pat.sub('', x)
|
||||||
return '<head>\n'+x+'\n</head>'
|
return '<head>\n'+x+'\n</head>'
|
||||||
|
|
||||||
class PreProcessor(object):
|
class PreProcessor(object):
|
||||||
PREPROCESS = [
|
PREPROCESS = [
|
||||||
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
# Some idiotic HTML generators (Frontpage I'm looking at you)
|
||||||
# Put all sorts of crap into <head>. This messes up lxml
|
# Put all sorts of crap into <head>. This messes up lxml
|
||||||
(re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL),
|
(re.compile(r'<head[^>]*>(.*?)</head>', re.IGNORECASE|re.DOTALL),
|
||||||
sanitize_head),
|
sanitize_head),
|
||||||
# Convert all entities, since lxml doesn't handle them well
|
# Convert all entities, since lxml doesn't handle them well
|
||||||
(re.compile(r'&(\S+?);'), convert_entities),
|
(re.compile(r'&(\S+?);'), convert_entities),
|
||||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||||
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
||||||
lambda match: ''),
|
lambda match: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
@ -348,20 +348,20 @@ class PreProcessor(object):
|
|||||||
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
|
||||||
# Remove <br> and replace <br><br> with <p>
|
# Remove <br> and replace <br><br> with <p>
|
||||||
(re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
|
(re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
|
||||||
(re.compile(r'(.*)<br.*?>', re.IGNORECASE),
|
(re.compile(r'(.*)<br.*?>', re.IGNORECASE),
|
||||||
lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40
|
lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40
|
||||||
else match.group(1)),
|
else match.group(1)),
|
||||||
# Remove hyphenation
|
# Remove hyphenation
|
||||||
(re.compile(r'-\n\r?'), lambda match: ''),
|
(re.compile(r'-\n\r?'), lambda match: ''),
|
||||||
|
|
||||||
# Remove gray background
|
# Remove gray background
|
||||||
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
|
(re.compile(r'<BODY[^<>]+>'), lambda match : '<BODY>'),
|
||||||
|
|
||||||
# Remove non breaking spaces
|
# Remove non breaking spaces
|
||||||
(re.compile(ur'\u00a0'), lambda match : ' '),
|
(re.compile(ur'\u00a0'), lambda match : ' '),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix Book Designer markup
|
# Fix Book Designer markup
|
||||||
BOOK_DESIGNER = [
|
BOOK_DESIGNER = [
|
||||||
# HR
|
# HR
|
||||||
@ -377,17 +377,17 @@ class PreProcessor(object):
|
|||||||
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
|
||||||
]
|
]
|
||||||
|
|
||||||
def is_baen(self, src):
|
def is_baen(self, src):
|
||||||
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
|
||||||
re.IGNORECASE).search(src) is not None
|
re.IGNORECASE).search(src) is not None
|
||||||
|
|
||||||
def is_book_designer(self, raw):
|
def is_book_designer(self, raw):
|
||||||
return re.search('<H2[^><]*id=BookTitle', raw) is not None
|
return re.search('<H2[^><]*id=BookTitle', raw) is not None
|
||||||
|
|
||||||
def is_pdftohtml(self, src):
|
def is_pdftohtml(self, src):
|
||||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||||
|
|
||||||
def preprocess(self, html):
|
def preprocess(self, html):
|
||||||
opts = getattr(self, 'opts', False)
|
opts = getattr(self, 'opts', False)
|
||||||
if opts and hasattr(opts, 'profile') and getattr(opts.profile, 'remove_special_chars', False):
|
if opts and hasattr(opts, 'profile') and getattr(opts.profile, 'remove_special_chars', False):
|
||||||
@ -403,17 +403,17 @@ class PreProcessor(object):
|
|||||||
for rule in self.PREPROCESS + rules:
|
for rule in self.PREPROCESS + rules:
|
||||||
html = rule[0].sub(rule[1], html)
|
html = rule[0].sub(rule[1], html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
class Parser(PreProcessor, LoggingInterface):
|
class Parser(PreProcessor, LoggingInterface):
|
||||||
# SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont'
|
# SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont'
|
||||||
# SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in
|
# SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in
|
||||||
# [
|
# [
|
||||||
# (r'<(?P<tag>%s)(?P<attrs>(\s+[^<>]*){0,1})(?<!/)>',
|
# (r'<(?P<tag>%s)(?P<attrs>(\s+[^<>]*){0,1})(?<!/)>',
|
||||||
# '<\g<tag>\g<attrs> />'),
|
# '<\g<tag>\g<attrs> />'),
|
||||||
# (),
|
# (),
|
||||||
# ]
|
# ]
|
||||||
# ]
|
# ]
|
||||||
|
|
||||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'):
|
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'):
|
||||||
LoggingInterface.__init__(self, logging.getLogger(name))
|
LoggingInterface.__init__(self, logging.getLogger(name))
|
||||||
self.setup_cli_handler(opts.verbose)
|
self.setup_cli_handler(opts.verbose)
|
||||||
@ -433,27 +433,27 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
|
name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
|
||||||
save_counter += 1
|
save_counter += 1
|
||||||
self.htmlfile_map[f.path] = name
|
self.htmlfile_map[f.path] = name
|
||||||
|
|
||||||
self.parse_html()
|
self.parse_html()
|
||||||
# Handle <image> tags inside embedded <svg>
|
# Handle <image> tags inside embedded <svg>
|
||||||
# At least one source of EPUB files (Penguin) uses xlink:href
|
# At least one source of EPUB files (Penguin) uses xlink:href
|
||||||
# without declaring the xlink namespace
|
# without declaring the xlink namespace
|
||||||
for image in self.root.xpath('//image'):
|
for image in self.root.xpath('//image'):
|
||||||
for attr in image.attrib.keys():
|
for attr in image.attrib.keys():
|
||||||
if attr.endswith(':href'):
|
if attr.endswith(':href'):
|
||||||
nhref = self.rewrite_links(image.get(attr))
|
nhref = self.rewrite_links(image.get(attr))
|
||||||
image.set(attr, nhref)
|
image.set(attr, nhref)
|
||||||
|
|
||||||
self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
|
self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
|
||||||
for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
|
for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
|
||||||
if self.root.get(bad, None) is not None:
|
if self.root.get(bad, None) is not None:
|
||||||
self.root.attrib.pop(bad)
|
self.root.attrib.pop(bad)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def save_path(self):
|
def save_path(self):
|
||||||
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
||||||
|
|
||||||
def save(self, strip_comments=False):
|
def save(self, strip_comments=False):
|
||||||
'''
|
'''
|
||||||
Save processed HTML into the content directory.
|
Save processed HTML into the content directory.
|
||||||
@ -463,7 +463,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
|
self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
|
||||||
for svg in self.root.xpath('//svg'):
|
for svg in self.root.xpath('//svg'):
|
||||||
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
||||||
|
|
||||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||||
ans = re.compile(r'<head>', re.IGNORECASE).sub(
|
ans = re.compile(r'<head>', re.IGNORECASE).sub(
|
||||||
'<head>\n\t<meta http-equiv="Content-Type" '
|
'<head>\n\t<meta http-equiv="Content-Type" '
|
||||||
@ -503,7 +503,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
self.root.remove(head)
|
self.root.remove(head)
|
||||||
self.root.insert(0, head)
|
self.root.insert(0, head)
|
||||||
|
|
||||||
self.head = head
|
self.head = head
|
||||||
try:
|
try:
|
||||||
self.body = self.root.body
|
self.body = self.root.body
|
||||||
except:
|
except:
|
||||||
@ -526,7 +526,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
if not self.head.xpath('./title'):
|
if not self.head.xpath('./title'):
|
||||||
title = etree.SubElement(self.head, 'title')
|
title = etree.SubElement(self.head, 'title')
|
||||||
title.text = _('Unknown')
|
title.text = _('Unknown')
|
||||||
|
|
||||||
def debug_tree(self, name):
|
def debug_tree(self, name):
|
||||||
'''
|
'''
|
||||||
Dump source tree for later debugging.
|
Dump source tree for later debugging.
|
||||||
@ -538,8 +538,8 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
(os.path.basename(self.htmlfile.path), name)), 'wb') as f:
|
(os.path.basename(self.htmlfile.path), name)), 'wb') as f:
|
||||||
f.write(tostring(self.root))
|
f.write(tostring(self.root))
|
||||||
self.log_debug(_('Written processed HTML to ')+f.name)
|
self.log_debug(_('Written processed HTML to ')+f.name)
|
||||||
|
|
||||||
|
|
||||||
def rewrite_links(self, olink):
|
def rewrite_links(self, olink):
|
||||||
'''
|
'''
|
||||||
Make all links in document relative so that they work in the EPUB container.
|
Make all links in document relative so that they work in the EPUB container.
|
||||||
@ -555,7 +555,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
|
if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
|
||||||
return olink
|
return olink
|
||||||
if link.path in self.htmlfiles:
|
if link.path in self.htmlfiles:
|
||||||
return self.htmlfile_map[link.path] + frag
|
return self.htmlfile_map[link.path] + frag
|
||||||
if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
|
if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
|
||||||
return olink # This happens when --max-levels is used
|
return olink # This happens when --max-levels is used
|
||||||
if link.path in self.resource_map.keys():
|
if link.path in self.resource_map.keys():
|
||||||
@ -567,26 +567,26 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
name = 'resources/' + name
|
name = 'resources/' + name
|
||||||
self.resource_map[link.path] = name
|
self.resource_map[link.path] = name
|
||||||
return name + frag
|
return name + frag
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Processor(Parser):
|
class Processor(Parser):
|
||||||
'''
|
'''
|
||||||
This class builds on :class:`Parser` to provide additional methods
|
This class builds on :class:`Parser` to provide additional methods
|
||||||
to perform various processing/modification tasks on HTML files.
|
to perform various processing/modification tasks on HTML files.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
LINKS_PATH = XPath('//a[@href]')
|
LINKS_PATH = XPath('//a[@href]')
|
||||||
PIXEL_PAT = re.compile(r'([-]?\d+|[-]?\d*\.\d+)px')
|
PIXEL_PAT = re.compile(r'([-]?\d+|[-]?\d*\.\d+)px')
|
||||||
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
Parser.__init__(self, *args, **kwargs)
|
Parser.__init__(self, *args, **kwargs)
|
||||||
temp = LoggingInterface(logging.getLogger('cssutils'))
|
temp = LoggingInterface(logging.getLogger('cssutils'))
|
||||||
temp.setup_cli_handler(self.opts.verbose)
|
temp.setup_cli_handler(self.opts.verbose)
|
||||||
self.css_parser = CSSParser(log=temp.logger, loglevel=logging.ERROR)
|
self.css_parser = CSSParser(log=temp.logger, loglevel=logging.ERROR)
|
||||||
self.stylesheet = self.font_css = self.override_css = None
|
self.stylesheet = self.font_css = self.override_css = None
|
||||||
|
|
||||||
def detect_chapters(self):
|
def detect_chapters(self):
|
||||||
self.detected_chapters = self.opts.chapter(self.root)
|
self.detected_chapters = self.opts.chapter(self.root)
|
||||||
chapter_mark = self.opts.chapter_mark
|
chapter_mark = self.opts.chapter_mark
|
||||||
@ -604,12 +604,12 @@ class Processor(Parser):
|
|||||||
else: # chapter_mark == 'both':
|
else: # chapter_mark == 'both':
|
||||||
mark = etree.Element('hr', style=page_break_before)
|
mark = etree.Element('hr', style=page_break_before)
|
||||||
elem.addprevious(mark)
|
elem.addprevious(mark)
|
||||||
|
|
||||||
def save(self, strip_comments=False):
|
def save(self, strip_comments=False):
|
||||||
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
|
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]+'_calibre'
|
||||||
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
||||||
if sheet is not None:
|
if sheet is not None:
|
||||||
style = etree.SubElement(self.head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
|
style = etree.SubElement(self.head, 'link', attrib={'type':'text/css', 'rel':'stylesheet',
|
||||||
'href':'resources/%s_%d.css'%(style_path, i),
|
'href':'resources/%s_%d.css'%(style_path, i),
|
||||||
'charset':'UTF-8'})
|
'charset':'UTF-8'})
|
||||||
style.tail = '\n'
|
style.tail = '\n'
|
||||||
@ -620,16 +620,16 @@ class Processor(Parser):
|
|||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
open(path, 'wb').write(raw)
|
open(path, 'wb').write(raw)
|
||||||
return Parser.save(self, strip_comments=strip_comments)
|
return Parser.save(self, strip_comments=strip_comments)
|
||||||
|
|
||||||
def populate_toc(self, toc):
|
def populate_toc(self, toc):
|
||||||
'''
|
'''
|
||||||
Populate the Table of Contents from detected chapters and links.
|
Populate the Table of Contents from detected chapters and links.
|
||||||
'''
|
'''
|
||||||
class Adder(object):
|
class Adder(object):
|
||||||
|
|
||||||
def __init__(self, toc):
|
def __init__(self, toc):
|
||||||
self.next_play_order = max([x.play_order for x in toc.flat()])
|
self.next_play_order = max([x.play_order for x in toc.flat()])
|
||||||
|
|
||||||
def __call__(self, href, fragment, text, target, type='link'):
|
def __call__(self, href, fragment, text, target, type='link'):
|
||||||
for entry in toc.flat():
|
for entry in toc.flat():
|
||||||
if entry.href == href and entry.fragment == fragment:
|
if entry.href == href and entry.fragment == fragment:
|
||||||
@ -637,15 +637,15 @@ class Processor(Parser):
|
|||||||
if len(text) > 50:
|
if len(text) > 50:
|
||||||
text = text[:50] + u'\u2026'
|
text = text[:50] + u'\u2026'
|
||||||
self.next_play_order += 1
|
self.next_play_order += 1
|
||||||
return target.add_item(href, fragment, text, type=type,
|
return target.add_item(href, fragment, text, type=type,
|
||||||
play_order=self.next_play_order)
|
play_order=self.next_play_order)
|
||||||
add_item = Adder(toc)
|
add_item = Adder(toc)
|
||||||
name = self.htmlfile_map[self.htmlfile.path]
|
name = self.htmlfile_map[self.htmlfile.path]
|
||||||
href = 'content/'+name
|
href = 'content/'+name
|
||||||
|
|
||||||
# Add level* TOC items
|
# Add level* TOC items
|
||||||
counter = 0
|
counter = 0
|
||||||
|
|
||||||
def elem_to_link(elem, href, counter):
|
def elem_to_link(elem, href, counter):
|
||||||
text = (u''.join(elem.xpath('string()'))).strip()
|
text = (u''.join(elem.xpath('string()'))).strip()
|
||||||
if not text:
|
if not text:
|
||||||
@ -662,8 +662,8 @@ class Processor(Parser):
|
|||||||
elem.set('id', id)
|
elem.set('id', id)
|
||||||
frag = id
|
frag = id
|
||||||
return text, _href, frag
|
return text, _href, frag
|
||||||
|
|
||||||
|
|
||||||
if self.opts.level1_toc is not None:
|
if self.opts.level1_toc is not None:
|
||||||
level1 = self.opts.level1_toc(self.root)
|
level1 = self.opts.level1_toc(self.root)
|
||||||
level1_order = []
|
level1_order = []
|
||||||
@ -702,17 +702,17 @@ class Processor(Parser):
|
|||||||
counter += 1
|
counter += 1
|
||||||
if text:
|
if text:
|
||||||
add_item(_href, frag, text, level2, type='chapter')
|
add_item(_href, frag, text, level2, type='chapter')
|
||||||
|
|
||||||
|
|
||||||
if level1_order: # Fix play order
|
if level1_order: # Fix play order
|
||||||
next_play_order = level1_order[0].play_order
|
next_play_order = level1_order[0].play_order
|
||||||
for x in level1_order:
|
for x in level1_order:
|
||||||
for y in x.flat():
|
for y in x.flat():
|
||||||
y.play_order = next_play_order
|
y.play_order = next_play_order
|
||||||
next_play_order += 1
|
next_play_order += 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if len(toc) > 0:
|
if len(toc) > 0:
|
||||||
# Detected TOC entries using --level* options
|
# Detected TOC entries using --level* options
|
||||||
# so aborting all other toc processing
|
# so aborting all other toc processing
|
||||||
@ -726,7 +726,7 @@ class Processor(Parser):
|
|||||||
id = elem.get('id', 'calibre_chapter_%d'%counter)
|
id = elem.get('id', 'calibre_chapter_%d'%counter)
|
||||||
elem.set('id', id)
|
elem.set('id', id)
|
||||||
add_item(href, id, text, toc, type='chapter')
|
add_item(href, id, text, toc, type='chapter')
|
||||||
|
|
||||||
if len(list(toc.flat())) >= self.opts.toc_threshold:
|
if len(list(toc.flat())) >= self.opts.toc_threshold:
|
||||||
return
|
return
|
||||||
referrer = toc
|
referrer = toc
|
||||||
@ -745,7 +745,7 @@ class Processor(Parser):
|
|||||||
name = self.htmlfile_map[self.htmlfile.referrer.path]
|
name = self.htmlfile_map[self.htmlfile.referrer.path]
|
||||||
href = 'content/'+name
|
href = 'content/'+name
|
||||||
referrer = add_item(href, None, text, toc)
|
referrer = add_item(href, None, text, toc)
|
||||||
|
|
||||||
# Add links to TOC
|
# Add links to TOC
|
||||||
if int(self.opts.max_toc_links) > 0:
|
if int(self.opts.max_toc_links) > 0:
|
||||||
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
||||||
@ -762,7 +762,7 @@ class Processor(Parser):
|
|||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
fragment = parts[1]
|
fragment = parts[1]
|
||||||
add_item(href, fragment, text, referrer)
|
add_item(href, fragment, text, referrer)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def preprocess_css(cls, css, dpi=96):
|
def preprocess_css(cls, css, dpi=96):
|
||||||
def rescale(match):
|
def rescale(match):
|
||||||
@ -772,17 +772,17 @@ class Processor(Parser):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return ''
|
return ''
|
||||||
return '%fpt'%(72 * val/dpi)
|
return '%fpt'%(72 * val/dpi)
|
||||||
|
|
||||||
css = cls.PIXEL_PAT.sub(rescale, css)
|
css = cls.PIXEL_PAT.sub(rescale, css)
|
||||||
css = cls.PAGE_PAT.sub('', css)
|
css = cls.PAGE_PAT.sub('', css)
|
||||||
return css
|
return css
|
||||||
|
|
||||||
def extract_css(self, parsed_sheets):
|
def extract_css(self, parsed_sheets):
|
||||||
'''
|
'''
|
||||||
Remove all CSS information from the document and store it as
|
Remove all CSS information from the document and store it as
|
||||||
:class:`StyleSheet` objects.
|
:class:`StyleSheet` objects.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def get_id(chapter, counter, prefix='calibre_css_'):
|
def get_id(chapter, counter, prefix='calibre_css_'):
|
||||||
new_id = '%s_%d'%(prefix, counter)
|
new_id = '%s_%d'%(prefix, counter)
|
||||||
if chapter.tag.lower() == 'a' and 'name' in chapter.keys():
|
if chapter.tag.lower() == 'a' and 'name' in chapter.keys():
|
||||||
@ -796,7 +796,7 @@ class Processor(Parser):
|
|||||||
id = new_id
|
id = new_id
|
||||||
chapter.set('id', id)
|
chapter.set('id', id)
|
||||||
return id
|
return id
|
||||||
|
|
||||||
self.external_stylesheets, self.stylesheet = [], self.css_parser.parseString('')
|
self.external_stylesheets, self.stylesheet = [], self.css_parser.parseString('')
|
||||||
self.specified_override_css = []
|
self.specified_override_css = []
|
||||||
for link in self.root.xpath('//link'):
|
for link in self.root.xpath('//link'):
|
||||||
@ -825,8 +825,7 @@ class Processor(Parser):
|
|||||||
self.log_exception('')
|
self.log_exception('')
|
||||||
if parsed_sheets.has_key(file):
|
if parsed_sheets.has_key(file):
|
||||||
self.external_stylesheets.append(parsed_sheets[file])
|
self.external_stylesheets.append(parsed_sheets[file])
|
||||||
|
|
||||||
|
|
||||||
for style in self.root.xpath('//style'):
|
for style in self.root.xpath('//style'):
|
||||||
if 'css' in style.get('type', 'text/css').lower():
|
if 'css' in style.get('type', 'text/css').lower():
|
||||||
override_css = style.get('title', '') == 'override_css'
|
override_css = style.get('title', '') == 'override_css'
|
||||||
@ -889,7 +888,7 @@ class Processor(Parser):
|
|||||||
cn += classname
|
cn += classname
|
||||||
font.set('class', cn)
|
font.set('class', cn)
|
||||||
font.tag = 'span'
|
font.tag = 'span'
|
||||||
|
|
||||||
id_css, id_css_counter = {}, 0
|
id_css, id_css_counter = {}, 0
|
||||||
for elem in self.root.xpath('//*[@style]'):
|
for elem in self.root.xpath('//*[@style]'):
|
||||||
setting = elem.get('style')
|
setting = elem.get('style')
|
||||||
@ -906,7 +905,7 @@ class Processor(Parser):
|
|||||||
cn = elem.get('class', classname)
|
cn = elem.get('class', classname)
|
||||||
elem.set('class', cn)
|
elem.set('class', cn)
|
||||||
elem.attrib.pop('style')
|
elem.attrib.pop('style')
|
||||||
|
|
||||||
css = '\n'.join(['.%s {%s;}'%(cn, setting) for \
|
css = '\n'.join(['.%s {%s;}'%(cn, setting) for \
|
||||||
setting, cn in cache.items()])
|
setting, cn in cache.items()])
|
||||||
css += '\n\n'
|
css += '\n\n'
|
||||||
@ -930,28 +929,28 @@ class Processor(Parser):
|
|||||||
self.override_css = self.css_parser.parseString(self.preprocess_css(css))
|
self.override_css = self.css_parser.parseString(self.preprocess_css(css))
|
||||||
for rule in reversed(self.specified_override_css):
|
for rule in reversed(self.specified_override_css):
|
||||||
self.override_css.insertRule(rule, index=0)
|
self.override_css.insertRule(rule, index=0)
|
||||||
|
|
||||||
|
|
||||||
def config(defaults=None, config_name='html',
|
def config(defaults=None, config_name='html',
|
||||||
desc=_('Options to control the traversal of HTML')):
|
desc=_('Options to control the traversal of HTML')):
|
||||||
if defaults is None:
|
if defaults is None:
|
||||||
c = Config(config_name, desc)
|
c = Config(config_name, desc)
|
||||||
else:
|
else:
|
||||||
c = StringConfig(defaults, desc)
|
c = StringConfig(defaults, desc)
|
||||||
|
|
||||||
c.add_opt('output', ['-o', '--output'], default=None,
|
c.add_opt('output', ['-o', '--output'], default=None,
|
||||||
help=_('The output directory. Default is the current directory.'))
|
help=_('The output directory. Default is the current directory.'))
|
||||||
c.add_opt('encoding', ['--encoding'], default=None,
|
c.add_opt('encoding', ['--encoding'], default=None,
|
||||||
help=_('Character encoding for HTML files. Default is to auto detect.'))
|
help=_('Character encoding for HTML files. Default is to auto detect.'))
|
||||||
c.add_opt('zip', ['--zip'], default=False,
|
c.add_opt('zip', ['--zip'], default=False,
|
||||||
help=_('Create the output in a zip file. If this option is specified, the --output should be the name of a file not a directory.'))
|
help=_('Create the output in a zip file. If this option is specified, the --output should be the name of a file not a directory.'))
|
||||||
|
|
||||||
traversal = c.add_group('traversal', _('Control the following of links in HTML files.'))
|
traversal = c.add_group('traversal', _('Control the following of links in HTML files.'))
|
||||||
traversal('breadth_first', ['--breadth-first'], default=False,
|
traversal('breadth_first', ['--breadth-first'], default=False,
|
||||||
help=_('Traverse links in HTML files breadth first. Normally, they are traversed depth first'))
|
help=_('Traverse links in HTML files breadth first. Normally, they are traversed depth first'))
|
||||||
traversal('max_levels', ['--max-levels'], default=sys.getrecursionlimit(), group='traversal',
|
traversal('max_levels', ['--max-levels'], default=sys.getrecursionlimit(), group='traversal',
|
||||||
help=_('Maximum levels of recursion when following links in HTML files. Must be non-negative. 0 implies that no links in the root HTML file are followed.'))
|
help=_('Maximum levels of recursion when following links in HTML files. Must be non-negative. 0 implies that no links in the root HTML file are followed.'))
|
||||||
|
|
||||||
metadata = c.add_group('metadata', _('Set metadata of the generated ebook'))
|
metadata = c.add_group('metadata', _('Set metadata of the generated ebook'))
|
||||||
metadata('title', ['-t', '--title'], default=None,
|
metadata('title', ['-t', '--title'], default=None,
|
||||||
help=_('Set the title. Default is to autodetect.'))
|
help=_('Set the title. Default is to autodetect.'))
|
||||||
@ -965,13 +964,13 @@ def config(defaults=None, config_name='html',
|
|||||||
help=_('A summary of this book.'))
|
help=_('A summary of this book.'))
|
||||||
metadata('from_opf', ['--metadata-from'], default=None,
|
metadata('from_opf', ['--metadata-from'], default=None,
|
||||||
help=_('Load metadata from the specified OPF file'))
|
help=_('Load metadata from the specified OPF file'))
|
||||||
|
|
||||||
debug = c.add_group('debug', _('Options useful for debugging'))
|
debug = c.add_group('debug', _('Options useful for debugging'))
|
||||||
debug('verbose', ['-v', '--verbose'], default=0, action='count',
|
debug('verbose', ['-v', '--verbose'], default=0, action='count',
|
||||||
help=_('Be more verbose while processing. Can be specified multiple times to increase verbosity.'))
|
help=_('Be more verbose while processing. Can be specified multiple times to increase verbosity.'))
|
||||||
debug('pretty_print', ['--pretty-print'], default=False,
|
debug('pretty_print', ['--pretty-print'], default=False,
|
||||||
help=_('Output HTML is "pretty printed" for easier parsing by humans'))
|
help=_('Output HTML is "pretty printed" for easier parsing by humans'))
|
||||||
|
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
@ -980,7 +979,7 @@ def option_parser():
|
|||||||
%prog [options] file.html|opf
|
%prog [options] file.html|opf
|
||||||
|
|
||||||
Follow all links in an HTML file and collect them into the specified directory.
|
Follow all links in an HTML file and collect them into the specified directory.
|
||||||
Also collects any resources like images, stylesheets, scripts, etc.
|
Also collects any resources like images, stylesheets, scripts, etc.
|
||||||
If an OPF file is specified instead, the list of files in its <spine> element
|
If an OPF file is specified instead, the list of files in its <spine> element
|
||||||
is used.
|
is used.
|
||||||
'''))
|
'''))
|
||||||
@ -1056,11 +1055,11 @@ def merge_metadata(htmlfile, opf, opts):
|
|||||||
elif attr == 'tags':
|
elif attr == 'tags':
|
||||||
val = [i.strip() for i in val.split(',') if i.strip()]
|
val = [i.strip() for i in val.split(',') if i.strip()]
|
||||||
setattr(mi, attr, val)
|
setattr(mi, attr, val)
|
||||||
|
|
||||||
cover = getattr(opts, 'cover', False)
|
cover = getattr(opts, 'cover', False)
|
||||||
if cover and os.path.exists(cover):
|
if cover and os.path.exists(cover):
|
||||||
mi.cover = os.path.abspath(cover)
|
mi.cover = os.path.abspath(cover)
|
||||||
|
|
||||||
if not mi.title:
|
if not mi.title:
|
||||||
if htmlfile:
|
if htmlfile:
|
||||||
mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
|
mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
|
||||||
@ -1092,13 +1091,13 @@ def rebase_toc(toc, htmlfile_map, basepath, root=True):
|
|||||||
def fix_entry(entry):
|
def fix_entry(entry):
|
||||||
if entry.abspath in htmlfile_map.keys():
|
if entry.abspath in htmlfile_map.keys():
|
||||||
entry.href = 'content/' + htmlfile_map[entry.abspath]
|
entry.href = 'content/' + htmlfile_map[entry.abspath]
|
||||||
|
|
||||||
for entry in toc:
|
for entry in toc:
|
||||||
rebase_toc(entry, htmlfile_map, basepath, root=False)
|
rebase_toc(entry, htmlfile_map, basepath, root=False)
|
||||||
fix_entry(entry)
|
fix_entry(entry)
|
||||||
if root:
|
if root:
|
||||||
toc.base_path = basepath
|
toc.base_path = basepath
|
||||||
|
|
||||||
def create_dir(htmlfile, opts):
|
def create_dir(htmlfile, opts):
|
||||||
'''
|
'''
|
||||||
Create a directory that contains the open ebook
|
Create a directory that contains the open ebook
|
||||||
@ -1110,16 +1109,16 @@ def create_dir(htmlfile, opts):
|
|||||||
else:
|
else:
|
||||||
opf, filelist = get_filelist(htmlfile, opts)
|
opf, filelist = get_filelist(htmlfile, opts)
|
||||||
mi = merge_metadata(htmlfile, opf, opts)
|
mi = merge_metadata(htmlfile, opf, opts)
|
||||||
|
|
||||||
resource_map, htmlfile_map = parse_content(filelist, opts)
|
resource_map, htmlfile_map = parse_content(filelist, opts)
|
||||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||||
|
|
||||||
if opf and opf.cover and os.access(opf.cover, os.R_OK):
|
if opf and opf.cover and os.access(opf.cover, os.R_OK):
|
||||||
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
|
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
|
||||||
shutil.copyfile(opf.cover, cpath)
|
shutil.copyfile(opf.cover, cpath)
|
||||||
resources.append(cpath)
|
resources.append(cpath)
|
||||||
mi.cover = cpath
|
mi.cover = cpath
|
||||||
|
|
||||||
spine = [htmlfile_map[f.path] for f in filelist]
|
spine = [htmlfile_map[f.path] for f in filelist]
|
||||||
mi = create_metadata(opts.output, mi, spine, resources)
|
mi = create_metadata(opts.output, mi, spine, resources)
|
||||||
buf = cStringIO.StringIO()
|
buf = cStringIO.StringIO()
|
||||||
@ -1132,7 +1131,7 @@ def create_dir(htmlfile, opts):
|
|||||||
with open(os.path.join(opts.output, 'toc.ncx'), 'wb') as f:
|
with open(os.path.join(opts.output, 'toc.ncx'), 'wb') as f:
|
||||||
f.write(toc)
|
f.write(toc)
|
||||||
print 'Open ebook created in', opts.output
|
print 'Open ebook created in', opts.output
|
||||||
|
|
||||||
def create_oebzip(htmlfile, opts):
|
def create_oebzip(htmlfile, opts):
|
||||||
'''
|
'''
|
||||||
Create a zip file that contains the Open ebook.
|
Create a zip file that contains the Open ebook.
|
||||||
@ -1154,13 +1153,13 @@ def main(args=sys.argv):
|
|||||||
parser.print_help()
|
parser.print_help()
|
||||||
print _('You must specify an input HTML file')
|
print _('You must specify an input HTML file')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
htmlfile = args[1]
|
htmlfile = args[1]
|
||||||
if opts.zip:
|
if opts.zip:
|
||||||
create_oebzip(htmlfile, opts)
|
create_oebzip(htmlfile, opts)
|
||||||
else:
|
else:
|
||||||
create_dir(htmlfile, opts)
|
create_dir(htmlfile, opts)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def gui_main(htmlfile, pt=None):
|
def gui_main(htmlfile, pt=None):
|
||||||
@ -1183,7 +1182,7 @@ output = %s
|
|||||||
if len(nontrivial) < 2:
|
if len(nontrivial) < 2:
|
||||||
return None
|
return None
|
||||||
return pt.name
|
return pt.name
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
@ -21,7 +21,7 @@ def config(defaults=None):
|
|||||||
c.remove_opt('profile')
|
c.remove_opt('profile')
|
||||||
mobic = mobi_config(defaults=defaults)
|
mobic = mobi_config(defaults=defaults)
|
||||||
c.update(mobic)
|
c.update(mobic)
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def option_parser(usage=USAGE):
|
def option_parser(usage=USAGE):
|
||||||
usage = usage % ('Mobipocket', formats())
|
usage = usage % ('Mobipocket', formats())
|
||||||
@ -33,13 +33,13 @@ def any2mobi(opts, path, notification=None):
|
|||||||
if not ext:
|
if not ext:
|
||||||
raise ValueError('Unknown file type: '+path)
|
raise ValueError('Unknown file type: '+path)
|
||||||
ext = ext.lower()[1:]
|
ext = ext.lower()[1:]
|
||||||
|
|
||||||
if opts.output is None:
|
if opts.output is None:
|
||||||
opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
|
opts.output = os.path.splitext(os.path.basename(path))[0]+'.mobi'
|
||||||
|
|
||||||
opts.output = os.path.abspath(opts.output)
|
opts.output = os.path.abspath(opts.output)
|
||||||
orig_output = opts.output
|
orig_output = opts.output
|
||||||
|
|
||||||
with TemporaryDirectory('_any2mobi') as tdir:
|
with TemporaryDirectory('_any2mobi') as tdir:
|
||||||
oebdir = os.path.join(tdir, 'oeb')
|
oebdir = os.path.join(tdir, 'oeb')
|
||||||
os.mkdir(oebdir)
|
os.mkdir(oebdir)
|
||||||
@ -54,7 +54,7 @@ def any2mobi(opts, path, notification=None):
|
|||||||
opts.output = orig_output
|
opts.output = orig_output
|
||||||
logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
|
logging.getLogger('html2epub').info(_('Creating Mobipocket file from EPUB...'))
|
||||||
oeb2mobi(opts, opf)
|
oeb2mobi(opts, opf)
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user