diff --git a/src/calibre/ebooks/html/output.py b/src/calibre/ebooks/html/output.py index f1178d3259..a083e0f4f1 100644 --- a/src/calibre/ebooks/html/output.py +++ b/src/calibre/ebooks/html/output.py @@ -5,7 +5,9 @@ __docformat__ = 'restructuredtext en' import os, re, shutil -from os.path import dirname, abspath, relpath, exists +from calibre.utils import zipfile + +from os.path import dirname, abspath, relpath, exists, basename from lxml import etree from templite import Templite @@ -37,7 +39,9 @@ class HTMLOutput(OutputFormatPlugin): help=_('Template used for the generation of the html contents of the book instead of the default file')), OptionRecommendation(name='extract_to', - help=_('Extract the contents of the generated ZIP file to the directory of the generated ZIP file') + help=_('Extract the contents of the generated ZIP file to the ' + 'specified directory. WARNING: The contents of the directory ' + 'will be deleted.') ), ]) @@ -70,7 +74,7 @@ class HTMLOutput(OutputFormatPlugin): def generate_html_toc(self, oeb_book, ref_url, output_dir): root = self.generate_toc(oeb_book, ref_url, output_dir) return etree.tostring(root, pretty_print=True, encoding='utf-8', - xml_declaration=True) + xml_declaration=False) def convert(self, oeb_book, output_path, input_plugin, opts, log): @@ -100,7 +104,7 @@ class HTMLOutput(OutputFormatPlugin): tempdir = PersistentTemporaryDirectory() output_file = os.path.join(tempdir, - os.path.basename(re.sub(r'\.zip', '', output_path)+'.html')) + basename(re.sub(r'\.zip', '', output_path)+'.html')) output_dir = re.sub(r'\.html', '', output_file)+'_files' if not exists(output_dir): @@ -119,7 +123,8 @@ class HTMLOutput(OutputFormatPlugin): tocUrl = relpath(output_file, dirname(output_file)) t = templite.render(has_toc=bool(oeb_book.toc.count()), toc=html_toc, meta=meta, nextLink=nextLink, - tocUrl=tocUrl, cssLink=cssLink) + tocUrl=tocUrl, cssLink=cssLink, + firstContentPageLink=nextLink) f.write(t) with CurrentDir(output_dir): @@ -151,6 +156,7 @@ class HTMLOutput(OutputFormatPlugin): body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0] ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8') ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content) + ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2>', ebook_content) # generate link to next page if item.spine_position+1 < len(oeb_book.spine): @@ -168,6 +174,7 @@ class HTMLOutput(OutputFormatPlugin): cssLink = relpath(abspath(css_path), dir) tocUrl = relpath(output_file, dir) + firstContentPageLink = oeb_book.spine[0].href # render template templite = Templite(template_html_data) @@ -176,7 +183,8 @@ class HTMLOutput(OutputFormatPlugin): prevLink=prevLink, nextLink=nextLink, has_toc=bool(oeb_book.toc.count()), toc=toc, tocUrl=tocUrl, head_content=head_content, - meta=meta, cssLink=cssLink) + meta=meta, cssLink=cssLink, + firstContentPageLink=firstContentPageLink) # write html to file with open(path, 'wb') as f: @@ -184,7 +192,8 @@ class HTMLOutput(OutputFormatPlugin): item.unload_data_from_memory(memory=path) zfile = ZipFile(output_path, "w") - zfile.add_dir(output_dir) + zfile.add_dir(output_dir, basename(output_dir)) + zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED) if opts.extract_to: if os.path.exists(opts.extract_to): @@ -197,5 +206,3 @@ class HTMLOutput(OutputFormatPlugin): # cleanup temp dir shutil.rmtree(tempdir) - -