From 0705abf946b3cebcb4aad5f23c1924be84c27394 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 17 Jul 2019 09:56:03 +0530 Subject: [PATCH] Convert individual HTML files separately Merge fonts instead. --- src/calibre/ebooks/pdf/html_writer.py | 48 ++++++++------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index 3ff02260c9..ed5abfb841 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -22,7 +22,6 @@ from calibre.constants import iswindows from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet from calibre.ebooks.oeb.base import XHTML from calibre.ebooks.oeb.polish.container import Container as ContainerBase -from calibre.ebooks.oeb.polish.split import merge_html from calibre.ebooks.oeb.polish.toc import get_toc from calibre.ebooks.pdf.image_writer import ( Image, PDFMetadata, draw_image_page, get_page_layout @@ -253,39 +252,20 @@ def add_cover(pdf_doc, cover_data, page_layout, opts): # Margin groups {{{ Margins = namedtuple('Margins', 'left top right bottom') +MarginFile = namedtuple('MarginFile', 'name margins') def dict_to_margins(val, d=None): return Margins(val.get('left', d), val.get('top', d), val.get('right', d), val.get('bottom', d)) -def create_margin_groups(container, name_anchor_map): - - def merge_group(group): - if len(group) > 1: - group_margins = group[0][1] - names = [name for (name, margins) in group] - first_anchor_map = merge_html(container, names, names[0], insert_page_breaks=True) - name_anchor_map.update(first_anchor_map) - group = [(names[0], group_margins)] - return group - - groups = [] - current_group = [] +def create_margin_files(container): for name, is_linear in container.spine_names: root = container.parsed(name) margins = root.get('data-calibre-pdf-output-page-margins') if margins: margins = dict_to_margins(json.loads(margins)) - if current_group: - prev_margins = current_group[-1][1] - if prev_margins != margins: - groups.append(merge_group(current_group)) - current_group = [] - current_group.append((name, margins)) - if current_group: - groups.append(merge_group(current_group)) - return groups + yield MarginFile(name, margins) # }}} @@ -303,14 +283,14 @@ def add_anchors_markup(root, uuid, anchors): a(uuid) -def add_all_links(container, margin_groups): +def add_all_links(container, margin_files): uuid = uuid4() name_anchor_map = {} for name, is_linear in container.spine_names: root = container.parsed(name) name_anchor_map[name] = frozenset(root.xpath('//*/@id')) - for group in margin_groups: - name = group[0][0] + for margin_file in margin_files: + name = margin_file.name anchors = name_anchor_map.get(name, set()) add_anchors_markup(container.parsed(name), uuid, anchors) container.dirty(name) @@ -395,8 +375,7 @@ def get_anchor_locations(pdf_doc, first_page_num, toc_uuid): ans = {} anchors = pdf_doc.extract_anchors() toc_pagenum = anchors.pop(toc_uuid)[0] - for r in range(pdf_doc.page_count(), toc_pagenum - 1, -1): - pdf_doc.delete_page(r - 1) + pdf_doc.delete_pages(toc_pagenum, pdf_doc.page_count() - toc_pagenum + 1) for anchor, loc in iteritems(anchors): loc = list(loc) loc[0] += first_page_num - 1 @@ -537,10 +516,10 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co container = Container(opf_path, log) report_progress(0.05, _('Parsed all content for markup transformation')) name_anchor_map = make_anchors_unique(container) - margin_groups = create_margin_groups(container, name_anchor_map) + margin_files = tuple(create_margin_files(container)) toc = get_toc(container, verify_destinations=False) has_toc = toc and len(toc) - links_page_uuid = add_all_links(container, margin_groups) + links_page_uuid = add_all_links(container, margin_files) container.commit() report_progress(0.1, _('Completed markup transformation')) @@ -549,13 +528,12 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co pdf_doc = None anchor_locations = {} jobs = [] - for group in margin_groups: - name, margins = group[0] - jobs.append(job_for_name(container, name, margins, page_layout)) + for margin_file in margin_files: + jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout)) results = manager.convert_html_files(jobs, settle_time=1) num_pages = 0 - for group in margin_groups: - name, margins = group[0] + for margin_file in margin_files: + name = margin_file.name data = results[name] if not isinstance(data, bytes): raise SystemExit(data)