From 848b3d166e2a9850b012c3051efbd1ef43032695 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Jan 2018 20:02:29 +0530 Subject: [PATCH] PDF Input: Fix conversion of multi-level PDF Outline causing duplicate entries in the Table of Contents. Fixes #1738385 [Duplicated ToC entries in PDF to any format conversion](https://bugs.launchpad.net/calibre/+bug/1738385) --- src/calibre/ebooks/pdf/pdftohtml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py index ec1ae9366e..31a59c3f1f 100644 --- a/src/calibre/ebooks/pdf/pdftohtml.py +++ b/src/calibre/ebooks/pdf/pdftohtml.py @@ -140,13 +140,13 @@ def parse_outline(raw, output_dir): count = [0] def process_node(node, toc): - for child in node.iterdescendants('*'): + for child in node.iterchildren('*'): if child.tag == 'outline': parent = toc.children[-1] if toc.children else toc process_node(child, parent) else: page = child.get('page', '1') - toc.add(child.text, 'index.html', 'p' + page) + toc.add(child.text or '', 'index.html', 'p' + page) count[0] += 1 process_node(outline, toc) if count[0] > 2: