From e917f2cf111901db8f1909d62b7c63f05993adac Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 17 Oct 2008 11:23:45 -0700 Subject: [PATCH] Fix #1174 (epub: failure to convert) --- src/calibre/ebooks/epub/split.py | 2 +- src/calibre/manual/faq.rst | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py index f694fe32b2..2d815354a3 100644 --- a/src/calibre/ebooks/epub/split.py +++ b/src/calibre/ebooks/epub/split.py @@ -90,7 +90,7 @@ class Splitter(LoggingInterface): pre.text = text for child in list(pre.iterchildren()): pre.remove(child) - pre.tail += tostring(child, pretty_print=False) + pre.text += u''.join(child.xpath('./text()')) if len(pre.text) > self.opts.profile.flow_size*0.5: frags = self.split_text(pre.text, root, int(0.2*self.opts.profile.flow_size)) new_pres = [] diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 9dd90eec82..1c99c2afd5 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -60,9 +60,24 @@ Why does the PDF conversion lose some images/tables? The PDF conversion tries to extract the text and images from the PDF file and convert them to and HTML based ebook. Some PDF files have images in a format that cannot be extracted (vector images). All tables are also represented as vector diagrams, thus they cannot be extracted. -There are no images in the LRF file after conversion from HTML, or, why is only one HTML file being converted and not the others it links to? +How do I convert a collection of HTML files in a specific order? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you use the GUI to convert an HTML file, you have to create a zip file with the HTML file and any images/extra files it references and then convert that ZIP file to LRF. +In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like:: + + + +

Table of Contents

+

+ First File
+ Second File
+ . + . + . +

+ + + +Then just add this HTML file to the GUI and use the convert button to create your ebook. How do I convert my file containing non-English characters? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~