From 754923ce07cbd268039b70bb9c8563f217b17730 Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 2 Apr 2009 20:44:48 -0400 Subject: [PATCH] pdf input to txt output giving correct output --- src/calibre/ebooks/pdf/input.py | 2 +- src/calibre/ebooks/pdf/pdftohtml.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 060b9f5367..6f55b71dd5 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -25,7 +25,7 @@ class PDFInput(InputFormatPlugin): html = pdftohtml(stream.name) with open('index.html', 'wb') as index: - index.write(html.encode('utf-8')) + index.write(html) #mi = metadata_from_formats([stream.name]) mi = MetaInformation(_('Unknown'), _('Unknown')) diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py index 168923ad1a..27cdb3f691 100644 --- a/src/calibre/ebooks/pdf/pdftohtml.py +++ b/src/calibre/ebooks/pdf/pdftohtml.py @@ -68,8 +68,8 @@ def pdftohtml(pdf_path): raise DRMError() with open(index, 'rb') as i: - raw = i.read().decode('latin-1') + raw = i.read() if not '\n' + raw + return '\n' + raw