pdf input to txt output giving correct output

This commit is contained in:
John Schember 2009-04-02 20:44:48 -04:00
parent a9a74acbde
commit 754923ce07
2 changed files with 3 additions and 3 deletions

View File

@ -25,7 +25,7 @@ class PDFInput(InputFormatPlugin):
html = pdftohtml(stream.name)
with open('index.html', 'wb') as index:
index.write(html.encode('utf-8'))
index.write(html)
#mi = metadata_from_formats([stream.name])
mi = MetaInformation(_('Unknown'), _('Unknown'))

View File

@ -68,8 +68,8 @@ def pdftohtml(pdf_path):
raise DRMError()
with open(index, 'rb') as i:
raw = i.read().decode('latin-1')
raw = i.read()
if not '<br' in raw[:4000]:
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
return u'<!-- created by calibre\'s pdftohtml -->\n' + raw
return '<!-- created by calibre\'s pdftohtml -->\n' + raw