mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
pdf input to txt output giving correct output
This commit is contained in:
parent
a9a74acbde
commit
754923ce07
@ -25,7 +25,7 @@ class PDFInput(InputFormatPlugin):
|
||||
html = pdftohtml(stream.name)
|
||||
|
||||
with open('index.html', 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
index.write(html)
|
||||
|
||||
#mi = metadata_from_formats([stream.name])
|
||||
mi = MetaInformation(_('Unknown'), _('Unknown'))
|
||||
|
@ -68,8 +68,8 @@ def pdftohtml(pdf_path):
|
||||
raise DRMError()
|
||||
|
||||
with open(index, 'rb') as i:
|
||||
raw = i.read().decode('latin-1')
|
||||
raw = i.read()
|
||||
if not '<br' in raw[:4000]:
|
||||
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
|
||||
|
||||
return u'<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||
return '<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||
|
Loading…
x
Reference in New Issue
Block a user