mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
pdf input to txt output giving correct output
This commit is contained in:
parent
a9a74acbde
commit
754923ce07
@ -25,7 +25,7 @@ class PDFInput(InputFormatPlugin):
|
|||||||
html = pdftohtml(stream.name)
|
html = pdftohtml(stream.name)
|
||||||
|
|
||||||
with open('index.html', 'wb') as index:
|
with open('index.html', 'wb') as index:
|
||||||
index.write(html.encode('utf-8'))
|
index.write(html)
|
||||||
|
|
||||||
#mi = metadata_from_formats([stream.name])
|
#mi = metadata_from_formats([stream.name])
|
||||||
mi = MetaInformation(_('Unknown'), _('Unknown'))
|
mi = MetaInformation(_('Unknown'), _('Unknown'))
|
||||||
|
@ -68,8 +68,8 @@ def pdftohtml(pdf_path):
|
|||||||
raise DRMError()
|
raise DRMError()
|
||||||
|
|
||||||
with open(index, 'rb') as i:
|
with open(index, 'rb') as i:
|
||||||
raw = i.read().decode('latin-1')
|
raw = i.read()
|
||||||
if not '<br' in raw[:4000]:
|
if not '<br' in raw[:4000]:
|
||||||
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
|
raise ConversionError(os.path.basename(pdf_path) + _(' is an image based PDF. Only conversion of text based PDFs is supported.'), True)
|
||||||
|
|
||||||
return u'<!-- created by calibre\'s pdftohtml -->\n' + raw
|
return '<!-- created by calibre\'s pdftohtml -->\n' + raw
|
||||||
|
Loading…
x
Reference in New Issue
Block a user