Sync to pluginize

This commit is contained in:
John Schember 2009-05-25 07:00:35 -04:00
commit 3432735e48
12 changed files with 139 additions and 44 deletions

View File

@ -679,7 +679,7 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html, html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
opts.preprocess_html) opts.preprocess_html)
oeb = OEBBook(log, html_preprocessor, oeb = OEBBook(log, html_preprocessor,
pretty_print=opts.pretty_print, encoding=encoding) pretty_print=opts.pretty_print, input_encoding=encoding)
# Read OEB Book into OEBBook # Read OEB Book into OEBBook
log('Parsing all content...') log('Parsing all content...')
if reader is None: if reader is None:

View File

@ -16,7 +16,7 @@ from urlparse import urlparse, urlunparse
from urllib import unquote from urllib import unquote
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre import unicode_path from calibre import unicode_path
@ -264,7 +264,7 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log, def convert(self, stream, opts, file_ext, log,
accelerators): accelerators):
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.html import get_metadata_
basedir = os.getcwd() basedir = os.getcwd()
self.opts = opts self.opts = opts
@ -275,18 +275,16 @@ class HTMLInput(InputFormatPlugin):
opfpath = stream.name opfpath = stream.name
else: else:
filelist = get_filelist(stream.name, basedir, opts, log) filelist = get_filelist(stream.name, basedir, opts, log)
mi = get_metadata(stream, 'html') mi = get_metadata_(stream.read(), opts.input_encoding)
mi = OPFCreator(os.getcwdu(), mi) mi = OPFCreator(os.getcwdu(), mi)
mi.guide = None mi.guide = None
entries = [(f.path, 'application/xhtml+xml') for f in filelist] entries = [(f.path, 'application/xhtml+xml') for f in filelist]
mi.create_manifest(entries) mi.create_manifest(entries)
mi.create_spine([f.path for f in filelist]) mi.create_spine([f.path for f in filelist])
mi.render(open('metadata.opf', 'wb')) mi.render(open('metadata.opf', 'wb'), encoding=opts.input_encoding)
opfpath = os.path.abspath('metadata.opf') opfpath = os.path.abspath('metadata.opf')
opf = OPF(opfpath, os.getcwdu())
if opts.dont_package: if opts.dont_package:
return opfpath return opfpath

View File

@ -12,8 +12,17 @@ import re
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
def get_metadata(stream): def get_metadata(stream):
src = xml_to_unicode(stream.read())[0] src = stream.read()
return get_metadata_(src)
def get_metadata_(src, encoding=None):
if not isinstance(src, unicode):
if not encoding:
src = xml_to_unicode(src)[0]
else:
src = src.decode(encoding, 'replace')
# Title # Title
title = None title = None

View File

@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<package version="2.0" <package version="2.0"
xmlns="http://www.idpf.org/2007/opf" xmlns="http://www.idpf.org/2007/opf"
xmlns:py="http://genshi.edgewall.org/" xmlns:py="http://genshi.edgewall.org/"

View File

@ -924,9 +924,11 @@ class OPFCreator(MetaInformation):
self.guide.set_basedir(self.base_path) self.guide.set_basedir(self.base_path)
def render(self, opf_stream=sys.stdout, ncx_stream=None, def render(self, opf_stream=sys.stdout, ncx_stream=None,
ncx_manifest_entry=None): ncx_manifest_entry=None, encoding=None):
from calibre.resources import opf_template from calibre.resources import opf_template
from calibre.utils.genshi.template import MarkupTemplate from calibre.utils.genshi.template import MarkupTemplate
if encoding is None:
encoding = 'utf-8'
template = MarkupTemplate(opf_template) template = MarkupTemplate(opf_template)
toc = getattr(self, 'toc', None) toc = getattr(self, 'toc', None)
if self.manifest: if self.manifest:
@ -948,7 +950,11 @@ class OPFCreator(MetaInformation):
cover = os.path.abspath(os.path.join(self.base_path, cover)) cover = os.path.abspath(os.path.join(self.base_path, cover))
self.guide.set_cover(cover) self.guide.set_cover(cover)
self.guide.set_basedir(self.base_path) self.guide.set_basedir(self.base_path)
opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml') opf = template.generate(
__appname__=__appname__, mi=self,
__version__=__version__).render('xml', encoding=encoding)
opf_stream.write('<?xml version="1.0" encoding="%s" ?>\n'
%encoding.upper())
opf_stream.write(opf) opf_stream.write(opf)
opf_stream.flush() opf_stream.flush()
if toc is not None and ncx_stream is not None: if toc is not None and ncx_stream is not None:

View File

@ -1516,7 +1516,8 @@ class OEBBook(object):
def __init__(self, logger, def __init__(self, logger,
html_preprocessor, html_preprocessor,
css_preprocessor=CSSPreProcessor(), css_preprocessor=CSSPreProcessor(),
encoding='utf-8', pretty_print=False): encoding='utf-8', pretty_print=False,
input_encoding='utf-8'):
"""Create empty book. Arguments: """Create empty book. Arguments:
:param:`encoding`: Default encoding for textual content read :param:`encoding`: Default encoding for textual content read
@ -1549,6 +1550,7 @@ class OEBBook(object):
""" """
_css_log_handler.log = logger _css_log_handler.log = logger
self.encoding = encoding self.encoding = encoding
self.input_encoding = input_encoding
self.html_preprocessor = html_preprocessor self.html_preprocessor = html_preprocessor
self.css_preprocessor = css_preprocessor self.css_preprocessor = css_preprocessor
self.pretty_print = pretty_print self.pretty_print = pretty_print
@ -1588,9 +1590,9 @@ class OEBBook(object):
return fix_data(data.decode('utf-16')) return fix_data(data.decode('utf-16'))
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
if self.encoding is not None: if self.input_encoding is not None:
try: try:
return fix_data(data.decode(self.encoding, 'replace')) return fix_data(data.decode(self.input_encoding, 'replace'))
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
try: try:

View File

@ -130,6 +130,9 @@ class EbookIterator(object):
plumber.opts.dont_package = True plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'): if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True plumber.opts.no_process = True
if hasattr(plumber.input_plugin, '_preprocess_html_for_viewer'):
plumber.input_plugin._preprocess_html_for_viewer = True
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'), self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log, plumber.opts, plumber.input_fmt, self.log,
{}, self.base) {}, self.base)

View File

@ -30,6 +30,7 @@ class OEBOutput(OutputFormatPlugin):
raw = etree.tostring(root, pretty_print=True, raw = etree.tostring(root, pretty_print=True,
encoding='utf-8') encoding='utf-8')
with open(href, 'wb') as f: with open(href, 'wb') as f:
f.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
f.write(raw) f.write(raw)
for item in oeb_book.manifest: for item in oeb_book.manifest:

View File

@ -17,10 +17,18 @@ class PDFInput(InputFormatPlugin):
description = 'Convert PDF files to HTML' description = 'Convert PDF files to HTML'
file_types = set(['pdf']) file_types = set(['pdf'])
_preprocess_html_for_viewer = False
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
html = pdftohtml(stream.name) html = pdftohtml(stream.name)
if self._preprocess_html_for_viewer:
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
prepro = HTMLPreProcessor(lambda x:x, False)
html = prepro(html.decode('utf-8')).encode('utf-8')
with open('index.html', 'wb') as index: with open('index.html', 'wb') as index:
index.write(html) index.write(html)

View File

@ -30,6 +30,13 @@ PARALLEL_FUNCS = {
'read_metadata' : 'read_metadata' :
('calibre.ebooks.metadata.worker', 'read_metadata_', 'notification'), ('calibre.ebooks.metadata.worker', 'read_metadata_', 'notification'),
'read_pdf_metadata' :
('calibre.utils.podofo.__init__', 'get_metadata_', None),
'write_pdf_metadata' :
('calibre.utils.podofo.__init__', 'set_metadata_', None),
'save_book' : 'save_book' :
('calibre.ebooks.metadata.worker', 'save_book', 'notification'), ('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
} }

View File

@ -6,11 +6,14 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, time
from calibre.constants import plugins, preferred_encoding from calibre.constants import plugins, preferred_encoding
from calibre.ebooks.metadata import MetaInformation, string_to_authors, \ from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
authors_to_string authors_to_string
from calibre.utils.ipc.job import ParallelJob
from calibre.utils.ipc.server import Server
from calibre.ptempfile import PersistentTemporaryFile
podofo, podofo_err = plugins['podofo'] podofo, podofo_err = plugins['podofo']
@ -19,22 +22,43 @@ class Unavailable(Exception): pass
def get_metadata(stream): def get_metadata(stream):
if not podofo: if not podofo:
raise Unavailable(podofo_err) raise Unavailable(podofo_err)
raw = stream.read() pt = PersistentTemporaryFile('_podofo.pdf')
stream.seek(0) pt.write(stream.read())
p = podofo.PDFDoc() pt.close()
p.load(raw) server = Server(pool_size=1)
title = p.title job = ParallelJob('read_pdf_metadata', 'Read pdf metadata',
if not title: lambda x,y:x, args=[pt.name])
server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
job.update()
job.update()
server.close()
if job.result is None:
raise ValueError('Failed to read metadata: ' + job.details)
title, authors, creator = job.result
if title == '_':
title = getattr(stream, 'name', _('Unknown')) title = getattr(stream, 'name', _('Unknown'))
title = os.path.splitext(os.path.basename(title))[0] title = os.path.splitext(title)[0]
author = p.author
authors = string_to_authors(author) if author else [_('Unknown')]
mi = MetaInformation(title, authors) mi = MetaInformation(title, authors)
creator = p.creator
if creator: if creator:
mi.book_producer = creator mi.book_producer = creator
if os.path.exists(pt.name): os.remove(pt.name)
return mi return mi
def get_metadata_(path):
p = podofo.PDFDoc()
p.open(path)
title = p.title
if not title:
title = '_'
author = p.author
authors = string_to_authors(author) if author else [_('Unknown')]
creator = p.creator
return (title, authors, creator)
def prep(val): def prep(val):
if not val: if not val:
return u'' return u''
@ -45,21 +69,43 @@ def prep(val):
def set_metadata(stream, mi): def set_metadata(stream, mi):
if not podofo: if not podofo:
raise Unavailable(podofo_err) raise Unavailable(podofo_err)
raw = stream.read() pt = PersistentTemporaryFile('_podofo.pdf')
pt.write(stream.read())
pt.close()
server = Server(pool_size=1)
job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
lambda x,y:x, args=[pt.name, mi.title, mi.authors, mi.book_producer])
server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
job.update()
job.update()
server.close()
if job.result is not None:
stream.seek(0)
stream.truncate()
stream.write(job.result)
stream.flush()
stream.seek(0)
def set_metadata_(path, title, authors, bkp):
p = podofo.PDFDoc() p = podofo.PDFDoc()
p.load(raw) p.open(path)
title = prep(mi.title) title = prep(title)
touched = False touched = False
if title: if title:
p.title = title p.title = title
touched = True touched = True
author = prep(authors_to_string(mi.authors)) author = prep(authors_to_string(authors))
if author: if author:
p.author = author p.author = author
touched = True touched = True
bkp = prep(mi.book_producer) bkp = prep(bkp)
if bkp: if bkp:
p.creator = bkp p.creator = bkp
touched = True touched = True
@ -68,12 +114,7 @@ def set_metadata(stream, mi):
from calibre.ptempfile import TemporaryFile from calibre.ptempfile import TemporaryFile
with TemporaryFile('_pdf_set_metadata.pdf') as f: with TemporaryFile('_pdf_set_metadata.pdf') as f:
p.save(f) p.save(f)
raw = open(f, 'rb').read() return open(f, 'rb').read()
stream.seek(0)
stream.truncate()
stream.write(raw)
stream.flush()
stream.seek(0)
if __name__ == '__main__': if __name__ == '__main__':
f = '/tmp/t.pdf' f = '/tmp/t.pdf'

View File

@ -64,6 +64,24 @@ podofo_PDFDoc_load(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
return Py_None; return Py_None;
} }
static PyObject *
podofo_PDFDoc_open(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
char *fname;
if (PyArg_ParseTuple(args, "s", &fname)) {
try {
self->doc->Load(fname);
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
}
} else return NULL;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject * static PyObject *
podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
char *buffer; char *buffer;
@ -232,6 +250,9 @@ static PyMethodDef podofo_PDFDoc_methods[] = {
{"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS, {"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS,
"Load a PDF document from a byte buffer (string)" "Load a PDF document from a byte buffer (string)"
}, },
{"open", (PyCFunction)podofo_PDFDoc_open, METH_VARARGS,
"Load a PDF document from a file path (string)"
},
{"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS, {"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS,
"Save the PDF document to a path on disk" "Save the PDF document to a path on disk"
}, },