Speedup serialization of numbers to PDF by a factor of 10

This commit is contained in:
Kovid Goyal 2013-01-01 13:06:14 +05:30
parent f77765ff3c
commit eca40f6b51
5 changed files with 79 additions and 85 deletions

View File

@ -232,7 +232,15 @@ class PDFOutput(OutputFormatPlugin):
out_stream.seek(0)
out_stream.truncate()
self.log.debug('Rendering pages to PDF...')
import time
st = time.time()
if False:
import cProfile
cProfile.runctx('writer.dump(items, out_stream, PDFMetadata(self.metadata))',
globals(), locals(), '/tmp/profile')
else:
writer.dump(items, out_stream, PDFMetadata(self.metadata))
self.log('Rendered PDF in %g seconds:'%(time.time()-st))
if close:
out_stream.close()

View File

@ -9,8 +9,10 @@ __docformat__ = 'restructuredtext en'
import codecs, zlib
from io import BytesIO
from struct import pack
from decimal import Decimal
from calibre.constants import plugins, ispy3
pdf_float = plugins['speedup'][0].pdf_float
EOL = b'\n'
@ -52,32 +54,25 @@ PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
# Basic PDF datatypes {{{
def format_float(f):
if abs(f) < 1e-7:
return '0'
places = 6
a, b = type(u'')(Decimal(f).quantize(Decimal(10)**-places)).partition('.')[0::2]
b = b.rstrip('0')
if not b:
return '0' if a == '-0' else a
return '%s.%s'%(a, b)
ic = str if ispy3 else unicode
icb = (lambda x: str(x).encode('ascii')) if ispy3 else bytes
def fmtnum(o):
if isinstance(o, (int, long)):
return type(u'')(o)
return format_float(o)
if isinstance(o, float):
return pdf_float(o)
return ic(o)
def serialize(o, stream):
if hasattr(o, 'pdf_serialize'):
o.pdf_serialize(stream)
elif isinstance(o, bool):
stream.write(b'true' if o else b'false')
if isinstance(o, float):
stream.write_raw(pdf_float(o).encode('ascii'))
elif isinstance(o, (int, long)):
stream.write(type(u'')(o).encode('ascii'))
elif isinstance(o, float):
stream.write(format_float(o).encode('ascii'))
stream.write_raw(icb(o))
elif hasattr(o, 'pdf_serialize'):
o.pdf_serialize(stream)
elif o is None:
stream.write(b'null')
stream.write_raw(b'null')
elif isinstance(o, bool):
stream.write_raw(b'true' if o else b'false')
else:
raise ValueError('Unknown object: %r'%o)
@ -103,13 +98,6 @@ class String(unicode):
raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be')
stream.write(b'('+raw+b')')
class GlyphIndex(int):
def pdf_serialize(self, stream):
byts = bytearray(pack(b'>H', self))
stream.write('<%s>'%''.join(map(
lambda x: bytes(hex(x)[2:]).rjust(2, b'0'), byts)))
class Dictionary(dict):
def pdf_serialize(self, stream):
@ -180,6 +168,9 @@ class Stream(BytesIO):
super(Stream, self).write(raw if isinstance(raw, bytes) else
raw.encode('ascii'))
def write_raw(self, raw):
BytesIO.write(self, raw)
class Reference(object):
def __init__(self, num, obj):

View File

@ -9,14 +9,13 @@ __docformat__ = 'restructuredtext en'
import hashlib
from future_builtins import map
from itertools import izip
from PyQt4.Qt import QBuffer, QByteArray, QImage, Qt, QColor, qRgba
from calibre.constants import (__appname__, __version__)
from calibre.ebooks.pdf.render.common import (
Reference, EOL, serialize, Stream, Dictionary, String, Name, Array,
GlyphIndex, fmtnum)
fmtnum)
from calibre.ebooks.pdf.render.fonts import FontManager
from calibre.ebooks.pdf.render.links import Links
@ -166,54 +165,6 @@ class Path(object):
def close(self):
self.ops.append(('h',))
class Text(object):
def __init__(self):
self.transform = self.default_transform = [1, 0, 0, 1, 0, 0]
self.font_name = 'Times-Roman'
self.font_path = None
self.horizontal_scale = self.default_horizontal_scale = 100
self.word_spacing = self.default_word_spacing = 0
self.char_space = self.default_char_space = 0
self.glyph_adjust = self.default_glyph_adjust = None
self.size = 12
self.text = ''
def set_transform(self, *args):
if len(args) == 1:
m = args[0]
vals = [m.m11(), m.m12(), m.m21(), m.m22(), m.dx(), m.dy()]
else:
vals = args
self.transform = vals
def pdf_serialize(self, stream, font_name):
if not self.text: return
stream.write_line('BT ')
serialize(Name(font_name), stream)
stream.write(' %s Tf '%fmtnum(self.size))
stream.write(' '.join(map(fmtnum, self.transform)) + ' Tm ')
if self.horizontal_scale != self.default_horizontal_scale:
stream.write('%s Tz '%fmtnum(self.horizontal_scale))
if self.word_spacing != self.default_word_spacing:
stream.write('%s Tw '%fmtnum(self.word_spacing))
if self.char_space != self.default_char_space:
stream.write('%s Tc '%fmtnum(self.char_space))
stream.write_line()
if self.glyph_adjust is self.default_glyph_adjust:
serialize(String(self.text), stream)
stream.write(' Tj ')
else:
chars = Array()
frac, widths = self.glyph_adjust
for c, width in izip(self.text, widths):
chars.append(String(c))
chars.append(int(width * frac))
serialize(chars, stream)
stream.write(' TJ ')
stream.write_line('ET')
class Catalog(Dictionary):
def __init__(self, pagetree):
@ -244,7 +195,9 @@ class HashingStream(object):
self.last_char = b''
def write(self, raw):
raw = raw if isinstance(raw, bytes) else raw.encode('ascii')
self.write_raw(raw if isinstance(raw, bytes) else raw.encode('ascii'))
def write_raw(self, raw):
self.f.write(raw)
self.hashobj.update(raw)
if raw:
@ -420,9 +373,8 @@ class PDFStream(object):
self.current_page.write(' %s Tf '%fmtnum(size))
self.current_page.write('%s Tm '%' '.join(map(fmtnum, transform)))
for x, y, glyph_id in glyphs:
self.current_page.write('%s %s Td '%(fmtnum(x), fmtnum(y)))
serialize(GlyphIndex(glyph_id), self.current_page)
self.current_page.write(' Tj ')
self.current_page.write_raw(('%s %s Td <%04X> Tj '%(
fmtnum(x), fmtnum(y), glyph_id)).encode('ascii'))
self.current_page.write_line(b' ET')
def get_image(self, cache_key):

View File

@ -105,12 +105,15 @@ def pen(p, xmax, ymax):
p.setPen(pen)
p.drawRect(0, xmax/3, xmax/3, xmax/2)
def text(p, xmax, ymax):
p.drawText(QPoint(0, ymax/3), 'Text')
def main():
app = QApplication([])
app
tdir = gettempdir()
pdf = os.path.join(tdir, 'painter.pdf')
func = pen
func = full
dpi = 100
with open(pdf, 'wb') as f:
dev = PdfDevice(f, xdpi=dpi, ydpi=dpi, compress=False)

View File

@ -3,6 +3,9 @@
#include <stdlib.h>
#define min(x, y) ((x < y) ? x : y)
#define max(x, y) ((x > y) ? x : y)
static PyObject *
speedup_parse_date(PyObject *self, PyObject *args) {
const char *raw, *orig, *tz;
@ -61,11 +64,48 @@ speedup_parse_date(PyObject *self, PyObject *args) {
(tzh*60 + tzm)*sign*60);
}
static PyObject*
speedup_pdf_float(PyObject *self, PyObject *args) {
double f = 0.0, a = 0.0;
char *buf = "0", *dot;
void *free_buf = NULL;
int precision = 6, l = 0;
PyObject *ret;
if(!PyArg_ParseTuple(args, "d", &f)) return NULL;
a = fabs(f);
if (a > 1.0e-7) {
if(a > 1) precision = min(max(0, 6-(int)log10(a)), 6);
buf = PyOS_double_to_string(f, 'f', precision, 0, NULL);
if (buf != NULL) {
free_buf = (void*)buf;
if (precision > 0) {
l = strlen(buf) - 1;
while (l > 0 && buf[l] == '0') l--;
if (buf[l] == ',' || buf[l] == '.') buf[l] = 0;
else buf[l+1] = 0;
if ( (dot = strchr(buf, ',')) ) *dot = '.';
}
} else if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "Float->str failed.");
}
ret = PyUnicode_FromString(buf);
if (free_buf != NULL) PyMem_Free(free_buf);
return ret;
}
static PyMethodDef speedup_methods[] = {
{"parse_date", speedup_parse_date, METH_VARARGS,
"parse_date()\n\nParse ISO dates faster."
},
{"pdf_float", speedup_pdf_float, METH_VARARGS,
"pdf_float()\n\nConvert float to a string representation suitable for PDF"
},
{NULL, NULL, 0, NULL}
};