Sync to pluginize

This commit is contained in:
John Schember 2009-04-25 07:43:30 -04:00
commit 0861a58127
9 changed files with 283 additions and 60 deletions

View File

@ -2,7 +2,7 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, re, os, shutil, cStringIO, tempfile, subprocess, time
import sys, re, os, subprocess
sys.path.append('src')
iswindows = re.search('win(32|64)', sys.platform)
isosx = 'darwin' in sys.platform
@ -54,7 +54,7 @@ if __name__ == '__main__':
build_osx, upload_installers, upload_user_manual, \
upload_to_pypi, stage3, stage2, stage1, upload, \
upload_rss
entry_points['console_scripts'].append(
'calibre_postinstall = calibre.linux:post_install')
ext_modules = [
@ -65,12 +65,15 @@ if __name__ == '__main__':
'src/calibre/utils/lzx/lzc.c',
'src/calibre/utils/lzx/lzxc.c'],
include_dirs=['src/calibre/utils/lzx']),
Extension('calibre.plugins.msdes',
sources=['src/calibre/utils/msdes/msdesmodule.c',
'src/calibre/utils/msdes/des.c'],
include_dirs=['src/calibre/utils/msdes']),
Extension('calibre.plugins.cPalmdoc',
sources=['src/calibre/ebooks/mobi/palmdoc.c']),
PyQtExtension('calibre.plugins.pictureflow',
['src/calibre/gui2/pictureflow/pictureflow.cpp',
'src/calibre/gui2/pictureflow/pictureflow.h'],
@ -81,7 +84,7 @@ if __name__ == '__main__':
ext_modules.append(Extension('calibre.plugins.winutil',
sources=['src/calibre/utils/windows/winutil.c'],
libraries=['shell32', 'setupapi'],
include_dirs=os.environ.get('INCLUDE',
include_dirs=os.environ.get('INCLUDE',
'C:/WinDDK/6001.18001/inc/api/;'
'C:/WinDDK/6001.18001/inc/crt/').split(';'),
extra_compile_args=['/X']
@ -91,7 +94,7 @@ if __name__ == '__main__':
sources=['src/calibre/devices/usbobserver/usbobserver.c'],
extra_link_args=['-framework', 'IOKit'])
)
if not iswindows:
plugins = ['plugins/%s.so'%(x.name.rpartition('.')[-1]) for x in ext_modules]
else:
@ -99,7 +102,7 @@ if __name__ == '__main__':
['plugins/%s.pyd.manifest'%(x.name.rpartition('.')[-1]) \
for x in ext_modules if 'pictureflow' not in x.name]
setup(
name = APPNAME,
packages = find_packages('src'),
@ -152,9 +155,9 @@ if __name__ == '__main__':
'Topic :: System :: Hardware :: Hardware Drivers'
],
cmdclass = {
'build_ext' : build_ext,
'build_ext' : build_ext,
'build' : build,
'build_py' : build_py,
'build_py' : build_py,
'pot' : pot,
'manual' : manual,
'resources' : resources,

View File

@ -2,7 +2,7 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
import sys, os, re, logging, time, subprocess, mimetypes, \
__builtin__, warnings
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
from htmlentitydefs import name2codepoint
@ -71,7 +71,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
**WARNING:** This function also replaces path separators, so only pass file names
and not full paths to it.
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
are encoded in the filesystem encoding of the platform, or UTF-8.
are encoded in the filesystem encoding of the platform, or UTF-8.
'''
if isinstance(name, unicode):
name = name.encode(filesystem_encoding, 'ignore')
@ -159,7 +159,7 @@ def extract(path, dir):
def get_proxies():
proxies = {}
for q in ('http', 'ftp'):
proxy = os.environ.get(q+'_proxy', None)
if not proxy: continue
@ -194,8 +194,8 @@ def get_proxies():
def browser(honor_time=True, max_time=2, mobile_browser=False):
'''
Create a mechanize browser for web scraping. The browser handles cookies,
refresh requests and ignores robots.txt. Also uses proxy if avaialable.
refresh requests and ignores robots.txt. Also uses proxy if avaialable.
:param honor_time: If True honors pause time in refresh requests
:param max_time: Maximum time in seconds to wait during a refresh request
'''
@ -232,16 +232,16 @@ def fit_image(width, height, pwidth, pheight):
return scaled, int(width), int(height)
class CurrentDir(object):
def __init__(self, path):
self.path = path
self.cwd = None
def __enter__(self, *args):
self.cwd = os.getcwd()
os.chdir(self.path)
return self.cwd
def __exit__(self, *args):
os.chdir(self.cwd)

View File

@ -53,7 +53,7 @@ if plugins is None:
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
for plugin in ['pictureflow', 'lzx', 'msdes', 'cPalmdoc'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:

View File

@ -0,0 +1,204 @@
/*
:mod:`cPalmdoc` -- Palmdoc compression/decompression
=====================================================
.. module:: cPalmdoc
:platform: All
:synopsis: Compression decompression of Palmdoc implemented in C for speed
.. moduleauthor:: Kovid Goyal <kovid@kovidgoyal.net> Copyright 2009
*/
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stdio.h>
#define DELTA sizeof(Byte)*4096
#define BUFFER 6000
#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
typedef unsigned short int Byte;
typedef struct {
Byte *data;
Py_ssize_t len;
} buffer;
#ifdef bool
#undef bool
#endif
#define bool int
#ifdef false
#undef false
#endif
#define false 0
#ifdef true
#undef true
#endif
#define true 1
#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
static PyObject *
cpalmdoc_decompress(PyObject *self, PyObject *args) {
const char *_input = NULL; Py_ssize_t input_len = 0;
Py_ssize_t i = 0, o = 0, j = 0, di, n;
if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
return NULL;
Byte *input = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
if (input == NULL) return PyErr_NoMemory();
// Map chars to bytes
for (j = 0; j < input_len; j++)
input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
char *output = (char *)PyMem_Malloc(sizeof(char)*BUFFER);
Byte c;
PyObject *ans;
if (output == NULL) return PyErr_NoMemory();
while (i < input_len) {
c = input[i++];
if (c >= 1 && c <= 8) // copy 'c' bytes
while (c--) output[o++] = input[i++];
else if (c <= 0x7F) // 0, 09-7F = self
output[o++] = c;
else if (c >= 0xC0) { // space + ASCII char
output[o++] = ' ';
output[o++] = c ^ 0x80;
}
else { // 80-BF repeat sequences
c = (c << 8) + input[i++];
di = (c & 0x3FFF) >> 3;
for ( n = (c & 7) + 3; n--; ++o )
output[o] = output[o - di];
}
}
ans = Py_BuildValue("s#", output, o);
if (output != NULL) PyMem_Free(output);
if (input != NULL) PyMem_Free(input);
return ans;
}
static bool
cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
Py_ssize_t i;
for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
return true;
}
static Py_ssize_t
cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
Py_ssize_t i;
for (i = pos - chunk_length; i > -1; i--)
if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
return pos;
}
static Py_ssize_t
cpalmdoc_do_compress(buffer *b, char *output) {
Py_ssize_t i = 0, j, chunk_len, dist;
unsigned compound;
Byte c, n;
bool found;
char *head;
head = output;
buffer temp;
temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
if (temp.data == NULL) return 0;
while (i < b->len) {
c = b->data[i];
//do repeats
if ( i > 10 && (b->len - i) > 10) {
found = false;
for (chunk_len = 10; chunk_len > 2; chunk_len--) {
j = cpalmdoc_rfind(b->data, i, chunk_len);
dist = i - j;
if (j < i && dist <= 2047) {
found = true;
compound = (dist << 3) + chunk_len-3;
*(output++) = CHAR(0x80 + (compound >> 8 ));
*(output++) = CHAR(compound & 0xFF);
i += chunk_len;
break;
}
}
if (found) continue;
}
//write single character
i++;
if (c == 32 && i < b->len) {
n = b->data[i];
if ( n >= 0x40 && n <= 0x7F) {
*(output++) = CHAR(n^0x80); i++; continue;
}
}
if (c == 0 || (c > 8 && c < 0x80))
*(output++) = CHAR(c);
else { // Write binary data
j = i;
temp.data[0] = c; temp.len = 1;
while (j < b->len && temp.len < 8) {
c = b->data[j];
if (c == 0 || (c > 8 && c < 0x80)) break;
temp.data[temp.len++] = c; j++;
}
i += temp.len - 1;
*(output++) = temp.len;
for (j=0; j < temp.len; j++) *(output++) = temp.data[j];
}
}
return output - head;
}
static PyObject *
cpalmdoc_compress(PyObject *self, PyObject *args) {
const char *_input = NULL; Py_ssize_t input_len = 0;
Py_ssize_t j = 0;
buffer b;
if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
return NULL;
b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
if (b.data == NULL) return PyErr_NoMemory();
// Map chars to bytes
for (j = 0; j < input_len; j++)
b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
b.len = input_len;
char *output = (char *)PyMem_Malloc(sizeof(char) * b.len);
if (output == NULL) return PyErr_NoMemory();
j = cpalmdoc_do_compress(&b, output);
if ( j == 0) return PyErr_NoMemory();
PyObject *ans = Py_BuildValue("s#", output, j);
PyMem_Free(output);
PyMem_Free(b.data);
return ans;
}
static PyMethodDef cPalmdocMethods[] = {
{"decompress", cpalmdoc_decompress, METH_VARARGS,
"decompress(bytestring) -> decompressed bytestring\n\n"
"Decompress a palmdoc compressed byte string. "
},
{"compress", cpalmdoc_compress, METH_VARARGS,
"compress(bytestring) -> compressed bytestring\n\n"
"Palmdoc compress a byte string. "
},
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC
initcPalmdoc(void) {
PyObject *m;
m = Py_InitModule3("cPalmdoc", cPalmdocMethods,
"Compress and decompress palmdoc strings."
);
if (m == NULL) return;
}

View File

@ -2,41 +2,46 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
from cStringIO import StringIO
from struct import pack
COUNT_BITS = 3
from calibre.constants import plugins
cPalmdoc = plugins['cPalmdoc'][0]
if not cPalmdoc:
raise RuntimeError(('Failed to load required cPalmdoc module: '
'%s')%plugins['cPalmdoc'][1])
def decompress_doc(data):
buffer = [ord(i) for i in data]
res = []
i = 0
while i < len(buffer):
c = buffer[i]
i += 1
if c >= 1 and c <= 8:
res.extend(buffer[i:i+c])
i += c
elif c <= 0x7f:
res.append(c)
elif c >= 0xc0:
res.extend( (ord(' '), c^0x80) )
else:
c = (c << 8) + buffer[i]
i += 1
di = (c & 0x3fff) >> COUNT_BITS
j = len(res)
num = (c & ((1 << COUNT_BITS) - 1)) + 3
for k in range( num ):
res.append(res[j - di+k])
return ''.join([chr(i) for i in res])
return cPalmdoc.decompress(data)
def compress_doc(data):
return cPalmdoc.compress(data)
def test():
TESTS = [
'abc\x03\x04\x05\x06ms', # Test binary writing
'a b c \xfed ', # Test encoding of spaces
'0123456789axyz2bxyz2cdfgfo9iuyerh',
'0123456789asd0123456789asd|yyzzxxffhhjjkk',
('ciewacnaq eiu743 r787q 0w% ; sa fd\xef\ffdxosac wocjp acoiecowei '
'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
]
for test in TESTS:
print 'Test:', repr(test)
print '\tTesting compression...'
good = py_compress_doc(test)
x = compress_doc(test)
print '\t\tgood:', repr(good)
print '\t\tx :', repr(x)
assert x == good
print '\tTesting decompression...'
print '\t\t', repr(decompress_doc(x))
assert decompress_doc(x) == test
print
def py_compress_doc(data):
out = StringIO()
i = 0
ldata = len(data)
@ -85,4 +90,4 @@ def compress_doc(data):
out.write(''.join(binseq))
i += len(binseq) - 1
return out.getvalue()

View File

@ -226,7 +226,7 @@ class MobiReader(object):
page-break-after: always; margin: 0; display: block
}
''')
self.tag_css_rules = []
self.tag_css_rules = {}
if hasattr(filename_or_stream, 'read'):
stream = filename_or_stream
@ -328,10 +328,10 @@ class MobiReader(object):
with open('styles.css', 'wb') as s:
s.write(self.base_css_rules+'\n\n')
for rule in self.tag_css_rules:
for cls, rule in self.tag_css_rules.items():
if isinstance(rule, unicode):
rule = rule.encode('utf-8')
s.write(rule+'\n\n')
s.write('.%s { %s }\n\n'%(cls, rule))
if self.book_header.exth is not None or self.embedded_mi is not None:
@ -389,6 +389,7 @@ class MobiReader(object):
'xx-large' : '6',
}
mobi_version = self.book_header.mobi_version
style_map = {}
for i, tag in enumerate(root.iter(etree.Element)):
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content'):
@ -455,9 +456,18 @@ class MobiReader(object):
except ValueError:
pass
if styles:
attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
self.tag_css_rules.append('#%s {%s}'%(attrib['id'],
'; '.join(styles)))
cls = None
rule = '; '.join(styles)
for sel, srule in self.tag_css_rules.items():
if srule == rule:
cls = sel
break
if cls is None:
ncls = 'calibre_%d'%i
self.tag_css_rules[ncls] = rule
cls = attrib.get('class', '')
cls = cls + (' ' if cls else '') + ncls
attrib['class'] = cls
def create_opf(self, htmlfile, guide=None, root=None):
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)

View File

@ -726,6 +726,7 @@ class Manifest(object):
% (self.id, self.href, self.media_type)
def _parse_xhtml(self, data):
self.oeb.log.debug('Parsing', self.href, '...')
# Convert to Unicode and normalize line endings
data = self.oeb.decode(data)
data = self.oeb.html_preprocessor(data)
@ -804,6 +805,7 @@ class Manifest(object):
return data
def _parse_css(self, data):
self.oeb.log.debug('Parsing', self.href, '...')
data = self.oeb.decode(data)
data = self.oeb.css_preprocessor(data)
data = XHTML_CSS_NAMESPACE + data

View File

@ -41,10 +41,12 @@ class Clean(object):
for x in list(self.oeb.guide):
href = urldefrag(self.oeb.guide[x].href)[0]
if x.lower() != ('cover', 'titlepage'):
if x.lower() not in ('cover', 'titlepage'):
try:
if href not in protected_hrefs:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
item = self.oeb.manifest.hrefs[href]
if item not in self.oeb.spine:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
except KeyError:
pass
self.oeb.guide.remove(x)

View File

@ -44,14 +44,14 @@ class Split(object):
self.split_on_page_breaks = split_on_page_breaks
self.page_breaks_xpath = page_breaks_xpath
self.max_flow_size = max_flow_size
self.page_break_selectors = None
if self.page_breaks_xpath is not None:
self.page_breaks_xpath = XPath(self.page_breaks_xpath)
self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
def __call__(self, oeb, context):
self.oeb = oeb
self.log = oeb.log
self.map = {}
self.page_break_selectors = None
for item in list(self.oeb.manifest.items):
if item.spine_position is not None and etree.iselement(item.data):
self.split_item(item)
@ -60,10 +60,7 @@ class Split(object):
def split_item(self, item):
if self.split_on_page_breaks:
if self.page_breaks_xpath is None:
page_breaks, page_break_ids = self.find_page_breaks(item)
else:
page_breaks, page_break_ids = self.page_breaks_xpath(item.data)
page_breaks, page_break_ids = self.find_page_breaks(item)
splitter = FlowSplitter(item, page_breaks, page_break_ids,
self.max_flow_size, self.oeb)