Sync to pluginize

2025-07-09 03:04:10 -04:00 · 2009-04-25 07:43:30 -04:00 · 2009-04-25 07:43:30 -04:00 · 0861a58127
commit 0861a58127
parent ae86b2a44b 316e55244a
9 changed files with 283 additions and 60 deletions
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,7 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import sys, re, os, shutil, cStringIO, tempfile, subprocess, time
+import sys, re, os, subprocess
 sys.path.append('src')
 iswindows = re.search('win(32|64)', sys.platform)
 isosx = 'darwin' in sys.platform
@ -71,6 +71,9 @@ if __name__ == '__main__':
                                      'src/calibre/utils/msdes/des.c'],
                             include_dirs=['src/calibre/utils/msdes']),
                    Extension('calibre.plugins.cPalmdoc',
                        sources=['src/calibre/ebooks/mobi/palmdoc.c']),
                    PyQtExtension('calibre.plugins.pictureflow',
                                  ['src/calibre/gui2/pictureflow/pictureflow.cpp',
                                   'src/calibre/gui2/pictureflow/pictureflow.h'],
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -2,7 +2,7 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
+import sys, os, re, logging, time, subprocess, mimetypes, \
       __builtin__, warnings
 __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
 from htmlentitydefs import name2codepoint
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -53,7 +53,7 @@ if plugins is None:
            plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
            sys.path.insert(0, plugin_path)
-        for plugin in ['pictureflow', 'lzx', 'msdes'] + \
+        for plugin in ['pictureflow', 'lzx', 'msdes', 'cPalmdoc'] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/ebooks/mobi/palmdoc.c
+++ b/src/calibre/ebooks/mobi/palmdoc.c
@ -0,0 +1,204 @@
 /*
 :mod:`cPalmdoc` -- Palmdoc compression/decompression
 =====================================================
 .. module:: cPalmdoc
    :platform: All
    :synopsis: Compression decompression of Palmdoc implemented in C for speed
 .. moduleauthor:: Kovid Goyal <kovid@kovidgoyal.net> Copyright 2009
 */
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include <stdio.h>
 #define DELTA sizeof(Byte)*4096
 #define BUFFER 6000
 #define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
 typedef unsigned short int Byte;
 typedef struct {
 	Byte	*data;
 	Py_ssize_t len;
 } buffer;
 #ifdef	bool
 #undef	bool
 #endif
 #define	bool		int
 #ifdef	false
 #undef	false
 #endif
 #define	false		0
 #ifdef	true
 #undef	true
 #endif
 #define	true		1
 #define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
 static PyObject *
 cpalmdoc_decompress(PyObject *self, PyObject *args) {
    const char *_input = NULL; Py_ssize_t input_len = 0;
    Py_ssize_t i = 0, o = 0, j = 0, di, n;
    if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
 		return NULL;
    Byte *input = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
    if (input == NULL) return PyErr_NoMemory();
    // Map chars to bytes
    for (j = 0; j < input_len; j++) 
        input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
    char *output = (char *)PyMem_Malloc(sizeof(char)*BUFFER);
    Byte c;
    PyObject *ans;
    if (output == NULL) return PyErr_NoMemory();
    while (i < input_len) {
        c = input[i++];
        if (c >= 1 && c <= 8)  // copy 'c' bytes
            while (c--) output[o++] = input[i++];
        else if (c <= 0x7F)  // 0, 09-7F = self
            output[o++] = c;
        else if (c >= 0xC0) { // space + ASCII char
            output[o++] = ' ';
            output[o++] = c ^ 0x80;
        }
        else { // 80-BF repeat sequences
            c = (c << 8) + input[i++];
            di = (c & 0x3FFF) >> 3;
            for ( n = (c & 7) + 3; n--; ++o ) 
                output[o] = output[o - di];
        }
    }
    ans = Py_BuildValue("s#", output, o);
    if (output != NULL) PyMem_Free(output);
    if (input != NULL) PyMem_Free(input);
    return ans;
 }
 static bool 
 cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
    Py_ssize_t i;
    for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
    return true;
 }
 static Py_ssize_t
 cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
    Py_ssize_t i;
    for (i = pos - chunk_length; i > -1; i--) 
        if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
    return pos;
 }
 static Py_ssize_t
 cpalmdoc_do_compress(buffer *b, char *output) {
    Py_ssize_t i = 0, j, chunk_len, dist;
    unsigned compound;
    Byte c, n;
    bool found;
    char *head;
    head = output;
    buffer temp; 
    temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
    if (temp.data == NULL) return 0;
    while (i < b->len) {
        c = b->data[i];
        //do repeats
        if ( i > 10 && (b->len - i) > 10) {
            found = false;
            for (chunk_len = 10; chunk_len > 2; chunk_len--) {
                j = cpalmdoc_rfind(b->data, i, chunk_len);
                dist = i - j;
                if (j < i && dist <= 2047) {
                    found = true;
                    compound = (dist << 3) + chunk_len-3;
                    *(output++) = CHAR(0x80 + (compound >> 8 ));
                    *(output++) = CHAR(compound & 0xFF);
                    i += chunk_len;
                    break;
                }
            }
            if (found) continue;
        }
        //write single character
        i++;
        if (c == 32 && i < b->len) {
            n = b->data[i];
            if ( n >= 0x40 && n <= 0x7F) {
                *(output++) = CHAR(n^0x80); i++; continue;
            }
        }
        if (c == 0 || (c > 8 && c < 0x80))
            *(output++) = CHAR(c);
        else { // Write binary data
            j = i;
            temp.data[0] = c; temp.len = 1;
            while (j < b->len && temp.len < 8) {
                c = b->data[j];
                if (c == 0 || (c > 8 && c < 0x80)) break;
                temp.data[temp.len++] = c; j++;
            }
            i += temp.len - 1;
            *(output++) = temp.len;
            for (j=0; j < temp.len; j++) *(output++) = temp.data[j];
        }
    }
    return output - head;
 }
 static PyObject *
 cpalmdoc_compress(PyObject *self, PyObject *args) {
    const char *_input = NULL; Py_ssize_t input_len = 0;
    Py_ssize_t j = 0;
    buffer b;
    if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
 		return NULL;
    b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
    if (b.data == NULL) return PyErr_NoMemory();
    // Map chars to bytes
    for (j = 0; j < input_len; j++) 
        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
    b.len = input_len;
    char *output = (char *)PyMem_Malloc(sizeof(char) * b.len);
    if (output == NULL) return PyErr_NoMemory();
    j = cpalmdoc_do_compress(&b, output);
    if ( j == 0) return PyErr_NoMemory();
    PyObject *ans = Py_BuildValue("s#", output, j);
    PyMem_Free(output);
    PyMem_Free(b.data);
    return ans;
 }
 static PyMethodDef cPalmdocMethods[] = {
    {"decompress", cpalmdoc_decompress, METH_VARARGS,
    "decompress(bytestring) -> decompressed bytestring\n\n"
    		"Decompress a palmdoc compressed byte string. "
    },
    {"compress", cpalmdoc_compress, METH_VARARGS,
    "compress(bytestring) -> compressed bytestring\n\n"
    		"Palmdoc compress a byte string. "
    },
    {NULL, NULL, 0, NULL}
 };
 PyMODINIT_FUNC
 initcPalmdoc(void) {
    PyObject *m;
    m = Py_InitModule3("cPalmdoc", cPalmdocMethods,
    "Compress and decompress palmdoc strings."
    );
    if (m == NULL) return;
 }
--- a/src/calibre/ebooks/mobi/palmdoc.py
+++ b/src/calibre/ebooks/mobi/palmdoc.py
@ -2,41 +2,46 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
    'and Marshall T. Vandegrift <llasram@gmail.com>'
 from cStringIO import StringIO
 from struct import pack
-COUNT_BITS = 3
+from calibre.constants import plugins
 cPalmdoc = plugins['cPalmdoc'][0]
 if not cPalmdoc:
    raise RuntimeError(('Failed to load required cPalmdoc module: '
            '%s')%plugins['cPalmdoc'][1])
 def decompress_doc(data):
-    buffer = [ord(i) for i in data]
+    return cPalmdoc.decompress(data)
    res = []
    i = 0
    while i < len(buffer):
        c = buffer[i]
        i += 1
        if c >= 1 and c <= 8:
            res.extend(buffer[i:i+c])
            i += c
        elif c <= 0x7f:
            res.append(c)
        elif c >= 0xc0:
            res.extend( (ord(' '), c^0x80) )
        else:
            c = (c << 8) + buffer[i]
            i += 1
            di = (c & 0x3fff) >> COUNT_BITS
            j = len(res)
            num = (c & ((1 << COUNT_BITS) - 1)) + 3
            for k in range( num ):
                res.append(res[j - di+k])
    return ''.join([chr(i) for i in res])
 def compress_doc(data):
    return cPalmdoc.compress(data)
 def test():
    TESTS = [
            'abc\x03\x04\x05\x06ms', # Test binary writing
            'a b c \xfed ', # Test encoding of spaces
            '0123456789axyz2bxyz2cdfgfo9iuyerh',
            '0123456789asd0123456789asd|yyzzxxffhhjjkk',
            ('ciewacnaq eiu743 r787q 0w%  ; sa fd\xef\ffdxosac wocjp acoiecowei '
            'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
            ]
    for test in TESTS:
        print 'Test:', repr(test)
        print '\tTesting compression...'
        good = py_compress_doc(test)
        x = compress_doc(test)
        print '\t\tgood:',  repr(good)
        print '\t\tx   :',  repr(x)
        assert x == good
        print '\tTesting decompression...'
        print '\t\t', repr(decompress_doc(x))
        assert decompress_doc(x) == test
        print
 def py_compress_doc(data):
    out = StringIO()
    i = 0
    ldata = len(data)
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -226,7 +226,7 @@ class MobiReader(object):
                    page-break-after: always; margin: 0; display: block
                }
                ''')
-        self.tag_css_rules = []
+        self.tag_css_rules = {}
        if hasattr(filename_or_stream, 'read'):
            stream = filename_or_stream
@ -328,10 +328,10 @@ class MobiReader(object):
        with open('styles.css', 'wb') as s:
            s.write(self.base_css_rules+'\n\n')
-            for rule in self.tag_css_rules:
+            for cls, rule in self.tag_css_rules.items():
                if isinstance(rule, unicode):
                    rule = rule.encode('utf-8')
-                s.write(rule+'\n\n')
+                s.write('.%s { %s }\n\n'%(cls, rule))
        if self.book_header.exth is not None or self.embedded_mi is not None:
@ -389,6 +389,7 @@ class MobiReader(object):
                    'xx-large' : '6',
                    }
        mobi_version = self.book_header.mobi_version
        style_map = {}
        for i, tag in enumerate(root.iter(etree.Element)):
            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
                           'state', 'city', 'street', 'address', 'content'):
@ -455,9 +456,18 @@ class MobiReader(object):
                except ValueError:
                    pass
            if styles:
-                attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
+                cls = None
-                self.tag_css_rules.append('#%s {%s}'%(attrib['id'],
+                rule = '; '.join(styles)
-                                                      '; '.join(styles)))
+                for sel, srule in self.tag_css_rules.items():
                    if srule == rule:
                        cls = sel
                        break
                if cls is None:
                    ncls = 'calibre_%d'%i
                    self.tag_css_rules[ncls] = rule
                cls = attrib.get('class', '')
                cls = cls + (' ' if cls else '') + ncls
                attrib['class'] = cls
    def create_opf(self, htmlfile, guide=None, root=None):
        mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -726,6 +726,7 @@ class Manifest(object):
                % (self.id, self.href, self.media_type)
        def _parse_xhtml(self, data):
            self.oeb.log.debug('Parsing', self.href, '...')
            # Convert to Unicode and normalize line endings
            data = self.oeb.decode(data)
            data = self.oeb.html_preprocessor(data)
@ -804,6 +805,7 @@ class Manifest(object):
            return data
        def _parse_css(self, data):
            self.oeb.log.debug('Parsing', self.href, '...')
            data = self.oeb.decode(data)
            data = self.oeb.css_preprocessor(data)
            data = XHTML_CSS_NAMESPACE + data
--- a/src/calibre/ebooks/oeb/transforms/guide.py
+++ b/src/calibre/ebooks/oeb/transforms/guide.py
@ -41,9 +41,11 @@ class Clean(object):
        for x in list(self.oeb.guide):
            href = urldefrag(self.oeb.guide[x].href)[0]
-            if x.lower() != ('cover', 'titlepage'):
+            if x.lower() not in ('cover', 'titlepage'):
                try:
                    if href not in protected_hrefs:
                        item = self.oeb.manifest.hrefs[href]
                        if item not in self.oeb.spine:
                            self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
                except KeyError:
                    pass
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -44,14 +44,14 @@ class Split(object):
        self.split_on_page_breaks = split_on_page_breaks
        self.page_breaks_xpath = page_breaks_xpath
        self.max_flow_size = max_flow_size
        self.page_break_selectors = None
        if self.page_breaks_xpath is not None:
-            self.page_breaks_xpath = XPath(self.page_breaks_xpath)
+            self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
    def __call__(self, oeb, context):
        self.oeb = oeb
        self.log = oeb.log
        self.map = {}
        self.page_break_selectors = None
        for item in list(self.oeb.manifest.items):
            if item.spine_position is not None and etree.iselement(item.data):
                self.split_item(item)
@ -60,10 +60,7 @@ class Split(object):
    def split_item(self, item):
        if self.split_on_page_breaks:
            if self.page_breaks_xpath is None:
            page_breaks, page_break_ids = self.find_page_breaks(item)
            else:
                page_breaks, page_break_ids = self.page_breaks_xpath(item.data)
        splitter = FlowSplitter(item, page_breaks, page_break_ids,
                self.max_flow_size, self.oeb)