From 917d142bded2a0c40cfdb89862662c751286098b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 15:59:30 +0530 Subject: [PATCH 01/57] MTP: Add file transfer tests and fix libmtp not returning correct storage_id values in the object dict --- src/calibre/devices/mtp/filesystem_cache.py | 29 +++-- src/calibre/devices/mtp/test.py | 105 ++++++++++++++++-- src/calibre/devices/mtp/unix/driver.py | 37 +++++- src/calibre/devices/mtp/unix/libmtp.c | 51 ++++++--- .../mtp/windows/content_enumeration.cpp | 2 +- src/calibre/devices/mtp/windows/driver.py | 2 + 6 files changed, 192 insertions(+), 34 deletions(-) diff --git a/src/calibre/devices/mtp/filesystem_cache.py b/src/calibre/devices/mtp/filesystem_cache.py index fd5e5cd8e9..a52a20dfa3 100644 --- a/src/calibre/devices/mtp/filesystem_cache.py +++ b/src/calibre/devices/mtp/filesystem_cache.py @@ -17,23 +17,30 @@ from calibre.utils.icu import sort_key, lower class FileOrFolder(object): - def __init__(self, entry, fs_cache, all_storage_ids=()): + def __init__(self, entry, fs_cache): self.object_id = entry['id'] self.is_folder = entry['is_folder'] - self.name = force_unicode(entry.get('name', '___'), 'utf-8') + n = entry.get('name', None) + if not n: n = '___' + self.name = force_unicode(n, 'utf-8') self.storage_id = entry.get('storage_id', None) self.persistent_id = entry.get('persistent_id', self.object_id) self.size = entry.get('size', 0) # self.parent_id is None for storage objects self.parent_id = entry.get('parent_id', None) - if self.parent_id == 0: - sid = self.storage_id - if all_storage_ids and sid not in all_storage_ids: - sid = all_storage_ids[0] - self.parent_id = sid + self.all_storage_ids = fs_cache.all_storage_ids + if self.parent_id is None and self.storage_id is None: # A storage object self.storage_id = self.object_id + + if self.storage_id not in self.all_storage_ids: + raise ValueError('Storage id %s not valid for %s'%(self.storage_id, + entry)) + + if self.parent_id == 0: + self.parent_id = self.storage_id + self.is_hidden = entry.get('is_hidden', False) self.is_system = entry.get('is_system', False) self.can_delete = entry.get('can_delete', True) @@ -42,6 +49,7 @@ class FileOrFolder(object): self.folders = [] fs_cache.id_map[self.object_id] = self self.fs_cache = weakref.ref(fs_cache) + self.deleted = False @property def id_map(self): @@ -80,6 +88,7 @@ class FileOrFolder(object): except ValueError: pass self.id_map.pop(entry.object_id, None) + entry.deleted = True def dump(self, prefix='', out=sys.stdout): c = '+' if self.is_folder else '-' @@ -110,16 +119,18 @@ class FilesystemCache(object): def __init__(self, all_storage, entries): self.entries = [] self.id_map = {} + self.all_storage_ids = tuple(x['id'] for x in all_storage) for storage in all_storage: - e = FileOrFolder(storage, self, []) + e = FileOrFolder(storage, self) self.entries.append(e) self.entries.sort(key=attrgetter('object_id')) all_storage_ids = [x.object_id for x in self.entries] + self.all_storage_ids = tuple(all_storage_ids) for entry in entries: - FileOrFolder(entry, self, all_storage_ids) + FileOrFolder(entry, self) for item in self.id_map.itervalues(): try: diff --git a/src/calibre/devices/mtp/test.py b/src/calibre/devices/mtp/test.py index ca8dd55176..07c944929b 100644 --- a/src/calibre/devices/mtp/test.py +++ b/src/calibre/devices/mtp/test.py @@ -7,35 +7,49 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import unittest, gc +import unittest, gc, io from calibre.constants import iswindows, islinux from calibre.utils.icu import lower from calibre.devices.mtp.driver import MTP_DEVICE from calibre.devices.scanner import DeviceScanner +class ProgressCallback(object): + + def __init__(self): + self.count = 0 + self.end_called = False + + def __call__(self, pos, total): + if pos == total: + self.end_called = True + self.count += 1 + class TestDeviceInteraction(unittest.TestCase): @classmethod def setUpClass(cls): + cls.dev = cls.storage = None cls.dev = MTP_DEVICE(None) cls.dev.startup() cls.scanner = DeviceScanner() cls.scanner.scan() cd = cls.dev.detect_managed_devices(cls.scanner.devices) if cd is None: - raise ValueError('No MTP device found') + cls.dev.shutdown() + cls.dev = None + return cls.dev.open(cd, 'test_library') if cls.dev.free_space()[0] < 10*(1024**2): - raise ValueError('The connected device %s does not have enough free' - ' space in its main memory to do the tests'%cd) + return cls.dev.filesystem_cache cls.storage = cls.dev.filesystem_cache.entries[0] @classmethod def tearDownClass(cls): - cls.dev.shutdown() - cls.dev = None + if cls.dev is not None: + cls.dev.shutdown() + cls.dev = None def setUp(self): self.cleanup = [] @@ -44,8 +58,15 @@ class TestDeviceInteraction(unittest.TestCase): for obj in reversed(self.cleanup): self.dev.delete_file_or_folder(obj) + def check_setup(self): + if self.dev is None: + self.skipTest('No MTP device detected') + if self.storage is None: + self.skipTest('The connected device does not have enough free space') + def test_folder_operations(self): ''' Test the creation of folders, duplicate folders and sub folders ''' + self.check_setup() # Create a folder name = 'zzz-test-folder' @@ -78,6 +99,72 @@ class TestDeviceInteraction(unittest.TestCase): with self.assertRaises(ValueError): self.dev.create_folder(root_file[0], 'sub-folder') + def test_file_transfer(self): + ''' Test transferring files to and from the device ''' + self.check_setup() + # Create a folder + name = 'zzz-test-folder' + folder = self.dev.create_folder(self.storage, name) + self.cleanup.append(folder) + self.assertTrue(folder.is_folder) + self.assertEqual(folder.parent_id, self.storage.object_id) + + # Check simple file put/get + size = 1024**2 + raw = io.BytesIO(b'a'*size) + raw.seek(0) + name = 'test-file.txt' + pc = ProgressCallback() + f = self.dev.put_file(folder, name, raw, size, callback=pc) + self.cleanup.append(f) + self.assertEqual(f.name, name) + self.assertEqual(f.size, size) + self.assertEqual(f.parent_id, folder.object_id) + self.assertEqual(f.storage_id, folder.storage_id) + self.assertTrue(pc.end_called, + msg='Progress callback not called with equal values (put_file)') + self.assertTrue(pc.count > 1, + msg='Progress callback only called once (put_file)') + + raw2 = io.BytesIO() + pc = ProgressCallback() + self.dev.get_file(f, raw2, callback=pc) + self.assertEqual(raw.getvalue(), raw2.getvalue()) + self.assertTrue(pc.end_called, + msg='Progress callback not called with equal values (get_file)') + self.assertTrue(pc.count > 1, + msg='Progress callback only called once (get_file)') + + # Check file replacement + raw = io.BytesIO(b'abcd') + raw.seek(0) + size = 4 + f = self.dev.put_file(folder, name, raw, size) + self.cleanup.append(f) + self.assertEqual(f.name, name) + self.assertEqual(f.size, size) + self.assertEqual(f.parent_id, folder.object_id) + self.assertEqual(f.storage_id, folder.storage_id) + + # Check that we get an error with replace=False + raw.seek(0) + with self.assertRaises(ValueError): + self.dev.put_file(folder, name, raw, size, replace=False) + + # Check that we can put a file into the root + raw.seek(0) + name = 'zzz-test-file.txt' + f = self.dev.put_file(self.storage, name, raw, size) + self.cleanup.append(f) + self.assertEqual(f.name, name) + self.assertEqual(f.size, size) + self.assertEqual(f.parent_id, self.storage.object_id) + self.assertEqual(f.storage_id, self.storage.storage_id) + + raw2 = io.BytesIO() + self.dev.get_file(f, raw2) + self.assertEqual(raw.getvalue(), raw2.getvalue()) + def measure_memory_usage(self, repetitions, func, *args, **kwargs): from calibre.utils.mem import memory gc.disable() @@ -91,10 +178,10 @@ class TestDeviceInteraction(unittest.TestCase): gc.enable() return end_mem - start_mem + @unittest.skipUnless(iswindows or islinux, 'Can only test for leaks on windows and linux') def test_memory_leaks(self): ''' Test for memory leaks in the C modules ''' - if not (iswindows or islinux): - self.skipTest('Can only test for leaks on windows and linux') + self.check_setup() # Test device scanning used_by_one = self.measure_memory_usage(1, @@ -122,7 +209,7 @@ class TestDeviceInteraction(unittest.TestCase): def tests(): tl = unittest.TestLoader() - return tl.loadTestsFromName('test.TestDeviceInteraction.test_memory_leaks') + # return tl.loadTestsFromName('test.TestDeviceInteraction.test_file_transfer') return tl.loadTestsFromTestCase(TestDeviceInteraction) def run(): diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index 1ff87eb974..ff00f79ad4 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -238,11 +238,44 @@ class MTP_DEVICE(MTPDeviceBase): raise DeviceError( 'Failed to create folder named %s in %s with error: %s'% (name, parent.full_path, self.format_errorstack(errs))) - ans['storage_id'] = sid return parent.add_child(ans) + @synchronous + def put_file(self, parent, name, stream, size, callback=None, replace=True): + e = parent.folder_named(name) + if e is not None: + raise ValueError('Cannot upload file, %s already has a folder named: %s'%( + parent.full_path, e.name)) + e = parent.file_named(name) + if e is not None: + if not replace: + raise ValueError('Cannot upload file %s, it already exists'%( + e.full_path,)) + self.delete_file_or_folder(e) + ename = name.encode('utf-8') if isinstance(name, unicode) else name + sid, pid = parent.storage_id, parent.object_id + if pid == sid: + pid = 0 + + ans, errs = self.dev.put_file(sid, pid, ename, stream, size, callback) + if ans is None: + raise DeviceError('Failed to upload file named: %s to %s: %s' + %(name, parent.full_path, self.format_errorstack(errs))) + return parent.add_child(ans) + + @synchronous + def get_file(self, f, stream, callback=None): + if f.is_folder: + raise ValueError('%s if a folder'%(f.full_path,)) + ok, errs = self.dev.get_file(f.object_id, stream, callback) + if not ok: + raise DeviceError('Failed to get file: %s with errors: %s'%( + f.full_path, self.format_errorstack(errs))) + @synchronous def delete_file_or_folder(self, obj): + if obj.deleted: + return if not obj.can_delete: raise ValueError('Cannot delete %s as deletion not allowed'% (obj.full_path,)) @@ -255,7 +288,7 @@ class MTP_DEVICE(MTPDeviceBase): parent = obj.parent ok, errs = self.dev.delete_object(obj.object_id) if not ok: - raise DeviceError('Failed to delete %s with error: '% + raise DeviceError('Failed to delete %s with error: %s'% (obj.full_path, self.format_errorstack(errs))) parent.remove_child(obj) diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 982d0faf97..7f80e5bcb3 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -67,7 +67,7 @@ static void dump_errorstack(LIBMTP_mtpdevice_t *dev, PyObject *list) { PyObject *err; for(stack = LIBMTP_Get_Errorstack(dev); stack != NULL; stack=stack->next) { - err = Py_BuildValue("Is", stack->errornumber, stack->error_text); + err = Py_BuildValue("is", stack->errornumber, stack->error_text); if (err == NULL) break; PyList_Append(list, err); Py_DECREF(err); @@ -119,17 +119,43 @@ static uint16_t data_from_python(void *params, void *priv, uint32_t wantlen, uns } static PyObject* build_file_metadata(LIBMTP_file_t *nf, uint32_t storage_id) { - char *filename = nf->filename; - if (filename == NULL) filename = ""; + PyObject *ans = NULL, *l = NULL; - return Py_BuildValue("{s:k,s:k,s:k,s:s,s:K,s:O}", - "id", nf->item_id, - "parent_id", nf->parent_id, - "storage_id", storage_id, - "name", filename, - "size", nf->filesize, - "is_folder", (nf->filetype == LIBMTP_FILETYPE_FOLDER) ? Py_True : Py_False - ); + ans = Py_BuildValue("{s:s}", "name", nf->filename); + if (ans == NULL) return PyErr_NoMemory(); + + // We explicitly populate the dictionary instead of using Py_BuildValue to + // handle the numeric variables properly. Without this, for some reason the + // dict sometimes has incorrect values + l = PyLong_FromUnsignedLong(nf->item_id); + if (l == NULL) goto error; + if (PyDict_SetItemString(ans, "id", l) != 0) goto error; + Py_DECREF(l); l = NULL; + + l = PyLong_FromUnsignedLong(nf->parent_id); + if (l == NULL) goto error; + if (PyDict_SetItemString(ans, "parent_id", l) != 0) goto error; + Py_DECREF(l); l = NULL; + + l = PyLong_FromUnsignedLong(storage_id); + if (l == NULL) goto error; + if (PyDict_SetItemString(ans, "storage_id", l) != 0) goto error; + Py_DECREF(l); l = NULL; + + l = PyLong_FromUnsignedLongLong(nf->filesize); + if (l == NULL) goto error; + if (PyDict_SetItemString(ans, "size", l) != 0) goto error; + Py_DECREF(l); l = NULL; + + if (PyDict_SetItemString(ans, "is_folder", + (nf->filetype == LIBMTP_FILETYPE_FOLDER) ? Py_True : Py_False) != 0) + goto error; + + return ans; + +error: + Py_XDECREF(ans); Py_XDECREF(l); + return PyErr_NoMemory(); } static PyObject* file_metadata(LIBMTP_mtpdevice_t *device, PyObject *errs, uint32_t item_id, uint32_t storage_id) { @@ -507,9 +533,8 @@ libmtp_Device_delete_object(libmtp_Device *self, PyObject *args, PyObject *kwarg static PyObject * libmtp_Device_create_folder(libmtp_Device *self, PyObject *args, PyObject *kwargs) { PyObject *errs, *fo = NULL; - uint32_t parent_id, storage_id; + uint32_t storage_id, parent_id, folder_id; char *name; - uint32_t folder_id; ENSURE_DEV(NULL); ENSURE_STORAGE(NULL); diff --git a/src/calibre/devices/mtp/windows/content_enumeration.cpp b/src/calibre/devices/mtp/windows/content_enumeration.cpp index 65831768a7..e1f439926c 100644 --- a/src/calibre/devices/mtp/windows/content_enumeration.cpp +++ b/src/calibre/devices/mtp/windows/content_enumeration.cpp @@ -73,7 +73,7 @@ static void set_size_property(PyObject *dict, REFPROPERTYKEY key, const char *py hr = properties->GetUnsignedLargeIntegerValue(key, &val); if (SUCCEEDED(hr)) { - pval = PyInt_FromSsize_t((Py_ssize_t)val); + pval = PyLong_FromUnsignedLongLong(val); if (pval != NULL) { PyDict_SetItemString(dict, pykey, pval); Py_DECREF(pval); diff --git a/src/calibre/devices/mtp/windows/driver.py b/src/calibre/devices/mtp/windows/driver.py index 91abe228ae..03e3c65ad1 100644 --- a/src/calibre/devices/mtp/windows/driver.py +++ b/src/calibre/devices/mtp/windows/driver.py @@ -275,6 +275,8 @@ class MTP_DEVICE(MTPDeviceBase): @same_thread def delete_file_or_folder(self, obj): + if obj.deleted: + return if not obj.can_delete: raise ValueError('Cannot delete %s as deletion not allowed'% (obj.full_path,)) From 8fed10364f77a7d3ae34ac1f1bf068563580981f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 16:01:07 +0530 Subject: [PATCH 02/57] ... --- manual/typesetting_math.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manual/typesetting_math.rst b/manual/typesetting_math.rst index dd3d574c3a..e631eaea10 100644 --- a/manual/typesetting_math.rst +++ b/manual/typesetting_math.rst @@ -48,7 +48,7 @@ This snippet looks like the following screen shot in the |app| viewer. .. figure:: images/lorentz.png :align: center - :guilabel:`The Lorentz Equations` + :guilabel:`The Lorenz Equations` The complete HTML file, with more equations and inline mathematics is reproduced below. You can convert this HTML file to EPUB in |app| to end up From b8f45adf2820b4d79ebb54ee49a5c7b150aeb62f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 16:33:24 +0530 Subject: [PATCH 03/57] MTP: Add mem leak test for file operations --- src/calibre/devices/mtp/test.py | 43 ++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/calibre/devices/mtp/test.py b/src/calibre/devices/mtp/test.py index 07c944929b..ce548b3350 100644 --- a/src/calibre/devices/mtp/test.py +++ b/src/calibre/devices/mtp/test.py @@ -180,7 +180,7 @@ class TestDeviceInteraction(unittest.TestCase): @unittest.skipUnless(iswindows or islinux, 'Can only test for leaks on windows and linux') def test_memory_leaks(self): - ''' Test for memory leaks in the C modules ''' + ''' Test for memory leaks in the C module ''' self.check_setup() # Test device scanning @@ -188,7 +188,7 @@ class TestDeviceInteraction(unittest.TestCase): self.dev.detect_managed_devices, self.scanner.devices, force_refresh=True) - used_by_many = self.measure_memory_usage(1000, + used_by_many = self.measure_memory_usage(100, self.dev.detect_managed_devices, self.scanner.devices, force_refresh=True) @@ -196,6 +196,42 @@ class TestDeviceInteraction(unittest.TestCase): msg='Memory consumption during device scan: for one: %g for many:%g'% (used_by_one, used_by_many)) + # Test file transfer + size = 1024*100 + raw = io.BytesIO(b'a'*size) + raw.seek(0) + name = 'zzz-test-file.txt' + + def send_file(storage, name, raw, size): + raw.seek(0) + pc = ProgressCallback() + f = self.dev.put_file(storage, name, raw, size, callback=pc) + self.cleanup.append(f) + del pc + + used_once = self.measure_memory_usage(1, send_file, self.storage, name, + raw, size) + used_many = self.measure_memory_usage(10, send_file, self.storage, name, + raw, size) + + self.assertTrue(used_many <= used_once, + msg='Memory consumption during put_file: for one: %g for many:%g'% + (used_once, used_many)) + + def get_file(f): + raw = io.BytesIO() + pc = ProgressCallback() + self.dev.get_file(f, raw, callback=pc) + del raw + del pc + + f = self.storage.file_named(name) + used_once = self.measure_memory_usage(1, get_file, f) + used_many = self.measure_memory_usage(10, get_file, f) + self.assertTrue(used_many <= used_once, + msg='Memory consumption during get_file: for one: %g for many:%g'% + (used_once, used_many)) + # Test get_filesystem used_by_one = self.measure_memory_usage(1, self.dev.dev.get_filesystem, self.storage.object_id) @@ -207,9 +243,10 @@ class TestDeviceInteraction(unittest.TestCase): msg='Memory consumption during get_filesystem: for one: %g for many:%g'% (used_by_one, used_by_many)) + def tests(): tl = unittest.TestLoader() - # return tl.loadTestsFromName('test.TestDeviceInteraction.test_file_transfer') + # return tl.loadTestsFromName('test.TestDeviceInteraction.test_memory_leaks') return tl.loadTestsFromTestCase(TestDeviceInteraction) def run(): From 9977d6d5814b8a52499dcdbe2e991fde853b3c73 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 16:48:41 +0530 Subject: [PATCH 04/57] ... --- src/calibre/devices/mtp/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/calibre/devices/mtp/test.py b/src/calibre/devices/mtp/test.py index ce548b3350..668add96fa 100644 --- a/src/calibre/devices/mtp/test.py +++ b/src/calibre/devices/mtp/test.py @@ -192,7 +192,7 @@ class TestDeviceInteraction(unittest.TestCase): self.dev.detect_managed_devices, self.scanner.devices, force_refresh=True) - self.assertTrue(used_by_many <= used_by_one, + self.assertTrue(used_by_many <= used_by_one*2, msg='Memory consumption during device scan: for one: %g for many:%g'% (used_by_one, used_by_many)) @@ -214,7 +214,7 @@ class TestDeviceInteraction(unittest.TestCase): used_many = self.measure_memory_usage(10, send_file, self.storage, name, raw, size) - self.assertTrue(used_many <= used_once, + self.assertTrue(used_many <= used_once*2, msg='Memory consumption during put_file: for one: %g for many:%g'% (used_once, used_many)) @@ -228,7 +228,7 @@ class TestDeviceInteraction(unittest.TestCase): f = self.storage.file_named(name) used_once = self.measure_memory_usage(1, get_file, f) used_many = self.measure_memory_usage(10, get_file, f) - self.assertTrue(used_many <= used_once, + self.assertTrue(used_many <= used_once*2, msg='Memory consumption during get_file: for one: %g for many:%g'% (used_once, used_many)) @@ -239,7 +239,7 @@ class TestDeviceInteraction(unittest.TestCase): used_by_many = self.measure_memory_usage(5, self.dev.dev.get_filesystem, self.storage.object_id) - self.assertTrue(used_by_many <= used_by_one, + self.assertTrue(used_by_many <= used_by_one*2, msg='Memory consumption during get_filesystem: for one: %g for many:%g'% (used_by_one, used_by_many)) From 2d6b0badd08785f71c009eee72b9a2668f89eab7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 17:55:32 +0530 Subject: [PATCH 05/57] ... --- src/calibre/devices/mtp/filesystem_cache.py | 15 +++++++++++ src/calibre/devices/mtp/unix/driver.py | 13 ++++++++-- src/calibre/devices/mtp/windows/driver.py | 28 +++++++++++++++++---- src/calibre/devices/mtp/windows/remote.py | 8 +++++- 4 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/calibre/devices/mtp/filesystem_cache.py b/src/calibre/devices/mtp/filesystem_cache.py index a52a20dfa3..4c7573f0a0 100644 --- a/src/calibre/devices/mtp/filesystem_cache.py +++ b/src/calibre/devices/mtp/filesystem_cache.py @@ -51,6 +51,21 @@ class FileOrFolder(object): self.fs_cache = weakref.ref(fs_cache) self.deleted = False + def __repr__(self): + name = 'Folder' if self.is_folder else 'File' + try: + path = unicode(self.full_path) + except: + path = '' + datum = 'size=%s'%(self.size) + if self.is_folder: + datum = 'children=%s'%(len(self.files) + len(self.folders)) + return '%s(id=%s, storage_id=%s, %s, path=%s)'%(name, self.object_id, + self.storage_id, datum, path) + + __str__ = __repr__ + __unicode__ = __repr__ + @property def id_map(self): return self.fs_cache().id_map diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index ff00f79ad4..a306fbcc0d 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -14,6 +14,7 @@ from collections import namedtuple from calibre import prints from calibre.constants import plugins +from calibre.ptempfile import SpooledTemporaryFile from calibre.devices.errors import OpenFailed, DeviceError from calibre.devices.mtp.base import MTPDeviceBase, synchronous from calibre.devices.mtp.filesystem_cache import FilesystemCache @@ -264,13 +265,17 @@ class MTP_DEVICE(MTPDeviceBase): return parent.add_child(ans) @synchronous - def get_file(self, f, stream, callback=None): + def get_file(self, f, stream=None, callback=None): if f.is_folder: raise ValueError('%s if a folder'%(f.full_path,)) + if stream is None: + stream = SpooledTemporaryFile(5*1024*1024, '_wpd_receive_file.dat') + stream.name = f.name ok, errs = self.dev.get_file(f.object_id, stream, callback) if not ok: raise DeviceError('Failed to get file: %s with errors: %s'%( f.full_path, self.format_errorstack(errs))) + return stream @synchronous def delete_file_or_folder(self, obj): @@ -315,13 +320,17 @@ if __name__ == '__main__': # fname = b'moose.txt' # src = BytesIO(raw) # print (d.put_file(dev._main_id, 0, fname, src, len(raw), PR())) - dev.filesystem_cache.dump() # with open('/tmp/flint.epub', 'wb') as f: # print(d.get_file(786, f, PR())) # print() # with open('/tmp/bleak.epub', 'wb') as f: # print(d.get_file(601, f, PR())) # print() + + dev.filesystem_cache.dump() + + # print (dev.filesystem_cache.entries[0].files[0]) + # print (dev.filesystem_cache.entries[0].folders[0]) dev.set_debug_level(dev.LIBMTP_DEBUG_ALL) del d dev.shutdown() diff --git a/src/calibre/devices/mtp/windows/driver.py b/src/calibre/devices/mtp/windows/driver.py index 03e3c65ad1..d030e2a53c 100644 --- a/src/calibre/devices/mtp/windows/driver.py +++ b/src/calibre/devices/mtp/windows/driver.py @@ -245,18 +245,18 @@ class MTP_DEVICE(MTPDeviceBase): return tuple(ans) @same_thread - def get_file(self, object_id, stream=None, callback=None): - f = self.filesystem_cache.id_map[object_id] + def get_file(self, f, stream=None, callback=None): if f.is_folder: - raise ValueError('%s is a folder on the device'%(f.full_path,)) + raise ValueError('%s if a folder'%(f.full_path,)) if stream is None: stream = SpooledTemporaryFile(5*1024*1024, '_wpd_receive_file.dat') + stream.name = f.name try: try: - self.dev.get_file(object_id, stream, callback) + self.dev.get_file(f.object_id, stream, callback) except self.wpd.WPDFileBusy: time.sleep(2) - self.dev.get_file(object_id, stream, callback) + self.dev.get_file(f.object_id, stream, callback) except Exception as e: raise DeviceError('Failed to fetch the file %s with error: %s'% f.full_path, as_unicode(e)) @@ -290,3 +290,21 @@ class MTP_DEVICE(MTPDeviceBase): self.dev.delete_object(obj.object_id) parent.remove_child(obj) + @same_thread + def put_file(self, parent, name, stream, size, callback=None, replace=True): + e = parent.folder_named(name) + if e is not None: + raise ValueError('Cannot upload file, %s already has a folder named: %s'%( + parent.full_path, e.name)) + e = parent.file_named(name) + if e is not None: + if not replace: + raise ValueError('Cannot upload file %s, it already exists'%( + e.full_path,)) + self.delete_file_or_folder(e) + sid, pid = parent.storage_id, parent.object_id + ans = self.dev.put_file(sid, pid, name, stream, size, callback) + ans['storage_id'] = parent.storage_id + return parent.add_child(ans) + + diff --git a/src/calibre/devices/mtp/windows/remote.py b/src/calibre/devices/mtp/windows/remote.py index 22e186c32d..a02b955522 100644 --- a/src/calibre/devices/mtp/windows/remote.py +++ b/src/calibre/devices/mtp/windows/remote.py @@ -70,12 +70,18 @@ def main(): print ('Connected to:', dev.get_gui_name()) print ('Total space', dev.total_space()) print ('Free space', dev.free_space()) - dev.filesystem_cache.dump() # pprint.pprint(dev.dev.create_folder(dev.filesystem_cache.entries[0].object_id, # 'zzz')) # print ('Fetching file: oFF (198214 bytes)') # stream = dev.get_file('oFF') # print ("Fetched size: ", stream.tell()) + size = 4 + stream = io.BytesIO(b'a'*size) + name = 'zzz-test-file.txt' + stream.seek(0) + f = dev.put_file(dev.filesystem_cache.entries[0], name, stream, size) + print ('Put file:', f) + # dev.filesystem_cache.dump() finally: dev.shutdown() From 5c4f6df4484ec2f778e64bed359d4499b543d49a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 18:24:57 +0530 Subject: [PATCH 06/57] WPD: put_file works --- src/calibre/devices/mtp/windows/driver.py | 4 ++-- src/calibre/devices/mtp/windows/remote.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/mtp/windows/driver.py b/src/calibre/devices/mtp/windows/driver.py index d030e2a53c..63eef1df66 100644 --- a/src/calibre/devices/mtp/windows/driver.py +++ b/src/calibre/devices/mtp/windows/driver.py @@ -303,8 +303,8 @@ class MTP_DEVICE(MTPDeviceBase): e.full_path,)) self.delete_file_or_folder(e) sid, pid = parent.storage_id, parent.object_id - ans = self.dev.put_file(sid, pid, name, stream, size, callback) - ans['storage_id'] = parent.storage_id + ans = self.dev.put_file(pid, name, stream, size, callback) + ans['storage_id'] = sid return parent.add_child(ans) diff --git a/src/calibre/devices/mtp/windows/remote.py b/src/calibre/devices/mtp/windows/remote.py index a02b955522..5be782e611 100644 --- a/src/calibre/devices/mtp/windows/remote.py +++ b/src/calibre/devices/mtp/windows/remote.py @@ -63,6 +63,8 @@ def main(): try: devices = win_scanner() pnp_id = dev.detect_managed_devices(devices) + if not pnp_id: + raise ValueError('Failed to detect device') # pprint.pprint(dev.detected_devices) print ('Trying to connect to:', pnp_id) dev.open(pnp_id, '') From 3efd69528ce3eb8565d726678c946cbd50eeeafe Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 18:33:08 +0530 Subject: [PATCH 07/57] WPD passes all tests --- src/calibre/devices/mtp/test.py | 27 +++++++++++++---------- src/calibre/devices/mtp/windows/remote.py | 4 ++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/calibre/devices/mtp/test.py b/src/calibre/devices/mtp/test.py index 668add96fa..bcfd2c34d1 100644 --- a/src/calibre/devices/mtp/test.py +++ b/src/calibre/devices/mtp/test.py @@ -178,6 +178,13 @@ class TestDeviceInteraction(unittest.TestCase): gc.enable() return end_mem - start_mem + def check_memory(self, once, many, msg, factor=2): + msg += ' for once: %g for many: %g'%(once, many) + if once > 0: + self.assertTrue(many <= once*factor, msg=msg) + else: + self.assertTrue(many <= 0.01, msg=msg) + @unittest.skipUnless(iswindows or islinux, 'Can only test for leaks on windows and linux') def test_memory_leaks(self): ''' Test for memory leaks in the C module ''' @@ -192,9 +199,8 @@ class TestDeviceInteraction(unittest.TestCase): self.dev.detect_managed_devices, self.scanner.devices, force_refresh=True) - self.assertTrue(used_by_many <= used_by_one*2, - msg='Memory consumption during device scan: for one: %g for many:%g'% - (used_by_one, used_by_many)) + self.check_memory(used_by_one, used_by_many, + 'Memory consumption during device scan') # Test file transfer size = 1024*100 @@ -214,9 +220,8 @@ class TestDeviceInteraction(unittest.TestCase): used_many = self.measure_memory_usage(10, send_file, self.storage, name, raw, size) - self.assertTrue(used_many <= used_once*2, - msg='Memory consumption during put_file: for one: %g for many:%g'% - (used_once, used_many)) + self.check_memory(used_once, used_many, + 'Memory consumption during put_file:') def get_file(f): raw = io.BytesIO() @@ -228,9 +233,8 @@ class TestDeviceInteraction(unittest.TestCase): f = self.storage.file_named(name) used_once = self.measure_memory_usage(1, get_file, f) used_many = self.measure_memory_usage(10, get_file, f) - self.assertTrue(used_many <= used_once*2, - msg='Memory consumption during get_file: for one: %g for many:%g'% - (used_once, used_many)) + self.check_memory(used_once, used_many, + 'Memory consumption during get_file:') # Test get_filesystem used_by_one = self.measure_memory_usage(1, @@ -239,9 +243,8 @@ class TestDeviceInteraction(unittest.TestCase): used_by_many = self.measure_memory_usage(5, self.dev.dev.get_filesystem, self.storage.object_id) - self.assertTrue(used_by_many <= used_by_one*2, - msg='Memory consumption during get_filesystem: for one: %g for many:%g'% - (used_by_one, used_by_many)) + self.check_memory(used_by_one, used_by_many, + 'Memory consumption during get_filesystem') def tests(): diff --git a/src/calibre/devices/mtp/windows/remote.py b/src/calibre/devices/mtp/windows/remote.py index 5be782e611..cbc23978d2 100644 --- a/src/calibre/devices/mtp/windows/remote.py +++ b/src/calibre/devices/mtp/windows/remote.py @@ -54,6 +54,10 @@ def main(): plugins._plugins['wpd'] = (wpd, '') sys.path.pop(0) + from calibre.devices.mtp.test import run + run() + return + from calibre.devices.scanner import win_scanner from calibre.devices.mtp.windows.driver import MTP_DEVICE dev = MTP_DEVICE(None) From e05c862dd0c47239e701fbdd4fefa96a1deb3298 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 19:31:30 +0530 Subject: [PATCH 08/57] CIPER Chile by Darko Miletic. Fixes #1041189 (New recipe for CIPER Chile) --- recipes/ciperchile.recipe | 58 +++++++++++++++++++++++++++++++++++ recipes/icons/ciperchile.png | Bin 0 -> 1551 bytes 2 files changed, 58 insertions(+) create mode 100644 recipes/ciperchile.recipe create mode 100644 recipes/icons/ciperchile.png diff --git a/recipes/ciperchile.recipe b/recipes/ciperchile.recipe new file mode 100644 index 0000000000..e1576c9958 --- /dev/null +++ b/recipes/ciperchile.recipe @@ -0,0 +1,58 @@ +__license__ = 'GPL v3' +__copyright__ = '2012, Darko Miletic ' +''' +ciperchile.cl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class CiperChile(BasicNewsRecipe): + title = 'CIPER Chile' + __author__ = 'Darko Miletic' + description = 'El Centro de Investigacion e Informacion Periodistica (CIPER) es una institucion independiente que desarrolla reportajes de investigacion de acuerdo a principios de maxima calidad e integridad profesional. Para lograr dicho objetivo, los profesionales de CIPER incorporan a las tecnicas propias del reporteo el uso sistematico de las leyes chilenas que norman el libre acceso a la informacion, de manera que los documentos que se obtengan por esta via esten puestos a disposicion del publico sin restricciones.' + publisher = 'CIPER' + category = 'news, politics, Chile' + oldest_article = 15 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'es_CL' + auto_cleanup = False + remove_empty_feeds = True + publication_type = 'blog' + masthead_url = 'http://ciperchile.cl/wp-content/themes/cipertheme/css/ui/ciper-logo.png' + extra_css = """ + body{font-family: Arial,sans-serif} + .excerpt{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: 1.25em} + .author{font-family: Georgia,"Times New Roman",Times,serif; font-style: italic; font-size: small} + .date{font-family: Georgia,"Times New Roman",Times,serif; font-size: small; color: grey} + .epigrafe{font-size: small; color: grey} + img{margin-bottom: 0.4em; display:block} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [ + dict(name=['meta','link']), + dict(attrs={'class':['articleSharingTools','articleNav']}) + ] + remove_attributes=['lang'] + remove_tags_before=dict(name='p', attrs={'class':'epigrafe'}) + remove_tags_after=dict(name='div', attrs={'class':'articleBody'}) + keep_only_tags = [dict(name='div', attrs={'class':'articleElements'})] + + feeds = [ + (u'Opinion del lector', u'http://ciperchile.cl/category/opinion-del-lector/feed/') + ,(u'Reportajes de investigacion', u'http://ciperchile.cl/category/reportajes-de-investigacion/feed/') + ,(u'Actualidad y Entrevistas', u'http://ciperchile.cl/category/actualidad-y-entrevistas/feed/') + ,(u'Opinion', u'http://ciperchile.cl/category/opinion/feed/') + ,(u'Accesso a la informacion', u'http://ciperchile.cl/category/acceso-a-la-informacion/feed/') + ,(u'Libros', u'http://ciperchile.cl/category/libros/feed/') + ,(u'Blog', u'http://ciperchile.cl/category/blog/feed/') + ] diff --git a/recipes/icons/ciperchile.png b/recipes/icons/ciperchile.png new file mode 100644 index 0000000000000000000000000000000000000000..93e4062a5e145aa3a5a969ffa0523fd3cb05251a GIT binary patch literal 1551 zcmV+q2JrcbP)Q;F1CjK&vQ930e+NBr9leB8B1)5W{&{Ao$)_ zBC})kj8CtwZSZ%R*KKkP3OTWZ2O}$RGR6=vL}GqmhI9YKLI}JNHSTDbLa3&=eD|;p zG-2)R)9dGfy9k#CcRc7&lvF-*KeKpDS02qemm+|7-5J?D5J#Q-W-Qyny^wH#F zk#!W!L=)*=&gnpGLEY2qjvoue9ZOl!KB+p!q|U~_Xy11F&*;UoU(VdP4p(q0Ar4`8 zGPoei7)m=QVPL_h!0U0D5^Mj-~xD9aEeb%+yk5z+@6^0MweQ-{I|`!uJ1dPX?v(Wx^Pi* zOun{`5))d4A)))E@l}FS1JTBDrb;DOhsiCgSg>;jp2Nk%n{+H7ORQbp)!o?wL~9JW z2$>-g)grr&%OBknomC3L!r+mK@2O-VJ?l}M67Hg zQC|auXeJpZ@-3e?3=O8wzSlci?s)LYH+H}O&h}^nak9P^p;U0jlOy09R2BU4`I@*8 zt32a!8Izg{M=FmGn?nb$o!k>UpWHf_+OkV8==Nme6KP*%xgu5*kp4cIv)P&qAayn! zPL_HLdiNuZ8;*2d?s>XDvvF&sZNaT%VQ4vR$u6XYrP4^j~CE&xaHwR zeT(De`8`hSJIQ>oP!e!ZAtR_u@O{#wSd_IH)co}Cx-pSq9+rQCnjDwW|B6x~5!Sk_ zgZd1N;i}FycfQzhM?z=NoZ2##s(D$ey7=S(-#Ms+aK%6wv0SgfJT%38DP5?kfxZtD zjiws)vd z6Mf|Bvkt(ALS5QizO*=pBREK987f1?g%f*%!y4o^c%b<86L{sD(TSz2f~Vrc*X002ovPDHLkV1hjN B?BoCd literal 0 HcmV?d00001 From 016ec7ade3ebe37514c33a154f7a7d3a5f9b041d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Aug 2012 22:32:14 +0530 Subject: [PATCH 09/57] Add a method to get the list of available transliterators in ICU --- src/calibre/utils/icu.c | 52 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index dfaf2dd53e..d90c6c0b90 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -7,7 +7,25 @@ #include #include #include +#include +static PyObject* uchar_to_unicode(const UChar *src, int32_t len) { + wchar_t *buf = NULL; + PyObject *ans = NULL; + UErrorCode status = U_ZERO_ERROR; + + if (len < 0) { len = u_strlen(src); } + buf = (wchar_t *)calloc(4*len, sizeof(wchar_t)); + if (buf == NULL) return PyErr_NoMemory(); + u_strToWCS(buf, 4*len, NULL, src, len, &status); + if (U_SUCCESS(status)) { + ans = PyUnicode_FromWideChar(buf, wcslen(buf)); + if (ans == NULL) PyErr_NoMemory(); + } else PyErr_SetString(PyExc_TypeError, "Failed to convert UChar* to wchar_t*"); + + free(buf); + return ans; +} // Collator object definition {{{ typedef struct { @@ -610,7 +628,6 @@ icu_title(PyObject *self, PyObject *args) { return ret; } // }}} - // set_default_encoding {{{ static PyObject * icu_set_default_encoding(PyObject *self, PyObject *args) { @@ -625,6 +642,35 @@ icu_set_default_encoding(PyObject *self, PyObject *args) { } // }}} +// set_default_encoding {{{ +static PyObject * +icu_get_available_transliterators(PyObject *self, PyObject *args) { + PyObject *ans, *l; + UErrorCode status = U_ZERO_ERROR; + const UChar *id = NULL; + UEnumeration *i; + + ans = PyList_New(0); + if (ans == NULL) return PyErr_NoMemory(); + + i = utrans_openIDs(&status); + if (i == NULL || U_FAILURE(status)) {Py_DECREF(ans); PyErr_SetString(PyExc_RuntimeError, "Failed to create enumerator"); return NULL; } + + do { + id = uenum_unext(i, NULL, &status); + if (U_SUCCESS(status) && id != NULL) { + l = uchar_to_unicode(id, -1); + if (l == NULL) break; + PyList_Append(ans, l); + Py_DECREF(l); + } + } while(id != NULL); + uenum_close(i); + + return ans; +} + +// }}} static PyMethodDef icu_methods[] = { {"upper", icu_upper, METH_VARARGS, "upper(locale, unicode object) -> upper cased unicode object using locale rules." @@ -642,6 +688,10 @@ static PyMethodDef icu_methods[] = { "set_default_encoding(encoding) -> Set the default encoding for the python unicode implementation." }, + {"get_available_transliterators", icu_get_available_transliterators, METH_VARARGS, + "get_available_transliterators() -> Return list of available transliterators. This list is rather limited on OS X." + }, + {NULL} /* Sentinel */ }; From 872a1434c796b64e284f37102c81654f593ca0af Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 00:01:55 +0530 Subject: [PATCH 10/57] ... --- recipes/time_magazine.recipe | 50 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/recipes/time_magazine.recipe b/recipes/time_magazine.recipe index 4b815bd7ce..cbe40f79f2 100644 --- a/recipes/time_magazine.recipe +++ b/recipes/time_magazine.recipe @@ -2,6 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' + ''' time.com ''' @@ -11,28 +12,23 @@ from calibre.web.feeds.news import BasicNewsRecipe from lxml import html class Time(BasicNewsRecipe): - #recipe_disabled = ('This recipe has been disabled as TIME no longer' - # ' publish complete articles on the web.') title = u'Time' - __author__ = 'Kovid Goyal' + __author__ = 'Kovid Goyal, Rick Shang' description = ('Weekly US magazine.') encoding = 'utf-8' no_stylesheets = True language = 'en' remove_javascript = True - #needs_subscription = 'optional' + needs_subscription = 'optional' keep_only_tags = [ { - 'class':['artHd', 'articleContent', - 'entry-title','entry-meta', 'entry-content', 'thumbnail'] + 'class':['tout1', 'entry-content', 'external-gallery-img', 'image-meta'] }, ] remove_tags = [ - {'class':['content-tools', 'quigo', 'see', - 'first-tier-social-tools', 'navigation', 'enlarge lightbox']}, - {'id':['share-tools']}, - {'rel':'lightbox'}, + {'class':['thumbnail', 'button']}, + ] recursions = 10 @@ -43,17 +39,22 @@ class Time(BasicNewsRecipe): def get_browser(self): br = BasicNewsRecipe.get_browser(self) - if False and self.username and self.password: - # This site uses javascript in its login process - res = br.open('http://www.time.com/time/magazine') - br.select_form(nr=1) - br['username'] = self.username + # This site uses javascript in its login process + if False and self.username is not None and self.password is not None: + br.open('http://www.time.com/time/magazine') + br.select_form(predicate=lambda f: 'action' in f.attrs and f.attrs['action'] == 'https://auth.time.com/login.php') + br['username'] = self.username br['password'] = self.password - res = br.submit() - raw = res.read() + br['magcode'] = ['TD'] + br.find_control('turl').readonly = False + br['turl'] = 'http://www.time.com/time/magazine' + br.find_control('rurl').readonly = False + br['rurl'] = 'http://www.time.com/time/magazine' + br['remember'] = False + raw = br.submit().read() if '>Log Out<' not in raw: raise ValueError('Failed to login to time.com, check' - ' your username and password') + ' your username and password') return br def parse_index(self): @@ -70,6 +71,9 @@ class Time(BasicNewsRecipe): except: self.log.exception('Failed to fetch cover') + dates = ''.join(root.xpath('//time[@class="updated"]/text()')) + if dates: + self.timefmt = ' [%s]'%dates feeds = [] parent = root.xpath('//div[@class="content-main-aside"]')[0] @@ -96,7 +100,9 @@ class Time(BasicNewsRecipe): title = html.tostring(a[0], encoding=unicode, method='text').strip() if not title: continue - url = a[0].get('href') + url = a[0].get('href') + # url = re.sub('/magazine/article/0,9171','/subscriber/printout/0,8816', + # url) if url.startswith('/'): url = 'http://www.time.com'+url desc = '' @@ -111,9 +117,3 @@ class Time(BasicNewsRecipe): 'date' : '', 'description' : desc } - - def postprocess_html(self,soup,first): - for tag in soup.findAll(attrs ={'class':['artPag','pagination']}): - tag.extract() - return soup - From f754786d121d6d3c060ed84c85c068114d39934f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 08:08:12 +0530 Subject: [PATCH 11/57] When generating ICU sort keys handle strings with NULL bytes in them --- src/calibre/utils/icu.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 93f4d7b1da..f1f94dc175 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -71,10 +71,16 @@ def icu_sort_key(collator, obj): if not obj: return _none2 try: + try: + return _secondary_collator.sort_key(obj) + except AttributeError: + return secondary_collator().sort_key(obj) + except TypeError: + if isinstance(obj, unicode): + obj = obj.replace(u'\0', u'') + else: + obj = obj.replace(b'\0', b'') return _secondary_collator.sort_key(obj) - except AttributeError: - return secondary_collator().sort_key(obj) - def py_find(pattern, source): pos = source.find(pattern) From efd41d87eda7ebe6149ffe8a8d019b145e39bbc3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 08:49:15 +0530 Subject: [PATCH 12/57] Fix #1041389 (Drop down lists behaviour not consistent) --- src/calibre/gui2/complete2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/gui2/complete2.py b/src/calibre/gui2/complete2.py index 59291cf04b..debc9a6599 100644 --- a/src/calibre/gui2/complete2.py +++ b/src/calibre/gui2/complete2.py @@ -30,6 +30,7 @@ class CompleteModel(QAbstractListModel): # {{{ items = [x for x in items if x] items = tuple(sorted(items, key=sort_key)) self.all_items = self.current_items = items + self.current_prefix = '' self.reset() def set_completion_prefix(self, prefix): From 34efc911aea836bdf69e5e648a22b95de7243f9d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 09:01:25 +0530 Subject: [PATCH 13/57] libmtp: Filter ro storage at the python level for better debugging --- src/calibre/devices/mtp/unix/driver.py | 1 + src/calibre/devices/mtp/unix/libmtp.c | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index a306fbcc0d..832a54652d 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -146,6 +146,7 @@ class MTP_DEVICE(MTPDeviceBase): raise OpenFailed('') storage = sorted(self.dev.storage_info, key=operator.itemgetter('id')) + storage = [x for x in storage if x.get('rw', False)] if not storage: self.blacklisted_devices.add(connected_device) raise OpenFailed('No storage found for device %s'%(connected_device,)) diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 7f80e5bcb3..79c1bbeac0 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -347,28 +347,26 @@ static PyObject * libmtp_Device_storage_info(libmtp_Device *self, void *closure) { PyObject *ans, *loc; LIBMTP_devicestorage_t *storage; + int ro = 0; ENSURE_DEV(NULL); ENSURE_STORAGE(NULL); ans = PyList_New(0); if (ans == NULL) { PyErr_NoMemory(); return NULL; } for (storage = self->device->storage; storage != NULL; storage = storage->next) { - // Ignore read only storage - if (storage->StorageType == ST_FixedROM || storage->StorageType == ST_RemovableROM) continue; - // Storage IDs with the lower 16 bits 0x0000 are not supposed to be - // writeable. - if ((storage->id & 0x0000FFFFU) == 0x00000000U) continue; - // Also check the access capability to avoid e.g. deletable only storages - if (storage->AccessCapability == AC_ReadOnly || storage->AccessCapability == AC_ReadOnly_with_Object_Deletion) continue; + ro = 0; + // Check if read only storage + if (storage->StorageType == ST_FixedROM || storage->StorageType == ST_RemovableROM || (storage->id & 0x0000FFFFU) == 0x00000000U || storage->AccessCapability == AC_ReadOnly || storage->AccessCapability == AC_ReadOnly_with_Object_Deletion) ro = 1; - loc = Py_BuildValue("{s:k,s:O,s:K,s:K,s:K,s:s,s:s}", + loc = Py_BuildValue("{s:k,s:O,s:K,s:K,s:K,s:s,s:s,s:O}", "id", storage->id, "removable", ((storage->StorageType == ST_RemovableRAM) ? Py_True : Py_False), "capacity", storage->MaxCapacity, "freespace_bytes", storage->FreeSpaceInBytes, "freespace_objects", storage->FreeSpaceInObjects, "name", storage->StorageDescription, - "volume_id", storage->VolumeIdentifier + "volume_id", storage->VolumeIdentifier, + "rw", (ro) ? Py_False : Py_True ); if (loc == NULL) return NULL; From 9d697e814637577ace962e903ef6267384ffab32 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 09:34:12 +0530 Subject: [PATCH 14/57] Remove legacy code --- .../devices/{prs500/cli/main.py => cli.py} | 8 +- src/calibre/devices/libusb.py | 368 ------- src/calibre/devices/prs500/__init__.py | 6 - src/calibre/devices/prs500/books.py | 385 ------- src/calibre/devices/prs500/cli/__init__.py | 9 - src/calibre/devices/prs500/driver.py | 989 ------------------ src/calibre/devices/prs500/prstypes.py | 861 --------------- src/calibre/ebooks/lrf/meta.py | 30 +- src/calibre/linux.py | 12 +- src/calibre/trac/setup.py | 21 - 10 files changed, 39 insertions(+), 2650 deletions(-) rename src/calibre/devices/{prs500/cli/main.py => cli.py} (98%) delete mode 100644 src/calibre/devices/libusb.py delete mode 100644 src/calibre/devices/prs500/__init__.py delete mode 100644 src/calibre/devices/prs500/books.py delete mode 100644 src/calibre/devices/prs500/cli/__init__.py delete mode 100644 src/calibre/devices/prs500/driver.py delete mode 100755 src/calibre/devices/prs500/prstypes.py delete mode 100644 src/calibre/trac/setup.py diff --git a/src/calibre/devices/prs500/cli/main.py b/src/calibre/devices/cli.py similarity index 98% rename from src/calibre/devices/prs500/cli/main.py rename to src/calibre/devices/cli.py index 16a9ab7b0a..95181bf639 100755 --- a/src/calibre/devices/prs500/cli/main.py +++ b/src/calibre/devices/cli.py @@ -1,7 +1,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' """ -Provides a command-line and optional graphical interface to the SONY Reader PRS-500. +Provides a command-line interface to ebook devices. For usage information run the script. """ @@ -275,7 +275,7 @@ def main(): elif command == "cp": usage="usage: %prog cp [options] source destination\nCopy files to/from the device\n\n"+\ "One of source or destination must be a path on the device. \n\nDevice paths have the form\n"+\ - "prs500:mountpoint/my/path\n"+\ + "dev:mountpoint/my/path\n"+\ "where mountpoint is one of / or card:/\n\n"+\ "source must point to a file for which you have read permissions\n"+\ "destination must point to a file or directory for which you have write permissions" @@ -286,7 +286,7 @@ def main(): if len(args) != 2: parser.print_help() return 1 - if args[0].startswith("prs500:"): + if args[0].startswith("dev:"): outfile = args[1] path = args[0][7:] if path.endswith("/"): path = path[:-1] @@ -300,7 +300,7 @@ def main(): return 1 dev.get_file(path, outfile) outfile.close() - elif args[1].startswith("prs500:"): + elif args[1].startswith("dev:"): try: infile = open(args[0], "rb") except IOError as e: diff --git a/src/calibre/devices/libusb.py b/src/calibre/devices/libusb.py deleted file mode 100644 index 016a6b18aa..0000000000 --- a/src/calibre/devices/libusb.py +++ /dev/null @@ -1,368 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' -""" -This module provides a thin ctypes based wrapper around libusb. -""" - -from ctypes import cdll, POINTER, byref, pointer, Structure as _Structure, \ - c_ubyte, c_ushort, c_int, c_char, c_void_p, c_byte, c_uint -from errno import EBUSY, ENOMEM - -from calibre import iswindows, isosx, isbsd, load_library - -_libusb_name = 'libusb' -PATH_MAX = 511 if iswindows else 1024 if (isosx or isbsd) else 4096 -if iswindows: - class Structure(_Structure): - _pack_ = 1 - _libusb_name = 'libusb0' -else: - Structure = _Structure - -try: - try: - _libusb = load_library(_libusb_name, cdll) - except OSError: - _libusb = cdll.LoadLibrary('libusb-0.1.so.4') - has_library = True -except: - _libusb = None - has_library = False - -class DeviceDescriptor(Structure): - _fields_ = [\ - ('Length', c_ubyte), \ - ('DescriptorType', c_ubyte), \ - ('bcdUSB', c_ushort), \ - ('DeviceClass', c_ubyte), \ - ('DeviceSubClass', c_ubyte), \ - ('DeviceProtocol', c_ubyte), \ - ('MaxPacketSize0', c_ubyte), \ - ('idVendor', c_ushort), \ - ('idProduct', c_ushort), \ - ('bcdDevice', c_ushort), \ - ('Manufacturer', c_ubyte), \ - ('Product', c_ubyte), \ - ('SerialNumber', c_ubyte), \ - ('NumConfigurations', c_ubyte) \ - ] - -class EndpointDescriptor(Structure): - _fields_ = [\ - ('Length', c_ubyte), \ - ('DescriptorType', c_ubyte), \ - ('EndpointAddress', c_ubyte), \ - ('Attributes', c_ubyte), \ - ('MaxPacketSize', c_ushort), \ - ('Interval', c_ubyte), \ - ('Refresh', c_ubyte), \ - ('SynchAddress', c_ubyte), \ - ('extra', POINTER(c_char)), \ - ('extralen', c_int)\ - ] - -class InterfaceDescriptor(Structure): - _fields_ = [\ - ('Length', c_ubyte), \ - ('DescriptorType', c_ubyte), \ - ('InterfaceNumber', c_ubyte), \ - ('AlternateSetting', c_ubyte), \ - ('NumEndpoints', c_ubyte), \ - ('InterfaceClass', c_ubyte), \ - ('InterfaceSubClass', c_ubyte), \ - ('InterfaceProtocol', c_ubyte), \ - ('Interface', c_ubyte), \ - ('endpoint', POINTER(EndpointDescriptor)), \ - ('extra', POINTER(c_char)), \ - ('extralen', c_int)\ - ] - -class Interface(Structure): - _fields_ = [\ - ('altsetting', POINTER(InterfaceDescriptor)), \ - ('num_altsetting', c_int)\ - ] - -class ConfigDescriptor(Structure): - _fields_ = [\ - ('Length', c_ubyte), \ - ('DescriptorType', c_ubyte), \ - ('TotalLength', c_ushort), \ - ('NumInterfaces', c_ubyte), \ - ('Value', c_ubyte), \ - ('Configuration', c_ubyte), \ - ('Attributes', c_ubyte), \ - ('MaxPower', c_ubyte), \ - ('interface', POINTER(Interface)), \ - ('extra', POINTER(c_ubyte)), \ - ('extralen', c_int) \ - ] - - def __str__(self): - ans = "" - for field in self._fields_: - ans += field[0] + ": " + str(eval('self.'+field[0])) + '\n' - return ans.strip() - - - -class Error(Exception): - pass - -class Device(Structure): - - def open(self): - """ Open device for use. Return a DeviceHandle. """ - handle = _libusb.usb_open(byref(self)) - if not handle: - raise Error("Cannot open device") - return handle.contents - - @dynamic_property - def configurations(self): - doc = """ List of device configurations. See L{ConfigDescriptor} """ - def fget(self): - ans = [] - for config in range(self.device_descriptor.NumConfigurations): - ans.append(self.config_descriptor[config]) - return tuple(ans) - return property(doc=doc, fget=fget) - -class Bus(Structure): - @dynamic_property - def device_list(self): - doc = \ - """ - Flat list of devices on this bus. - Note: children are not explored - TODO: Check if exploring children is neccessary (e.g. with an external hub) - """ - def fget(self): - if _libusb is None: - return [] - if _libusb.usb_find_devices() < 0: - raise Error('Unable to search for USB devices') - ndev = self.devices - ans = [] - while ndev: - dev = ndev.contents - ans.append(dev) - ndev = dev.next - return ans - return property(doc=doc, fget=fget) - -class DeviceHandle(Structure): - _fields_ = [\ - ('fd', c_int), \ - ('bus', POINTER(Bus)), \ - ('device', POINTER(Device)), \ - ('config', c_int), \ - ('interface', c_int), \ - ('altsetting', c_int), \ - ('impl_info', c_void_p) - ] - - def close(self): - """ Close this DeviceHandle """ - _libusb.usb_close(byref(self)) - - def set_configuration(self, config): - """ - Set device configuration. This has to be called on windows before - trying to claim an interface. - @param config: A L{ConfigDescriptor} or a integer (the ConfigurationValue) - """ - try: - num = config.Value - except AttributeError: - num = config - ret = _libusb.usb_set_configuration(byref(self), num) - if ret < 0: - raise Error('Failed to set device configuration to: ' + str(num) + \ - '. Error code: ' + str(ret)) - - def claim_interface(self, num): - """ - Claim interface C{num} on device. - Must be called before doing anything witht the device. - """ - ret = _libusb.usb_claim_interface(byref(self), num) - - if -ret == ENOMEM: - raise Error("Insufficient memory to claim interface") - elif -ret == EBUSY: - raise Error('Device busy') - elif ret < 0: - raise Error('Unknown error occurred while trying to claim USB'\ - ' interface: ' + str(ret)) - - def control_msg(self, rtype, request, bytes, value=0, index=0, timeout=100): - """ - Perform a control request to the default control pipe on the device. - @param rtype: specifies the direction of data flow, the type - of request, and the recipient. - @param request: specifies the request. - @param bytes: if the transfer is a write transfer, buffer is a sequence - with the transfer data, otherwise, buffer is the number of - bytes to read. - @param value: specific information to pass to the device. - @param index: specific information to pass to the device. - """ - size = 0 - try: - size = len(bytes) - except TypeError: - size = bytes - ArrayType = c_byte * size - _libusb.usb_control_msg.argtypes = [POINTER(DeviceHandle), c_int, \ - c_int, c_int, c_int, \ - POINTER(ArrayType), \ - c_int, c_int] - arr = ArrayType() - rsize = _libusb.usb_control_msg(byref(self), rtype, request, \ - value, index, byref(arr), \ - size, timeout) - if rsize < size: - raise Error('Could not read ' + str(size) + ' bytes on the '\ - 'control bus. Read: ' + str(rsize) + ' bytes.') - return arr - else: - ArrayType = c_byte * size - _libusb.usb_control_msg.argtypes = [POINTER(DeviceHandle), c_int, \ - c_int, c_int, c_int, \ - POINTER(ArrayType), \ - c_int, c_int] - arr = ArrayType(*bytes) - return _libusb.usb_control_msg(byref(self), rtype, request, \ - value, index, byref(arr), \ - size, timeout) - - def bulk_read(self, endpoint, size, timeout=100): - """ - Read C{size} bytes via a bulk transfer from the device. - """ - ArrayType = c_byte * size - arr = ArrayType() - _libusb.usb_bulk_read.argtypes = [POINTER(DeviceHandle), c_int, \ - POINTER(ArrayType), c_int, c_int - ] - rsize = _libusb.usb_bulk_read(byref(self), endpoint, byref(arr), \ - size, timeout) - if rsize < 0: - raise Error('Could not read ' + str(size) + ' bytes on the '\ - 'bulk bus. Error code: ' + str(rsize)) - if rsize == 0: - raise Error('Device sent zero bytes') - if rsize < size: - arr = arr[:rsize] - return arr - - def bulk_write(self, endpoint, bytes, timeout=100): - """ - Send C{bytes} to device via a bulk transfer. - """ - size = len(bytes) - ArrayType = c_byte * size - arr = ArrayType(*bytes) - _libusb.usb_bulk_write.argtypes = [POINTER(DeviceHandle), c_int, \ - POINTER(ArrayType), c_int, c_int - ] - _libusb.usb_bulk_write(byref(self), endpoint, byref(arr), size, timeout) - - def release_interface(self, num): - ret = _libusb.usb_release_interface(pointer(self), num) - if ret < 0: - raise Error('Unknown error occurred while trying to release USB'\ - ' interface: ' + str(ret)) - - def reset(self): - ret = _libusb.usb_reset(pointer(self)) - if ret < 0: - raise Error('Unknown error occurred while trying to reset '\ - 'USB device ' + str(ret)) - - -Bus._fields_ = [ \ - ('next', POINTER(Bus)), \ - ('previous', POINTER(Bus)), \ - ('dirname', c_char * (PATH_MAX+1)), \ - ('devices', POINTER(Device)), \ - ('location', c_uint), \ - ('root_dev', POINTER(Device))\ - ] - -Device._fields_ = [ \ - ('next', POINTER(Device)), \ - ('previous', POINTER(Device)), \ - ('filename', c_char * (PATH_MAX+1)), \ - ('bus', POINTER(Bus)), \ - ('device_descriptor', DeviceDescriptor), \ - ('config_descriptor', POINTER(ConfigDescriptor)), \ - ('dev', c_void_p), \ - ('devnum', c_ubyte), \ - ('num_children', c_ubyte), \ - ('children', POINTER(POINTER(Device))) - ] - -if _libusb is not None: - try: - _libusb.usb_get_busses.restype = POINTER(Bus) - _libusb.usb_open.restype = POINTER(DeviceHandle) - _libusb.usb_open.argtypes = [POINTER(Device)] - _libusb.usb_close.argtypes = [POINTER(DeviceHandle)] - _libusb.usb_claim_interface.argtypes = [POINTER(DeviceHandle), c_int] - _libusb.usb_claim_interface.restype = c_int - _libusb.usb_release_interface.argtypes = [POINTER(DeviceHandle), c_int] - _libusb.usb_release_interface.restype = c_int - _libusb.usb_reset.argtypes = [POINTER(DeviceHandle)] - _libusb.usb_reset.restype = c_int - _libusb.usb_control_msg.restype = c_int - _libusb.usb_bulk_read.restype = c_int - _libusb.usb_bulk_write.restype = c_int - _libusb.usb_set_configuration.argtypes = [POINTER(DeviceHandle), c_int] - _libusb.usb_set_configuration.restype = c_int - _libusb.usb_init() - except: - _libusb = None - - - -def busses(): - """ Get list of USB busses present on system """ - if _libusb is None: - raise Error('Could not find libusb.') - if _libusb.usb_find_busses() < 0: - raise Error('Unable to search for USB busses') - if _libusb.usb_find_devices() < 0: - raise Error('Unable to search for USB devices') - ans = [] - nbus = _libusb.usb_get_busses() - while nbus: - bus = nbus.contents - ans.append(bus) - nbus = bus.next - return ans - - -def get_device_by_id(idVendor, idProduct): - """ Return a L{Device} by vendor and prduct ids """ - buslist = busses() - for bus in buslist: - devices = bus.device_list - for dev in devices: - if dev.device_descriptor.idVendor == idVendor and \ - dev.device_descriptor.idProduct == idProduct: - return dev - -def has_library(): - return _libusb is not None - -def get_devices(): - buslist = busses() - ans = [] - for bus in buslist: - devices = bus.device_list - for dev in devices: - device = (dev.device_descriptor.idVendor, dev.device_descriptor.idProduct, dev.device_descriptor.bcdDevice) - ans.append(device) - return ans diff --git a/src/calibre/devices/prs500/__init__.py b/src/calibre/devices/prs500/__init__.py deleted file mode 100644 index d9f4403944..0000000000 --- a/src/calibre/devices/prs500/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -''' -Device driver for the Sony Reader PRS 500 -''' \ No newline at end of file diff --git a/src/calibre/devices/prs500/books.py b/src/calibre/devices/prs500/books.py deleted file mode 100644 index 91fcb3255f..0000000000 --- a/src/calibre/devices/prs500/books.py +++ /dev/null @@ -1,385 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' -""" -This module contains the logic for dealing with XML book lists found -in the reader cache. -""" -import xml.dom.minidom as dom -from base64 import b64decode as decode -from base64 import b64encode as encode -import re - -from calibre.devices.interface import BookList as _BookList -from calibre.devices import strftime, strptime - -MIME_MAP = { \ - "lrf":"application/x-sony-bbeb", \ - 'lrx':'application/x-sony-bbeb', \ - "rtf":"application/rtf", \ - "pdf":"application/pdf", \ - "txt":"text/plain" \ - } - -def sortable_title(title): - return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', title).rstrip() - -class book_metadata_field(object): - """ Represents metadata stored as an attribute """ - def __init__(self, attr, formatter=None, setter=None): - self.attr = attr - self.formatter = formatter - self.setter = setter - - def __get__(self, obj, typ=None): - """ Return a string. String may be empty if self.attr is absent """ - return self.formatter(obj.elem.getAttribute(self.attr)) if \ - self.formatter else obj.elem.getAttribute(self.attr).strip() - - def __set__(self, obj, val): - """ Set the attribute """ - val = self.setter(val) if self.setter else val - if not isinstance(val, unicode): - val = unicode(val, 'utf8', 'replace') - obj.elem.setAttribute(self.attr, val) - -class Book(object): - """ Provides a view onto the XML element that represents a book """ - - title = book_metadata_field("title") - authors = book_metadata_field("author", \ - formatter=lambda x: x if x and x.strip() else "Unknown") - mime = book_metadata_field("mime") - rpath = book_metadata_field("path") - id = book_metadata_field("id", formatter=int) - sourceid = book_metadata_field("sourceid", formatter=int) - size = book_metadata_field("size", formatter=int) - # When setting this attribute you must use an epoch - datetime = book_metadata_field("date", formatter=strptime, setter=strftime) - @dynamic_property - def title_sorter(self): - doc = '''String to sort the title. If absent, title is returned''' - def fget(self): - src = self.elem.getAttribute('titleSorter').strip() - if not src: - src = self.title - return src - def fset(self, val): - self.elem.setAttribute('titleSorter', sortable_title(unicode(val))) - return property(doc=doc, fget=fget, fset=fset) - - @dynamic_property - def thumbnail(self): - doc = \ - """ - The thumbnail. Should be a height 68 image. - Setting is not supported. - """ - def fget(self): - th = self.elem.getElementsByTagName(self.prefix + "thumbnail") - if len(th): - for n in th[0].childNodes: - if n.nodeType == n.ELEMENT_NODE: - th = n - break - rc = "" - for node in th.childNodes: - if node.nodeType == node.TEXT_NODE: - rc += node.data - return decode(rc) - return property(fget=fget, doc=doc) - - @dynamic_property - def path(self): - doc = """ Absolute path to book on device. Setting not supported. """ - def fget(self): - return self.root + self.rpath - return property(fget=fget, doc=doc) - - @dynamic_property - def db_id(self): - doc = '''The database id in the application database that this file corresponds to''' - def fget(self): - match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0]) - if match: - return int(match.group(1)) - return property(fget=fget, doc=doc) - - def __init__(self, node, tags=[], prefix="", root="/Data/media/"): - self.elem = node - self.prefix = prefix - self.root = root - self.tags = tags - - def __str__(self): - """ Return a utf-8 encoded string with title author and path information """ - return self.title.encode('utf-8') + " by " + \ - self.authors.encode('utf-8') + " at " + self.path.encode('utf-8') - - -def fix_ids(media, cache, *args): - ''' - Adjust ids in cache to correspond with media. - ''' - media.purge_empty_playlists() - media.reorder_playlists() - if cache.root: - sourceid = media.max_id() - cid = sourceid + 1 - for child in cache.root.childNodes: - if child.nodeType == child.ELEMENT_NODE and child.hasAttribute("sourceid"): - child.setAttribute("sourceid", str(sourceid)) - child.setAttribute("id", str(cid)) - cid += 1 - media.set_next_id(str(cid)) - - -class BookList(_BookList): - """ - A list of L{Book}s. Created from an XML file. Can write list - to an XML file. - """ - __getslice__ = None - __setslice__ = None - - def __init__(self, root="/Data/media/", sfile=None): - _BookList.__init__(self) - self.tag_order = {} - self.root = self.document = self.proot = None - if sfile: - sfile.seek(0) - src = sfile.read() - try: - src = src.decode('utf8') - except UnicodeDecodeError: - try: - src = src.decode('latin1') - except UnicodeDecodeError: - src = src.decode('cp1252') - src = src.replace(' max: - max = nid - return max - - def book_by_path(self, path): - for child in self.root.childNodes: - if child.nodeType == child.ELEMENT_NODE and child.hasAttribute("path"): - if path == child.getAttribute('path'): - return child - return None - - def add_book(self, mi, name, size, ctime): - """ Add a node into DOM tree representing a book """ - book = self.book_by_path(name) - if book is not None: - self.remove_book(name) - node = self.document.createElement(self.prefix + "text") - mime = MIME_MAP[name[name.rfind(".")+1:].lower()] - cid = self.max_id()+1 - sourceid = str(self[0].sourceid) if len(self) else "1" - attrs = { - "title" : mi.title, - 'titleSorter' : sortable_title(mi.title), - "author" : mi.format_authors() if mi.format_authors() else _('Unknown'), - "page":"0", "part":"0", "scale":"0", \ - "sourceid":sourceid, "id":str(cid), "date":"", \ - "mime":mime, "path":name, "size":str(size) - } - for attr in attrs.keys(): - node.setAttributeNode(self.document.createAttribute(attr)) - node.setAttribute(attr, attrs[attr]) - try: - w, h, data = mi.thumbnail - except: - w, h, data = None, None, None - - if data: - th = self.document.createElement(self.prefix + "thumbnail") - th.setAttribute("width", str(w)) - th.setAttribute("height", str(h)) - jpeg = self.document.createElement(self.prefix + "jpeg") - jpeg.appendChild(self.document.createTextNode(encode(data))) - th.appendChild(jpeg) - node.appendChild(th) - self.root.appendChild(node) - book = Book(node, root=self.proot, prefix=self.prefix) - book.datetime = ctime - self.append(book) - self.set_next_id(cid+1) - tags = [] - if mi.tags: - tags.extend(mi.tags) - if mi.series: - tags.append(mi.series) - if self.prefix and tags: # Playlists only supportted in main memory - if hasattr(mi, 'tag_order'): - self.tag_order.update(mi.tag_order) - self.set_tags(book, tags) - - def playlist_by_title(self, title): - for pl in self.playlists(): - if pl.getAttribute('title').lower() == title.lower(): - return pl - - def add_playlist(self, title): - cid = self.max_id()+1 - pl = self.document.createElement(self.prefix+'playlist') - pl.setAttribute('sourceid', '0') - pl.setAttribute('id', str(cid)) - pl.setAttribute('title', title) - for child in self.root.childNodes: - try: - if child.getAttribute('id') == '1': - self.root.insertBefore(pl, child) - self.set_next_id(cid+1) - break - except AttributeError: - continue - return pl - - - def remove_from_playlists(self, id): - for pli in self.playlist_items(): - if pli.getAttribute('id') == str(id): - pli.parentNode.removeChild(pli) - pli.unlink() - - def set_tags(self, book, tags): - book.tags = tags - self.set_playlists(book.id, tags) - - def set_playlists(self, id, collections): - self.remove_from_playlists(id) - for collection in set(collections): - coll = self.playlist_by_title(collection) - if not coll: - coll = self.add_playlist(collection) - item = self.document.createElement(self.prefix+'item') - item.setAttribute('id', str(id)) - coll.appendChild(item) - - def get_playlists(self, id): - ans = [] - for pl in self.playlists(): - for item in pl.getElementsByTagName(self.prefix+'item'): - if item.getAttribute('id') == str(id): - ans.append(pl) - continue - return ans - - def book_by_id(self, id): - for book in self: - if str(book.id) == str(id): - return book - - def reorder_playlists(self): - for title in self.tag_order.keys(): - pl = self.playlist_by_title(title) - if not pl: - continue - db_ids = [i.getAttribute('id') for i in pl.childNodes if hasattr(i, 'getAttribute')] - pl_book_ids = [self.book_by_id(i.getAttribute('id')).db_id for i in pl.childNodes if hasattr(i, 'getAttribute')] - map = {} - for i, j in zip(pl_book_ids, db_ids): - map[i] = j - pl_book_ids = [i for i in pl_book_ids if i is not None] - ordered_ids = [i for i in self.tag_order[title] if i in pl_book_ids] - - if len(ordered_ids) < len(pl.childNodes): - continue - children = [i for i in pl.childNodes if hasattr(i, 'getAttribute')] - for child in children: - pl.removeChild(child) - child.unlink() - for id in ordered_ids: - item = self.document.createElement(self.prefix+'item') - item.setAttribute('id', str(map[id])) - pl.appendChild(item) - - def write(self, stream): - """ Write XML representation of DOM tree to C{stream} """ - stream.write(self.document.toxml('utf-8')) diff --git a/src/calibre/devices/prs500/cli/__init__.py b/src/calibre/devices/prs500/cli/__init__.py deleted file mode 100644 index 6b376c2ad3..0000000000 --- a/src/calibre/devices/prs500/cli/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' -""" -Provides a command-line interface to the SONY Reader PRS-500. - -For usage information run the script. -""" -__docformat__ = "epytext" -__author__ = "Kovid Goyal " diff --git a/src/calibre/devices/prs500/driver.py b/src/calibre/devices/prs500/driver.py deleted file mode 100644 index 28545b2d86..0000000000 --- a/src/calibre/devices/prs500/driver.py +++ /dev/null @@ -1,989 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -### End point description for PRS-500 procductId=667 -### Endpoint Descriptor: -### bLength 7 -### bDescriptorType 5 -### bEndpointAddress 0x81 EP 1 IN -### bmAttributes 2 -### Transfer Type Bulk -### Synch Type None -### Usage Type Data -### wMaxPacketSize 0x0040 1x 64 bytes -### bInterval 0 -### Endpoint Descriptor: -### bLength 7 -### bDescriptorType 5 -### bEndpointAddress 0x02 EP 2 OUT -### bmAttributes 2 -### Transfer Type Bulk -### Synch Type None -### Usage Type Data -### wMaxPacketSize 0x0040 1x 64 bytes -### bInterval 0 -### -### -### Endpoint 0x81 is device->host and endpoint 0x02 is host->device. -### You can establish Stream pipes to/from these endpoints for Bulk transfers. -### Has two configurations 1 is the USB charging config 2 is the self-powered -### config. I think config management is automatic. Endpoints are the same -""" -Contains the logic for communication with the device (a SONY PRS-500). - -The public interface of class L{PRS500} defines the -methods for performing various tasks. -""" -import sys, os -from tempfile import TemporaryFile -from array import array -from functools import wraps -from StringIO import StringIO -from threading import RLock - -from calibre.devices.interface import DevicePlugin -from calibre.devices.libusb import Error as USBError -from calibre.devices.libusb import get_device_by_id -from calibre.devices.prs500.prstypes import * -from calibre.devices.errors import * -from calibre.devices.prs500.books import BookList, fix_ids -from calibre import __author__, __appname__ -from calibre.devices.usbms.deviceconfig import DeviceConfig - -# Protocol versions this driver has been tested with -KNOWN_USB_PROTOCOL_VERSIONS = [0x3030303030303130L] - -lock = RLock() - -class File(object): - """ - Wrapper that allows easy access to all information about files/directories - """ - def __init__(self, _file): - self.is_dir = _file[1].is_dir #: True if self is a directory - self.is_readonly = _file[1].is_readonly #: True if self is readonly - self.size = _file[1].file_size #: Size in bytes of self - self.ctime = _file[1].ctime #: Creation time of self as a epoch - self.wtime = _file[1].wtime #: Creation time of self as an epoch - path = _file[0] - if path.endswith("/"): - path = path[:-1] - self.path = path #: Path to self - self.name = path[path.rfind("/")+1:].rstrip() #: Name of self - - def __repr__(self): - """ Return path to self """ - return "File:" + self.path - - def __str__(self): - return self.name - - -class PRS500(DeviceConfig, DevicePlugin): - - """ - Implements the backend for communication with the SONY Reader. - Each method decorated by C{safe} performs a task. - """ - name = 'PRS-500 Device Interface' - description = _('Communicate with the Sony PRS-500 eBook reader.') - author = _('Kovid Goyal') - supported_platforms = ['windows', 'osx', 'linux'] - log_packets = False - - VENDOR_ID = 0x054c #: SONY Vendor Id - PRODUCT_ID = 0x029b #: Product Id for the PRS-500 - BCD = [0x100] - PRODUCT_NAME = 'PRS-500' - gui_name = PRODUCT_NAME - VENDOR_NAME = 'SONY' - INTERFACE_ID = 0 #: The interface we use to talk to the device - BULK_IN_EP = 0x81 #: Endpoint for Bulk reads - BULK_OUT_EP = 0x02 #: Endpoint for Bulk writes - # Location of media.xml file on device - MEDIA_XML = "/Data/database/cache/media.xml" - # Location of cache.xml on storage card in device - CACHE_XML = "/Sony Reader/database/cache.xml" - # Ordered list of supported formats - FORMATS = ["lrf", "lrx", "rtf", "pdf", "txt"] - # Height for thumbnails of books/images on the device - THUMBNAIL_HEIGHT = 68 - # Directory on card to which books are copied - CARD_PATH_PREFIX = __appname__ - _packet_number = 0 #: Keep track of the packet number for packet tracing - - SUPPORTS_SUB_DIRS = False - MUST_READ_METADATA = True - - def log_packet(self, packet, header, stream=sys.stderr): - """ - Log C{packet} to stream C{stream}. - Header should be a small word describing the type of packet. - """ - self._packet_number += 1 - print >> stream, str(self._packet_number), header, "Type:", \ - packet.__class__.__name__ - print >> stream, packet - print >> stream, "--" - - @classmethod - def validate_response(cls, res, _type=0x00, number=0x00): - """ - Raise a ProtocolError if the type and number of C{res} - is not the same as C{type} and C{number}. - """ - if _type != res.type or number != res.rnumber: - raise ProtocolError("Inavlid response.\ntype: expected=" + \ - hex(_type)+" actual=" + hex(res.type) + \ - "\nrnumber: expected=" + hex(number) + \ - " actual="+hex(res.rnumber)) - - @classmethod - def signature(cls): - """ Return a two element tuple (vendor id, product id) """ - return (cls.VENDOR_ID, cls.PRODUCT_ID ) - - def safe(func): - """ - Decorator that wraps a call to C{func} to ensure that - exceptions are handled correctly. It also calls L{open} to claim - the interface and initialize the Reader if needed. - - As a convenience, C{safe} automatically sends the a - L{EndSession} after calling func, unless func has - a keyword argument named C{end_session} set to C{False}. - - An L{ArgumentError} will cause the L{EndSession} command to - be sent to the device, unless end_session is set to C{False}. - An L{usb.USBError} will cause the library to release control of the - USB interface via a call to L{close}. - """ - @wraps(func) - def run_session(*args, **kwargs): - with lock: - dev = args[0] - res = None - try: - if not hasattr(dev, 'in_session'): - dev.reset() - if not dev.handle: - dev.open() - if not getattr(dev, 'in_session', False): - dev.send_validated_command(BeginEndSession(end=False)) - dev.in_session = True - res = func(*args, **kwargs) - except ArgumentError: - if not kwargs.has_key("end_session") or kwargs["end_session"]: - dev.send_validated_command(BeginEndSession(end=True)) - dev.in_session = False - raise - except USBError as err: - if "No such device" in str(err): - raise DeviceError() - elif "Connection timed out" in str(err): - dev.close() - raise TimeoutError(func.__name__) - elif "Protocol error" in str(err): - dev.close() - raise ProtocolError("There was an unknown error in the"+\ - " protocol. Contact " + __author__) - dev.close() - raise - if not kwargs.has_key("end_session") or kwargs["end_session"]: - dev.send_validated_command(BeginEndSession(end=True)) - dev.in_session = False - return res - - return run_session - - def reset(self, key='-1', log_packets=False, report_progress=None, - detected_device=None) : - """ - @param key: The key to unlock the device - @param log_packets: If true the packet stream to/from the device is logged - @param report_progress: Function that is called with a % progress - (number between 0 and 100) for various tasks - If it is called with -1 that means that the - task does not have any progress information - """ - with lock: - self.device = get_device_by_id(self.VENDOR_ID, self.PRODUCT_ID) - # Handle that is used to communicate with device. Setup in L{open} - self.handle = None - self.in_session = False - self.log_packets = log_packets - self.report_progress = report_progress - if len(key) > 8: - key = key[:8] - elif len(key) < 8: - key += ''.join(['\0' for i in xrange(8 - len(key))]) - self.key = key - - def reconnect(self): - """ Only recreates the device node and deleted the connection handle """ - self.device = get_device_by_id(self.VENDOR_ID, self.PRODUCT_ID) - self.handle = None - - @classmethod - def is_connected(cls, helper=None): - """ - This method checks to see whether the device is physically connected. - It does not return any information about the validity of the - software connection. You may need to call L{reconnect} if you keep - getting L{DeviceError}. - """ - try: - return get_device_by_id(cls.VENDOR_ID, cls.PRODUCT_ID) != None - except USBError: - return False - - def set_progress_reporter(self, report_progress): - self.report_progress = report_progress - - def open(self, connected_device, library_uuid) : - """ - Claim an interface on the device for communication. - Requires write privileges to the device file. - Also initialize the device. - See the source code for the sequence of initialization commands. - """ - with lock: - if not hasattr(self, 'key'): - self.reset() - self.device = get_device_by_id(self.VENDOR_ID, self.PRODUCT_ID) - if not self.device: - raise DeviceError() - configs = self.device.configurations - try: - self.handle = self.device.open() - config = configs[0] - try: - self.handle.set_configuration(configs[0]) - except USBError: - self.handle.set_configuration(configs[1]) - config = configs[1] - _id = config.interface.contents.altsetting.contents - ed1 = _id.endpoint[0] - ed2 = _id.endpoint[1] - if ed1.EndpointAddress == self.BULK_IN_EP: - red, wed = ed1, ed2 - else: - red, wed = ed2, ed1 - self.bulk_read_max_packet_size = red.MaxPacketSize - self.bulk_write_max_packet_size = wed.MaxPacketSize - self.handle.claim_interface(self.INTERFACE_ID) - except USBError as err: - raise DeviceBusy(str(err)) - # Large timeout as device may still be initializing - res = self.send_validated_command(GetUSBProtocolVersion(), timeout=20000) - if res.code != 0: - raise ProtocolError("Unable to get USB Protocol version.") - version = self._bulk_read(24, data_type=USBProtocolVersion)[0].version - if version not in KNOWN_USB_PROTOCOL_VERSIONS: - print >> sys.stderr, "WARNING: Usb protocol version " + \ - hex(version) + " is unknown" - res = self.send_validated_command(SetBulkSize(\ - chunk_size = 512*self.bulk_read_max_packet_size, \ - unknown = 2)) - if res.code != 0: - raise ProtocolError("Unable to set bulk size.") - res = self.send_validated_command(UnlockDevice(key=self.key))#0x312d)) - if res.code != 0: - raise DeviceLocked() - res = self.send_validated_command(SetTime()) - if res.code != 0: - raise ProtocolError("Could not set time on device") - - def eject(self): - pass - - def close(self): - """ Release device interface """ - with lock: - try: - self.handle.reset() - self.handle.release_interface(self.INTERFACE_ID) - except Exception as err: - print >> sys.stderr, err - self.handle, self.device = None, None - self.in_session = False - - def _send_command(self, command, response_type=Response, timeout=1000): - """ - Send L{command} to device and return its L{response}. - - @param command: an object of type Command or one of its derived classes - @param response_type: an object of type 'type'. The return packet - from the device is returned as an object of type response_type. - @param timeout: The time to wait for a response from the - device, in milliseconds. If there is no response, a L{usb.USBError} is raised. - """ - with lock: - if self.log_packets: - self.log_packet(command, "Command") - bytes_sent = self.handle.control_msg(0x40, 0x80, command) - if bytes_sent != len(command): - raise ControlError(desc="Could not send control request to device\n"\ - + str(command)) - response = response_type(self.handle.control_msg(0xc0, 0x81, \ - Response.SIZE, timeout=timeout)) - if self.log_packets: - self.log_packet(response, "Response") - return response - - def send_validated_command(self, command, cnumber=None, \ - response_type=Response, timeout=1000): - """ - Wrapper around L{_send_command} that checks if the - C{Response.rnumber == cnumber or - command.number if cnumber==None}. Also check that - C{Response.type == Command.type}. - """ - if cnumber == None: - cnumber = command.number - res = self._send_command(command, response_type=response_type, \ - timeout=timeout) - self.validate_response(res, _type=command.type, number=cnumber) - return res - - def _bulk_write(self, data, packet_size=0x1000): - """ - Send data to device via a bulk transfer. - @type data: Any listable type supporting __getslice__ - @param packet_size: Size of packets to be sent to device. - C{data} is broken up into packets to be sent to device. - """ - with lock: - def bulk_write_packet(packet): - self.handle.bulk_write(self.BULK_OUT_EP, packet) - if self.log_packets: - self.log_packet(Answer(packet), "Answer h->d") - - bytes_left = len(data) - if bytes_left + 16 <= packet_size: - packet_size = bytes_left +16 - first_packet = Answer(bytes_left+16) - first_packet[16:] = data - first_packet.length = len(data) - else: - first_packet = Answer(packet_size) - first_packet[16:] = data[0:packet_size-16] - first_packet.length = packet_size-16 - first_packet.number = 0x10005 - bulk_write_packet(first_packet) - pos = first_packet.length - bytes_left -= first_packet.length - while bytes_left > 0: - endpos = pos + packet_size if pos + packet_size <= len(data) \ - else len(data) - bulk_write_packet(data[pos:endpos]) - bytes_left -= endpos - pos - pos = endpos - res = Response(self.handle.control_msg(0xc0, 0x81, Response.SIZE, \ - timeout=5000)) - if self.log_packets: - self.log_packet(res, "Response") - if res.rnumber != 0x10005 or res.code != 0: - raise ProtocolError("Sending via Bulk Transfer failed with response:\n"\ - +str(res)) - if res.data_size != len(data): - raise ProtocolError("Unable to transfer all data to device. "+\ - "Response packet:\n"\ - +str(res)) - - - def _bulk_read(self, bytes, command_number=0x00, packet_size=0x1000, \ - data_type=Answer): - """ - Read in C{bytes} bytes via a bulk transfer in - packets of size S{<=} C{packet_size} - @param data_type: an object of type type. - The data packet is returned as an object of type C{data_type}. - @return: A list of packets read from the device. - Each packet is of type data_type - """ - with lock: - msize = self.bulk_read_max_packet_size - def bulk_read_packet(data_type=Answer, size=0x1000): - rsize = size - if size % msize: - rsize = size - size % msize + msize - data = data_type(self.handle.bulk_read(self.BULK_IN_EP, rsize)) - if self.log_packets: - self.log_packet(data, "Answer d->h") - if len(data) != size: - raise ProtocolError("Unable to read " + str(size) + " bytes from "\ - "device. Read: " + str(len(data)) + " bytes") - return data - - bytes_left = bytes - packets = [] - while bytes_left > 0: - if packet_size > bytes_left: - packet_size = bytes_left - packet = bulk_read_packet(data_type=data_type, size=packet_size) - bytes_left -= len(packet) - packets.append(packet) - self.send_validated_command(\ - AcknowledgeBulkRead(packets[0].number), \ - cnumber=command_number) - return packets - - @safe - def get_device_information(self, end_session=True): - """ - Ask device for device information. See L{DeviceInfoQuery}. - @return: (device name, device version, software version on device, mime type) - """ - size = self.send_validated_command(DeviceInfoQuery()).data[2] + 16 - ans = self._bulk_read(size, command_number=\ - DeviceInfoQuery.NUMBER, data_type=DeviceInfo)[0] - return (ans.device_name, ans.device_version, \ - ans.software_version, ans.mime_type) - - @safe - def path_properties(self, path, end_session=True): - """ - Send command asking device for properties of C{path}. - Return L{FileProperties}. - """ - res = self.send_validated_command(PathQuery(path), \ - response_type=ListResponse) - data = self._bulk_read(0x28, data_type=FileProperties, \ - command_number=PathQuery.NUMBER)[0] - if path.endswith('/') and path != '/': - path = path[:-1] - if res.path_not_found : - raise PathError(path + " does not exist on device") - if res.is_invalid: - raise PathError(path + " is not a valid path") - if res.is_unmounted: - raise PathError(path + " is not mounted") - if res.permission_denied: - raise PathError('Permission denied for: ' + path + '\nYou can only '+\ - 'operate on paths starting with /Data, a:/ or b:/') - if res.code not in (0, PathResponseCodes.IS_FILE): - raise PathError(path + " has an unknown error. Code: " + \ - hex(res.code)) - return data - - @safe - def get_file(self, path, outfile, end_session=True): - """ - Read the file at path on the device and write it to outfile. - - The data is fetched in chunks of size S{<=} 32K. Each chunk is - made of packets of size S{<=} 4K. See L{FileOpen}, - L{FileRead} and L{FileClose} for details on the command packets used. - - @param outfile: file object like C{sys.stdout} or the result of an C{open} call - """ - if path.endswith("/"): - path = path[:-1] # We only copy files - cp = self.card_prefix(False) - path = path.replace('card:/', cp if cp else '') - _file = self.path_properties(path, end_session=False) - if _file.is_dir: - raise PathError("Cannot read as " + path + " is a directory") - bytes = _file.file_size - res = self.send_validated_command(FileOpen(path)) - if res.code != 0: - raise PathError("Unable to open " + path + \ - " for reading. Response code: " + hex(res.code)) - _id = self._bulk_read(20, data_type=IdAnswer, \ - command_number=FileOpen.NUMBER)[0].id - # The first 16 bytes from the device are meta information on the packet stream - bytes_left, chunk_size = bytes, 512 * self.bulk_read_max_packet_size -16 - packet_size, pos = 64 * self.bulk_read_max_packet_size, 0 - while bytes_left > 0: - if chunk_size > bytes_left: - chunk_size = bytes_left - res = self.send_validated_command(FileIO(_id, pos, chunk_size)) - if res.code != 0: - self.send_validated_command(FileClose(id)) - raise ProtocolError("Error while reading from " + path + \ - ". Response code: " + hex(res.code)) - packets = self._bulk_read(chunk_size+16, \ - command_number=FileIO.RNUMBER, packet_size=packet_size) - try: - outfile.write("".join(map(chr, packets[0][16:]))) - for i in range(1, len(packets)): - outfile.write("".join(map(chr, packets[i]))) - except IOError as err: - self.send_validated_command(FileClose(_id)) - raise ArgumentError("File get operation failed. " + \ - "Could not write to local location: " + str(err)) - bytes_left -= chunk_size - pos += chunk_size - if self.report_progress: - self.report_progress(int(100*((1.*pos)/bytes))) - self.send_validated_command(FileClose(_id)) - # Not going to check response code to see if close was successful - # as there's not much we can do if it wasnt - - @safe - def list(self, path, recurse=False, end_session=True): - """ - Return a listing of path. See the code for details. See L{DirOpen}, - L{DirRead} and L{DirClose} for details on the command packets used. - - @type path: string - @param path: The path to list - @type recurse: boolean - @param recurse: If true do a recursive listing - @return: A list of tuples. The first element of each tuple is a path. - The second element is a list of L{Files}. - The path is the path we are listing, the C{Files} are the - files/directories in that path. If it is a recursive list, then the first - element will be (C{path}, children), the next will be - (child, its children) and so on. If it is not recursive the length of the - outermost list will be 1. - """ - def _list(path): - """ Do a non recursive listsing of path """ - if not path.endswith("/"): - path += "/" # Initially assume path is a directory - cp = self.card_prefix(False) - path = path.replace('card:/', cp if cp else '') - files = [] - candidate = self.path_properties(path, end_session=False) - if not candidate.is_dir: - path = path[:-1] - data = self.path_properties(path, end_session=False) - files = [ File((path, data)) ] - else: - # Get query ID used to ask for next element in list - res = self.send_validated_command(DirOpen(path)) - if res.code != 0: - raise PathError("Unable to open directory " + path + \ - " for reading. Response code: " + hex(res.code)) - _id = self._bulk_read(0x14, data_type=IdAnswer, \ - command_number=DirOpen.NUMBER)[0].id - # Create command asking for next element in list - next = DirRead(_id) - items = [] - while True: - res = self.send_validated_command(next, response_type=ListResponse) - size = res.data_size + 16 - data = self._bulk_read(size, data_type=ListAnswer, \ - command_number=DirRead.NUMBER)[0] - # path_not_found seems to happen if the usb server - # doesn't have the permissions to access the directory - if res.is_eol or res.path_not_found: - break - elif res.code != 0: - raise ProtocolError("Unknown error occured while "+\ - "reading contents of directory " + path + \ - ". Response code: " + hex(res.code)) - items.append(data.name) - self.send_validated_command(DirClose(_id)) - # Ignore res.code as we cant do anything if close fails - for item in items: - ipath = path + item - data = self.path_properties(ipath, end_session=False) - files.append( File( (ipath, data) ) ) - files.sort() - return files - - files = _list(path) - dirs = [(path, files)] - - for _file in files: - if recurse and _file.is_dir and not _file.path.startswith(("/dev","/proc")): - dirs[len(dirs):] = self.list(_file.path, recurse=True, end_session=False) - return dirs - - @safe - def total_space(self, end_session=True): - """ - Get total space available on the mountpoints: - 1. Main memory - 2. Memory Stick - 3. SD Card - - @return: A 3 element list with total space in bytes of (1, 2, 3) - """ - data = [] - for path in ("/Data/", "a:/", "b:/"): - # Timeout needs to be increased as it takes time to read card - res = self.send_validated_command(TotalSpaceQuery(path), \ - timeout=5000) - buffer_size = 16 + res.data[2] - pkt = self._bulk_read(buffer_size, data_type=TotalSpaceAnswer, \ - command_number=TotalSpaceQuery.NUMBER)[0] - data.append( pkt.total ) - return data - - @safe - def card_prefix(self, end_session=True): - try: - path = 'a:/' - self.path_properties(path, end_session=False) - return path - except PathError: - try: - path = 'b:/' - self.path_properties(path, end_session=False) - return path - except PathError: - return None - - @safe - def free_space(self, end_session=True): - """ - Get free space available on the mountpoints: - 1. Main memory - 2. Memory Stick - 3. SD Card - - @return: A 3 element list with free space in bytes of (1, 2, 3) - """ - data = [] - for path in ("/", "a:/", "b:/"): - # Timeout needs to be increased as it takes time to read card - self.send_validated_command(FreeSpaceQuery(path), \ - timeout=5000) - pkt = self._bulk_read(FreeSpaceAnswer.SIZE, \ - data_type=FreeSpaceAnswer, \ - command_number=FreeSpaceQuery.NUMBER)[0] - data.append( pkt.free ) - data = [x for x in data if x != 0] - data.append(0) - return data - - def _exists(self, path): - """ Return (True, FileProperties) if path exists or (False, None) otherwise """ - dest = None - try: - dest = self.path_properties(path, end_session=False) - except PathError as err: - if "does not exist" in str(err) or "not mounted" in str(err): - return (False, None) - else: raise - return (True, dest) - - @safe - def touch(self, path, end_session=True): - """ - Create a file at path - @todo: Update file modification time if it exists. - Opening the file in write mode and then closing it doesn't work. - """ - cp = self.card_prefix(False) - path = path.replace('card:/', cp if cp else '') - if path.endswith("/") and len(path) > 1: - path = path[:-1] - exists, _file = self._exists(path) - if exists and _file.is_dir: - raise PathError("Cannot touch directories") - if not exists: - res = self.send_validated_command(FileCreate(path)) - if res.code != 0: - raise PathError("Could not create file " + path + \ - ". Response code: " + str(hex(res.code))) - - @safe - def put_file(self, infile, path, replace_file=False, end_session=True): - """ - Put infile onto the devoce at path - @param infile: An open file object. infile must have a name attribute. - If you are using a StringIO object set its name attribute manually. - @param path: The path on the device at which to put infile. - It should point to an existing directory. - @param replace_file: If True and path points to a file that already exists, it is replaced - """ - pos = infile.tell() - infile.seek(0, 2) - bytes = infile.tell() - pos - start_pos = pos - infile.seek(pos) - cp = self.card_prefix(False) - path = path.replace('card:/', cp if cp else '') - exists, dest = self._exists(path) - if exists: - if dest.is_dir: - if not path.endswith("/"): - path += "/" - path += os.path.basename(infile.name) - return self.put_file(infile, path, replace_file=replace_file, end_session=False) - else: - if not replace_file: - raise PathError("Cannot write to " + \ - path + " as it already exists", path=path) - _file = self.path_properties(path, end_session=False) - if _file.file_size > bytes: - self.del_file(path, end_session=False) - self.touch(path, end_session=False) - else: self.touch(path, end_session=False) - chunk_size = 512 * self.bulk_write_max_packet_size - data_left = True - res = self.send_validated_command(FileOpen(path, mode=FileOpen.WRITE)) - if res.code != 0: - raise ProtocolError("Unable to open " + path + \ - " for writing. Response code: " + hex(res.code)) - _id = self._bulk_read(20, data_type=IdAnswer, \ - command_number=FileOpen.NUMBER)[0].id - - while data_left: - data = array('B') - try: - # Cannot use data.fromfile(infile, chunk_size) as it - # doesn't work in windows w/ python 2.5.1 - ind = infile.read(chunk_size) - data.fromstring(ind) - if len(ind) < chunk_size: - raise EOFError - except EOFError: - data_left = False - res = self.send_validated_command(FileIO(_id, pos, len(data), \ - mode=FileIO.WNUMBER)) - if res.code != 0: - raise ProtocolError("Unable to write to " + \ - path + ". Response code: " + hex(res.code)) - self._bulk_write(data) - pos += len(data) - if self.report_progress: - self.report_progress( int(100*(pos-start_pos)/(1.*bytes)) ) - self.send_validated_command(FileClose(_id)) - # Ignore res.code as cant do anything if close fails - _file = self.path_properties(path, end_session=False) - if _file.file_size != pos: - raise ProtocolError("Copying to device failed. The file " +\ - "on the device is larger by " + \ - str(_file.file_size - pos) + " bytes") - - @safe - def del_file(self, path, end_session=True): - """ Delete C{path} from device iff path is a file """ - data = self.path_properties(path, end_session=False) - if data.is_dir: - raise PathError("Cannot delete directories") - res = self.send_validated_command(FileDelete(path), \ - response_type=ListResponse) - if res.code != 0: - raise ProtocolError("Unable to delete " + path + \ - " with response:\n" + str(res)) - - @safe - def mkdir(self, path, end_session=True): - """ Make directory """ - if path.startswith('card:/'): - cp = self.card_prefix(False) - path = path.replace('card:/', cp if cp else '') - if not path.endswith("/"): - path += "/" - error_prefix = "Cannot create directory " + path - res = self.send_validated_command(DirCreate(path)).data[0] - if res == 0xffffffcc: - raise PathError(error_prefix + " as it already exists") - elif res == PathResponseCodes.NOT_FOUND: - raise PathError(error_prefix + " as " + \ - path[0:path[:-1].rfind("/")] + " does not exist ") - elif res == PathResponseCodes.INVALID: - raise PathError(error_prefix + " as " + path + " is invalid") - elif res != 0: - raise PathError(error_prefix + ". Response code: " + hex(res)) - - @safe - def rm(self, path, end_session=True): - """ Delete path from device if it is a file or an empty directory """ - cp = self.card_prefix(False) - path = path.replace('card:/', cp if cp else '') - dir = self.path_properties(path, end_session=False) - if not dir.is_dir: - self.del_file(path, end_session=False) - else: - if not path.endswith("/"): - path += "/" - res = self.send_validated_command(DirDelete(path)) - if res.code == PathResponseCodes.HAS_CHILDREN: - raise PathError("Cannot delete directory " + path + \ - " as it is not empty") - if res.code != 0: - raise ProtocolError("Failed to delete directory " + path + \ - ". Response code: " + hex(res.code)) - - @safe - def card(self, end_session=True): - """ Return path prefix to installed card or None """ - card = None - try: - if self._exists("a:/")[0]: - card = "a:" - except: - pass - try: - if self._exists("b:/")[0]: - card = "b:" - except: - pass - return card - - @safe - def books(self, oncard=False, end_session=True): - """ - Return a list of ebooks on the device. - @param oncard: If True return a list of ebooks on the storage card, - otherwise return list of ebooks in main memory of device - - @return: L{BookList} - """ - root = "/Data/media/" - tfile = TemporaryFile() - if oncard: - try: - self.get_file("a:"+self.CACHE_XML, tfile, end_session=False) - root = "a:/" - except PathError: - try: - self.get_file("b:"+self.CACHE_XML, tfile, end_session=False) - root = "b:/" - except PathError: pass - if tfile.tell() == 0: - tfile = None - else: - self.get_file(self.MEDIA_XML, tfile, end_session=False) - bl = BookList(root=root, sfile=tfile) - paths = bl.purge_corrupted_files() - for path in paths: - try: - self.del_file(path, end_session=False) - except PathError: # Incase this is a refetch without a sync in between - continue - return bl - - @safe - def remove_books(self, paths, booklists, end_session=True): - """ - Remove the books specified by paths from the device. The metadata - cache on the device should also be updated. - """ - for path in paths: - self.del_file(path, end_session=False) - fix_ids(booklists[0], booklists[1]) - self.sync_booklists(booklists, end_session=False) - - @safe - def sync_booklists(self, booklists, end_session=True): - ''' - Upload bookslists to device. - @param booklists: A tuple containing the result of calls to - (L{books}(oncard=False), L{books}(oncard=True)). - ''' - fix_ids(*booklists) - self.upload_book_list(booklists[0], end_session=False) - if booklists[1].root: - self.upload_book_list(booklists[1], end_session=False) - - @safe - def upload_books(self, files, names, on_card=False, end_session=True, - metadata=None): - card = self.card(end_session=False) - prefix = card + '/' + self.CARD_PATH_PREFIX +'/' if on_card else '/Data/media/books/' - if on_card and not self._exists(prefix)[0]: - self.mkdir(prefix[:-1], False) - paths, ctimes = [], [] - names = iter(names) - infiles = [file if hasattr(file, 'read') else open(file, 'rb') for file in files] - for f in infiles: f.seek(0, 2) - sizes = [f.tell() for f in infiles] - size = sum(sizes) - space = self.free_space(end_session=False) - mspace = space[0] - cspace = space[2] if len(space) > 2 and space[2] >= space[1] else space[1] - if on_card and size > cspace - 1024*1024: - raise FreeSpaceError("There is insufficient free space "+\ - "on the storage card") - if not on_card and size > mspace - 2*1024*1024: - raise FreeSpaceError("There is insufficient free space " +\ - "in main memory") - - for infile in infiles: - infile.seek(0) - name = names.next() - paths.append(prefix+name) - self.put_file(infile, paths[-1], replace_file=True, end_session=False) - ctimes.append(self.path_properties(paths[-1], end_session=False).ctime) - return zip(paths, sizes, ctimes) - - @classmethod - def add_books_to_metadata(cls, locations, metadata, booklists): - metadata = iter(metadata) - for location in locations: - info = metadata.next() - path = location[0] - on_card = 1 if path[1] == ':' else 0 - name = path.rpartition('/')[2] - name = (cls.CARD_PATH_PREFIX+'/' if on_card else 'books/') + name - booklists[on_card].add_book(info, name, *location[1:]) - fix_ids(*booklists) - - @safe - def delete_books(self, paths, end_session=True): - for path in paths: - self.del_file(path, end_session=False) - - @classmethod - def remove_books_from_metadata(cls, paths, booklists): - for path in paths: - on_card = 1 if path[1] == ':' else 0 - booklists[on_card].remove_book(path) - fix_ids(*booklists) - - @safe - def add_book(self, infile, name, info, booklists, oncard=False, \ - sync_booklists=False, end_session=True): - """ - Add a book to the device. If oncard is True then the book is copied - to the card rather than main memory. - - @param infile: The source file, should be opened in "rb" mode - @param name: The name of the book file when uploaded to the - device. The extension of name must be one of - the supported formats for this device. - @param info: A dictionary that must have the keys "title", "authors", "cover". - C{info["cover"]} should be a three element tuple (width, height, data) - where data is the image data in JPEG format as a string - @param booklists: A tuple containing the result of calls to - (L{books}(oncard=False), L{books}(oncard=True)). - """ - infile.seek(0, 2) - size = infile.tell() - infile.seek(0) - card = self.card(end_session=False) - space = self.free_space(end_session=False) - mspace = space[0] - cspace = space[1] if space[1] >= space[2] else space[2] - if oncard and size > cspace - 1024*1024: - raise FreeSpaceError("There is insufficient free space "+\ - "on the storage card") - if not oncard and size > mspace - 1024*1024: - raise FreeSpaceError("There is insufficient free space " +\ - "in main memory") - prefix = "/Data/media/" - if oncard: - prefix = card + "/" - else: name = "books/"+name - path = prefix + name - self.put_file(infile, path, end_session=False) - ctime = self.path_properties(path, end_session=False).ctime - bkl = booklists[1] if oncard else booklists[0] - bkl.add_book(info, name, size, ctime) - fix_ids(booklists[0], booklists[1]) - if sync_booklists: - self.sync_booklists(booklists, end_session=False) - - @safe - def upload_book_list(self, booklist, end_session=True): - path = self.MEDIA_XML - if not booklist.prefix: - card = self.card(end_session=True) - if not card: - raise ArgumentError("Cannot upload list to card as "+\ - "card is not present") - path = card + self.CACHE_XML - f = StringIO() - booklist.write(f) - f.seek(0) - self.put_file(f, path, replace_file=True, end_session=False) - f.close() diff --git a/src/calibre/devices/prs500/prstypes.py b/src/calibre/devices/prs500/prstypes.py deleted file mode 100755 index 3efbfcab31..0000000000 --- a/src/calibre/devices/prs500/prstypes.py +++ /dev/null @@ -1,861 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' -""" -Defines the structure of packets that are sent to/received from the device. - -Packet structure is defined using classes and inheritance. Each class is a -view that imposes structure on the underlying data buffer. -The data buffer is encoded in little-endian format, but you don't -have to worry about that if you are using the classes. -The classes have instance variables with getter/setter functions defined -to take care of the encoding/decoding. -The classes are intended to mimic C structs. - -There are three kinds of packets. L{Commands}, -L{Responses}, and L{Answers}. -C{Commands} are sent to the device on the control bus, -C{Responses} are received from the device, -also on the control bus. C{Answers} and their sub-classes represent -data packets sent to/received from the device via bulk transfers. - -Commands are organized as follows: G{classtree Command} - -You will typically only use sub-classes of Command. - -Responses are organized as follows: G{classtree Response} - -Responses inherit Command as they share header structure. - -Answers are organized as follows: G{classtree Answer} -""" - -import struct -import time -from datetime import datetime - -from calibre.devices.errors import PacketError - -WORD = " 31 and self[i+b] < 127 else "." - except IndexError: break - ans = ans + " " - if (i+2)%16 == 0: - if i+2 < len(self): - ans += " " + ascii + "\n" + (TransferBuffer.phex(i+2)+": ").rjust(10, "0") - ascii = "" - last_line = ans[ans.rfind("\n")+1:] - padding = 50 - len(last_line) - ans += "".ljust(padding) + " " + ascii - return ans.strip() - - def unpack(self, fmt=DWORD, start=0): - """ - Return decoded data from buffer. - - @param fmt: See U{struct} - @param start: Position in buffer from which to decode - """ - end = start + struct.calcsize(fmt) - return struct.unpack(fmt, "".join([ chr(i) for i in list.__getslice__(self, start, end) ])) - - def pack(self, val, fmt=DWORD, start=0): - """ - Encode C{val} and write it to buffer. For fmt==WORD val is - adjusted to be in the range 0 <= val < 256**2. - - @param fmt: See U{struct} - @param start: Position in buffer at which to write encoded data - """ - # struct.py is fussy about packing values into a WORD. The value must be - # between 0 and 65535 or a DeprecationWarning is raised. In the future - # this may become an error, so it's best to take care of wrapping here. - if fmt == WORD: - val = val % 256**2 - self[start:start+struct.calcsize(fmt)] = \ - [ ord(i) for i in struct.pack(fmt, val) ] - - def _normalize(self): - """ Replace negative bytes in C{self} by 256 + byte """ - for i in range(len(self)): - if self[i] < 0: - self[i] = 256 + self[i] - - @classmethod - def phex(cls, num): - """ - Return the hex representation of num without the 0x prefix. - - If the hex representation is only 1 digit it is padded to the left with a zero. Used in L{TransferBuffer.__str__}. - """ - index, sign = 2, "" - if num < 0: - index, sign = 3, "-" - h = hex(num)[index:] - if len(h) < 2: - h = "0"+h - return sign + h - - -class field(object): - """ A U{Descriptor}, that implements access - to protocol packets in a human readable way. - """ - def __init__(self, start=16, fmt=DWORD): - """ - @param start: The byte at which this field is stored in the buffer - @param fmt: The packing format for this field. - See U{struct}. - """ - self._fmt, self._start = fmt, start - - def __get__(self, obj, typ=None): - return obj.unpack(start=self._start, fmt=self._fmt)[0] - - def __set__(self, obj, val): - obj.pack(val, start=self._start, fmt=self._fmt) - - def __repr__(self): - typ = "" - if self._fmt == DWORD: - typ = "unsigned int" - if self._fmt == DDWORD: - typ = "unsigned long long" - return "An " + typ + " stored in " + \ - str(struct.calcsize(self._fmt)) + \ - " bytes starting at byte " + str(self._start) - -class stringfield(object): - """ A field storing a variable length string. """ - def __init__(self, length_field, start=16): - """ - @param length_field: A U{Descriptor} - that returns the length of the string. - @param start: The byte at which this field is stored in the buffer - """ - self._length_field = length_field - self._start = start - - def __get__(self, obj, typ=None): - length = str(self._length_field.__get__(obj)) - return obj.unpack(start=self._start, fmt="<"+length+"s")[0] - - def __set__(self, obj, val): - if isinstance(val, unicode): - val = val.encode('utf8') - else: - val = str(val) - obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s") - - def __repr__(self): - return "A string starting at byte " + str(self._start) - -class Command(TransferBuffer): - - """ Defines the structure of command packets sent to the device. """ - # Command number. C{unsigned int} stored in 4 bytes at byte 0. - # - # Command numbers are: - # 0 GetUsbProtocolVersion - # 1 ReqEndSession - # 10 FskFileOpen - # 11 FskFileClose - # 12 FskGetSize - # 13 FskSetSize - # 14 FskFileSetPosition - # 15 FskGetPosition - # 16 FskFileRead - # 17 FskFileWrite - # 18 FskFileGetFileInfo - # 19 FskFileSetFileInfo - # 1A FskFileCreate - # 1B FskFileDelete - # 1C FskFileRename - # 30 FskFileCreateDirectory - # 31 FskFileDeleteDirectory - # 32 FskFileRenameDirectory - # 33 FskDirectoryIteratorNew - # 34 FskDirectoryIteratorDispose - # 35 FskDirectoryIteratorGetNext - # 52 FskVolumeGetInfo - # 53 FskVolumeGetInfoFromPath - # 80 FskFileTerminate - # 100 ConnectDevice - # 101 GetProperty - # 102 GetMediaInfo - # 103 GetFreeSpace - # 104 SetTime - # 105 DeviceBeginEnd - # 106 UnlockDevice - # 107 SetBulkSize - # 110 GetHttpRequest - # 111 SetHttpRespponse - # 112 Needregistration - # 114 GetMarlinState - # 200 ReqDiwStart - # 201 SetDiwPersonalkey - # 202 GetDiwPersonalkey - # 203 SetDiwDhkey - # 204 GetDiwDhkey - # 205 SetDiwChallengeserver - # 206 GetDiwChallengeserver - # 207 GetDiwChallengeclient - # 208 SetDiwChallengeclient - # 209 GetDiwVersion - # 20A SetDiwWriteid - # 20B GetDiwWriteid - # 20C SetDiwSerial - # 20D GetDiwModel - # 20C SetDiwSerial - # 20E GetDiwDeviceid - # 20F GetDiwSerial - # 210 ReqDiwCheckservicedata - # 211 ReqDiwCheckiddata - # 212 ReqDiwCheckserialdata - # 213 ReqDiwFactoryinitialize - # 214 GetDiwMacaddress - # 215 ReqDiwTest - # 216 ReqDiwDeletekey - # 300 UpdateChangemode - # 301 UpdateDeletePartition - # 302 UpdateCreatePartition - # 303 UpdateCreatePartitionWithImage - # 304 UpdateGetPartitionSize - number = field(start=0, fmt=DWORD) - # Known types are 0x00 and 0x01. Acknowledge commands are always type 0x00 - type = field(start=4, fmt=DDWORD) - # Length of the data part of this packet - length = field(start=12, fmt=DWORD) - - @dynamic_property - def data(self): - doc = \ - """ - The data part of this command. Returned/set as/by a TransferBuffer. - Stored at byte 16. - - Setting it by default changes self.length to the length of the new - buffer. You may have to reset it to the significant part of the buffer. - You would normally use the C{command} property of - L{ShortCommand} or L{LongCommand} instead. - """ - def fget(self): - return self[16:] - - def fset(self, buff): - self[16:] = buff - self.length = len(buff) - - return property(doc=doc, fget=fget, fset=fset) - - def __init__(self, packet): - """ - @param packet: len(packet) > 15 or packet > 15 - """ - if ("__len__" in dir(packet) and len(packet) < 16) or\ - ("__len__" not in dir(packet) and packet < 16): - raise PacketError(str(self.__class__)[7:-2] + \ - " packets must have length atleast 16") - TransferBuffer.__init__(self, packet) - - -class SetTime(Command): - """ - Set time on device. All fields refer to time in the GMT time zone. - """ - NUMBER = 0x104 - # -time.timezone with negative numbers encoded - # as int(0xffffffff +1 -time.timezone/60.) - timezone = field(start=0x10, fmt=DWORD) - year = field(start=0x14, fmt=DWORD) #: year e.g. 2006 - month = field(start=0x18, fmt=DWORD) #: month 1-12 - day = field(start=0x1c, fmt=DWORD) #: day 1-31 - hour = field(start=0x20, fmt=DWORD) #: hour 0-23 - minute = field(start=0x24, fmt=DWORD) #: minute 0-59 - second = field(start=0x28, fmt=DWORD) #: second 0-59 - - def __init__(self, t=None): - """ @param t: time as an epoch """ - self.number = SetTime.NUMBER - self.type = 0x01 - self.length = 0x1c - td = datetime.now() - datetime.utcnow() - tz = int((td.days*24*3600 + td.seconds)/60.) - self.timezone = tz if tz > 0 else 0xffffffff +1 + tz - if not t: t = time.time() - t = time.gmtime(t) - self.year = t[0] - self.month = t[1] - self.day = t[2] - self.hour = t[3] - self.minute = t[4] - # Hack you should actually update the entire time tree if - # second is > 59 - self.second = t[5] if t[5] < 60 else 59 - - -class ShortCommand(Command): - - """ A L{Command} whose data section is 4 bytes long """ - - SIZE = 20 #: Packet size in bytes - # Usually carries additional information - command = field(start=16, fmt=DWORD) - - def __init__(self, number=0x00, type=0x00, command=0x00): - """ - @param number: L{Command.number} - @param type: L{Command.type} - @param command: L{ShortCommand.command} - """ - Command.__init__(self, ShortCommand.SIZE) - self.number = number - self.type = type - self.length = 4 - self.command = command - -class DirRead(ShortCommand): - """ The command that asks the device to send the next item in the list """ - NUMBER = 0x35 #: Command number - def __init__(self, _id): - """ @param id: The identifier returned as a result of a L{DirOpen} command """ - ShortCommand.__init__(self, number=DirRead.NUMBER, type=0x01, \ - command=_id) - -class DirClose(ShortCommand): - """ Close a previously opened directory """ - NUMBER = 0x34 #: Command number - def __init__(self, _id): - """ @param id: The identifier returned as a result of a L{DirOpen} command """ - ShortCommand.__init__(self, number=DirClose.NUMBER, type=0x01, - command=_id) - -class BeginEndSession(ShortCommand): - """ - Ask device to either start or end a session. - """ - NUMBER = 0x01 #: Command number - def __init__(self, end=True): - command = 0x00 if end else 0x01 - ShortCommand.__init__(self, \ - number=BeginEndSession.NUMBER, type=0x01, command=command) - -class GetUSBProtocolVersion(ShortCommand): - """ Get USB Protocol version used by device """ - NUMBER = 0x0 #: Command number - def __init__(self): - ShortCommand.__init__(self, \ - number=GetUSBProtocolVersion.NUMBER, \ - type=0x01, command=0x00) - -class SetBulkSize(Command): - """ Set size for bulk transfers in this session """ - NUMBER = 0x107 #: Command number - chunk_size = field(fmt=WORD, start=0x10) - unknown = field(fmt=WORD, start=0x12) - def __init__(self, chunk_size=0x8000, unknown=0x2): - Command.__init__(self, [0 for i in range(24)]) - self.number = SetBulkSize.NUMBER - self.type = 0x01 - self.chunk_size = chunk_size - self.unknown = unknown - -class UnlockDevice(Command): - """ Unlock the device """ - NUMBER = 0x106 #: Command number - key = stringfield(8, start=16) #: The key defaults to -1 - - def __init__(self, key='-1\0\0\0\0\0\0'): - Command.__init__(self, 24) - self.number = UnlockDevice.NUMBER - self.type = 0x01 - self.length = 8 - self.key = key - -class LongCommand(Command): - - """ A L{Command} whose data section is 16 bytes long """ - - SIZE = 32 #: Size in bytes of C{LongCommand} packets - - def __init__(self, number=0x00, type=0x00, command=0x00): - """ - @param number: L{Command.number} - @param type: L{Command.type} - @param command: L{LongCommand.command} - """ - Command.__init__(self, LongCommand.SIZE) - self.number = number - self.type = type - self.length = 16 - self.command = command - - @dynamic_property - def command(self): - doc = \ - """ - Usually carries extra information needed for the command - It is a list of C{unsigned integers} of length between 1 and 4. 4 - C{unsigned int} stored in 16 bytes at byte 16. - """ - def fget(self): - return self.unpack(start=16, fmt="<"+str(self.length/4)+"I") - - def fset(self, val): - if "__len__" not in dir(val): val = (val,) - start = 16 - for command in val: - self.pack(command, start=start, fmt=DWORD) - start += struct.calcsize(DWORD) - - return property(doc=doc, fget=fget, fset=fset) - -class PathCommand(Command): - """ Abstract class that defines structure common to all path related commands. """ - - path_length = field(start=16, fmt=DWORD) #: Length of the path to follow - path = stringfield(path_length, start=20) #: The path this query is about - def __init__(self, path, number, path_len_at_byte=16): - Command.__init__(self, path_len_at_byte+4+len(path)) - if isinstance(path, unicode): - path = path.encode('utf8') - self.path_length = len(path) - self.path = path - self.type = 0x01 - self.length = len(self) - 16 - self.number = number - -class TotalSpaceQuery(PathCommand): - """ Query the total space available on the volume represented by path """ - NUMBER = 0x53 #: Command number - def __init__(self, path): - """ @param path: valid values are 'a:', 'b:', '/Data/' """ - PathCommand.__init__(self, path, TotalSpaceQuery.NUMBER) - -class FreeSpaceQuery(ShortCommand): - """ Query the free space available """ - NUMBER = 0x103 #: Command number - def __init__(self, where): - """ @param where: valid values are: 'a:', 'b:', '/' """ - c = 0 - if where.startswith('a:'): c = 1 - elif where.startswith('b:'): c = 2 - ShortCommand.__init__(self, \ - number=FreeSpaceQuery.NUMBER, type=0x01, command=c) - -class DirCreate(PathCommand): - """ Create a directory """ - NUMBER = 0x30 - def __init__(self, path): - PathCommand.__init__(self, path, DirCreate.NUMBER) - -class DirOpen(PathCommand): - """ Open a directory for reading its contents """ - NUMBER = 0x33 #: Command number - def __init__(self, path): - PathCommand.__init__(self, path, DirOpen.NUMBER) - - -class AcknowledgeBulkRead(LongCommand): - """ Must be sent to device after a bulk read """ - def __init__(self, bulk_read_id): - """ - bulk_read_id is an integer, the id of the bulk read - we are acknowledging. See L{Answer.id} - """ - LongCommand.__init__(self, number=0x1000, \ - type=0x00, command=bulk_read_id) - -class DeviceInfoQuery(Command): - """ The command used to ask for device information """ - NUMBER = 0x101 #: Command number - def __init__(self): - Command.__init__(self, 16) - self.number = DeviceInfoQuery.NUMBER - self.type = 0x01 - -class FileClose(ShortCommand): - """ File close command """ - NUMBER = 0x11 #: Command number - def __init__(self, _id): - ShortCommand.__init__(self, number=FileClose.NUMBER, \ - type=0x01, command=_id) - -class FileCreate(PathCommand): - """ Create a file """ - NUMBER = 0x1a #: Command number - def __init__(self, path): - PathCommand.__init__(self, path, FileCreate.NUMBER) - -class FileDelete(PathCommand): - """ Delete a file """ - NUMBER = 0x1B - def __init__(self, path): - PathCommand.__init__(self, path, FileDelete.NUMBER) - -class DirDelete(PathCommand): - """ Delete a directory """ - NUMBER = 0x31 - def __init__(self, path): - PathCommand.__init__(self, path, DirDelete.NUMBER) - -class FileOpen(PathCommand): - """ File open command """ - NUMBER = 0x10 #: Command number - READ = 0x00 #: Open file in read mode - WRITE = 0x01 #: Open file in write mode - path_length = field(start=20, fmt=DWORD) - path = stringfield(path_length, start=24) - - def __init__(self, path, mode=0x00): - PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20) - self.mode = mode - - @dynamic_property - def mode(self): - doc = \ - """ - The file open mode. Is either L{FileOpen.READ} - or L{FileOpen.WRITE}. C{unsigned int} stored at byte 16. - """ - def fget(self): - return self.unpack(start=16, fmt=DWORD)[0] - - def fset(self, val): - self.pack(val, start=16, fmt=DWORD) - - return property(doc=doc, fget=fget, fset=fset) - - -class FileIO(Command): - """ Command to read/write from an open file """ - RNUMBER = 0x16 #: Command number to read from a file - WNUMBER = 0x17 #: Command number to write to a file - id = field(start=16, fmt=DWORD) #: The file ID returned by a FileOpen command - offset = field(start=20, fmt=DDWORD) #: offset in the file at which to read - size = field(start=28, fmt=DWORD) #: The number of bytes to reead from file. - def __init__(self, _id, offset, size, mode=0x16): - """ - @param _id: File identifier returned by a L{FileOpen} command - @type id: C{unsigned int} - @param offset: Position in file at which to read - @type offset: C{unsigned long long} - @param size: number of bytes to read - @type size: C{unsigned int} - @param mode: Either L{FileIO.RNUMBER} or L{File.WNUMBER} - """ - Command.__init__(self, 32) - self.number = mode - self.type = 0x01 - self.length = 16 - self.id = _id - self.offset = offset - self.size = size - - -class PathQuery(PathCommand): - """ Defines structure of command that requests information about a path """ - NUMBER = 0x18 #: Command number - def __init__(self, path): - PathCommand.__init__(self, path, PathQuery.NUMBER) - -class SetFileInfo(PathCommand): - """ Set File information """ - NUMBER = 0x19 #: Command number - def __init__(self, path): - PathCommand.__init__(self, path, SetFileInfo.NUMBER) - -class Response(Command): - """ - Defines the structure of response packets received from the device. - - C{Response} inherits from C{Command} as the - first 16 bytes have the same structure. - """ - - SIZE = 32 #: Size of response packets in the SONY protocol - # Response number, the command number of a command - # packet sent sometime before this packet was received - rnumber = field(start=16, fmt=DWORD) - # Used to indicate error conditions. A value of 0 means - # there was no error - code = field(start=20, fmt=DWORD) - # Used to indicate the size of the next bulk read - data_size = field(start=28, fmt=DWORD) - - def __init__(self, packet): - """ C{len(packet) == Response.SIZE} """ - if len(packet) != Response.SIZE: - raise PacketError(str(self.__class__)[7:-2] + \ - " packets must have exactly " + \ - str(Response.SIZE) + " bytes not " + str(len(packet))) - Command.__init__(self, packet) - if self.number != 0x00001000: - raise PacketError("Response packets must have their number set to " \ - + hex(0x00001000)) - - @dynamic_property - def data(self): - doc = \ - """ - The last 3 DWORDs (12 bytes) of data in this - response packet. Returned as a list of unsigned integers. - """ - def fget(self): - return self.unpack(start=20, fmt="=} C{16} """ - if "__len__" in dir(packet): - if len(packet) < 16 : - raise PacketError(str(self.__class__)[7:-2] + \ - " packets must have a length of atleast 16 bytes. "\ - "Got initializer of " + str(len(packet)) + " bytes.") - elif packet < 16: - raise PacketError(str(self.__class__)[7:-2] + \ - " packets must have a length of atleast 16 bytes") - TransferBuffer.__init__(self, packet) - - -class FileProperties(Answer): - - """ - Defines the structure of packets that contain size, date and - permissions information about files/directories. - """ - - file_size = field(start=16, fmt=DDWORD) #: Size in bytes of the file - file_type = field(start=24, fmt=DWORD) #: 1 == file, 2 == dir - ctime = field(start=28, fmt=DWORD) #: Creation time as an epoch - wtime = field(start=32, fmt=DWORD) #: Modification time as an epoch - # 0 = default permissions, 4 = read only - permissions = field(start=36, fmt=DWORD) - - @dynamic_property - def is_dir(self): - doc = """True if path points to a directory, False if it points to a file.""" - - def fget(self): - return (self.file_type == 2) - - def fset(self, val): - if val: - val = 2 - else: - val = 1 - self.file_type = val - - return property(doc=doc, fget=fget, fset=fset) - - - @dynamic_property - def is_readonly(self): - doc = """ Whether this file is readonly.""" - - def fget(self): - return self.unpack(start=36, fmt=DWORD)[0] != 0 - - def fset(self, val): - if val: - val = 4 - else: - val = 0 - self.pack(val, start=36, fmt=DWORD) - - return property(doc=doc, fget=fget, fset=fset) - - -class USBProtocolVersion(Answer): - """ Get USB Protocol version """ - version = field(start=16, fmt=DDWORD) - -class IdAnswer(Answer): - - """ Defines the structure of packets that contain identifiers for queries. """ - - @dynamic_property - def id(self): - doc = \ - """ - The identifier. C{unsigned int} stored in 4 bytes - at byte 16. Should be sent in commands asking - for the next item in the list. - """ - - def fget(self): - return self.unpack(start=16, fmt=DWORD)[0] - - def fset(self, val): - self.pack(val, start=16, fmt=DWORD) - - return property(doc=doc, fget=fget, fset=fset) - -class DeviceInfo(Answer): - """ Defines the structure of the packet containing information about the device """ - device_name = field(start=16, fmt="<32s") - device_version = field(start=48, fmt="<32s") - software_version = field(start=80, fmt="<24s") - mime_type = field(start=104, fmt="<32s") - - -class TotalSpaceAnswer(Answer): - total = field(start=24, fmt=DDWORD) #: Total space available - # Supposedly free space available, but it does not work for main memory - free_space = field(start=32, fmt=DDWORD) - -class FreeSpaceAnswer(Answer): - SIZE = 24 - free = field(start=16, fmt=DDWORD) - - -class ListAnswer(Answer): - """ Defines the structure of packets that contain items in a list. """ - name_length = field(start=20, fmt=DWORD) - name = stringfield(name_length, start=24) - - @dynamic_property - def is_dir(self): - doc = \ - """ - True if list item points to a directory, False if it points to a file. - C{unsigned int} stored in 4 bytes at byte 16. - """ - - def fget(self): - return (self.unpack(start=16, fmt=DWORD)[0] == 2) - - def fset(self, val): - if val: val = 2 - else: val = 1 - self.pack(val, start=16, fmt=DWORD) - - return property(doc=doc, fget=fget, fset=fset) - diff --git a/src/calibre/ebooks/lrf/meta.py b/src/calibre/ebooks/lrf/meta.py index 52809b0775..822257d457 100644 --- a/src/calibre/ebooks/lrf/meta.py +++ b/src/calibre/ebooks/lrf/meta.py @@ -18,7 +18,6 @@ from cStringIO import StringIO import xml.dom.minidom as dom from functools import wraps -from calibre.devices.prs500.prstypes import field from calibre.ebooks.metadata import MetaInformation, string_to_authors BYTE = "}, that implements access + to protocol packets in a human readable way. + """ + def __init__(self, start=16, fmt=DWORD): + """ + @param start: The byte at which this field is stored in the buffer + @param fmt: The packing format for this field. + See U{struct}. + """ + self._fmt, self._start = fmt, start + + def __get__(self, obj, typ=None): + return obj.unpack(start=self._start, fmt=self._fmt)[0] + + def __set__(self, obj, val): + obj.pack(val, start=self._start, fmt=self._fmt) + + def __repr__(self): + typ = "" + if self._fmt == DWORD: + typ = "unsigned int" + if self._fmt == QWORD: + typ = "unsigned long long" + return "An " + typ + " stored in " + \ + str(struct.calcsize(self._fmt)) + \ + " bytes starting at byte " + str(self._start) + + class versioned_field(field): def __init__(self, vfield, version, start=0, fmt=WORD): field.__init__(self, start=start, fmt=fmt) diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 2613c168fd..1a66324d9f 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -16,7 +16,7 @@ from calibre import CurrentDir entry_points = { 'console_scripts': [ \ - 'ebook-device = calibre.devices.prs500.cli.main:main', + 'ebook-device = calibre.devices.cli:main', 'ebook-meta = calibre.ebooks.metadata.cli:main', 'ebook-convert = calibre.ebooks.conversion.cli:main', 'markdown-calibre = calibre.ebooks.markdown.markdown:main', @@ -299,7 +299,7 @@ class PostInstall: return 0 ;; cp ) - if [[ ${cur} == prs500:* ]]; then + if [[ ${cur} == dev:* ]]; then COMPREPLY=( $(_ebook_device_ls "${cur:7}") ) return 0 else @@ -307,20 +307,20 @@ class PostInstall: return 0 fi ;; - prs500 ) + dev ) COMPREPLY=( $(compgen -W "cp ls rm mkdir touch cat info books df" "${cur}") ) return 0 ;; * ) - if [[ ${cur} == prs500:* ]]; then + if [[ ${cur} == dev:* ]]; then COMPREPLY=( $(_ebook_device_ls "${cur:7}") ) return 0 else - if [[ ${prev} == prs500:* ]]; then + if [[ ${prev} == dev:* ]]; then _filedir return 0 else - COMPREPLY=( $(compgen -W "prs500:" "${cur}") ) + COMPREPLY=( $(compgen -W "dev:" "${cur}") ) return 0 fi return 0 diff --git a/src/calibre/trac/setup.py b/src/calibre/trac/setup.py deleted file mode 100644 index ae15f732d0..0000000000 --- a/src/calibre/trac/setup.py +++ /dev/null @@ -1,21 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - -from setuptools import find_packages, setup - -# name can be any name. This name will be used to create .egg file. -# name that is used in packages is the one that is used in the trac.ini file. -# use package name as entry_points -setup( - name='TracLibprs500Plugins', version='0.1', - packages=find_packages(exclude=['*.tests*']), - entry_points = """ - [trac.plugins] - download = plugins.download - changelog = plugins.Changelog - """, - package_data={'plugins': ['templates/*.html', - 'htdocs/css/*.css', - 'htdocs/images/*']}, -) - From 8c050a4355bbcb2b734247ce92072aaac8add2a9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 14:42:36 +0530 Subject: [PATCH 15/57] Update Time --- recipes/time_magazine.recipe | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/recipes/time_magazine.recipe b/recipes/time_magazine.recipe index cbe40f79f2..dfe897500e 100644 --- a/recipes/time_magazine.recipe +++ b/recipes/time_magazine.recipe @@ -19,7 +19,7 @@ class Time(BasicNewsRecipe): no_stylesheets = True language = 'en' remove_javascript = True - needs_subscription = 'optional' + needs_subscription = True keep_only_tags = [ { @@ -40,7 +40,7 @@ class Time(BasicNewsRecipe): def get_browser(self): br = BasicNewsRecipe.get_browser(self) # This site uses javascript in its login process - if False and self.username is not None and self.password is not None: + if self.username is not None and self.password is not None: br.open('http://www.time.com/time/magazine') br.select_form(predicate=lambda f: 'action' in f.attrs and f.attrs['action'] == 'https://auth.time.com/login.php') br['username'] = self.username @@ -52,7 +52,10 @@ class Time(BasicNewsRecipe): br['rurl'] = 'http://www.time.com/time/magazine' br['remember'] = False raw = br.submit().read() - if '>Log Out<' not in raw: + if False and '>Log Out<' not in raw: + # This check is disabled as it does not work (there is probably + # some cookie missing) however, the login is "sufficient" for + # the actual article downloads to work. raise ValueError('Failed to login to time.com, check' ' your username and password') return br @@ -101,8 +104,7 @@ class Time(BasicNewsRecipe): method='text').strip() if not title: continue url = a[0].get('href') - # url = re.sub('/magazine/article/0,9171','/subscriber/printout/0,8816', - # url) + url = re.sub('/magazine/article/0,9171','/subscriber/printout/0,8816', url) if url.startswith('/'): url = 'http://www.time.com'+url desc = '' From 11e0d744284f278876e0614232c0c208a8719893 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 14:53:20 +0530 Subject: [PATCH 16/57] Business Week Magazine and Chronicle of Higher Education by Rick Shang --- recipes/bwmagazine2.recipe | 69 +++++++++++++++++++++++++++ recipes/chronicle_higher_ed.recipe | 75 ++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 recipes/bwmagazine2.recipe create mode 100644 recipes/chronicle_higher_ed.recipe diff --git a/recipes/bwmagazine2.recipe b/recipes/bwmagazine2.recipe new file mode 100644 index 0000000000..5f4774eb24 --- /dev/null +++ b/recipes/bwmagazine2.recipe @@ -0,0 +1,69 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe +from collections import OrderedDict + +class BusinessWeekMagazine(BasicNewsRecipe): + + title = 'Business Week Magazine' + __author__ = 'Rick Shang' + + description = 'A renowned business publication. Business news, trends and profiles of successful businesspeople.' + language = 'en' + category = 'news' + encoding = 'UTF-8' + keep_only_tags = [ + dict(name='div', attrs={'id':'article_body_container'}), + ] + remove_tags = [dict(name='ui'),dict(name='li')] + no_javascript = True + no_stylesheets = True + + cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg' + + def parse_index(self): + + #Go to the issue + soup = self.index_to_soup('http://www.businessweek.com/magazine/news/articles/business_news.htm') + + #Find date + mag=soup.find('h2',text='Magazine') + self.log(mag) + dates=self.tag_to_string(mag.findNext('h3')) + self.timefmt = u' [%s]'%dates + + #Go to the main body + div0 = soup.find ('div', attrs={'class':'column left'}) + section_title = '' + feeds = OrderedDict() + for div in div0.findAll('a'): + articles = [] + section_title = self.tag_to_string(div.findPrevious('h3')).strip() + title=self.tag_to_string(div).strip() + url=div['href'] + soup0 = self.index_to_soup(url) + urlprint=soup0.find('li', attrs={'class':'print'}).a['href'] + articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''}) + + + if articles: + if section_title not in feeds: + feeds[section_title] = [] + feeds[section_title] += articles + div1 = soup.find ('div', attrs={'class':'column center'}) + section_title = '' + for div in div1.findAll('a'): + articles = [] + desc=self.tag_to_string(div.findNext('p')).strip() + section_title = self.tag_to_string(div.findPrevious('h3')).strip() + title=self.tag_to_string(div).strip() + url=div['href'] + soup0 = self.index_to_soup(url) + urlprint=soup0.find('li', attrs={'class':'print'}).a['href'] + articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''}) + + if articles: + if section_title not in feeds: + feeds[section_title] = [] + feeds[section_title] += articles + ans = [(key, val) for key, val in feeds.iteritems()] + return ans + diff --git a/recipes/chronicle_higher_ed.recipe b/recipes/chronicle_higher_ed.recipe new file mode 100644 index 0000000000..86f60181bc --- /dev/null +++ b/recipes/chronicle_higher_ed.recipe @@ -0,0 +1,75 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe +from collections import OrderedDict + +class Chronicle(BasicNewsRecipe): + + title = 'The Chronicle of Higher Education' + __author__ = 'Rick Shang' + + description = 'Weekly news and job-information source for college and university faculty members, administrators, and students.' + language = 'en' + category = 'news' + encoding = 'UTF-8' + keep_only_tags = [ + dict(name='div', attrs={'class':'article'}), + ] + remove_tags = [dict(name='div',attrs={'class':'related module1'})] + no_javascript = True + no_stylesheets = True + + + needs_subscription = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://chronicle.com/myaccount/login') + br.select_form(nr=1) + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + + #Go to the issue + soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/') + issue = soup0.find('ul',attrs={'class':'feature-promo-list'}).li + issueurl = "http://chronicle.com"+issue.a['href'] + + #Find date + dates = self.tag_to_string(issue.a).split(': ')[-1] + self.timefmt = u' [%s]'%dates + + #Go to the main body + soup = self.index_to_soup(issueurl) + div0 = soup.find ('div', attrs={'id':'article-body'}) + + feeds = OrderedDict() + for div in div0.findAll('div',attrs={'class':'module1'}): + section_title = self.tag_to_string(div.find('h3')) + for post in div.findAll('li',attrs={'class':'sub-promo'}): + articles = [] + a=post.find('a', href=True) + title=self.tag_to_string(a) + url="http://chronicle.com"+a['href'].strip() + desc=self.tag_to_string(post.find('p')) + articles.append({'title':title, 'url':url, 'description':desc, 'date':''}) + + if articles: + if section_title not in feeds: + feeds[section_title] = [] + feeds[section_title] += articles + ans = [(key, val) for key, val in feeds.iteritems()] + return ans + + def preprocess_html(self,soup): + #process all the images + for div in soup.findAll('div', attrs={'class':'tableauPlaceholder'}): + + noscripts=div.find('noscript').a + div.replaceWith(noscripts) + for div0 in soup.findAll('div',text='Powered by Tableau'): + div0.extract() + return soup + From 47fa81f0bd6e5150ea366cfd6556327ca21fdcb9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 17:29:14 +0530 Subject: [PATCH 17/57] ... --- recipes/bwmagazine2.recipe | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/recipes/bwmagazine2.recipe b/recipes/bwmagazine2.recipe index 5f4774eb24..300d71806a 100644 --- a/recipes/bwmagazine2.recipe +++ b/recipes/bwmagazine2.recipe @@ -26,7 +26,6 @@ class BusinessWeekMagazine(BasicNewsRecipe): #Find date mag=soup.find('h2',text='Magazine') - self.log(mag) dates=self.tag_to_string(mag.findNext('h3')) self.timefmt = u' [%s]'%dates @@ -34,9 +33,10 @@ class BusinessWeekMagazine(BasicNewsRecipe): div0 = soup.find ('div', attrs={'class':'column left'}) section_title = '' feeds = OrderedDict() + articles = [] for div in div0.findAll('a'): - articles = [] section_title = self.tag_to_string(div.findPrevious('h3')).strip() + self.log('Processing section:', section_title) title=self.tag_to_string(div).strip() url=div['href'] soup0 = self.index_to_soup(url) @@ -50,10 +50,11 @@ class BusinessWeekMagazine(BasicNewsRecipe): feeds[section_title] += articles div1 = soup.find ('div', attrs={'class':'column center'}) section_title = '' + articles = [] for div in div1.findAll('a'): - articles = [] desc=self.tag_to_string(div.findNext('p')).strip() section_title = self.tag_to_string(div.findPrevious('h3')).strip() + self.log('Processing section:', section_title) title=self.tag_to_string(div).strip() url=div['href'] soup0 = self.index_to_soup(url) From cf416ed13c1016a250b092a3b9fc218c7f55720a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 23:28:32 +0530 Subject: [PATCH 18/57] Create a libusb based device scanner and use it on OS X. This is needed to enable MTP support. --- setup/extensions.py | 9 +- setup/installer/osx/app/main.py | 9 +- src/calibre/constants.py | 3 +- src/calibre/devices/libusb/libusb.c | 144 ++++++++++++++++++++++++++++ src/calibre/devices/scanner.py | 83 ++++++++++++---- 5 files changed, 222 insertions(+), 26 deletions(-) create mode 100644 src/calibre/devices/libusb/libusb.c diff --git a/setup/extensions.py b/setup/extensions.py index 60efc7d168..9b852d10c5 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -188,10 +188,15 @@ if iswindows: if isosx: extensions.append(Extension('usbobserver', ['calibre/devices/usbobserver/usbobserver.c'], - ldflags=['-framework', 'IOKit']) + ldflags=['-framework', 'CoreServices', '-framework', 'IOKit']) ) -if islinux: +if islinux or isosx: + extensions.append(Extension('libusb', + ['calibre/devices/libusb/libusb.c'], + libraries=['usb-1.0'] + )) + extensions.append(Extension('libmtp', [ 'calibre/devices/mtp/unix/devices.c', diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 5268041359..14df94f4ba 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -438,12 +438,15 @@ class Py2App(object): @flush def add_misc_libraries(self): - for x in ('usb', 'unrar', 'readline.6.1', 'wmflite-0.2.7', 'chm.0', - 'sqlite3.0'): + for x in ('usb-1.0.0', 'mtp.9', 'unrar', 'readline.6.1', + 'wmflite-0.2.7', 'chm.0', 'sqlite3.0'): info('\nAdding', x) x = 'lib%s.dylib'%x shutil.copy2(join(SW, 'lib', x), self.frameworks_dir) - self.set_id(join(self.frameworks_dir, x), self.FID+'/'+x) + dest = join(self.frameworks_dir, x) + self.set_id(dest, self.FID+'/'+x) + if 'mtp' in x: + self.fix_dependencies_in_lib(dest) @flush def add_site_packages(self): diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 902307a649..c1e0faba36 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -94,7 +94,8 @@ class Plugins(collections.Mapping): plugins.extend(['winutil', 'wpd']) if isosx: plugins.append('usbobserver') - if islinux: + if islinux or isosx: + plugins.append('libusb') plugins.append('libmtp') self.plugins = frozenset(plugins) diff --git a/src/calibre/devices/libusb/libusb.c b/src/calibre/devices/libusb/libusb.c new file mode 100644 index 0000000000..7d5234e54e --- /dev/null +++ b/src/calibre/devices/libusb/libusb.c @@ -0,0 +1,144 @@ +/* + * libusb.c + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#define UNICODE + +#include +#include + +static PyObject *Error = NULL; +static PyObject *cache = NULL; + +static PyObject* format_err(int err) { + PyErr_SetString(Error, libusb_error_name(err)); + return NULL; +} + +static PyObject* read_string_property(libusb_device_handle *dev, uint8_t idx) { + unsigned char buf[301]; + int err; + PyObject *ans = NULL; + + Py_BEGIN_ALLOW_THREADS; + err = libusb_get_string_descriptor_ascii(dev, idx, buf, 300); + Py_END_ALLOW_THREADS; + + if (err > 0) { + ans = PyUnicode_FromStringAndSize((char *)buf, err); + } + + return ans; +} + +static PyObject* read_string_data(libusb_device *dev, uint8_t manufacturer, uint8_t product, uint8_t serial) { + libusb_device_handle *handle; + int err; + PyObject *ans = NULL, *p; + + ans = PyDict_New(); + if (ans == NULL) return PyErr_NoMemory(); + + err = libusb_open(dev, &handle); + + if (err == 0) { + p = read_string_property(handle, manufacturer); + if (p != NULL) { PyDict_SetItemString(ans, "manufacturer", p); Py_DECREF(p); } + + p = read_string_property(handle, product); + if (p != NULL) { PyDict_SetItemString(ans, "product", p); Py_DECREF(p); }; + + p = read_string_property(handle, serial); + if (p != NULL) { PyDict_SetItemString(ans, "serial", p); Py_DECREF(p); }; + + libusb_close(handle); + } + + return ans; +} + +static PyObject* get_devices(PyObject *self, PyObject *args) { + PyObject *ans = NULL, *d = NULL, *t = NULL, *rec = NULL; + int err, i = 0; + libusb_device **devs = NULL, *dev = NULL; + ssize_t count; + + ans = PyList_New(0); + if (ans == NULL) return PyErr_NoMemory(); + + Py_BEGIN_ALLOW_THREADS; + count = libusb_get_device_list(NULL, &devs); + Py_END_ALLOW_THREADS; + if (count < 0) { Py_DECREF(ans); return format_err((int)count); } + + while ( (dev = devs[i++]) != NULL ) { + struct libusb_device_descriptor desc; + err = libusb_get_device_descriptor(dev, &desc); + if (err != 0) { format_err(err); break; } + if (desc.bDeviceClass == LIBUSB_CLASS_HUB) continue; + + d = Py_BuildValue("(HHHHH)", libusb_get_bus_number(dev), + libusb_get_device_address(dev), desc.idVendor, desc.idProduct, + desc.bcdDevice); + if (d == NULL) break; + + t = PyDict_GetItem(cache, d); + if (t == NULL) { + t = read_string_data(dev, desc.iManufacturer, desc.iProduct, desc.iSerialNumber); + if (t == NULL) { Py_DECREF(d); break; } + PyDict_SetItem(cache, d, t); + Py_DECREF(t); + } + + rec = Py_BuildValue("(NO)", d, t); + if (rec == NULL) { Py_DECREF(d); break; } + + PyList_Append(ans, rec); + Py_DECREF(rec); + + } + + if (dev != NULL) { + // An error occurred + Py_DECREF(ans); ans = NULL; + } + + if (devs != NULL) libusb_free_device_list(devs, 1); + + return ans; +} + +static PyMethodDef libusb_methods[] = { + {"get_devices", get_devices, METH_VARARGS, + "get_devices()\n\nGet the list of USB devices on the system." + }, + + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initlibusb(void) { + PyObject *m; + + // We deliberately use the default context. This is the context used by + // libmtp and we want to ensure that the busnum/devnum numbers are the same + // here and for libmtp. + if(libusb_init(NULL) != 0) return; + + Error = PyErr_NewException("libusb.Error", NULL, NULL); + if (Error == NULL) return; + + cache = PyDict_New(); + if (cache == NULL) return; + + m = Py_InitModule3("libusb", libusb_methods, "Interface to libusb."); + if (m == NULL) return; + + PyModule_AddObject(m, "Error", Error); + PyModule_AddObject(m, "cache", cache); + +} + diff --git a/src/calibre/devices/scanner.py b/src/calibre/devices/scanner.py index 49273dd8bc..6865546a54 100644 --- a/src/calibre/devices/scanner.py +++ b/src/calibre/devices/scanner.py @@ -13,18 +13,13 @@ from calibre import prints, as_unicode from calibre.constants import (iswindows, isosx, plugins, islinux, isfreebsd, isnetbsd) -osx_scanner = win_scanner = linux_scanner = None +osx_scanner = win_scanner = linux_scanner = freebsd_scanner = netbsd_scanner = None if iswindows: try: win_scanner = plugins['winutil'][0].get_usb_devices except: raise RuntimeError('Failed to load the winutil plugin: %s'%plugins['winutil'][1]) -elif isosx: - try: - osx_scanner = plugins['usbobserver'][0].get_usb_devices - except: - raise RuntimeError('Failed to load the usbobserver plugin: %s'%plugins['usbobserver'][1]) class Drive(str): @@ -118,6 +113,54 @@ class USBDevice(_USBDevice): _USBDevice.__init__(self, *args, **kwargs) self.busnum = self.devnum = -1 + def __repr__(self): + return (u'USBDevice(busnum=%s, devnum=%s, ' + 'vendor_id=0x%04x, product_id=0x%04x, bcd=0x%04x, ' + 'manufacturer=%s, product=%s, serial=%s)')%( + self.busnum, self.devnum, self.vendor_id, self.product_id, + self.bcd, self.manufacturer, self.product, self.serial) + + __str__ = __repr__ + __unicode__ = __repr__ + +class LibUSBScanner(object): + + def __call__(self): + if not hasattr(self, 'libusb'): + self.libusb, self.libusb_err = plugins['libusb'] + if self.libusb is None: + raise ValueError( + 'DeviceScanner needs libusb to work. Error: %s'% + self.libusb_err) + + ans = set() + seen = set() + for fingerprint, ids in self.libusb.get_devices(): + seen.add(fingerprint) + man = ids.get('manufacturer', None) + prod = ids.get('product', None) + serial = ids.get('serial', None) + dev = fingerprint[2:] + (man, prod, serial) + dev = USBDevice(*dev) + dev.busnum, dev.devnum = fingerprint[:2] + ans.add(dev) + extra = set(self.libusb.cache.iterkeys()) - seen + for x in extra: + self.libusb.cache.pop(x, None) + return ans + + def check_for_mem_leak(self): + import gc + from calibre.utils.mem import memory + memory() + for num in (1, 10, 100): + start = memory() + for i in xrange(num): + self() + for i in xrange(3): gc.collect() + print 'Mem consumption increased by:', memory() - start, 'MB', + print 'after', num, 'repeats' + class LinuxScanner(object): SYSFS_PATH = os.environ.get('SYSFS_PATH', '/sys') @@ -165,17 +208,17 @@ class LinuxScanner(object): except: continue try: - dev.append(read(man)) + dev.append(read(man).decode('utf-8')) except: - dev.append(b'') + dev.append(u'') try: - dev.append(read(prod_string)) + dev.append(read(prod_string).decode('utf-8')) except: - dev.append(b'') + dev.append(u'') try: - dev.append(read(serial)) + dev.append(read(serial).decode('utf-8')) except: - dev.append(b'') + dev.append(u'') dev = USBDevice(*dev) try: @@ -244,18 +287,16 @@ class FreeBSDScanner(object): -linux_scanner = None - if islinux: linux_scanner = LinuxScanner() -freebsd_scanner = None +libusb_scanner = LibUSBScanner() +if isosx: + osx_scanner = libusb_scanner if isfreebsd: freebsd_scanner = FreeBSDScanner() -netbsd_scanner = None - ''' NetBSD support currently not written yet ''' if isnetbsd: netbsd_scanner = None @@ -263,9 +304,11 @@ if isnetbsd: class DeviceScanner(object): def __init__(self, *args): - if isosx and osx_scanner is None: - raise RuntimeError('The Python extension usbobserver must be available on OS X.') - self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else netbsd_scanner if isnetbsd else linux_scanner + self.scanner = (win_scanner if iswindows else osx_scanner if isosx else + freebsd_scanner if isfreebsd else netbsd_scanner if isnetbsd + else linux_scanner if islinux else libusb_scanner) + if self.scanner is None: + self.scanner = libusb_scanner self.devices = [] def scan(self): From 9ba5fa1c41aba288d4c28a1216f2d6cef59523da Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Aug 2012 23:47:50 +0530 Subject: [PATCH 19/57] libmtp: Only use list of known vids/pids for detection --- src/calibre/devices/mtp/unix/driver.py | 6 +- src/calibre/devices/mtp/unix/libmtp.c | 23 +++ .../devices/mtp/unix/upstream/music-players.h | 133 ++++++++++++++---- 3 files changed, 129 insertions(+), 33 deletions(-) diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index 832a54652d..24b008802e 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -33,6 +33,7 @@ class MTP_DEVICE(MTPDeviceBase): def __init__(self, *args, **kwargs): MTPDeviceBase.__init__(self, *args, **kwargs) self.libmtp = None + self.known_devices = None self.detect_cache = {} self.dev = None @@ -56,6 +57,8 @@ class MTP_DEVICE(MTPDeviceBase): @synchronous def detect_managed_devices(self, devices_on_system, force_refresh=False): if self.libmtp is None: return None + if self.known_devices is None: + self.known_devices = frozenset(self.libmtp.known_devices()) # First remove blacklisted devices. devs = set() for d in devices_on_system: @@ -81,8 +84,7 @@ class MTP_DEVICE(MTPDeviceBase): for d in devs: ans = cache.get(d, None) if ans is None: - ans = self.libmtp.is_mtp_device(d.busnum, d.devnum, - d.vendor_id, d.product_id) + ans = (d.vendor_id, d.product_id) in self.known_devices cache[d] = ans if ans: return d diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 79c1bbeac0..b0cfd11bb6 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -703,6 +703,25 @@ libmtp_is_mtp_device(PyObject *self, PyObject *args) { } +static PyObject* +known_devices(PyObject *self, PyObject *args) { + PyObject *ans, *d; + size_t i; + + ans = PyList_New(0); + if (ans == NULL) return PyErr_NoMemory(); + + for (i = 0; ; i++) { + if (calibre_mtp_device_table[i].vendor == NULL && calibre_mtp_device_table[i].product == NULL && calibre_mtp_device_table[i].vendor_id == 0xffff) break; + d = Py_BuildValue("(HH)", calibre_mtp_device_table[i].vendor_id, calibre_mtp_device_table[i].product_id); + if (d == NULL) { Py_DECREF(ans); ans = NULL; break; } + if (PyList_Append(ans, d) != 0) { Py_DECREF(d); Py_DECREF(ans); ans = NULL; PyErr_NoMemory(); break; } + Py_DECREF(d); + } + + return ans; +} + static PyMethodDef libmtp_methods[] = { {"set_debug_level", libmtp_set_debug_level, METH_VARARGS, "set_debug_level(level)\n\nSet the debug level bit mask, see LIBMTP_DEBUG_* constants." @@ -712,6 +731,10 @@ static PyMethodDef libmtp_methods[] = { "is_mtp_device(busnum, devnum, vendor_id, prod_id)\n\nReturn True if the device is recognized as an MTP device by its vendor/product ids. If it is not recognized a probe is done and True returned if the probe succeeds. Note that probing can cause some devices to malfunction, and it is not very reliable, which is why we prefer to use the device database." }, + {"known_devices", known_devices, METH_VARARGS, + "known_devices() -> Return the list of known (vendor_id, product_id) combinations." + }, + {NULL, NULL, 0, NULL} }; diff --git a/src/calibre/devices/mtp/unix/upstream/music-players.h b/src/calibre/devices/mtp/unix/upstream/music-players.h index f8f756df59..69f43f8145 100644 --- a/src/calibre/devices/mtp/unix/upstream/music-players.h +++ b/src/calibre/devices/mtp/unix/upstream/music-players.h @@ -292,6 +292,13 @@ DEVICE_FLAG_PLAYLIST_SPL_V1 | DEVICE_FLAG_UNIQUE_FILENAMES | DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + // From qnub + // Guessing on .spl flag + { "Samsung", 0x04e8, "YP-R2", 0x512d, + DEVICE_FLAG_UNLOAD_DRIVER | + DEVICE_FLAG_PLAYLIST_SPL_V1 | + DEVICE_FLAG_UNIQUE_FILENAMES | + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, // From Manuel Carro // Copied from Q2 { "Samsung", 0x04e8, "YP-Q3", 0x5130, @@ -359,6 +366,7 @@ // Guessing on flags. { "Samsung", 0x04e8, "Galaxy Y", 0x685e, DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST_ALL | + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST | DEVICE_FLAG_UNLOAD_DRIVER | DEVICE_FLAG_LONG_TIMEOUT | DEVICE_FLAG_PROPLIST_OVERRIDES_OI }, @@ -380,6 +388,8 @@ */ { "Samsung", 0x04e8, "GT P7310/P7510/N7000/I9070/I9100/I9300 Galaxy Tab 7.7/10.1/S2/S3/Nexus/Note/Y", 0x6860, + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST_ALL | + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST | DEVICE_FLAG_UNLOAD_DRIVER | DEVICE_FLAG_LONG_TIMEOUT | DEVICE_FLAG_PROPLIST_OVERRIDES_OI }, @@ -499,17 +509,23 @@ * Acer */ // Reported by anonymous sourceforge user - { "Acer", 0x0502, "Iconia TAB A500 v1", 0x3325, DEVICE_FLAGS_ANDROID_BUGS }, + { "Acer", 0x0502, "Iconia TAB A500 (ID1)", 0x3325, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by: Franck VDL - { "Acer", 0x0502, "Iconia TAB A500 v2", 0x3341, DEVICE_FLAGS_ANDROID_BUGS }, + { "Acer", 0x0502, "Iconia TAB A500 (ID2)", 0x3341, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by: Matthias Arndt { "Acer", 0x0502, "Iconia TAB A501", 0x3344, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by: anonymous sourceforge user - { "Acer", 0x0502, "Iconia TAB A100", 0x3348, DEVICE_FLAGS_ANDROID_BUGS }, + { "Acer", 0x0502, "Iconia TAB A100 (ID1)", 0x3348, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by: Arvin Schnell - { "Acer", 0x0502, "Iconia TAB A100 ID2", 0x3349, DEVICE_FLAGS_ANDROID_BUGS }, + { "Acer", 0x0502, "Iconia TAB A100 (ID2)", 0x3349, DEVICE_FLAGS_ANDROID_BUGS }, + // Reported by Philippe Marzouk + { "Acer", 0x0502, "Iconia TAB A700", 0x3378, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by anonymous sourceforge user - { "Acer", 0x0502, "Iconia TAB A200", 0x337c, DEVICE_FLAGS_ANDROID_BUGS }, + { "Acer", 0x0502, "Iconia TAB A200 (ID1)", 0x337c, DEVICE_FLAGS_ANDROID_BUGS }, + // Reported by anonymous sourceforge user + { "Acer", 0x0502, "Iconia TAB A200 (ID2)", 0x337d, DEVICE_FLAGS_ANDROID_BUGS }, + // Reported by nE0sIghT + { "Acer", 0x0502, "Iconia TAB A510", 0x338a, DEVICE_FLAGS_ANDROID_BUGS }, /* * SanDisk @@ -1396,21 +1412,50 @@ // Reported by Serge Chirik { "SonyEricsson", 0x0fce, "j108i (Cedar)", 0x014e, DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + // Reported by Jonas Nyrén + { "SonyEricsson", 0x0fce, "W302", 0x10c8, + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + // Reported by Anonymous Sourceforge user + { "SonyEricsson", 0x0fce, "j10i (Elm)", 0xd144, + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + // Reported by Thomas Schweitzer + { "SonyEricsson", 0x0fce, "K550i", 0xe000, + DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + /* * SonyEricsson/SONY Android devices usually have three personalities due to * using composite descriptors and the fact that Windows cannot distinguish * the device unless each composite descriptor is unique. * - * 0x0nnn = MTP - * 0x4nnn = MTP + mass storage (for CD-ROM) - * 0x5nnn = MTP + ADB (Android debug bridge) + * Legend: + * MTP = Media Transfer Protocol + * UMS = USB Mass Storage Protocol + * ADB = Android Debug Bridhe Protocol + * CDC = Communications Device Class, Internet Sharing * + * 0x0nnn = MTP + * 0x4nnn = MTP + UMS (for CD-ROM) + * 0x5nnn = MTP + ADB + * 0x6nnn = UMS + ADB + * 0x7nnn = MTP + CDC + * 0x8nnn = MTP + CDC + ADB + * 0xannn = MTP + UMS + ? + * 0xennn = UMS only + * + * The SonyEricsson and SONY devices have (at least)two deployed MTP + * stacks: Aricent and Android. These have different bug flags, and + * sometimes the same device has firmware upgrades moving it from + * the Aricent to Android MTP stack without changing the device + * VID+PID (first observed on the SK17i Xperia Mini Pro), so the + * detection has to be more elaborate. The code in libmtp.c will do + * this and assign the proper bug flags (hopefully). + * That is why DEVICE_FLAG_NONE is used for these devices. */ // Reported by Jonas Salling <> // Erroneous MTP implementation seems to be from Aricent, returns // broken transaction ID. { "SonyEricsson", 0x0fce, "LT15i (Xperia arc S)", 0x014f, - DEVICE_FLAGS_ARICENT_BUGS }, + DEVICE_FLAG_NONE }, // Reported by Eamonn Webster // Runtime detect the Aricent or Android stack { "SonyEricsson", 0x0fce, "MT11i Xperia Neo", 0x0156, @@ -1429,7 +1474,7 @@ * Android with Android stack in another one, so let the run-time * detector look up the device bug flags, set to NONE initially. */ - { "SonyEricsson", 0x0fce, "SK17i Xperia mini pro", 0x0166, + { "SonyEricsson", 0x0fce, "SK17i Xperia Mini Pro", 0x0166, DEVICE_FLAG_NONE }, // Reported by hdhoang // Runtime detect the Aricent or Android stack @@ -1437,36 +1482,56 @@ DEVICE_FLAG_NONE }, // Reported by Paul Taylor { "SONY", 0x0fce, "Xperia S", 0x0169, - DEVICE_FLAG_NO_ZERO_READS | DEVICE_FLAGS_ANDROID_BUGS }, + DEVICE_FLAG_NO_ZERO_READS }, + // Reported by Bruno Basilio + { "SONY", 0x0fce, "WT19i Live Walkman", 0x016d, + DEVICE_FLAG_NONE }, + // Reported by Christoffer Holmstedt + { "SONY", 0x0fce, "ST21i Xperia Tipo", 0x0170, + DEVICE_FLAG_NONE }, // Reported by equaeghe { "SONY", 0x0fce, "ST15i Xperia U", 0x0171, - DEVICE_FLAGS_ANDROID_BUGS }, + DEVICE_FLAG_NONE }, // Reported by Ondra Lengal { "SONY", 0x0fce, "Xperia P", 0x0172, - DEVICE_FLAGS_ANDROID_BUGS }, - // Reported by Jonas Nyrén - { "SonyEricsson", 0x0fce, "W302", 0x10c8, - DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + DEVICE_FLAG_NONE }, + // Guessing on this one + { "SONY", 0x0fce, "LT26w Xperia Acro S", 0x0176, + DEVICE_FLAG_NONE }, + /* * MTP+MSC personalities of MTP devices (see above) */ + // Guessing on this one + { "SONY", 0x0fce, "Xperia S (MTP+ADB mode)", 0x4169, + DEVICE_FLAG_NO_ZERO_READS }, + // Guessing on this one + { "SONY", 0x0fce, "ST21i Xperia Tipo (MTP+MSC mode)", 0x4170, + DEVICE_FLAG_NONE }, // Reported by equaeghe { "SONY", 0x0fce, "ST25i Xperia U (MTP+MSC mode)", 0x4171, - DEVICE_FLAGS_ANDROID_BUGS }, + DEVICE_FLAG_NONE }, // Guessing on this one { "SONY", 0x0fce, "Xperia P (MTP+MSC mode)", 0x4172, - DEVICE_FLAGS_ANDROID_BUGS }, + DEVICE_FLAG_NONE }, + // Guessing on this one + { "SONY", 0x0fce, "LT26w Xperia Acro S (MTP+MSC mode)", 0x4176, + DEVICE_FLAG_NONE }, + /* * MTP+ADB personalities of MTP devices (see above) */ // Reported by anonymous sourceforge user // Suspect Aricent stack, guessing on these bug flags { "SonyEricsson", 0x0fce, "LT15i Xperia Arc (MTP+ADB mode)", 0x514f, - DEVICE_FLAGS_ARICENT_BUGS }, + DEVICE_FLAG_NONE }, // Reported by Michael K. // Runtime detect the Aricent or Android stack { "SonyEricsson", 0x0fce, "MT11i Xperia Neo (MTP+ADB mode)", 0x5156, DEVICE_FLAG_NONE }, + // Reported by Jean-François B. + { "SONY", 0x0fce, "Xperia S (MTP+ADB mode)", 0x5169, + DEVICE_FLAG_NO_ZERO_READS }, // Runtime detect the Aricent or Android stack { "SonyEricsson", 0x0fce, "MK16i Xperia (MTP+ADB mode)", 0x515a, DEVICE_FLAG_NONE }, @@ -1478,26 +1543,28 @@ // Reported by StehpanKa // Android with homebrew MTP stack, possibly Aricent // Runtime detect the Aricent or Android stack - { "SonyEricsson", 0x0fce, "SK17i Xperia mini pro (MTP+ADB mode)", 0x5166, + { "SonyEricsson", 0x0fce, "SK17i Xperia Mini Pro (MTP+ADB mode)", 0x5166, DEVICE_FLAG_NONE }, // Android with homebrew MTP stack, possibly Aricent // Runtime detect the Aricent or Android stack { "SonyEricsson", 0x0fce, "ST15i Xperia Mini (MTP+ADB mode)", 0x5167, DEVICE_FLAG_NONE }, + { "SonyEricsson", 0x0fce, "SK17i Xperia Mini Pro (MTP+? mode)", 0x516d, + DEVICE_FLAG_NONE }, + // Guessing on this one + { "SONY", 0x0fce, "ST21i Xperia Tipo (MTP+ADB mode)", 0x5170, + DEVICE_FLAG_NONE }, // Reported by equaeghe { "SONY", 0x0fce, "ST25i Xperia U (MTP+ADB mode)", 0x5171, - DEVICE_FLAGS_ANDROID_BUGS }, + DEVICE_FLAG_NONE }, // Reported by Ondra Lengál { "SONY", 0x0fce, "Xperia P (MTP+ADB mode)", 0x5172, - DEVICE_FLAGS_ANDROID_BUGS }, + DEVICE_FLAG_NONE }, + // Reported by Ah Hong + { "SONY", 0x0fce, "LT26w Xperia Acro S (MTP+ADB mode)", 0x5176, + DEVICE_FLAG_NONE }, { "SONY", 0x0fce, "MT27i Xperia Sola (MTP+MSC+? mode)", 0xa173, - DEVICE_FLAGS_ANDROID_BUGS }, - // Reported by Anonymous Sourceforge user - { "SonyEricsson", 0x0fce, "j10i (Elm)", 0xd144, - DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, - // Reported by Thomas Schweitzer - { "SonyEricsson", 0x0fce, "K550i", 0xe000, - DEVICE_FLAG_BROKEN_MTPGETOBJPROPLIST }, + DEVICE_FLAG_NONE }, /* * Motorola @@ -1521,7 +1588,8 @@ DEVICE_FLAGS_ANDROID_BUGS }, { "Motorola", 0x22b8, "Xoom 2 Media Edition", 0x4311, DEVICE_FLAGS_ANDROID_BUGS }, - { "Motorola", 0x22b8, "XT912", 0x4362, + // Reported by B,H,Kissinger + { "Motorola", 0x22b8, "XT912/XT928", 0x4362, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by Marcus Meissner to libptp2 { "Motorola", 0x22b8, "IdeaPad K1", 0x4811, @@ -1748,7 +1816,7 @@ /* * HTC (High Tech Computer Corp) */ - { "HTC", 0x0bb4, "Zopo ZP100", 0x0c02, + { "HTC", 0x0bb4, "Zopo ZP100 (ID1)", 0x0c02, DEVICE_FLAGS_ANDROID_BUGS }, // Reported by Steven Eastland { "HTC", 0x0bb4, "EVO 4G LTE", 0x0c93, @@ -1762,6 +1830,9 @@ DEVICE_FLAGS_ANDROID_BUGS }, { "Hewlett-Packard", 0x0bb4, "HP Touchpad (debug mode)", 0x6860, DEVICE_FLAGS_ANDROID_BUGS }, + // Reported by anonymous SourceForge user + { "HTC", 0x0bb4, "Zopo ZP100 (ID2)", 0x2008, + DEVICE_FLAGS_ANDROID_BUGS }, /* * NEC From a79920d5f1af18aa7b35507a3871bc97a41f79ab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 08:42:45 +0530 Subject: [PATCH 20/57] ... --- src/calibre/gui2/preferences/emailp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/gui2/preferences/emailp.py b/src/calibre/gui2/preferences/emailp.py index 1256691c22..892b01f095 100644 --- a/src/calibre/gui2/preferences/emailp.py +++ b/src/calibre/gui2/preferences/emailp.py @@ -54,6 +54,8 @@ class EmailAccounts(QAbstractTableModel): # {{{ if row < 0 or row >= self.rowCount(): return NONE account = self.account_order[row] + if account not in self.accounts: + return NONE if role == Qt.UserRole: return (account, self.accounts[account]) if role == Qt.ToolTipRole: From 3f9a16e687d28968fadf18e02ac8d0b1716aea08 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 08:55:52 +0530 Subject: [PATCH 21/57] ... --- recipes/chronicle_higher_ed.recipe | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/recipes/chronicle_higher_ed.recipe b/recipes/chronicle_higher_ed.recipe index 86f60181bc..7ed834a4e5 100644 --- a/recipes/chronicle_higher_ed.recipe +++ b/recipes/chronicle_higher_ed.recipe @@ -41,6 +41,10 @@ class Chronicle(BasicNewsRecipe): dates = self.tag_to_string(issue.a).split(': ')[-1] self.timefmt = u' [%s]'%dates + #Find cover + cover=soup0.find('div',attrs={'class':'promo'}).findNext('div') + self.cover_url="http://chronicle.com"+cover.find('img')['src'] + #Go to the main body soup = self.index_to_soup(issueurl) div0 = soup.find ('div', attrs={'id':'article-body'}) From ddce60ac90e8f5259653b7f1591f4458a5cdb418 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 09:12:31 +0530 Subject: [PATCH 22/57] ... --- src/calibre/utils/filenames.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/utils/filenames.py b/src/calibre/utils/filenames.py index f031362d39..a4c9e35c3e 100644 --- a/src/calibre/utils/filenames.py +++ b/src/calibre/utils/filenames.py @@ -187,7 +187,11 @@ def case_preserving_open_file(path, mode='wb', mkdir_mode=0777): os.fsync(ans.fileno()) cl = fname.lower() - candidates = [c for c in os.listdir(cpath) if c.lower() == cl] + try: + candidates = [c for c in os.listdir(cpath) if c.lower() == cl] + except EnvironmentError: + # The containing directory, somehow disappeared? + candidates = [] if len(candidates) == 1: fpath = os.path.join(cpath, candidates[0]) else: From 1136f261865a0279b6e9dce798ad0d25d51d5f92 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 10:13:34 +0530 Subject: [PATCH 23/57] Refactor the PoDoFo module --- session.vim | 1 + setup/extensions.py | 9 +- src/calibre/utils/podofo/doc.cpp | 396 ++++++++++++++++++++++++ src/calibre/utils/podofo/global.h | 39 +++ src/calibre/utils/podofo/podofo.cpp | 448 +--------------------------- src/calibre/utils/podofo/utils.cpp | 46 +++ 6 files changed, 501 insertions(+), 438 deletions(-) create mode 100644 src/calibre/utils/podofo/doc.cpp create mode 100644 src/calibre/utils/podofo/global.h create mode 100644 src/calibre/utils/podofo/utils.cpp diff --git a/session.vim b/session.vim index ae2c55bf06..1a94d6bf07 100644 --- a/session.vim +++ b/session.vim @@ -3,6 +3,7 @@ let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,ic " Include directories for C++ modules let g:syntastic_cpp_include_dirs = [ + \'/usr/include/python2.7', \'/usr/include/podofo', \'/usr/include/qt4/QtCore', \'/usr/include/qt4/QtGui', diff --git a/setup/extensions.py b/setup/extensions.py index 9b852d10c5..a7b01bcd19 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -137,7 +137,14 @@ extensions = [ ['calibre/ebooks/compression/palmdoc.c']), Extension('podofo', - ['calibre/utils/podofo/podofo.cpp'], + [ + 'calibre/utils/podofo/utils.cpp', + 'calibre/utils/podofo/doc.cpp', + 'calibre/utils/podofo/podofo.cpp', + ], + headers=[ + 'calibre/utils/podofo/global.h', + ], libraries=['podofo'], lib_dirs=[podofo_lib], inc_dirs=[podofo_inc, os.path.dirname(podofo_inc)], diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp new file mode 100644 index 0000000000..d591d6fc65 --- /dev/null +++ b/src/calibre/utils/podofo/doc.cpp @@ -0,0 +1,396 @@ +/* + * doc.cpp + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "global.h" + +using namespace pdf; + +static void +PDFDoc_dealloc(PDFDoc* self) +{ + if (self->doc != NULL) delete self->doc; + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PDFDoc *self; + + self = (PDFDoc *)type->tp_alloc(type, 0); + if (self != NULL) { + self->doc = new PdfMemDocument(); + if (self->doc == NULL) { Py_DECREF(self); return NULL; } + } + + return (PyObject *)self; +} + +static PyObject * +PDFDoc_load(PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *buffer; Py_ssize_t size; + + if (PyArg_ParseTuple(args, "s#", &buffer, &size)) { + try { + self->doc->Load(buffer, size); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } +} else return NULL; + + + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_open(PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *fname; + + if (PyArg_ParseTuple(args, "s", &fname)) { + try { + self->doc->Load(fname); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } +} else return NULL; + + + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { + char *buffer; + + if (PyArg_ParseTuple(args, "s", &buffer)) { + try { + self->doc->Write(buffer); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + } else return NULL; + + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +PDFDoc_pages_getter(PDFDoc *self, void *closure) { + int pages = self->doc->GetPageCount(); + PyObject *ans = PyInt_FromLong(static_cast(pages)); + if (ans != NULL) Py_INCREF(ans); + return ans; +} + +static PyObject * +PDFDoc_version_getter(PDFDoc *self, void *closure) { + int version; + try { + version = self->doc->GetPdfVersion(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + switch(version) { + case ePdfVersion_1_0: + return Py_BuildValue("s", "1.0"); + case ePdfVersion_1_1: + return Py_BuildValue("s", "1.1"); + case ePdfVersion_1_2: + return Py_BuildValue("s", "1.2"); + case ePdfVersion_1_3: + return Py_BuildValue("s", "1.3"); + case ePdfVersion_1_4: + return Py_BuildValue("s", "1.4"); + case ePdfVersion_1_5: + return Py_BuildValue("s", "1.5"); + case ePdfVersion_1_6: + return Py_BuildValue("s", "1.6"); + case ePdfVersion_1_7: + return Py_BuildValue("s", "1.7"); + default: + return Py_BuildValue(""); + } + return Py_BuildValue(""); +} + + + +static PyObject * +PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { + try { + while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { + int count; + try { + count = self->doc->GetPageCount(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + return Py_BuildValue("i", count); +} + +static PyObject * +PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { + int num = 0; + if (PyArg_ParseTuple(args, "i", &num)) { + try { + self->doc->DeletePages(num, 1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + } else return NULL; + + Py_RETURN_NONE; +} + + +static PyObject * +PDFDoc_getter(PDFDoc *self, int field) +{ + PyObject *ans; + PdfString s; + PdfInfo *info = self->doc->GetInfo(); + if (info == NULL) { + PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); + return NULL; + } + switch (field) { + case 0: + s = info->GetTitle(); break; + case 1: + s = info->GetAuthor(); break; + case 2: + s = info->GetSubject(); break; + case 3: + s = info->GetKeywords(); break; + case 4: + s = info->GetCreator(); break; + case 5: + s = info->GetProducer(); break; + default: + PyErr_SetString(PyExc_Exception, "Bad field"); + return NULL; + } + + ans = podofo_convert_pdfstring(s); + if (ans == NULL) {PyErr_NoMemory(); return NULL;} + PyObject *uans = PyUnicode_FromEncodedObject(ans, "utf-8", "replace"); + Py_DECREF(ans); + if (uans == NULL) {return NULL;} + Py_INCREF(uans); + return uans; +} + +static int +PDFDoc_setter(PDFDoc *self, PyObject *val, int field) { + if (val == NULL || !PyUnicode_Check(val)) { + PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata"); + return -1; + } + PdfInfo *info = new PdfInfo(*self->doc->GetInfo()); + if (info == NULL) { + PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); + return -1; + } + PdfString *s = NULL; + + if (self->doc->GetEncrypted()) s = podofo_convert_pystring_single_byte(val); + else s = podofo_convert_pystring(val); + if (s == NULL) return -1; + + + switch (field) { + case 0: + info->SetTitle(*s); break; + case 1: + info->SetAuthor(*s); break; + case 2: + info->SetSubject(*s); break; + case 3: + info->SetKeywords(*s); break; + case 4: + info->SetCreator(*s); break; + case 5: + info->SetProducer(*s); break; + default: + PyErr_SetString(PyExc_Exception, "Bad field"); + return -1; + } + + return 0; +} + +static PyObject * +PDFDoc_title_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 0); +} +static PyObject * +PDFDoc_author_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 1); +} +static PyObject * +PDFDoc_subject_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 2); +} +static PyObject * +PDFDoc_keywords_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 3); +} +static PyObject * +PDFDoc_creator_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 4); +} +static PyObject * +PDFDoc_producer_getter(PDFDoc *self, void *closure) { + return PDFDoc_getter(self, 5); +} +static int +PDFDoc_title_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 0); +} +static int +PDFDoc_author_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 1); +} +static int +PDFDoc_subject_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 2); +} +static int +PDFDoc_keywords_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 3); +} +static int +PDFDoc_creator_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 4); +} +static int +PDFDoc_producer_setter(PDFDoc *self, PyObject *val, void *closure) { + return PDFDoc_setter(self, val, 5); +} + + +static PyMethodDef PDFDoc_methods[] = { + {"load", (PyCFunction)PDFDoc_load, METH_VARARGS, + "Load a PDF document from a byte buffer (string)" + }, + {"open", (PyCFunction)PDFDoc_open, METH_VARARGS, + "Load a PDF document from a file path (string)" + }, + {"save", (PyCFunction)PDFDoc_save, METH_VARARGS, + "Save the PDF document to a path on disk" + }, + {"extract_first_page", (PyCFunction)PDFDoc_extract_first_page, METH_VARARGS, + "extract_first_page() -> Remove all but the first page." + }, + {"page_count", (PyCFunction)PDFDoc_page_count, METH_VARARGS, + "page_count() -> Number of pages in the PDF." + }, + {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, + "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." + }, + + + {NULL} /* Sentinel */ +}; + +static PyGetSetDef PDFDoc_getsetters[] = { + {(char *)"title", + (getter)PDFDoc_title_getter, (setter)PDFDoc_title_setter, + (char *)"Document title", + NULL}, + {(char *)"author", + (getter)PDFDoc_author_getter, (setter)PDFDoc_author_setter, + (char *)"Document author", + NULL}, + {(char *)"subject", + (getter)PDFDoc_subject_getter, (setter)PDFDoc_subject_setter, + (char *)"Document subject", + NULL}, + {(char *)"keywords", + (getter)PDFDoc_keywords_getter, (setter)PDFDoc_keywords_setter, + (char *)"Document keywords", + NULL}, + {(char *)"creator", + (getter)PDFDoc_creator_getter, (setter)PDFDoc_creator_setter, + (char *)"Document creator", + NULL}, + {(char *)"producer", + (getter)PDFDoc_producer_getter, (setter)PDFDoc_producer_setter, + (char *)"Document producer", + NULL}, + {(char *)"pages", + (getter)PDFDoc_pages_getter, NULL, + (char *)"Number of pages in document (read only)", + NULL}, + {(char *)"version", + (getter)PDFDoc_version_getter, NULL, + (char *)"The PDF version (read only)", + NULL}, + + {NULL} /* Sentinel */ +}; + +PyTypeObject pdf::PDFDocType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "podofo.PDFDoc", /*tp_name*/ + sizeof(PDFDoc), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)PDFDoc_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "PDF Documents", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PDFDoc_methods, /* tp_methods */ + 0, /* tp_members */ + PDFDoc_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + PDFDoc_new, /* tp_new */ + +}; + + diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h new file mode 100644 index 0000000000..c7a5696ad6 --- /dev/null +++ b/src/calibre/utils/podofo/global.h @@ -0,0 +1,39 @@ +/* + * global.h + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ +#pragma once + +#define UNICODE +#define PY_SSIZE_T_CLEAN +#include + +#define USING_SHARED_PODOFO +#include +using namespace PoDoFo; + +namespace pdf { + +// Module exception types +extern PyObject *Error; + +typedef struct { + PyObject_HEAD + /* Type-specific fields go here. */ + PdfMemDocument *doc; + +} PDFDoc; + +extern PyTypeObject PDFDocType; +extern PyObject *Error; + +// Utilities +extern void podofo_set_exception(const PdfError &err); +extern PyObject * podofo_convert_pdfstring(const PdfString &s); +extern PdfString * podofo_convert_pystring(PyObject *py); +extern PdfString * podofo_convert_pystring_single_byte(PyObject *py); + +} + diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp index 3764fce829..b79ca8cfe3 100644 --- a/src/calibre/utils/podofo/podofo.cpp +++ b/src/calibre/utils/podofo/podofo.cpp @@ -6,458 +6,32 @@ #include using namespace PoDoFo; -typedef struct { - PyObject_HEAD - /* Type-specific fields go here. */ - PdfMemDocument *doc; +#include "global.h" -} podofo_PDFDoc; - -extern "C" { -static void -podofo_PDFDoc_dealloc(podofo_PDFDoc* self) -{ - if (self->doc != NULL) delete self->doc; - self->ob_type->tp_free((PyObject*)self); -} - -static PyObject * -podofo_PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - podofo_PDFDoc *self; - - self = (podofo_PDFDoc *)type->tp_alloc(type, 0); - if (self != NULL) { - self->doc = new PdfMemDocument(); - if (self->doc == NULL) { Py_DECREF(self); return NULL; } - } - - return (PyObject *)self; -} - -static void podofo_set_exception(const PdfError &err) { - const char *msg = PdfError::ErrorMessage(err.GetError()); - if (msg == NULL) msg = err.what(); - PyErr_SetString(PyExc_ValueError, msg); -} - -static PyObject * -podofo_PDFDoc_load(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - char *buffer; Py_ssize_t size; - - if (PyArg_ParseTuple(args, "s#", &buffer, &size)) { - try { - self->doc->Load(buffer, size); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } -} else return NULL; - - - Py_RETURN_NONE; -} - -static PyObject * -podofo_PDFDoc_open(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - char *fname; - - if (PyArg_ParseTuple(args, "s", &fname)) { - try { - self->doc->Load(fname); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } -} else return NULL; - - - Py_RETURN_NONE; -} - -static PyObject * -podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - char *buffer; - - if (PyArg_ParseTuple(args, "s", &buffer)) { - try { - self->doc->Write(buffer); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - } else return NULL; - - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject * -podofo_PDFDoc_pages_getter(podofo_PDFDoc *self, void *closure) { - int pages = self->doc->GetPageCount(); - PyObject *ans = PyInt_FromLong(static_cast(pages)); - if (ans != NULL) Py_INCREF(ans); - return ans; -} - -static PyObject * -podofo_PDFDoc_version_getter(podofo_PDFDoc *self, void *closure) { - int version; - try { - version = self->doc->GetPdfVersion(); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - switch(version) { - case ePdfVersion_1_0: - return Py_BuildValue("s", "1.0"); - case ePdfVersion_1_1: - return Py_BuildValue("s", "1.1"); - case ePdfVersion_1_2: - return Py_BuildValue("s", "1.2"); - case ePdfVersion_1_3: - return Py_BuildValue("s", "1.3"); - case ePdfVersion_1_4: - return Py_BuildValue("s", "1.4"); - case ePdfVersion_1_5: - return Py_BuildValue("s", "1.5"); - case ePdfVersion_1_6: - return Py_BuildValue("s", "1.6"); - case ePdfVersion_1_7: - return Py_BuildValue("s", "1.7"); - default: - return Py_BuildValue(""); - } - return Py_BuildValue(""); -} - - - -static PyObject * -podofo_PDFDoc_extract_first_page(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - try { - while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - Py_RETURN_NONE; -} - -static PyObject * -podofo_PDFDoc_page_count(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - int count; - try { - count = self->doc->GetPageCount(); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - return Py_BuildValue("i", count); -} - -static PyObject * -podofo_PDFDoc_delete_page(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { - int num = 0; - if (PyArg_ParseTuple(args, "i", &num)) { - try { - self->doc->DeletePages(num, 1); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - } else return NULL; - - Py_RETURN_NONE; -} - -static PyObject * -podofo_convert_pdfstring(const PdfString &s) { - std::string raw = s.GetStringUtf8(); - return PyString_FromStringAndSize(raw.c_str(), raw.length()); -} - -static PdfString * -podofo_convert_pystring(PyObject *py) { - Py_UNICODE* u = PyUnicode_AS_UNICODE(py); - PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace"); - if (u8 == NULL) { PyErr_NoMemory(); return NULL; } - pdf_utf8 *s8 = reinterpret_cast(PyString_AS_STRING(u8)); - PdfString *ans = new PdfString(s8); - Py_DECREF(u8); - if (ans == NULL) PyErr_NoMemory(); - return ans; -} - -static PdfString * -podofo_convert_pystring_single_byte(PyObject *py) { - Py_UNICODE* u = PyUnicode_AS_UNICODE(py); - PyObject *s = PyUnicode_Encode(u, PyUnicode_GET_SIZE(py), "cp1252", "replace"); - if (s == NULL) { PyErr_NoMemory(); return NULL; } - PdfString *ans = new PdfString(PyString_AS_STRING(s)); - Py_DECREF(s); - if (ans == NULL) PyErr_NoMemory(); - return ans; -} - - - -static PyObject * -podofo_PDFDoc_getter(podofo_PDFDoc *self, int field) -{ - PyObject *ans; - PdfString s; - PdfInfo *info = self->doc->GetInfo(); - if (info == NULL) { - PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); - return NULL; - } - switch (field) { - case 0: - s = info->GetTitle(); break; - case 1: - s = info->GetAuthor(); break; - case 2: - s = info->GetSubject(); break; - case 3: - s = info->GetKeywords(); break; - case 4: - s = info->GetCreator(); break; - case 5: - s = info->GetProducer(); break; - default: - PyErr_SetString(PyExc_Exception, "Bad field"); - return NULL; - } - - ans = podofo_convert_pdfstring(s); - if (ans == NULL) {PyErr_NoMemory(); return NULL;} - PyObject *uans = PyUnicode_FromEncodedObject(ans, "utf-8", "replace"); - Py_DECREF(ans); - if (uans == NULL) {return NULL;} - Py_INCREF(uans); - return uans; -} - -static int -podofo_PDFDoc_setter(podofo_PDFDoc *self, PyObject *val, int field) { - if (val == NULL || !PyUnicode_Check(val)) { - PyErr_SetString(PyExc_ValueError, "Must use unicode objects to set metadata"); - return -1; - } - PdfInfo *info = new PdfInfo(*self->doc->GetInfo()); - if (info == NULL) { - PyErr_SetString(PyExc_Exception, "You must first load a PDF Document"); - return -1; - } - PdfString *s = NULL; - - if (self->doc->GetEncrypted()) s = podofo_convert_pystring_single_byte(val); - else s = podofo_convert_pystring(val); - if (s == NULL) return -1; - - - switch (field) { - case 0: - info->SetTitle(*s); break; - case 1: - info->SetAuthor(*s); break; - case 2: - info->SetSubject(*s); break; - case 3: - info->SetKeywords(*s); break; - case 4: - info->SetCreator(*s); break; - case 5: - info->SetProducer(*s); break; - default: - PyErr_SetString(PyExc_Exception, "Bad field"); - return -1; - } - - return 0; -} - -static PyObject * -podofo_PDFDoc_title_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 0); -} -static PyObject * -podofo_PDFDoc_author_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 1); -} -static PyObject * -podofo_PDFDoc_subject_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 2); -} -static PyObject * -podofo_PDFDoc_keywords_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 3); -} -static PyObject * -podofo_PDFDoc_creator_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 4); -} -static PyObject * -podofo_PDFDoc_producer_getter(podofo_PDFDoc *self, void *closure) { - return podofo_PDFDoc_getter(self, 5); -} -static int -podofo_PDFDoc_title_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 0); -} -static int -podofo_PDFDoc_author_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 1); -} -static int -podofo_PDFDoc_subject_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 2); -} -static int -podofo_PDFDoc_keywords_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 3); -} -static int -podofo_PDFDoc_creator_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 4); -} -static int -podofo_PDFDoc_producer_setter(podofo_PDFDoc *self, PyObject *val, void *closure) { - return podofo_PDFDoc_setter(self, val, 5); -} - - - - - -} /* extern "C" */ - -static PyMethodDef podofo_PDFDoc_methods[] = { - {"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS, - "Load a PDF document from a byte buffer (string)" - }, - {"open", (PyCFunction)podofo_PDFDoc_open, METH_VARARGS, - "Load a PDF document from a file path (string)" - }, - {"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS, - "Save the PDF document to a path on disk" - }, - {"extract_first_page", (PyCFunction)podofo_PDFDoc_extract_first_page, METH_VARARGS, - "extract_first_page() -> Remove all but the first page." - }, - {"page_count", (PyCFunction)podofo_PDFDoc_page_count, METH_VARARGS, - "page_count() -> Number of pages in the PDF." - }, - {"delete_page", (PyCFunction)podofo_PDFDoc_delete_page, METH_VARARGS, - "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." - }, - - - {NULL} /* Sentinel */ -}; - -static PyGetSetDef podofo_PDFDoc_getsetters[] = { - {(char *)"title", - (getter)podofo_PDFDoc_title_getter, (setter)podofo_PDFDoc_title_setter, - (char *)"Document title", - NULL}, - {(char *)"author", - (getter)podofo_PDFDoc_author_getter, (setter)podofo_PDFDoc_author_setter, - (char *)"Document author", - NULL}, - {(char *)"subject", - (getter)podofo_PDFDoc_subject_getter, (setter)podofo_PDFDoc_subject_setter, - (char *)"Document subject", - NULL}, - {(char *)"keywords", - (getter)podofo_PDFDoc_keywords_getter, (setter)podofo_PDFDoc_keywords_setter, - (char *)"Document keywords", - NULL}, - {(char *)"creator", - (getter)podofo_PDFDoc_creator_getter, (setter)podofo_PDFDoc_creator_setter, - (char *)"Document creator", - NULL}, - {(char *)"producer", - (getter)podofo_PDFDoc_producer_getter, (setter)podofo_PDFDoc_producer_setter, - (char *)"Document producer", - NULL}, - {(char *)"pages", - (getter)podofo_PDFDoc_pages_getter, NULL, - (char *)"Number of pages in document (read only)", - NULL}, - {(char *)"version", - (getter)podofo_PDFDoc_version_getter, NULL, - (char *)"The PDF version (read only)", - NULL}, - - {NULL} /* Sentinel */ -}; - -static PyTypeObject podofo_PDFDocType = { - PyObject_HEAD_INIT(NULL) - 0, /*ob_size*/ - "podofo.PDFDoc", /*tp_name*/ - sizeof(podofo_PDFDoc), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)podofo_PDFDoc_dealloc, /*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT, /*tp_flags*/ - "PDF Documents", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - podofo_PDFDoc_methods, /* tp_methods */ - 0, /* tp_members */ - podofo_PDFDoc_getsetters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - podofo_PDFDoc_new, /* tp_new */ - -}; +PyObject *pdf::Error = NULL; static PyMethodDef podofo_methods[] = { {NULL} /* Sentinel */ }; -extern "C" { - PyMODINIT_FUNC initpodofo(void) { PyObject* m; - if (PyType_Ready(&podofo_PDFDocType) < 0) + if (PyType_Ready(&pdf::PDFDocType) < 0) return; + pdf::Error = PyErr_NewException((char*)"podofo.Error", NULL, NULL); + if (pdf::Error == NULL) return; + m = Py_InitModule3("podofo", podofo_methods, "Wrapper for the PoDoFo PDF library"); - Py_INCREF(&podofo_PDFDocType); - PyModule_AddObject(m, "PDFDoc", (PyObject *)&podofo_PDFDocType); -} + Py_INCREF(&pdf::PDFDocType); + PyModule_AddObject(m, "PDFDoc", (PyObject *)&pdf::PDFDocType); + + PyModule_AddObject(m, "Error", pdf::Error); } + diff --git a/src/calibre/utils/podofo/utils.cpp b/src/calibre/utils/podofo/utils.cpp new file mode 100644 index 0000000000..473eeac195 --- /dev/null +++ b/src/calibre/utils/podofo/utils.cpp @@ -0,0 +1,46 @@ +/* + * utils.cpp + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "global.h" + +using namespace pdf; + +void pdf::podofo_set_exception(const PdfError &err) { + const char *msg = PdfError::ErrorMessage(err.GetError()); + if (msg == NULL) msg = err.what(); + PyErr_SetString(Error, msg); +} + +PyObject * +pdf::podofo_convert_pdfstring(const PdfString &s) { + std::string raw = s.GetStringUtf8(); + return PyString_FromStringAndSize(raw.c_str(), raw.length()); +} + +PdfString * +pdf::podofo_convert_pystring(PyObject *py) { + Py_UNICODE* u = PyUnicode_AS_UNICODE(py); + PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace"); + if (u8 == NULL) { PyErr_NoMemory(); return NULL; } + pdf_utf8 *s8 = reinterpret_cast(PyString_AS_STRING(u8)); + PdfString *ans = new PdfString(s8); + Py_DECREF(u8); + if (ans == NULL) PyErr_NoMemory(); + return ans; +} + +PdfString * +pdf::podofo_convert_pystring_single_byte(PyObject *py) { + Py_UNICODE* u = PyUnicode_AS_UNICODE(py); + PyObject *s = PyUnicode_Encode(u, PyUnicode_GET_SIZE(py), "cp1252", "replace"); + if (s == NULL) { PyErr_NoMemory(); return NULL; } + PdfString *ans = new PdfString(PyString_AS_STRING(s)); + Py_DECREF(s); + if (ans == NULL) PyErr_NoMemory(); + return ans; +} + From 238bc24cf21f08830d5ccbec2cb464a9929ca8af Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 12:17:04 +0530 Subject: [PATCH 24/57] More podofo refactoring --- setup/extensions.py | 1 - src/calibre/utils/ipc/worker.py | 6 - src/calibre/utils/podofo/__init__.py | 179 ++++++--------------------- src/calibre/utils/podofo/doc.cpp | 169 ++++++++++++++----------- 4 files changed, 139 insertions(+), 216 deletions(-) diff --git a/setup/extensions.py b/setup/extensions.py index a7b01bcd19..d6052125e5 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -148,7 +148,6 @@ extensions = [ libraries=['podofo'], lib_dirs=[podofo_lib], inc_dirs=[podofo_inc, os.path.dirname(podofo_inc)], - optional=True, error=podofo_error), Extension('pictureflow', diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py index 08374400ac..7d13a76cf0 100644 --- a/src/calibre/utils/ipc/worker.py +++ b/src/calibre/utils/ipc/worker.py @@ -43,12 +43,6 @@ PARALLEL_FUNCS = { 'read_metadata' : ('calibre.ebooks.metadata.worker', 'read_metadata_', 'notification'), - 'read_pdf_metadata' : - ('calibre.utils.podofo.__init__', 'get_metadata_', None), - - 'write_pdf_metadata' : - ('calibre.utils.podofo.__init__', 'set_metadata_', None), - 'save_book' : ('calibre.ebooks.metadata.worker', 'save_book', 'notification'), diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 948962f438..232b6536af 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -6,109 +6,12 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, time, shutil +import os, shutil from calibre.constants import plugins, preferred_encoding -from calibre.ebooks.metadata import MetaInformation, string_to_authors, \ - authors_to_string -from calibre.utils.ipc.job import ParallelJob -from calibre.utils.ipc.server import Server -from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile -from calibre import prints - -podofo, podofo_err = plugins['podofo'] - -class Unavailable(Exception): pass - -def get_metadata(stream, cpath=None): - if not podofo: - raise Unavailable(podofo_err) - pt = PersistentTemporaryFile('_podofo.pdf') - pt.write(stream.read()) - pt.close() - server = Server(pool_size=1) - job = ParallelJob('read_pdf_metadata', 'Read pdf metadata', - lambda x,y:x, args=[pt.name, cpath]) - server.add_job(job) - while not job.is_finished: - time.sleep(0.1) - job.update() - - job.update() - server.close() - if job.result is None: - raise ValueError('Failed to read metadata: ' + job.details) - title, authors, creator, tags, ok = job.result - if not ok: - print 'Failed to extract cover:' - print job.details - if title == '_': - title = getattr(stream, 'name', _('Unknown')) - title = os.path.splitext(title)[0] - - mi = MetaInformation(title, authors) - if creator: - mi.book_producer = creator - if tags: - mi.tags = tags - if os.path.exists(pt.name): os.remove(pt.name) - if ok: - mi.cover = cpath - return mi - -def get_metadata_quick(raw): - p = podofo.PDFDoc() - p.load(raw) - title = p.title - if not title: - title = '_' - author = p.author - authors = string_to_authors(author) if author else [_('Unknown')] - creator = p.creator - try: - tags = [x.strip() for x in p.keywords.split(u',')] - tags = [x for x in tags if x] - except: - tags = [] - - mi = MetaInformation(title, authors) - if creator: - mi.book_producer = creator - if tags: - mi.tags = tags - return mi - -def get_metadata_(path, cpath=None): - p = podofo.PDFDoc() - p.open(path) - title = p.title - if not title: - title = '_' - author = p.author - authors = string_to_authors(author) if author else [_('Unknown')] - creator = p.creator - try: - tags = [x.strip() for x in p.keywords.split(u',')] - tags = [x for x in tags if x] - except: - tags = [] - ok = True - try: - if cpath is not None: - pages = p.pages - if pages < 1: - raise ValueError('PDF has no pages') - if True or pages == 1: - shutil.copyfile(path, cpath) - else: - p.extract_first_page() - p.save(cpath) - except: - import traceback - traceback.print_exc() - ok = False - - return (title, authors, creator, tags, ok) +from calibre.ebooks.metadata import authors_to_string +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.ipc.simple_worker import fork_job, WorkerError def prep(val): if not val: @@ -118,27 +21,16 @@ def prep(val): return val.strip() def set_metadata(stream, mi): - if not podofo: - raise Unavailable(podofo_err) - with TemporaryFile('_podofo_read.pdf') as inputf, \ - TemporaryFile('_podofo_write.pdf') as outputf: - server = Server(pool_size=1) - with open(inputf, 'wb') as f: + with TemporaryDirectory(u'_podofo_set_metadata') as tdir: + with open(os.path.join(tdir, u'input.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) - job = ParallelJob('write_pdf_metadata', 'Write pdf metadata', - lambda x,y:x, args=[inputf, outputf, mi.title, mi.authors, - mi.book_producer, mi.tags]) - server.add_job(job) - while not job.is_finished: - time.sleep(0.1) - job.update() - - job.update() - server.close() - if job.failed: - prints(job.details) - elif job.result: - with open(outputf, 'rb') as f: + try: + touched = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir, + mi.title, mi.authors, mi.book_producer, mi.tags)) + except WorkerError as e: + raise Exception('Failed to set PDF metadata: %s'%e.orig_tb) + if touched: + with open(os.path.join(tdir, u'output.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) @@ -148,10 +40,14 @@ def set_metadata(stream, mi): stream.flush() stream.seek(0) +def set_metadata_(tdir, title, authors, bkp, tags): + podofo, podofo_err = plugins['podofo'] + if podofo is None: + raise RuntimeError('Failed to load podofo: %s'%podofo_err) -def set_metadata_(path, opath, title, authors, bkp, tags): + os.chdir(tdir) p = podofo.PDFDoc() - p.open(path) + p.open(u'input.pdf') title = prep(title) touched = False if title and title != p.title: @@ -177,27 +73,32 @@ def set_metadata_(path, opath, title, authors, bkp, tags): pass if touched: - p.save(opath) - return True - return False + p.save(u'output.pdf') + + return touched def delete_all_but(path, pages): ''' Delete all the pages in the pdf except for the specified ones. Negative - numbers are counted from the end of the PDF.''' - with TemporaryFile('_podofo_in.pdf') as of: - shutil.copyfile(path, of) + numbers are counted from the end of the PDF. ''' + podofo, podofo_err = plugins['podofo'] + if podofo is None: + raise RuntimeError('Failed to load podofo: %s'%podofo_err) - p = podofo.PDFDoc() - p.open(of) - total = p.page_count() - pages = { total + x if x < 0 else x for x in pages } - for page in xrange(total-1, -1, -1): - if page not in pages: - p.delete_page(page) - os.remove(path) - p.save(path) + p = podofo.PDFDoc() + with open(path, 'rb') as f: + raw = f.read() + p.load(raw) + total = p.page_count() + pages = { total + x if x < 0 else x for x in pages } + for page in xrange(total-1, -1, -1): + if page not in pages: + p.delete_page(page) + + raw = p.write() + with open(path, 'wb') as f: + f.write(raw) if __name__ == '__main__': - f = '/tmp/t.pdf' + f = u'/tmp/t.pdf' delete_all_but(f, [0, 1, -2, -1]) diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index d591d6fc65..bf3135c7ee 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -57,8 +57,8 @@ PDFDoc_open(PDFDoc *self, PyObject *args, PyObject *kwargs) { } catch(const PdfError & err) { podofo_set_exception(err); return NULL; - } -} else return NULL; + } + } else return NULL; Py_RETURN_NONE; @@ -77,11 +77,72 @@ PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { } } else return NULL; - - Py_INCREF(Py_None); - return Py_None; + Py_RETURN_NONE; } +static PyObject * +PDFDoc_write(PDFDoc *self, PyObject *args, PyObject *kwargs) { + PyObject *ans; + PdfRefCountedBuffer buffer(1*1024*1024); + PdfOutputDevice out(&buffer); + + try { + self->doc->Write(&out); + } catch(const PdfError &err) { + podofo_set_exception(err); + return NULL; + } + + ans = PyBytes_FromStringAndSize(buffer.GetBuffer(), out.Tell()); + if (ans == NULL) PyErr_NoMemory(); + return ans; +} + +static PyObject * +PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { + try { + while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { + int count; + try { + count = self->doc->GetPageCount(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + return Py_BuildValue("i", count); +} + +static PyObject * +PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { + int num = 0; + if (PyArg_ParseTuple(args, "i", &num)) { + try { + self->doc->DeletePages(num, 1); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + } else return NULL; + + Py_RETURN_NONE; +} + +static PyObject * +PDFDoc_append(PDFDoc *self, PyObject *args, PyObject *kwargs) { + Py_RETURN_NONE; +} + +// Properties {{{ + static PyObject * PDFDoc_pages_getter(PDFDoc *self, void *closure) { int pages = self->doc->GetPageCount(); @@ -123,46 +184,6 @@ PDFDoc_version_getter(PDFDoc *self, void *closure) { } - -static PyObject * -PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { - try { - while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - Py_RETURN_NONE; -} - -static PyObject * -PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { - int count; - try { - count = self->doc->GetPageCount(); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - return Py_BuildValue("i", count); -} - -static PyObject * -PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { - int num = 0; - if (PyArg_ParseTuple(args, "i", &num)) { - try { - self->doc->DeletePages(num, 1); - } catch(const PdfError & err) { - podofo_set_exception(err); - return NULL; - } - } else return NULL; - - Py_RETURN_NONE; -} - - static PyObject * PDFDoc_getter(PDFDoc *self, int field) { @@ -288,31 +309,6 @@ PDFDoc_producer_setter(PDFDoc *self, PyObject *val, void *closure) { return PDFDoc_setter(self, val, 5); } - -static PyMethodDef PDFDoc_methods[] = { - {"load", (PyCFunction)PDFDoc_load, METH_VARARGS, - "Load a PDF document from a byte buffer (string)" - }, - {"open", (PyCFunction)PDFDoc_open, METH_VARARGS, - "Load a PDF document from a file path (string)" - }, - {"save", (PyCFunction)PDFDoc_save, METH_VARARGS, - "Save the PDF document to a path on disk" - }, - {"extract_first_page", (PyCFunction)PDFDoc_extract_first_page, METH_VARARGS, - "extract_first_page() -> Remove all but the first page." - }, - {"page_count", (PyCFunction)PDFDoc_page_count, METH_VARARGS, - "page_count() -> Number of pages in the PDF." - }, - {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, - "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." - }, - - - {NULL} /* Sentinel */ -}; - static PyGetSetDef PDFDoc_getsetters[] = { {(char *)"title", (getter)PDFDoc_title_getter, (setter)PDFDoc_title_setter, @@ -350,6 +346,39 @@ static PyGetSetDef PDFDoc_getsetters[] = { {NULL} /* Sentinel */ }; + +// }}} + +static PyMethodDef PDFDoc_methods[] = { + {"load", (PyCFunction)PDFDoc_load, METH_VARARGS, + "Load a PDF document from a byte buffer (string)" + }, + {"open", (PyCFunction)PDFDoc_open, METH_VARARGS, + "Load a PDF document from a file path (string)" + }, + {"save", (PyCFunction)PDFDoc_save, METH_VARARGS, + "Save the PDF document to a path on disk" + }, + {"write", (PyCFunction)PDFDoc_write, METH_VARARGS, + "Return the PDF document as a bytestring." + }, + {"extract_first_page", (PyCFunction)PDFDoc_extract_first_page, METH_VARARGS, + "extract_first_page() -> Remove all but the first page." + }, + {"page_count", (PyCFunction)PDFDoc_page_count, METH_VARARGS, + "page_count() -> Number of pages in the PDF." + }, + {"delete_page", (PyCFunction)PDFDoc_delete_page, METH_VARARGS, + "delete_page(page_num) -> Delete the specified page from the pdf (0 is the first page)." + }, + {"append", (PyCFunction)PDFDoc_append, METH_VARARGS, + "append(doc) -> Append doc (which must be a PDFDoc) to this document." + }, + + + {NULL} /* Sentinel */ +}; + PyTypeObject pdf::PDFDocType = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ From 37a38b606c86cc8c711febe5fb0e3c6c7c883cab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 12:49:08 +0530 Subject: [PATCH 25/57] ... --- src/calibre/ebooks/pdf/manipulate/info.py | 4 ++-- src/calibre/utils/podofo/doc.cpp | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py index 0cb4f69172..9b6e0312d0 100644 --- a/src/calibre/ebooks/pdf/manipulate/info.py +++ b/src/calibre/ebooks/pdf/manipulate/info.py @@ -13,10 +13,9 @@ import os, sys from calibre.utils.config import OptionParser from calibre.utils.logging import Log -from calibre.constants import preferred_encoding +from calibre.constants import preferred_encoding, plugins from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted from calibre import prints -from calibre.utils.podofo import podofo, podofo_err USAGE = '\n%prog %%name ' + _('''\ file.pdf ... @@ -33,6 +32,7 @@ def option_parser(name): return OptionParser(usage=usage) def print_info(pdf_path): + podofo, podofo_err = plugins['podofo'] if not podofo: raise RuntimeError('Failed to load PoDoFo with error:'+podofo_err) p = podofo.PDFDoc() diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index bf3135c7ee..72180bb55f 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -9,6 +9,7 @@ using namespace pdf; +// Constructor/desctructor {{{ static void PDFDoc_dealloc(PDFDoc* self) { @@ -29,7 +30,9 @@ PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)self; } +// }}} +// Loading/Opening of PDF files {{{ static PyObject * PDFDoc_load(PDFDoc *self, PyObject *args, PyObject *kwargs) { char *buffer; Py_ssize_t size; @@ -63,7 +66,9 @@ PDFDoc_open(PDFDoc *self, PyObject *args, PyObject *kwargs) { Py_RETURN_NONE; } +// }}} +// Saving/writing of PDF files {{{ static PyObject * PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { char *buffer; @@ -97,7 +102,9 @@ PDFDoc_write(PDFDoc *self, PyObject *args, PyObject *kwargs) { if (ans == NULL) PyErr_NoMemory(); return ans; } +// }}} +// extract_first_page() {{{ static PyObject * PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { try { @@ -108,7 +115,9 @@ PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { } Py_RETURN_NONE; } +// }}} +// page_count() {{{ static PyObject * PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { int count; @@ -119,8 +128,9 @@ PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { return NULL; } return Py_BuildValue("i", count); -} +} // }}} +// delete_page {{{ static PyObject * PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { int num = 0; @@ -134,12 +144,13 @@ PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { } else return NULL; Py_RETURN_NONE; -} +} // }}} +// append() {{{ static PyObject * PDFDoc_append(PDFDoc *self, PyObject *args, PyObject *kwargs) { Py_RETURN_NONE; -} +} // }}} // Properties {{{ @@ -379,6 +390,7 @@ static PyMethodDef PDFDoc_methods[] = { {NULL} /* Sentinel */ }; +// Type definition {{{ PyTypeObject pdf::PDFDocType = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ @@ -421,5 +433,5 @@ PyTypeObject pdf::PDFDocType = { PDFDoc_new, /* tp_new */ }; - +// }}} From 10e0c2bead7db5b0c85511e5791a2ce1e0c6745c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 13:41:10 +0530 Subject: [PATCH 26/57] ... --- src/calibre/ebooks/pdf/manipulate/info.py | 7 +++---- src/calibre/utils/podofo/__init__.py | 16 ++++++++-------- src/calibre/utils/podofo/doc.cpp | 10 ++++++---- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py index 9b6e0312d0..ee71dac71d 100644 --- a/src/calibre/ebooks/pdf/manipulate/info.py +++ b/src/calibre/ebooks/pdf/manipulate/info.py @@ -13,8 +13,9 @@ import os, sys from calibre.utils.config import OptionParser from calibre.utils.logging import Log -from calibre.constants import preferred_encoding, plugins +from calibre.constants import preferred_encoding from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted +from calibre.utils.podofo import get_podofo from calibre import prints USAGE = '\n%prog %%name ' + _('''\ @@ -32,9 +33,7 @@ def option_parser(name): return OptionParser(usage=usage) def print_info(pdf_path): - podofo, podofo_err = plugins['podofo'] - if not podofo: - raise RuntimeError('Failed to load PoDoFo with error:'+podofo_err) + podofo = get_podofo() p = podofo.PDFDoc() p.open(pdf_path) diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index 232b6536af..eb1d22d3e2 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -13,6 +13,12 @@ from calibre.ebooks.metadata import authors_to_string from calibre.ptempfile import TemporaryDirectory from calibre.utils.ipc.simple_worker import fork_job, WorkerError +def get_podofo(): + podofo, podofo_err = plugins['podofo'] + if podofo is None: + raise RuntimeError('Failed to load podofo: %s'%podofo_err) + return podofo + def prep(val): if not val: return u'' @@ -41,10 +47,7 @@ def set_metadata(stream, mi): stream.seek(0) def set_metadata_(tdir, title, authors, bkp, tags): - podofo, podofo_err = plugins['podofo'] - if podofo is None: - raise RuntimeError('Failed to load podofo: %s'%podofo_err) - + podofo = get_podofo() os.chdir(tdir) p = podofo.PDFDoc() p.open(u'input.pdf') @@ -80,10 +83,7 @@ def set_metadata_(tdir, title, authors, bkp, tags): def delete_all_but(path, pages): ''' Delete all the pages in the pdf except for the specified ones. Negative numbers are counted from the end of the PDF. ''' - podofo, podofo_err = plugins['podofo'] - if podofo is None: - raise RuntimeError('Failed to load podofo: %s'%podofo_err) - + podofo = get_podofo() p = podofo.PDFDoc() with open(path, 'rb') as f: raw = f.read() diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 72180bb55f..6f462b252a 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -88,18 +88,20 @@ PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { static PyObject * PDFDoc_write(PDFDoc *self, PyObject *args, PyObject *kwargs) { PyObject *ans; - PdfRefCountedBuffer buffer(1*1024*1024); - PdfOutputDevice out(&buffer); try { + PdfRefCountedBuffer buffer(1*1024*1024); + PdfOutputDevice out(&buffer); self->doc->Write(&out); + ans = PyBytes_FromStringAndSize(buffer.GetBuffer(), out.Tell()); + if (ans == NULL) PyErr_NoMemory(); } catch(const PdfError &err) { podofo_set_exception(err); return NULL; + } catch (...) { + return PyErr_NoMemory(); } - ans = PyBytes_FromStringAndSize(buffer.GetBuffer(), out.Tell()); - if (ans == NULL) PyErr_NoMemory(); return ans; } // }}} From 4d6e1aa1f67fd7c9a4eb4785a15edbd2e633032f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 14:18:19 +0530 Subject: [PATCH 27/57] PoDoFo, do not print out informational messages --- src/calibre/utils/podofo/podofo.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp index b79ca8cfe3..eefe182cec 100644 --- a/src/calibre/utils/podofo/podofo.cpp +++ b/src/calibre/utils/podofo/podofo.cpp @@ -14,6 +14,29 @@ static PyMethodDef podofo_methods[] = { {NULL} /* Sentinel */ }; +class PyLogMessage : public PdfError::LogMessageCallback { + + public: + ~PyLogMessage() {} + + void LogMessage(ELogSeverity severity, const char* prefix, const char* msg, va_list & args ) { + if (severity > eLogSeverity_Warning) return; + if (prefix) + fprintf(stderr, "%s", prefix); + + vfprintf(stderr, msg, args); + } + + void LogMessage(ELogSeverity severity, const wchar_t* prefix, const wchar_t* msg, va_list & args ) { + if (severity > eLogSeverity_Warning) return; + if (prefix) + fwprintf(stderr, prefix); + + vfwprintf(stderr, msg, args); + } +}; + +PyLogMessage log_message; PyMODINIT_FUNC initpodofo(void) @@ -26,6 +49,9 @@ initpodofo(void) pdf::Error = PyErr_NewException((char*)"podofo.Error", NULL, NULL); if (pdf::Error == NULL) return; + PdfError::SetLogMessageCallback((PdfError::LogMessageCallback*)&log_message); + + PdfError::EnableDebug(false); m = Py_InitModule3("podofo", podofo_methods, "Wrapper for the PoDoFo PDF library"); From 3cc8ef1875e70af32c49a34914b9b0d5174450c2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 15:39:06 +0530 Subject: [PATCH 28/57] PDF Output: Allow generating PDF files with more than 512 pages on windows. Fixes #1041614 (IOError: [Errno 24] Too many open files: u'C) --- src/calibre/ebooks/pdf/writer.py | 97 ++++++++++++++++++++------------ src/calibre/utils/podofo/doc.cpp | 16 ++++++ 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 41cdc5e498..5ae03704c6 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -10,22 +10,18 @@ Write content to PDF. import os import shutil +from future_builtins import map -from calibre import isosx -from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.ebooks.pdf.pageoptions import unit, paper_size, \ - orientation -from calibre.ebooks.metadata import authors_to_string -from calibre.ptempfile import PersistentTemporaryFile -from calibre import __appname__, __version__, fit_image -from calibre.ebooks.oeb.display.webview import load_html - -from PyQt4 import QtCore -from PyQt4.Qt import (QEventLoop, QObject, - QPrinter, QMetaObject, QSizeF, Qt, QPainter, QPixmap) +from PyQt4.Qt import (QEventLoop, QObject, QPrinter, QSizeF, Qt, QPainter, + QPixmap, QTimer) from PyQt4.QtWebKit import QWebView -from pyPdf import PdfFileWriter, PdfFileReader +from calibre.ptempfile import PersistentTemporaryDirectory +from calibre.ebooks.pdf.pageoptions import (unit, paper_size, orientation) +from calibre.ebooks.metadata import authors_to_string +from calibre.ptempfile import PersistentTemporaryFile +from calibre import __appname__, __version__, fit_image, isosx, force_unicode +from calibre.ebooks.oeb.display.webview import load_html def get_custom_size(opts): custom_size = None @@ -108,24 +104,33 @@ def draw_image_page(printer, painter, p, preserve_aspect_ratio=True): class PDFMetadata(object): def __init__(self, oeb_metadata=None): - self.title = _('Unknown') - self.author = _('Unknown') + self.title = _(u'Unknown') + self.author = _(u'Unknown') + self.tags = u'' if oeb_metadata != None: if len(oeb_metadata.title) >= 1: self.title = oeb_metadata.title[0].value if len(oeb_metadata.creator) >= 1: self.author = authors_to_string([x.value for x in oeb_metadata.creator]) + if oeb_metadata.subject: + self.tags = u', '.join(map(unicode, oeb_metadata.subject)) + + self.title = force_unicode(self.title) + self.author = force_unicode(self.author) class PDFWriter(QObject): # {{{ def __init__(self, opts, log, cover_data=None): from calibre.gui2 import is_ok_to_use_qt + from calibre.utils.podofo import get_podofo if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) - self.logger = log + self.logger = self.log = log + self.podofo = get_podofo() + self.doc = self.podofo.PDFDoc() self.loop = QEventLoop() self.view = QWebView() @@ -150,14 +155,14 @@ class PDFWriter(QObject): # {{{ self.render_queue = items self.combine_queue = [] self.out_stream = out_stream + self.insert_cover() self.render_succeeded = False - QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection) + QTimer.singleShot(0, self._render_book) self.loop.exec_() if not self.render_succeeded: raise Exception('Rendering HTML to PDF failed') - @QtCore.pyqtSignature('_render_book()') def _render_book(self): try: if len(self.render_queue) == 0: @@ -182,8 +187,9 @@ class PDFWriter(QObject): # {{{ self.do_paged_render(item_path) else: # The document is so corrupt that we can't render the page. + self.logger.error('Document cannot be rendered.') self.loop.exit(0) - raise Exception('Document cannot be rendered.') + return self._render_book() def do_paged_render(self, outpath): @@ -219,6 +225,14 @@ class PDFWriter(QObject): # {{{ painter.end() printer.abort() + self.append_doc(outpath) + + def append_doc(self, outpath): + doc = self.podofo.PDFDoc() + with open(outpath, 'rb') as f: + raw = f.read() + doc.load(raw) + self.doc.append(doc) def _delete_tmpdir(self): if os.path.exists(self.tmp_path): @@ -239,25 +253,21 @@ class PDFWriter(QObject): # {{{ draw_image_page(printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() + self.append_doc(item_path) printer.abort() - def _write(self): self.logger.debug('Combining individual PDF parts...') - self.insert_cover() - try: - outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author) - for item in self.combine_queue: - # The input PDF stream must remain open until the final PDF - # is written to disk. PyPDF references pages added to the - # final PDF from the input PDF on disk. It does not store - # the pages in memory so we can't close the input PDF. - inputPDF = PdfFileReader(open(item, 'rb')) - for page in inputPDF.pages: - outPDF.addPage(page) - outPDF.write(self.out_stream) + self.doc.creator = u'%s %s [http://calibre-ebook.com]'%( + __appname__, __version__) + self.doc.title = self.metadata.title + self.doc.author = self.metadata.author + if self.metadata.tags: + self.doc.keywords = self.metadata.tags + raw = self.doc.write() + self.out_stream.write(raw) self.render_succeeded = True finally: self._delete_tmpdir() @@ -272,21 +282,34 @@ class ImagePDFWriter(object): self.log = log def dump(self, items, out_stream, pdf_metadata): + from calibre.utils.podofo import get_podofo f = PersistentTemporaryFile('_comic2pdf.pdf') f.close() + self.metadata = pdf_metadata try: self.render_images(f.name, pdf_metadata, items) with open(f.name, 'rb') as x: - shutil.copyfileobj(x, out_stream) + raw = x.read() + doc = get_podofo().PDFDoc() + doc.load(raw) + doc.creator = u'%s %s [http://calibre-ebook.com]'%( + __appname__, __version__) + doc.title = self.metadata.title + doc.author = self.metadata.author + if self.metadata.tags: + doc.keywords = self.metadata.tags + raw = doc.write() + out_stream.write(raw) finally: - os.remove(f.name) + try: + os.remove(f.name) + except: + pass def render_images(self, outpath, mi, items): printer = get_pdf_printer(self.opts, for_comic=True, output_file_name=outpath) printer.setDocName(mi.title) - printer.setCreator(u'%s [%s]'%(__appname__, __version__)) - # Seems to be no way to set author painter = QPainter(printer) painter.setRenderHints(QPainter.Antialiasing|QPainter.SmoothPixmapTransform) @@ -304,3 +327,5 @@ class ImagePDFWriter(object): painter.end() + + diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 6f462b252a..f61ab68ec9 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -151,6 +151,22 @@ PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { // append() {{{ static PyObject * PDFDoc_append(PDFDoc *self, PyObject *args, PyObject *kwargs) { + PyObject *doc; + int typ; + + if (!PyArg_ParseTuple(args, "O", &doc)) return NULL; + + typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType); + if (typ == -1) return NULL; + if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; } + + try { + self->doc->Append(*((PDFDoc*)doc)->doc, true); + } catch (const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + Py_RETURN_NONE; } // }}} From 9a25f5c023268d6866ec90aa0f9c05809424f797 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 17:48:52 +0530 Subject: [PATCH 29/57] ... --- src/calibre/devices/mtp/unix/driver.py | 17 +++++++++++------ src/calibre/devices/mtp/unix/libmtp.c | 12 +++++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index 24b008802e..3173e6eb98 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -12,7 +12,7 @@ from threading import RLock from io import BytesIO from collections import namedtuple -from calibre import prints +from calibre import prints, as_unicode from calibre.constants import plugins from calibre.ptempfile import SpooledTemporaryFile from calibre.devices.errors import OpenFailed, DeviceError @@ -28,7 +28,7 @@ def fingerprint(d): class MTP_DEVICE(MTPDeviceBase): - supported_platforms = ['linux'] + supported_platforms = ['linux', 'osx'] def __init__(self, *args, **kwargs): MTPDeviceBase.__init__(self, *args, **kwargs) @@ -138,11 +138,12 @@ class MTP_DEVICE(MTPDeviceBase): time.sleep(2) try: self.dev = self.create_device(connected_device) - except self.libmtp.MTPError: + except self.libmtp.MTPError as e: # Black list this device so that it is ignored for the # remainder of this session. self.blacklisted_devices.add(connected_device) - raise OpenFailed('%s is not a MTP device'%(connected_device,)) + raise OpenFailed('Failed to open %s: Error: %s'%( + connected_device, as_unicode(e))) except TypeError: self.blacklisted_devices.add(connected_device) raise OpenFailed('') @@ -309,9 +310,13 @@ if __name__ == '__main__': from pprint import pprint dev = MTP_DEVICE(None) dev.startup() - from calibre.devices.scanner import linux_scanner - devs = linux_scanner() + from calibre.devices.scanner import DeviceScanner + scanner = DeviceScanner() + scanner.scan() + devs = scanner.devices cd = dev.detect_managed_devices(devs) + if cd is None: + raise Exception('No MTP device found') dev.open(cd, 'xxx') d = dev.dev print ("Opened device:", dev.get_gui_name()) diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index b0cfd11bb6..61d26e6341 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -212,22 +212,24 @@ libmtp_Device_dealloc(libmtp_Device* self) static int libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) { - int busnum, devnum, vendor_id, product_id; + uint32_t busnum; + uint8_t devnum; + uint16_t vendor_id, product_id; PyObject *usb_serialnum; char *vendor, *product, *friendly_name, *manufacturer_name, *model_name, *serial_number, *device_version; LIBMTP_raw_device_t rawdev; LIBMTP_mtpdevice_t *dev; size_t i; - if (!PyArg_ParseTuple(args, "iiiissO", &busnum, &devnum, &vendor_id, &product_id, &vendor, &product, &usb_serialnum)) return -1; + if (!PyArg_ParseTuple(args, "IBHHssO", &busnum, &devnum, &vendor_id, &product_id, &vendor, &product, &usb_serialnum)) return -1; if (devnum < 0 || devnum > 255 || busnum < 0) { PyErr_SetString(PyExc_TypeError, "Invalid busnum/devnum"); return -1; } - self->ids = Py_BuildValue("iiiiO", busnum, devnum, vendor_id, product_id, usb_serialnum); + self->ids = Py_BuildValue("IBHHO", busnum, devnum, vendor_id, product_id, usb_serialnum); if (self->ids == NULL) return -1; - rawdev.bus_location = (uint32_t)busnum; - rawdev.devnum = (uint8_t)devnum; + rawdev.bus_location = busnum; + rawdev.devnum = devnum; rawdev.device_entry.vendor = vendor; rawdev.device_entry.product = product; rawdev.device_entry.vendor_id = vendor_id; From ec8d874fd70502b6004ba929ca60bf11975b281b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 18:26:35 +0530 Subject: [PATCH 30/57] MTP: Update to work with libmtp 1.1.4 --- src/calibre/devices/mtp/unix/libmtp.c | 48 +++++++++++++-------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 61d26e6341..42d3c762db 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -217,42 +217,40 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) uint16_t vendor_id, product_id; PyObject *usb_serialnum; char *vendor, *product, *friendly_name, *manufacturer_name, *model_name, *serial_number, *device_version; - LIBMTP_raw_device_t rawdev; - LIBMTP_mtpdevice_t *dev; - size_t i; + LIBMTP_raw_device_t *rawdevs = NULL, rdev; + int numdevs, c; + LIBMTP_mtpdevice_t *dev = NULL; + LIBMTP_error_number_t err; if (!PyArg_ParseTuple(args, "IBHHssO", &busnum, &devnum, &vendor_id, &product_id, &vendor, &product, &usb_serialnum)) return -1; if (devnum < 0 || devnum > 255 || busnum < 0) { PyErr_SetString(PyExc_TypeError, "Invalid busnum/devnum"); return -1; } - self->ids = Py_BuildValue("IBHHO", busnum, devnum, vendor_id, product_id, usb_serialnum); - if (self->ids == NULL) return -1; - - rawdev.bus_location = busnum; - rawdev.devnum = devnum; - rawdev.device_entry.vendor = vendor; - rawdev.device_entry.product = product; - rawdev.device_entry.vendor_id = vendor_id; - rawdev.device_entry.product_id = product_id; - rawdev.device_entry.device_flags = 0x00000000U; - + // We have to build and search the rawdevice list instead of creating a + // rawdevice directly as otherwise, dynamic bug flag assignment in libmtp + // does not work Py_BEGIN_ALLOW_THREADS; - for (i = 0; ; i++) { - if (calibre_mtp_device_table[i].vendor == NULL && calibre_mtp_device_table[i].product == NULL && calibre_mtp_device_table[i].vendor_id == 0xffff) break; - if (calibre_mtp_device_table[i].vendor_id == vendor_id && calibre_mtp_device_table[i].product_id == product_id) { - rawdev.device_entry.device_flags = calibre_mtp_device_table[i].device_flags; + err = LIBMTP_Detect_Raw_Devices(&rawdevs, &numdevs); + Py_END_ALLOW_THREADS; + if (err != 0) { PyErr_SetString(MTPError, "Failed to detect raw MTP devices"); return -1; } + + for (c = 0; c < numdevs; c++) { + rdev = rawdevs[c]; + if (rdev.bus_location == busnum && rdev.devnum == devnum) { + Py_BEGIN_ALLOW_THREADS; + dev = LIBMTP_Open_Raw_Device_Uncached(&rdev); + Py_END_ALLOW_THREADS; + if (dev == NULL) { free(rawdevs); PyErr_SetString(MTPError, "Unable to open raw device."); return -1; } + break; } } - dev = LIBMTP_Open_Raw_Device_Uncached(&rawdev); - Py_END_ALLOW_THREADS; - - if (dev == NULL) { - PyErr_SetString(MTPError, "Unable to open raw device."); - return -1; - } + if (rawdevs != NULL) free(rawdevs); + if (dev == NULL) { PyErr_Format(MTPError, "No device with busnum=%lu and devnum=%u found", (long unsigned int)busnum, devnum); return -1; } self->device = dev; + self->ids = Py_BuildValue("IBHHO", busnum, devnum, vendor_id, product_id, usb_serialnum); + if (self->ids == NULL) return -1; Py_BEGIN_ALLOW_THREADS; friendly_name = LIBMTP_Get_Friendlyname(self->device); From 23c1910bdb01f377fbad5fb144a835dc166de232 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 18:40:09 +0530 Subject: [PATCH 31/57] ... --- src/calibre/devices/mtp/unix/libmtp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 42d3c762db..8f83778c34 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -232,7 +232,10 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) Py_BEGIN_ALLOW_THREADS; err = LIBMTP_Detect_Raw_Devices(&rawdevs, &numdevs); Py_END_ALLOW_THREADS; - if (err != 0) { PyErr_SetString(MTPError, "Failed to detect raw MTP devices"); return -1; } + if (err == LIBMTP_ERROR_NO_DEVICE_ATTACHED) { PyErr_SetString(MTPError, "No raw devices found"); return -1; } + if (err == LIBMTP_ERROR_CONNECTING) { PyErr_SetString(MTPError, "There has been an error connecting"); return -1; } + if (err == LIBMTP_ERROR_MEMORY_ALLOCATION) { PyErr_NoMemory(); return -1; } + if (err != LIBMTP_ERROR_NONE) { PyErr_SetString(MTPError, "Failed to detect raw MTP devices"); return -1; } for (c = 0; c < numdevs; c++) { rdev = rawdevs[c]; From 6b17b8a42be28f478118f0dd87761aba7e0bd9ad Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 18:46:11 +0530 Subject: [PATCH 32/57] ... --- src/calibre/devices/mtp/unix/driver.py | 17 +++-------------- src/calibre/devices/mtp/unix/libmtp.c | 2 -- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index 3173e6eb98..c9db90a381 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import time, operator +import operator from threading import RLock from io import BytesIO from collections import namedtuple @@ -133,20 +133,9 @@ class MTP_DEVICE(MTPDeviceBase): self.dev = self._filesystem_cache = None try: self.dev = self.create_device(connected_device) - except self.libmtp.MTPError: - # Give the device some time to settle - time.sleep(2) - try: - self.dev = self.create_device(connected_device) - except self.libmtp.MTPError as e: - # Black list this device so that it is ignored for the - # remainder of this session. - self.blacklisted_devices.add(connected_device) - raise OpenFailed('Failed to open %s: Error: %s'%( + except Exception as e: + raise OpenFailed('Failed to open %s: Error: %s'%( connected_device, as_unicode(e))) - except TypeError: - self.blacklisted_devices.add(connected_device) - raise OpenFailed('') storage = sorted(self.dev.storage_info, key=operator.itemgetter('id')) storage = [x for x in storage if x.get('rw', False)] diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 8f83778c34..ffbcbe688d 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -224,8 +224,6 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTuple(args, "IBHHssO", &busnum, &devnum, &vendor_id, &product_id, &vendor, &product, &usb_serialnum)) return -1; - if (devnum < 0 || devnum > 255 || busnum < 0) { PyErr_SetString(PyExc_TypeError, "Invalid busnum/devnum"); return -1; } - // We have to build and search the rawdevice list instead of creating a // rawdevice directly as otherwise, dynamic bug flag assignment in libmtp // does not work From a43ca92e0222800e605496d002d7a8d6252ea99d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 22:22:49 +0530 Subject: [PATCH 33/57] MTP unix: Add a debug connected devices method. --- src/calibre/devices/mtp/unix/driver.py | 67 ++++++++++++++------------ 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index c9db90a381..85460ff818 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -7,10 +7,10 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import operator +import operator, traceback, pprint, sys from threading import RLock -from io import BytesIO from collections import namedtuple +from functools import partial from calibre import prints, as_unicode from calibre.constants import plugins @@ -57,8 +57,6 @@ class MTP_DEVICE(MTPDeviceBase): @synchronous def detect_managed_devices(self, devices_on_system, force_refresh=False): if self.libmtp is None: return None - if self.known_devices is None: - self.known_devices = frozenset(self.libmtp.known_devices()) # First remove blacklisted devices. devs = set() for d in devices_on_system: @@ -91,6 +89,36 @@ class MTP_DEVICE(MTPDeviceBase): return None + @synchronous + def debug_managed_device_detection(self, devices_on_system, output): + p = partial(prints, file=output) + if self.libmtp is None: + err = plugins['libmtp'][1] + if not err: + err = 'startup() not called on this device driver' + p(err) + return False + devs = [d for d in devices_on_system if (d.vendor_id, d.product_id) + in self.known_devices] + if not devs: + p('No known MTP devices connected to system') + return False + p('Known MTP devices connected:') + for d in devs: p(d) + d = devs[0] + p('\nTrying to open:', d) + try: + self.open(d, 'debug') + except: + p('Opening device failed:') + p(traceback.format_exc()) + return False + p('Opened', self.current_friendly_name, 'successfully') + p('Storage info:') + p(pprint.pformat(self.dev.storage_info)) + self.eject() + return True + @synchronous def create_device(self, connected_device): d = connected_device @@ -115,6 +143,8 @@ class MTP_DEVICE(MTPDeviceBase): if self.libmtp is None: print ('Failed to load libmtp, MTP device detection disabled') print (p[1]) + else: + self.known_devices = frozenset(self.libmtp.known_devices()) for x in vars(self.libmtp): if x.startswith('LIBMTP'): @@ -291,44 +321,17 @@ class MTP_DEVICE(MTPDeviceBase): parent.remove_child(obj) if __name__ == '__main__': - BytesIO class PR: def report_progress(self, sent, total): print (sent, total, end=', ') - from pprint import pprint dev = MTP_DEVICE(None) dev.startup() from calibre.devices.scanner import DeviceScanner scanner = DeviceScanner() scanner.scan() devs = scanner.devices - cd = dev.detect_managed_devices(devs) - if cd is None: - raise Exception('No MTP device found') - dev.open(cd, 'xxx') - d = dev.dev - print ("Opened device:", dev.get_gui_name()) - print ("Storage info:") - pprint(d.storage_info) - print("Free space:", dev.free_space()) - # print (d.create_folder(dev._main_id, 0, 'testf')) - # raw = b'test' - # fname = b'moose.txt' - # src = BytesIO(raw) - # print (d.put_file(dev._main_id, 0, fname, src, len(raw), PR())) - # with open('/tmp/flint.epub', 'wb') as f: - # print(d.get_file(786, f, PR())) - # print() - # with open('/tmp/bleak.epub', 'wb') as f: - # print(d.get_file(601, f, PR())) - # print() - - dev.filesystem_cache.dump() - - # print (dev.filesystem_cache.entries[0].files[0]) - # print (dev.filesystem_cache.entries[0].folders[0]) + dev.debug_managed_device_detection(devs, sys.stdout) dev.set_debug_level(dev.LIBMTP_DEBUG_ALL) - del d dev.shutdown() From 26618a2a9d8215f664e53ca92e35c9c443215467 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Aug 2012 22:39:35 +0530 Subject: [PATCH 34/57] Add a function to get information about all network interfaces --- setup/installer/linux/freeze2.py | 2 +- setup/installer/windows/notes.rst | 13 +++++++++++++ src/calibre/utils/mdns.py | 13 +++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 3b801c190e..13c02cca12 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -15,7 +15,7 @@ from setup import Command, modules, basenames, functions, __version__, \ SITE_PACKAGES = ['PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize', 'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml', 'sipconfig.py', 'xdg', 'dbus', '_dbus_bindings.so', 'dbus_bindings.py', - '_dbus_glib_bindings.so'] + '_dbus_glib_bindings.so', 'netifaces.so'] QTDIR = '/usr/lib/qt4' QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus') diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index e29b205de6..d0f6eb67ba 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -348,6 +348,19 @@ Remove the CORE_xlib, UTIL_Imdisplay and CORE_Magick++ projects. F7 for build project, you will get one error due to the removal of xlib, ignore it. +netifaces +------------ + +Download the source tarball from http://alastairs-place.net/projects/netifaces/ + +Rename netifaces.c to netifaces.cpp and make the same change in setup.py + +Run + +python setup.py build +cp build/lib.win32-2.7/netifaces.pyd /cygdrive/c/Python27/Lib/site-packages/ + + calibre --------- diff --git a/src/calibre/utils/mdns.py b/src/calibre/utils/mdns.py index 9232aab994..6140435e46 100644 --- a/src/calibre/utils/mdns.py +++ b/src/calibre/utils/mdns.py @@ -4,9 +4,22 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' import socket, time, atexit +from collections import defaultdict _server = None +def get_all_ips(): + ''' Return a mapping of interface names to the configuration of the + interface, which includes the ip address, netmask and broadcast addresses + ''' + import netifaces + all_ips = defaultdict(list) + if hasattr(netifaces, 'AF_INET'): + for x in netifaces.interfaces(): + for c in netifaces.ifaddresses(x).get(netifaces.AF_INET, []): + all_ips[x].append(c) + return dict(all_ips) + def _get_external_ip(): 'Get IP address of interface used to connect to the outside world' try: From a14bc99af62342bd2d5e38d3a9e2c655e06e0ce6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 00:37:32 +0530 Subject: [PATCH 35/57] ... --- src/calibre/gui2/viewer/documentview.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 6096216b3e..2dbc16d84c 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -123,11 +123,11 @@ class Document(QWebPage): # {{{ opts = config().parse() bg = opts.background_color or 'white' brules = ['background-color: %s !important'%bg] - if opts.text_color: - brules += ['color: %s !important'%opts.text_color] prefix = ''' body { %s } '''%('; '.join(brules)) + if opts.text_color: + prefix += '\n\nbody, p, div { color: %s !important }'%opts.text_color raw = prefix + opts.user_css raw = '::selection {background:#ffff00; color:#000;}\n'+raw data = 'data:text/css;charset=utf-8;base64,' From 9aa30a8b712e1d933dacd597028a1c39921903cc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 09:53:52 +0530 Subject: [PATCH 36/57] ... --- src/calibre/devices/mtp/filesystem_cache.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/calibre/devices/mtp/filesystem_cache.py b/src/calibre/devices/mtp/filesystem_cache.py index 4c7573f0a0..9fc65a61f5 100644 --- a/src/calibre/devices/mtp/filesystem_cache.py +++ b/src/calibre/devices/mtp/filesystem_cache.py @@ -20,20 +20,16 @@ class FileOrFolder(object): def __init__(self, entry, fs_cache): self.object_id = entry['id'] self.is_folder = entry['is_folder'] + self.storage_id = entry['storage_id'] + # self.parent_id is None for storage objects + self.parent_id = entry.get('parent_id', None) n = entry.get('name', None) if not n: n = '___' self.name = force_unicode(n, 'utf-8') - self.storage_id = entry.get('storage_id', None) self.persistent_id = entry.get('persistent_id', self.object_id) self.size = entry.get('size', 0) - # self.parent_id is None for storage objects - self.parent_id = entry.get('parent_id', None) self.all_storage_ids = fs_cache.all_storage_ids - if self.parent_id is None and self.storage_id is None: - # A storage object - self.storage_id = self.object_id - if self.storage_id not in self.all_storage_ids: raise ValueError('Storage id %s not valid for %s'%(self.storage_id, entry)) @@ -137,6 +133,7 @@ class FilesystemCache(object): self.all_storage_ids = tuple(x['id'] for x in all_storage) for storage in all_storage: + storage['storage_id'] = storage['id'] e = FileOrFolder(storage, self) self.entries.append(e) From f9dc2cd6d7efe90f0b5a2d9b7fc05652682f8003 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 11:59:01 +0530 Subject: [PATCH 37/57] ... --- src/calibre/devices/mtp/filesystem_cache.py | 6 +-- src/calibre/devices/mtp/unix/driver.py | 19 ++++++-- src/calibre/devices/mtp/unix/libmtp.c | 47 +++++-------------- .../mtp/windows/device_enumeration.cpp | 2 +- 4 files changed, 30 insertions(+), 44 deletions(-) diff --git a/src/calibre/devices/mtp/filesystem_cache.py b/src/calibre/devices/mtp/filesystem_cache.py index 9fc65a61f5..3370967054 100644 --- a/src/calibre/devices/mtp/filesystem_cache.py +++ b/src/calibre/devices/mtp/filesystem_cache.py @@ -31,8 +31,8 @@ class FileOrFolder(object): self.all_storage_ids = fs_cache.all_storage_ids if self.storage_id not in self.all_storage_ids: - raise ValueError('Storage id %s not valid for %s'%(self.storage_id, - entry)) + raise ValueError('Storage id %s not valid for %s, valid values: %s'%(self.storage_id, + entry, self.all_storage_ids)) if self.parent_id == 0: self.parent_id = self.storage_id @@ -138,7 +138,7 @@ class FilesystemCache(object): self.entries.append(e) self.entries.sort(key=attrgetter('object_id')) - all_storage_ids = [x.object_id for x in self.entries] + all_storage_ids = [x.storage_id for x in self.entries] self.all_storage_ids = tuple(all_storage_ids) for entry in entries: diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index 85460ff818..e179647629 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -320,11 +320,22 @@ class MTP_DEVICE(MTPDeviceBase): (obj.full_path, self.format_errorstack(errs))) parent.remove_child(obj) -if __name__ == '__main__': - class PR: - def report_progress(self, sent, total): - print (sent, total, end=', ') +def develop(): + from calibre.devices.scanner import DeviceScanner + scanner = DeviceScanner() + scanner.scan() + dev = MTP_DEVICE(None) + dev.startup() + try: + cd = dev.detect_managed_devices(scanner.devices) + if cd is None: raise RuntimeError('No MTP device found') + dev.open(cd, 'develop') + pprint.pprint(dev.dev.storage_info) + dev.filesystem_cache + finally: + dev.shutdown() +if __name__ == '__main__': dev = MTP_DEVICE(None) dev.startup() from calibre.devices.scanner import DeviceScanner diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index ffbcbe688d..48fd55a2eb 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -119,43 +119,18 @@ static uint16_t data_from_python(void *params, void *priv, uint32_t wantlen, uns } static PyObject* build_file_metadata(LIBMTP_file_t *nf, uint32_t storage_id) { - PyObject *ans = NULL, *l = NULL; + PyObject *ans = NULL; - ans = Py_BuildValue("{s:s}", "name", nf->filename); - if (ans == NULL) return PyErr_NoMemory(); - - // We explicitly populate the dictionary instead of using Py_BuildValue to - // handle the numeric variables properly. Without this, for some reason the - // dict sometimes has incorrect values - l = PyLong_FromUnsignedLong(nf->item_id); - if (l == NULL) goto error; - if (PyDict_SetItemString(ans, "id", l) != 0) goto error; - Py_DECREF(l); l = NULL; - - l = PyLong_FromUnsignedLong(nf->parent_id); - if (l == NULL) goto error; - if (PyDict_SetItemString(ans, "parent_id", l) != 0) goto error; - Py_DECREF(l); l = NULL; - - l = PyLong_FromUnsignedLong(storage_id); - if (l == NULL) goto error; - if (PyDict_SetItemString(ans, "storage_id", l) != 0) goto error; - Py_DECREF(l); l = NULL; - - l = PyLong_FromUnsignedLongLong(nf->filesize); - if (l == NULL) goto error; - if (PyDict_SetItemString(ans, "size", l) != 0) goto error; - Py_DECREF(l); l = NULL; - - if (PyDict_SetItemString(ans, "is_folder", - (nf->filetype == LIBMTP_FILETYPE_FOLDER) ? Py_True : Py_False) != 0) - goto error; + ans = Py_BuildValue("{s:s, s:k, s:k, s:k, s:K, s:O}", + "name", nf->filename, + "id", nf->item_id, + "parent_id", nf->parent_id, + "storage_id", storage_id, + "size", nf->filesize, + "is_folder", (nf->filetype == LIBMTP_FILETYPE_FOLDER) ? Py_True : Py_False + ); return ans; - -error: - Py_XDECREF(ans); Py_XDECREF(l); - return PyErr_NoMemory(); } static PyObject* file_metadata(LIBMTP_mtpdevice_t *device, PyObject *errs, uint32_t item_id, uint32_t storage_id) { @@ -396,8 +371,8 @@ static int recursive_get_files(LIBMTP_mtpdevice_t *dev, uint32_t storage_id, uin entry = build_file_metadata(f, storage_id); if (entry == NULL) { ok = 0; } else { - PyList_Append(ans, entry); - Py_DECREF(entry); + if (PyList_Append(ans, entry) != 0) { ok = 0; } + Py_DECREF(entry); } if (ok && f->filetype == LIBMTP_FILETYPE_FOLDER) { diff --git a/src/calibre/devices/mtp/windows/device_enumeration.cpp b/src/calibre/devices/mtp/windows/device_enumeration.cpp index 90bc437be1..2c9b48d506 100644 --- a/src/calibre/devices/mtp/windows/device_enumeration.cpp +++ b/src/calibre/devices/mtp/windows/device_enumeration.cpp @@ -149,7 +149,7 @@ PyObject* get_storage_info(IPortableDevice *device) { // {{{ if (SUCCEEDED(values->GetUnsignedIntegerValue(WPD_STORAGE_ACCESS_CAPABILITY, &access)) && access == WPD_STORAGE_ACCESS_CAPABILITY_READWRITE) desc = Py_True; soid = PyUnicode_FromWideChar(object_ids[i], wcslen(object_ids[i])); if (soid == NULL) { PyErr_NoMemory(); goto end; } - so = Py_BuildValue("{s:K,s:K,s:K,s:K,s:O,s:N}", + so = Py_BuildValue("{s:K, s:K, s:K, s:K, s:O, s:N}", "capacity", capacity, "capacity_objects", capacity_objects, "free_space", free_space, "free_objects", free_objects, "rw", desc, "id", soid); if (so == NULL) { PyErr_NoMemory(); goto end; } if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_DESCRIPTION, &storage_desc))) { From 3ee0576ed4cce821c61604fdc0e2d0f720150e62 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 14:23:15 +0530 Subject: [PATCH 38/57] ... --- src/calibre/devices/mtp/test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/mtp/test.py b/src/calibre/devices/mtp/test.py index bcfd2c34d1..0563708ea4 100644 --- a/src/calibre/devices/mtp/test.py +++ b/src/calibre/devices/mtp/test.py @@ -217,7 +217,7 @@ class TestDeviceInteraction(unittest.TestCase): used_once = self.measure_memory_usage(1, send_file, self.storage, name, raw, size) - used_many = self.measure_memory_usage(10, send_file, self.storage, name, + used_many = self.measure_memory_usage(20, send_file, self.storage, name, raw, size) self.check_memory(used_once, used_many, @@ -227,12 +227,13 @@ class TestDeviceInteraction(unittest.TestCase): raw = io.BytesIO() pc = ProgressCallback() self.dev.get_file(f, raw, callback=pc) + raw.truncate(0) del raw del pc f = self.storage.file_named(name) used_once = self.measure_memory_usage(1, get_file, f) - used_many = self.measure_memory_usage(10, get_file, f) + used_many = self.measure_memory_usage(20, get_file, f) self.check_memory(used_once, used_many, 'Memory consumption during get_file:') From f0d7ad263592288b467d05e766651a0e714b98cf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 14:48:30 +0530 Subject: [PATCH 39/57] Fix erroneuos method signature for new types defined in C extensions. Also fix handling of uint* types in the libusb and libmtp modules (I hope) --- src/calibre/devices/libusb/libusb.c | 6 +- src/calibre/devices/mtp/unix/libmtp.c | 167 ++++++++++----------- src/calibre/devices/mtp/windows/device.cpp | 14 +- src/calibre/utils/podofo/doc.cpp | 16 +- 4 files changed, 98 insertions(+), 105 deletions(-) diff --git a/src/calibre/devices/libusb/libusb.c b/src/calibre/devices/libusb/libusb.c index 7d5234e54e..843459cd98 100644 --- a/src/calibre/devices/libusb/libusb.c +++ b/src/calibre/devices/libusb/libusb.c @@ -80,9 +80,9 @@ static PyObject* get_devices(PyObject *self, PyObject *args) { if (err != 0) { format_err(err); break; } if (desc.bDeviceClass == LIBUSB_CLASS_HUB) continue; - d = Py_BuildValue("(HHHHH)", libusb_get_bus_number(dev), - libusb_get_device_address(dev), desc.idVendor, desc.idProduct, - desc.bcdDevice); + d = Py_BuildValue("(BBHHH)", (unsigned char)libusb_get_bus_number(dev), + (unsigned char)libusb_get_device_address(dev), (unsigned short)desc.idVendor, (unsigned short)desc.idProduct, + (unsigned short)desc.bcdDevice); if (d == NULL) break; t = PyDict_GetItem(cache, d); diff --git a/src/calibre/devices/mtp/unix/libmtp.c b/src/calibre/devices/mtp/unix/libmtp.c index 48fd55a2eb..86c9349d20 100644 --- a/src/calibre/devices/mtp/unix/libmtp.c +++ b/src/calibre/devices/mtp/unix/libmtp.c @@ -55,7 +55,7 @@ static int report_progress(uint64_t const sent, uint64_t const total, void const cb = (ProgressCallback *)data; if (cb->obj != NULL) { PyEval_RestoreThread(cb->state); - res = PyObject_CallFunction(cb->obj, "KK", sent, total); + res = PyObject_CallFunction(cb->obj, "KK", (unsigned long long)sent, (unsigned long long)total); Py_XDECREF(res); cb->state = PyEval_SaveThread(); } @@ -84,7 +84,7 @@ static uint16_t data_to_python(void *params, void *priv, uint32_t sendlen, unsig cb = (ProgressCallback *)priv; *putlen = sendlen; PyEval_RestoreThread(cb->state); - res = PyObject_CallMethod(cb->extra, "write", "s#", data, sendlen); + res = PyObject_CallMethod(cb->extra, "write", "s#", data, (Py_ssize_t)sendlen); if (res == NULL) { ret = LIBMTP_HANDLER_RETURN_ERROR; *putlen = 0; @@ -106,7 +106,7 @@ static uint16_t data_from_python(void *params, void *priv, uint32_t wantlen, uns cb = (ProgressCallback *)priv; PyEval_RestoreThread(cb->state); - res = PyObject_CallMethod(cb->extra, "read", "k", wantlen); + res = PyObject_CallMethod(cb->extra, "read", "k", (unsigned long)wantlen); if (res != NULL && PyBytes_AsStringAndSize(res, &buf, &len) != -1 && len <= wantlen) { memcpy(data, buf, len); *gotlen = len; @@ -122,10 +122,10 @@ static PyObject* build_file_metadata(LIBMTP_file_t *nf, uint32_t storage_id) { PyObject *ans = NULL; ans = Py_BuildValue("{s:s, s:k, s:k, s:k, s:K, s:O}", - "name", nf->filename, - "id", nf->item_id, - "parent_id", nf->parent_id, - "storage_id", storage_id, + "name", (unsigned long)nf->filename, + "id", (unsigned long)nf->item_id, + "parent_id", (unsigned long)nf->parent_id, + "storage_id", (unsigned long)storage_id, "size", nf->filesize, "is_folder", (nf->filetype == LIBMTP_FILETYPE_FOLDER) ? Py_True : Py_False ); @@ -161,11 +161,11 @@ typedef struct { PyObject *serial_number; PyObject *device_version; -} libmtp_Device; +} Device; // Device.__init__() {{{ static void -libmtp_Device_dealloc(libmtp_Device* self) +Device_dealloc(Device* self) { if (self->device != NULL) { Py_BEGIN_ALLOW_THREADS; @@ -185,11 +185,11 @@ libmtp_Device_dealloc(libmtp_Device* self) } static int -libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) +Device_init(Device *self, PyObject *args, PyObject *kwds) { - uint32_t busnum; - uint8_t devnum; - uint16_t vendor_id, product_id; + unsigned long busnum; + unsigned char devnum; + unsigned short vendor_id, product_id; PyObject *usb_serialnum; char *vendor, *product, *friendly_name, *manufacturer_name, *model_name, *serial_number, *device_version; LIBMTP_raw_device_t *rawdevs = NULL, rdev; @@ -197,7 +197,7 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) LIBMTP_mtpdevice_t *dev = NULL; LIBMTP_error_number_t err; - if (!PyArg_ParseTuple(args, "IBHHssO", &busnum, &devnum, &vendor_id, &product_id, &vendor, &product, &usb_serialnum)) return -1; + if (!PyArg_ParseTuple(args, "kBHHssO", &busnum, &devnum, &vendor_id, &product_id, &vendor, &product, &usb_serialnum)) return -1; // We have to build and search the rawdevice list instead of creating a // rawdevice directly as otherwise, dynamic bug flag assignment in libmtp @@ -212,7 +212,7 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) for (c = 0; c < numdevs; c++) { rdev = rawdevs[c]; - if (rdev.bus_location == busnum && rdev.devnum == devnum) { + if (rdev.bus_location == (uint32_t)busnum && rdev.devnum == (uint8_t)devnum) { Py_BEGIN_ALLOW_THREADS; dev = LIBMTP_Open_Raw_Device_Uncached(&rdev); Py_END_ALLOW_THREADS; @@ -222,10 +222,10 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) } if (rawdevs != NULL) free(rawdevs); - if (dev == NULL) { PyErr_Format(MTPError, "No device with busnum=%lu and devnum=%u found", (long unsigned int)busnum, devnum); return -1; } + if (dev == NULL) { PyErr_Format(MTPError, "No device with busnum=%lu and devnum=%u found", busnum, devnum); return -1; } self->device = dev; - self->ids = Py_BuildValue("IBHHO", busnum, devnum, vendor_id, product_id, usb_serialnum); + self->ids = Py_BuildValue("kBHHO", busnum, devnum, vendor_id, product_id, usb_serialnum); if (self->ids == NULL) return -1; Py_BEGIN_ALLOW_THREADS; @@ -272,46 +272,46 @@ libmtp_Device_init(libmtp_Device *self, PyObject *args, PyObject *kwds) // Device.friendly_name {{{ static PyObject * -libmtp_Device_friendly_name(libmtp_Device *self, void *closure) { +Device_friendly_name(Device *self, void *closure) { Py_INCREF(self->friendly_name); return self->friendly_name; } // }}} // Device.manufacturer_name {{{ static PyObject * -libmtp_Device_manufacturer_name(libmtp_Device *self, void *closure) { +Device_manufacturer_name(Device *self, void *closure) { Py_INCREF(self->manufacturer_name); return self->manufacturer_name; } // }}} // Device.model_name {{{ static PyObject * -libmtp_Device_model_name(libmtp_Device *self, void *closure) { +Device_model_name(Device *self, void *closure) { Py_INCREF(self->model_name); return self->model_name; } // }}} // Device.serial_number {{{ static PyObject * -libmtp_Device_serial_number(libmtp_Device *self, void *closure) { +Device_serial_number(Device *self, void *closure) { Py_INCREF(self->serial_number); return self->serial_number; } // }}} // Device.device_version {{{ static PyObject * -libmtp_Device_device_version(libmtp_Device *self, void *closure) { +Device_device_version(Device *self, void *closure) { Py_INCREF(self->device_version); return self->device_version; } // }}} // Device.ids {{{ static PyObject * -libmtp_Device_ids(libmtp_Device *self, void *closure) { +Device_ids(Device *self, void *closure) { Py_INCREF(self->ids); return self->ids; } // }}} // Device.update_storage_info() {{{ static PyObject* -libmtp_Device_update_storage_info(libmtp_Device *self, PyObject *args, PyObject *kwargs) { +Device_update_storage_info(Device *self, PyObject *args) { ENSURE_DEV(NULL); if (LIBMTP_Get_Storage(self->device, LIBMTP_STORAGE_SORTBY_NOTSORTED) < 0) { - PyErr_SetString(MTPError, "Failed to get storage infor for device."); + PyErr_SetString(MTPError, "Failed to get storage info for device."); return NULL; } Py_RETURN_NONE; @@ -320,7 +320,7 @@ libmtp_Device_update_storage_info(libmtp_Device *self, PyObject *args, PyObject // Device.storage_info {{{ static PyObject * -libmtp_Device_storage_info(libmtp_Device *self, void *closure) { +Device_storage_info(Device *self, void *closure) { PyObject *ans, *loc; LIBMTP_devicestorage_t *storage; int ro = 0; @@ -335,11 +335,11 @@ libmtp_Device_storage_info(libmtp_Device *self, void *closure) { if (storage->StorageType == ST_FixedROM || storage->StorageType == ST_RemovableROM || (storage->id & 0x0000FFFFU) == 0x00000000U || storage->AccessCapability == AC_ReadOnly || storage->AccessCapability == AC_ReadOnly_with_Object_Deletion) ro = 1; loc = Py_BuildValue("{s:k,s:O,s:K,s:K,s:K,s:s,s:s,s:O}", - "id", storage->id, + "id", (unsigned long)storage->id, "removable", ((storage->StorageType == ST_RemovableRAM) ? Py_True : Py_False), - "capacity", storage->MaxCapacity, - "freespace_bytes", storage->FreeSpaceInBytes, - "freespace_objects", storage->FreeSpaceInObjects, + "capacity", (unsigned long long)storage->MaxCapacity, + "freespace_bytes", (unsigned long long)storage->FreeSpaceInBytes, + "freespace_objects", (unsigned long long)storage->FreeSpaceInObjects, "name", storage->StorageDescription, "volume_id", storage->VolumeIdentifier, "rw", (ro) ? Py_False : Py_True @@ -392,9 +392,9 @@ static int recursive_get_files(LIBMTP_mtpdevice_t *dev, uint32_t storage_id, uin } static PyObject * -libmtp_Device_get_filesystem(libmtp_Device *self, PyObject *args, PyObject *kwargs) { +Device_get_filesystem(Device *self, PyObject *args) { PyObject *ans, *errs; - uint32_t storage_id; + unsigned long storage_id; int ok = 0; ENSURE_DEV(NULL); ENSURE_STORAGE(NULL); @@ -405,7 +405,7 @@ libmtp_Device_get_filesystem(libmtp_Device *self, PyObject *args, PyObject *kwar if (errs == NULL || ans == NULL) { PyErr_NoMemory(); return NULL; } LIBMTP_Clear_Errorstack(self->device); - ok = recursive_get_files(self->device, storage_id, 0, ans, errs); + ok = recursive_get_files(self->device, (uint32_t)storage_id, 0, ans, errs); dump_errorstack(self->device, errs); if (!ok) { Py_DECREF(ans); @@ -419,10 +419,10 @@ libmtp_Device_get_filesystem(libmtp_Device *self, PyObject *args, PyObject *kwar // Device.get_file {{{ static PyObject * -libmtp_Device_get_file(libmtp_Device *self, PyObject *args, PyObject *kwargs) { +Device_get_file(Device *self, PyObject *args) { PyObject *stream, *callback = NULL, *errs; ProgressCallback cb; - uint32_t fileid; + unsigned long fileid; int ret; ENSURE_DEV(NULL); ENSURE_STORAGE(NULL); @@ -436,7 +436,7 @@ libmtp_Device_get_file(libmtp_Device *self, PyObject *args, PyObject *kwargs) { cb.obj = callback; cb.extra = stream; Py_XINCREF(callback); Py_INCREF(stream); cb.state = PyEval_SaveThread(); - ret = LIBMTP_Get_File_To_Handler(self->device, fileid, data_to_python, &cb, report_progress, &cb); + ret = LIBMTP_Get_File_To_Handler(self->device, (uint32_t)fileid, data_to_python, &cb, report_progress, &cb); PyEval_RestoreThread(cb.state); Py_XDECREF(callback); Py_DECREF(stream); @@ -450,11 +450,11 @@ libmtp_Device_get_file(libmtp_Device *self, PyObject *args, PyObject *kwargs) { // Device.put_file {{{ static PyObject * -libmtp_Device_put_file(libmtp_Device *self, PyObject *args, PyObject *kwargs) { +Device_put_file(Device *self, PyObject *args) { PyObject *stream, *callback = NULL, *errs, *fo = NULL; ProgressCallback cb; - uint32_t parent_id, storage_id; - uint64_t filesize; + unsigned long parent_id, storage_id; + unsigned long long filesize; int ret; char *name; LIBMTP_file_t f; @@ -467,7 +467,7 @@ libmtp_Device_put_file(libmtp_Device *self, PyObject *args, PyObject *kwargs) { if (callback == NULL || !PyCallable_Check(callback)) callback = NULL; cb.obj = callback; cb.extra = stream; - f.parent_id = parent_id; f.storage_id = storage_id; f.item_id = 0; f.filename = name; f.filetype = LIBMTP_FILETYPE_UNKNOWN; f.filesize = filesize; + f.parent_id = (uint32_t)parent_id; f.storage_id = (uint32_t)storage_id; f.item_id = 0; f.filename = name; f.filetype = LIBMTP_FILETYPE_UNKNOWN; f.filesize = (uint64_t)filesize; Py_XINCREF(callback); Py_INCREF(stream); cb.state = PyEval_SaveThread(); ret = LIBMTP_Send_File_From_Handler(self->device, data_from_python, &cb, &f, report_progress, &cb); @@ -484,9 +484,9 @@ libmtp_Device_put_file(libmtp_Device *self, PyObject *args, PyObject *kwargs) { // Device.delete_object {{{ static PyObject * -libmtp_Device_delete_object(libmtp_Device *self, PyObject *args, PyObject *kwargs) { +Device_delete_object(Device *self, PyObject *args) { PyObject *errs; - uint32_t id; + unsigned long id; int res; ENSURE_DEV(NULL); ENSURE_STORAGE(NULL); @@ -496,7 +496,7 @@ libmtp_Device_delete_object(libmtp_Device *self, PyObject *args, PyObject *kwarg if (errs == NULL) { PyErr_NoMemory(); return NULL; } Py_BEGIN_ALLOW_THREADS; - res = LIBMTP_Delete_Object(self->device, id); + res = LIBMTP_Delete_Object(self->device, (uint32_t)id); Py_END_ALLOW_THREADS; if (res != 0) dump_errorstack(self->device, errs); @@ -505,9 +505,10 @@ libmtp_Device_delete_object(libmtp_Device *self, PyObject *args, PyObject *kwarg // Device.create_folder {{{ static PyObject * -libmtp_Device_create_folder(libmtp_Device *self, PyObject *args, PyObject *kwargs) { +Device_create_folder(Device *self, PyObject *args) { PyObject *errs, *fo = NULL; - uint32_t storage_id, parent_id, folder_id; + unsigned long storage_id, parent_id; + uint32_t folder_id; char *name; ENSURE_DEV(NULL); ENSURE_STORAGE(NULL); @@ -517,7 +518,7 @@ libmtp_Device_create_folder(libmtp_Device *self, PyObject *args, PyObject *kwarg if (errs == NULL) { PyErr_NoMemory(); return NULL; } Py_BEGIN_ALLOW_THREADS; - folder_id = LIBMTP_Create_Folder(self->device, name, parent_id, storage_id); + folder_id = LIBMTP_Create_Folder(self->device, name, (uint32_t)parent_id, (uint32_t)storage_id); Py_END_ALLOW_THREADS; if (folder_id == 0) dump_errorstack(self->device, errs); @@ -527,28 +528,28 @@ libmtp_Device_create_folder(libmtp_Device *self, PyObject *args, PyObject *kwarg return Py_BuildValue("NN", fo, errs); } // }}} -static PyMethodDef libmtp_Device_methods[] = { - {"update_storage_info", (PyCFunction)libmtp_Device_update_storage_info, METH_VARARGS, +static PyMethodDef Device_methods[] = { + {"update_storage_info", (PyCFunction)Device_update_storage_info, METH_VARARGS, "update_storage_info() -> Reread the storage info from the device (total, space, free space, storage locations, etc.)" }, - {"get_filesystem", (PyCFunction)libmtp_Device_get_filesystem, METH_VARARGS, + {"get_filesystem", (PyCFunction)Device_get_filesystem, METH_VARARGS, "get_filesystem(storage_id) -> Get the list of files and folders on the device in storage_id. Returns files, errors." }, - {"get_file", (PyCFunction)libmtp_Device_get_file, METH_VARARGS, + {"get_file", (PyCFunction)Device_get_file, METH_VARARGS, "get_file(fileid, stream, callback=None) -> Get the file specified by fileid from the device. stream must be a file-like object. The file will be written to it. callback works the same as in get_filelist(). Returns ok, errs, where errs is a list of errors (if any)." }, - {"put_file", (PyCFunction)libmtp_Device_put_file, METH_VARARGS, + {"put_file", (PyCFunction)Device_put_file, METH_VARARGS, "put_file(storage_id, parent_id, filename, stream, size, callback=None) -> Put a file on the device. The file is read from stream. It is put inside the folder identified by parent_id on the storage identified by storage_id. Use parent_id=0 to put it in the root. stream must be a file-like object. size is the size in bytes of the data in stream. callback works the same as in get_filelist(). Returns fileinfo, errs, where errs is a list of errors (if any), and fileinfo is a file information dictionary, as returned by get_filelist(). fileinfo will be None if case or errors." }, - {"create_folder", (PyCFunction)libmtp_Device_create_folder, METH_VARARGS, + {"create_folder", (PyCFunction)Device_create_folder, METH_VARARGS, "create_folder(storage_id, parent_id, name) -> Create a folder named name under parent parent_id (use 0 for root) in the storage identified by storage_id. Returns folderinfo, errors, where folderinfo is the same dict as returned by get_folderlist(), it will be None if there are errors." }, - {"delete_object", (PyCFunction)libmtp_Device_delete_object, METH_VARARGS, + {"delete_object", (PyCFunction)Device_delete_object, METH_VARARGS, "delete_object(id) -> Delete the object identified by id from the device. Can be used to delete files, folders, etc. Returns ok, errs." }, @@ -556,52 +557,52 @@ static PyMethodDef libmtp_Device_methods[] = { {NULL} /* Sentinel */ }; -static PyGetSetDef libmtp_Device_getsetters[] = { +static PyGetSetDef Device_getsetters[] = { {(char *)"friendly_name", - (getter)libmtp_Device_friendly_name, NULL, + (getter)Device_friendly_name, NULL, (char *)"The friendly name of this device, can be None.", NULL}, {(char *)"manufacturer_name", - (getter)libmtp_Device_manufacturer_name, NULL, + (getter)Device_manufacturer_name, NULL, (char *)"The manufacturer name of this device, can be None.", NULL}, {(char *)"model_name", - (getter)libmtp_Device_model_name, NULL, + (getter)Device_model_name, NULL, (char *)"The model name of this device, can be None.", NULL}, {(char *)"serial_number", - (getter)libmtp_Device_serial_number, NULL, + (getter)Device_serial_number, NULL, (char *)"The serial number of this device, can be None.", NULL}, {(char *)"device_version", - (getter)libmtp_Device_device_version, NULL, + (getter)Device_device_version, NULL, (char *)"The device version of this device, can be None.", NULL}, {(char *)"ids", - (getter)libmtp_Device_ids, NULL, + (getter)Device_ids, NULL, (char *)"The ids of the device (busnum, devnum, vendor_id, product_id, usb_serialnum)", NULL}, {(char *)"storage_info", - (getter)libmtp_Device_storage_info, NULL, + (getter)Device_storage_info, NULL, (char *)"Information about the storage locations on the device. Returns a list of dictionaries where each dictionary corresponds to the LIBMTP_devicestorage_struct.", NULL}, {NULL} /* Sentinel */ }; -static PyTypeObject libmtp_DeviceType = { // {{{ +static PyTypeObject DeviceType = { // {{{ PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "libmtp.Device", /*tp_name*/ - sizeof(libmtp_Device), /*tp_basicsize*/ + sizeof(Device), /*tp_basicsize*/ 0, /*tp_itemsize*/ - (destructor)libmtp_Device_dealloc, /*tp_dealloc*/ + (destructor)Device_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ @@ -624,15 +625,15 @@ static PyTypeObject libmtp_DeviceType = { // {{{ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ - libmtp_Device_methods, /* tp_methods */ + Device_methods, /* tp_methods */ 0, /* tp_members */ - libmtp_Device_getsetters, /* tp_getset */ + Device_getsetters, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ - (initproc)libmtp_Device_init, /* tp_init */ + (initproc)Device_init, /* tp_init */ 0, /* tp_alloc */ 0, /* tp_new */ }; // }}} @@ -640,7 +641,7 @@ static PyTypeObject libmtp_DeviceType = { // {{{ // }}} End Device object definition static PyObject * -libmtp_set_debug_level(PyObject *self, PyObject *args) { +set_debug_level(PyObject *self, PyObject *args) { int level; if (!PyArg_ParseTuple(args, "i", &level)) return NULL; LIBMTP_Set_Debug(level); @@ -649,18 +650,10 @@ libmtp_set_debug_level(PyObject *self, PyObject *args) { static PyObject * -libmtp_is_mtp_device(PyObject *self, PyObject *args) { - int busnum, devnum, vendor_id, prod_id, ans = 0; - size_t i; +is_mtp_device(PyObject *self, PyObject *args) { + int busnum, devnum, ans = 0; - if (!PyArg_ParseTuple(args, "iiii", &busnum, &devnum, &vendor_id, &prod_id)) return NULL; - - for (i = 0; ; i++) { - if (calibre_mtp_device_table[i].vendor == NULL && calibre_mtp_device_table[i].product == NULL && calibre_mtp_device_table[i].vendor_id == 0xffff) break; - if (calibre_mtp_device_table[i].vendor_id == vendor_id && calibre_mtp_device_table[i].product_id == prod_id) { - Py_RETURN_TRUE; - } - } + if (!PyArg_ParseTuple(args, "ii", &busnum, &devnum)) return NULL; /* * LIBMTP_Check_Specific_Device does not seem to work at least on my linux @@ -689,7 +682,7 @@ known_devices(PyObject *self, PyObject *args) { for (i = 0; ; i++) { if (calibre_mtp_device_table[i].vendor == NULL && calibre_mtp_device_table[i].product == NULL && calibre_mtp_device_table[i].vendor_id == 0xffff) break; - d = Py_BuildValue("(HH)", calibre_mtp_device_table[i].vendor_id, calibre_mtp_device_table[i].product_id); + d = Py_BuildValue("(HH)", (unsigned short)calibre_mtp_device_table[i].vendor_id, (unsigned short)calibre_mtp_device_table[i].product_id); if (d == NULL) { Py_DECREF(ans); ans = NULL; break; } if (PyList_Append(ans, d) != 0) { Py_DECREF(d); Py_DECREF(ans); ans = NULL; PyErr_NoMemory(); break; } Py_DECREF(d); @@ -699,12 +692,12 @@ known_devices(PyObject *self, PyObject *args) { } static PyMethodDef libmtp_methods[] = { - {"set_debug_level", libmtp_set_debug_level, METH_VARARGS, + {"set_debug_level", set_debug_level, METH_VARARGS, "set_debug_level(level)\n\nSet the debug level bit mask, see LIBMTP_DEBUG_* constants." }, - {"is_mtp_device", libmtp_is_mtp_device, METH_VARARGS, - "is_mtp_device(busnum, devnum, vendor_id, prod_id)\n\nReturn True if the device is recognized as an MTP device by its vendor/product ids. If it is not recognized a probe is done and True returned if the probe succeeds. Note that probing can cause some devices to malfunction, and it is not very reliable, which is why we prefer to use the device database." + {"is_mtp_device", is_mtp_device, METH_VARARGS, + "is_mtp_device(busnum, devnum)\n\nA probe is done and True returned if the probe succeeds. Note that probing can cause some devices to malfunction, and it is not very reliable, which is why we prefer to use the device database." }, {"known_devices", known_devices, METH_VARARGS, @@ -719,8 +712,8 @@ PyMODINIT_FUNC initlibmtp(void) { PyObject *m; - libmtp_DeviceType.tp_new = PyType_GenericNew; - if (PyType_Ready(&libmtp_DeviceType) < 0) + DeviceType.tp_new = PyType_GenericNew; + if (PyType_Ready(&DeviceType) < 0) return; m = Py_InitModule3("libmtp", libmtp_methods, "Interface to libmtp."); @@ -733,8 +726,8 @@ initlibmtp(void) { LIBMTP_Init(); LIBMTP_Set_Debug(LIBMTP_DEBUG_NONE); - Py_INCREF(&libmtp_DeviceType); - PyModule_AddObject(m, "Device", (PyObject *)&libmtp_DeviceType); + Py_INCREF(&DeviceType); + PyModule_AddObject(m, "Device", (PyObject *)&DeviceType); PyModule_AddStringMacro(m, LIBMTP_VERSION_STRING); PyModule_AddIntMacro(m, LIBMTP_DEBUG_NONE); diff --git a/src/calibre/devices/mtp/windows/device.cpp b/src/calibre/devices/mtp/windows/device.cpp index 63eeef7402..3d8d442b6c 100644 --- a/src/calibre/devices/mtp/windows/device.cpp +++ b/src/calibre/devices/mtp/windows/device.cpp @@ -67,7 +67,7 @@ init(Device *self, PyObject *args, PyObject *kwds) // update_device_data() {{{ static PyObject* -update_data(Device *self, PyObject *args, PyObject *kwargs) { +update_data(Device *self, PyObject *args) { PyObject *di = NULL; di = get_device_information(self->device, NULL); if (di == NULL) return NULL; @@ -77,7 +77,7 @@ update_data(Device *self, PyObject *args, PyObject *kwargs) { // get_filesystem() {{{ static PyObject* -py_get_filesystem(Device *self, PyObject *args, PyObject *kwargs) { +py_get_filesystem(Device *self, PyObject *args) { PyObject *storage_id, *ret; wchar_t *storage; @@ -92,7 +92,7 @@ py_get_filesystem(Device *self, PyObject *args, PyObject *kwargs) { // get_file() {{{ static PyObject* -py_get_file(Device *self, PyObject *args, PyObject *kwargs) { +py_get_file(Device *self, PyObject *args) { PyObject *object_id, *stream, *callback = NULL, *ret; wchar_t *object; @@ -109,7 +109,7 @@ py_get_file(Device *self, PyObject *args, PyObject *kwargs) { // create_folder() {{{ static PyObject* -py_create_folder(Device *self, PyObject *args, PyObject *kwargs) { +py_create_folder(Device *self, PyObject *args) { PyObject *pparent_id, *pname, *ret; wchar_t *parent_id, *name; @@ -125,7 +125,7 @@ py_create_folder(Device *self, PyObject *args, PyObject *kwargs) { // delete_object() {{{ static PyObject* -py_delete_object(Device *self, PyObject *args, PyObject *kwargs) { +py_delete_object(Device *self, PyObject *args) { PyObject *pobject_id, *ret; wchar_t *object_id; @@ -140,10 +140,10 @@ py_delete_object(Device *self, PyObject *args, PyObject *kwargs) { // get_file() {{{ static PyObject* -py_put_file(Device *self, PyObject *args, PyObject *kwargs) { +py_put_file(Device *self, PyObject *args) { PyObject *pparent_id, *pname, *stream, *callback = NULL, *ret; wchar_t *parent_id, *name; - unsigned PY_LONG_LONG size; + unsigned long long size; if (!PyArg_ParseTuple(args, "OOOK|O", &pparent_id, &pname, &stream, &size, &callback)) return NULL; parent_id = unicode_to_wchar(pparent_id); diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index f61ab68ec9..8e59efdeaf 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -34,7 +34,7 @@ PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds) // Loading/Opening of PDF files {{{ static PyObject * -PDFDoc_load(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_load(PDFDoc *self, PyObject *args) { char *buffer; Py_ssize_t size; if (PyArg_ParseTuple(args, "s#", &buffer, &size)) { @@ -51,7 +51,7 @@ PDFDoc_load(PDFDoc *self, PyObject *args, PyObject *kwargs) { } static PyObject * -PDFDoc_open(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_open(PDFDoc *self, PyObject *args) { char *fname; if (PyArg_ParseTuple(args, "s", &fname)) { @@ -70,7 +70,7 @@ PDFDoc_open(PDFDoc *self, PyObject *args, PyObject *kwargs) { // Saving/writing of PDF files {{{ static PyObject * -PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_save(PDFDoc *self, PyObject *args) { char *buffer; if (PyArg_ParseTuple(args, "s", &buffer)) { @@ -86,7 +86,7 @@ PDFDoc_save(PDFDoc *self, PyObject *args, PyObject *kwargs) { } static PyObject * -PDFDoc_write(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_write(PDFDoc *self, PyObject *args) { PyObject *ans; try { @@ -108,7 +108,7 @@ PDFDoc_write(PDFDoc *self, PyObject *args, PyObject *kwargs) { // extract_first_page() {{{ static PyObject * -PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_extract_first_page(PDFDoc *self, PyObject *args) { try { while (self->doc->GetPageCount() > 1) self->doc->GetPagesTree()->DeletePage(1); } catch(const PdfError & err) { @@ -121,7 +121,7 @@ PDFDoc_extract_first_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { // page_count() {{{ static PyObject * -PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_page_count(PDFDoc *self, PyObject *args) { int count; try { count = self->doc->GetPageCount(); @@ -134,7 +134,7 @@ PDFDoc_page_count(PDFDoc *self, PyObject *args, PyObject *kwargs) { // delete_page {{{ static PyObject * -PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_delete_page(PDFDoc *self, PyObject *args) { int num = 0; if (PyArg_ParseTuple(args, "i", &num)) { try { @@ -150,7 +150,7 @@ PDFDoc_delete_page(PDFDoc *self, PyObject *args, PyObject *kwargs) { // append() {{{ static PyObject * -PDFDoc_append(PDFDoc *self, PyObject *args, PyObject *kwargs) { +PDFDoc_append(PDFDoc *self, PyObject *args) { PyObject *doc; int typ; From 6e25db5853e95d245a96f2a5277e5b6557e5a90f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 15:00:48 +0530 Subject: [PATCH 40/57] ... --- src/calibre/utils/magick/magick.c | 64 +++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/calibre/utils/magick/magick.c b/src/calibre/utils/magick/magick.c index 4b336a2531..8713d2cebb 100644 --- a/src/calibre/utils/magick/magick.c +++ b/src/calibre/utils/magick/magick.c @@ -89,7 +89,7 @@ magick_PixelWand_color_setter(magick_PixelWand *self, PyObject *val, void *closu // PixelWand.destroy {{{ static PyObject * -magick_PixelWand_destroy(magick_PixelWand *self, PyObject *args, PyObject *kwargs) { +magick_PixelWand_destroy(magick_PixelWand *self, PyObject *args) { NULL_CHECK(NULL) self->wand = DestroyPixelWand(self->wand); Py_RETURN_NONE; @@ -197,7 +197,7 @@ magick_DrawingWand_new(PyTypeObject *type, PyObject *args, PyObject *kwds) // DrawingWand.destroy {{{ static PyObject * -magick_DrawingWand_destroy(magick_DrawingWand *self, PyObject *args, PyObject *kwargs) { +magick_DrawingWand_destroy(magick_DrawingWand *self, PyObject *args) { NULL_CHECK(NULL) self->wand = DestroyDrawingWand(self->wand); Py_RETURN_NONE; @@ -493,9 +493,9 @@ typedef struct { } magick_Image; // Method declarations {{{ -static PyObject* magick_Image_compose(magick_Image *self, PyObject *args, PyObject *kwargs); -static PyObject* magick_Image_copy(magick_Image *self, PyObject *args, PyObject *kwargs); -static PyObject* magick_Image_texture(magick_Image *self, PyObject *args, PyObject *kwargs); +static PyObject* magick_Image_compose(magick_Image *self, PyObject *args); +static PyObject* magick_Image_copy(magick_Image *self, PyObject *args); +static PyObject* magick_Image_texture(magick_Image *self, PyObject *args); // }}} static void @@ -527,7 +527,7 @@ magick_Image_new(PyTypeObject *type, PyObject *args, PyObject *kwds) // Image.load {{{ static PyObject * -magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_load(magick_Image *self, PyObject *args) { const char *data; Py_ssize_t dlen; MagickBooleanType res; @@ -547,7 +547,7 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.identify {{{ static PyObject * -magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_identify(magick_Image *self, PyObject *args) { const char *data; Py_ssize_t dlen; MagickBooleanType res; @@ -567,7 +567,7 @@ magick_Image_identify(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.open {{{ static PyObject * -magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_read(magick_Image *self, PyObject *args) { const char *data; MagickBooleanType res; @@ -586,7 +586,7 @@ magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.create_canvas {{{ static PyObject * -magick_Image_create_canvas(magick_Image *self, PyObject *args, PyObject *kwargs) +magick_Image_create_canvas(magick_Image *self, PyObject *args) { Py_ssize_t width, height; char *bgcolor; @@ -611,7 +611,7 @@ magick_Image_create_canvas(magick_Image *self, PyObject *args, PyObject *kwargs) // Image.font_metrics {{{ static PyObject * -magick_Image_font_metrics(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_font_metrics(magick_Image *self, PyObject *args) { char *text; PyObject *dw_, *ans, *m; Py_ssize_t i; @@ -641,7 +641,7 @@ magick_Image_font_metrics(magick_Image *self, PyObject *args, PyObject *kwargs) // Image.annotate {{{ static PyObject * -magick_Image_annotate(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_annotate(magick_Image *self, PyObject *args) { char *text; PyObject *dw_; DrawingWand *dw; @@ -662,7 +662,7 @@ magick_Image_annotate(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.export {{{ static PyObject * -magick_Image_export(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_export(magick_Image *self, PyObject *args) { char *fmt; unsigned char *data; PyObject *ans; @@ -790,7 +790,7 @@ magick_Image_format_setter(magick_Image *self, PyObject *val, void *closure) { // Image.distort {{{ static PyObject * -magick_Image_distort(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_distort(magick_Image *self, PyObject *args) { int method; Py_ssize_t i, number; PyObject *bestfit, *argv, *t; @@ -826,7 +826,7 @@ magick_Image_distort(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.trim {{{ static PyObject * -magick_Image_trim(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_trim(magick_Image *self, PyObject *args) { double fuzz; NULL_CHECK(NULL) @@ -842,7 +842,7 @@ magick_Image_trim(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.thumbnail {{{ static PyObject * -magick_Image_thumbnail(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_thumbnail(magick_Image *self, PyObject *args) { Py_ssize_t width, height; NULL_CHECK(NULL) @@ -858,7 +858,7 @@ magick_Image_thumbnail(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.crop {{{ static PyObject * -magick_Image_crop(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_crop(magick_Image *self, PyObject *args) { Py_ssize_t width, height, x, y; NULL_CHECK(NULL) @@ -874,7 +874,7 @@ magick_Image_crop(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.set_border_color {{{ static PyObject * -magick_Image_set_border_color(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_set_border_color(magick_Image *self, PyObject *args) { PyObject *obj; magick_PixelWand *pw; @@ -893,7 +893,7 @@ magick_Image_set_border_color(magick_Image *self, PyObject *args, PyObject *kwar // Image.rotate {{{ static PyObject * -magick_Image_rotate(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_rotate(magick_Image *self, PyObject *args) { PyObject *obj; magick_PixelWand *pw; double degrees; @@ -913,7 +913,7 @@ magick_Image_rotate(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.rotate {{{ static PyObject * -magick_Image_flip(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_flip(magick_Image *self, PyObject *args) { PyObject *obj = NULL; MagickBooleanType ret = 0; @@ -930,7 +930,7 @@ magick_Image_flip(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.set_page {{{ static PyObject * -magick_Image_set_page(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_set_page(magick_Image *self, PyObject *args) { Py_ssize_t width, height, x, y; NULL_CHECK(NULL) @@ -946,7 +946,7 @@ magick_Image_set_page(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.set_compression_quality {{{ static PyObject * -magick_Image_set_compression_quality(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_set_compression_quality(magick_Image *self, PyObject *args) { Py_ssize_t quality; NULL_CHECK(NULL) @@ -962,7 +962,7 @@ magick_Image_set_compression_quality(magick_Image *self, PyObject *args, PyObjec // Image.has_transparent_pixels {{{ static PyObject * -magick_Image_has_transparent_pixels(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_has_transparent_pixels(magick_Image *self, PyObject *args) { PixelIterator *pi = NULL; PixelWand **pixels = NULL; int found = 0; @@ -993,7 +993,7 @@ magick_Image_has_transparent_pixels(magick_Image *self, PyObject *args, PyObject // Image.normalize {{{ static PyObject * -magick_Image_normalize(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_normalize(magick_Image *self, PyObject *args) { NULL_CHECK(NULL) if (!MagickNormalizeImage(self->wand)) return magick_set_exception(self->wand); @@ -1005,7 +1005,7 @@ magick_Image_normalize(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.add_border {{{ static PyObject * -magick_Image_add_border(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_add_border(magick_Image *self, PyObject *args) { Py_ssize_t dx, dy; PyObject *obj; magick_PixelWand *pw; @@ -1025,7 +1025,7 @@ magick_Image_add_border(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.sharpen {{{ static PyObject * -magick_Image_sharpen(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_sharpen(magick_Image *self, PyObject *args) { double radius, sigma; NULL_CHECK(NULL) @@ -1041,7 +1041,7 @@ magick_Image_sharpen(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.quantize {{{ static PyObject * -magick_Image_quantize(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_quantize(magick_Image *self, PyObject *args) { Py_ssize_t number_colors, treedepth; int colorspace; PyObject *dither, *measure_error; @@ -1060,7 +1060,7 @@ magick_Image_quantize(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.despeckle {{{ static PyObject * -magick_Image_despeckle(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_despeckle(magick_Image *self, PyObject *args) { NULL_CHECK(NULL) if (!MagickDespeckleImage(self->wand)) return magick_set_exception(self->wand); @@ -1107,7 +1107,7 @@ magick_Image_type_setter(magick_Image *self, PyObject *val, void *closure) { // Image.destroy {{{ static PyObject * -magick_Image_destroy(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_destroy(magick_Image *self, PyObject *args) { NULL_CHECK(NULL) self->wand = DestroyMagickWand(self->wand); Py_RETURN_NONE; @@ -1117,7 +1117,7 @@ magick_Image_destroy(magick_Image *self, PyObject *args, PyObject *kwargs) { // Image.set_opacity {{{ static PyObject * -magick_Image_set_opacity(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_set_opacity(magick_Image *self, PyObject *args) { double opacity; NULL_CHECK(NULL) @@ -1310,7 +1310,7 @@ static PyTypeObject magick_ImageType = { // {{{ // Image.compose {{{ static PyObject * -magick_Image_compose(magick_Image *self, PyObject *args, PyObject *kwargs) +magick_Image_compose(magick_Image *self, PyObject *args) { PyObject *img, *op_; ssize_t left, top; @@ -1341,7 +1341,7 @@ magick_Image_compose(magick_Image *self, PyObject *args, PyObject *kwargs) // Image.clone {{{ static PyObject * -magick_Image_copy(magick_Image *self, PyObject *args, PyObject *kwargs) +magick_Image_copy(magick_Image *self, PyObject *args) { PyObject *img; magick_Image *src; @@ -1361,7 +1361,7 @@ magick_Image_copy(magick_Image *self, PyObject *args, PyObject *kwargs) // Image.texture {{{ static PyObject * -magick_Image_texture(magick_Image *self, PyObject *args, PyObject *kwargs) { +magick_Image_texture(magick_Image *self, PyObject *args) { PyObject *img; magick_Image *texture; From 0a22c291b745cad59f915f65eb583671d9857ba1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 17:17:33 +0530 Subject: [PATCH 41/57] ... --- src/calibre/utils/podofo/doc.cpp | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 8e59efdeaf..26951fcdce 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -170,6 +170,30 @@ PDFDoc_append(PDFDoc *self, PyObject *args) { Py_RETURN_NONE; } // }}} +// set_box() {{{ +static PyObject * +PDFDoc_set_box(PDFDoc *self, PyObject *args) { + int num = 0; + double left, bottom, width, height; + char *box; + if (!PyArg_ParseTuple(args, "isdddd", &num, &box, &left, &bottom, &width, &height)) return NULL; + + try { + PdfRect r(left, bottom, width, height); + PdfObject o; + r.ToVariant(o); + self->doc->GetPage(num)->GetObject()->GetDictionary().AddKey(PdfName(box), o); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } catch (...) { + PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to set the box"); + return NULL; + } + + Py_RETURN_NONE; +} // }}} + // Properties {{{ static PyObject * @@ -403,6 +427,9 @@ static PyMethodDef PDFDoc_methods[] = { {"append", (PyCFunction)PDFDoc_append, METH_VARARGS, "append(doc) -> Append doc (which must be a PDFDoc) to this document." }, + {"set_box", (PyCFunction)PDFDoc_set_box, METH_VARARGS, + "set_box(page_num, box, left, bottom, width, height) -> Set the PDF bounding box for the page numbered nu, box must be one of: MediaBox, CropBox, TrimBox, BleedBox, ArtBox. The numbers are interpreted as pts." + }, {NULL} /* Sentinel */ From b69fb230c5966f03a87058550574b102e18410ac Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 17:22:51 +0530 Subject: [PATCH 42/57] Remove the unmaintained pdfmanipulate command line utility --- src/calibre/__init__.py | 17 - src/calibre/ebooks/pdf/manipulate/__init__.py | 0 src/calibre/ebooks/pdf/manipulate/cli.py | 72 - src/calibre/ebooks/pdf/manipulate/crop.py | 150 -- src/calibre/ebooks/pdf/manipulate/decrypt.py | 113 -- src/calibre/ebooks/pdf/manipulate/encrypt.py | 107 -- src/calibre/ebooks/pdf/manipulate/info.py | 85 - src/calibre/ebooks/pdf/manipulate/merge.py | 115 -- src/calibre/ebooks/pdf/manipulate/reverse.py | 106 -- src/calibre/ebooks/pdf/manipulate/rotate.py | 105 -- src/calibre/ebooks/pdf/manipulate/split.py | 204 --- src/calibre/ebooks/pdf/verify.py | 44 - src/calibre/linux.py | 1 - src/pyPdf/__init__.py | 2 - src/pyPdf/filters.py | 252 --- src/pyPdf/generic.py | 780 --------- src/pyPdf/pdf.py | 1530 ----------------- src/pyPdf/utils.py | 110 -- src/pyPdf/xmp.py | 355 ---- 19 files changed, 4148 deletions(-) delete mode 100644 src/calibre/ebooks/pdf/manipulate/__init__.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/cli.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/crop.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/decrypt.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/encrypt.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/info.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/merge.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/reverse.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/rotate.py delete mode 100644 src/calibre/ebooks/pdf/manipulate/split.py delete mode 100644 src/calibre/ebooks/pdf/verify.py delete mode 100644 src/pyPdf/__init__.py delete mode 100644 src/pyPdf/filters.py delete mode 100644 src/pyPdf/generic.py delete mode 100644 src/pyPdf/pdf.py delete mode 100644 src/pyPdf/utils.py delete mode 100644 src/pyPdf/xmp.py diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 58390a314a..bfe23cee67 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -444,23 +444,6 @@ class CurrentDir(object): pass -class StreamReadWrapper(object): - ''' - Used primarily with pyPdf to ensure the stream is properly closed. - ''' - - def __init__(self, stream): - for x in ('read', 'seek', 'tell'): - setattr(self, x, getattr(stream, x)) - - def __exit__(self, *args): - for x in ('read', 'seek', 'tell'): - setattr(self, x, None) - - def __enter__(self): - return self - - def detect_ncpus(): """Detects the number of effective CPUs in the system""" import multiprocessing diff --git a/src/calibre/ebooks/pdf/manipulate/__init__.py b/src/calibre/ebooks/pdf/manipulate/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/calibre/ebooks/pdf/manipulate/cli.py b/src/calibre/ebooks/pdf/manipulate/cli.py deleted file mode 100644 index c6e52f85d3..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/cli.py +++ /dev/null @@ -1,72 +0,0 @@ -from __future__ import with_statement -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Command line interface to run pdf manipulation commands. -''' - -import string, sys - -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.ebooks.pdf.manipulate import crop, decrypt, encrypt, \ - info, merge, reverse, rotate, split - -COMMANDS = { - 'crop' : crop, - 'decrypt' : decrypt, - 'encrypt' : encrypt, - 'info' : info, - 'merge' : merge, - 'reverse' : reverse, - 'rotate' : rotate, - 'split' : split, - } - -USAGE = '%prog ' + _('''command ... - -command can be one of the following: -[%%commands] - -Use %prog command --help to get more information about a specific command - -Manipulate a PDF. -''').replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(): - return OptionParser(usage=USAGE) - -def main(args=sys.argv): - log = Log() - parser = option_parser() - - if len(args) < 2: - print 'Error: No command sepecified.\n' - print_help(parser, log) - return 1 - - command = args[1].lower().strip() - - if command in COMMANDS.keys(): - del args[1] - return COMMANDS[command].main(args, command) - else: - parser.parse_args(args) - print 'Unknown command %s.\n' % command - print_help(parser, log) - return 1 - - # We should never get here. - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/crop.py b/src/calibre/ebooks/pdf/manipulate/crop.py deleted file mode 100644 index 8957320280..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/crop.py +++ /dev/null @@ -1,150 +0,0 @@ -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, James Beal , ' \ - '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Crop a pdf file -''' - -import sys -import re -from decimal import Decimal -from optparse import OptionGroup, Option - -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted - -from pyPdf import PdfFileWriter, PdfFileReader - -DEFAULT_CROP = 10 - -USAGE = '\n%prog %%name ' + _('''\ -[options] file.pdf - -Crop a PDF file. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='cropped.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), - OptionRecommendation(name='bottom_left_x', recommended_value=DEFAULT_CROP, - level=OptionRecommendation.LOW, long_switch='left-x', short_switch='x', - help=_('Number of pixels to crop from the left most x (default is %s)') % DEFAULT_CROP), - OptionRecommendation(name='bottom_left_y', recommended_value=DEFAULT_CROP, - level=OptionRecommendation.LOW, long_switch='left-y', short_switch='y', - help=_('Number of pixels to crop from the left most y (default is %s)') % DEFAULT_CROP), - OptionRecommendation(name='top_right_x', recommended_value=DEFAULT_CROP, - level=OptionRecommendation.LOW, long_switch='right-x', short_switch='v', - help=_('Number of pixels to crop from the right most x (default is %s)') % DEFAULT_CROP), - OptionRecommendation(name='top_right_y', recommended_value=DEFAULT_CROP, - level=OptionRecommendation.LOW, long_switch='right-y', short_switch='w', - help=_('Number of pixels to crop from the right most y (default is %s)') % DEFAULT_CROP), - OptionRecommendation(name='bounding', recommended_value=None, - level=OptionRecommendation.LOW, long_switch='bounding', short_switch='b', - help=_('A file generated by ghostscript which allows each page to be individually cropped `gs -dSAFER -dNOPAUSE -dBATCH -sDEVICE=bbox file.pdf 2> bounding`')), -]) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Crop Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def crop_pdf(pdf_path, opts, metadata=None): - if metadata == None: - title = _('Unknown') - author = _('Unknown') - else: - title = metadata.title - author = authors_to_string(metadata.authors) - - input_pdf = PdfFileReader(open(pdf_path, 'rb')) - - bounding_lines = [] - if opts.bounding != None: - try: - bounding = open(opts.bounding , 'r') - bounding_regex = re.compile('%%BoundingBox: (?P\d+) (?P\d+) (?P\d+) (?P\d+)') - except: - raise Exception('Error reading %s' % opts.bounding) - - lines = bounding.readlines() - for line in lines: - if line.startswith('%%BoundingBox:'): - bounding_lines.append(line) - if len(bounding_lines) != input_pdf.numPages: - raise Exception('Error bounding file %s page count does not correspond to specified pdf' % opts.bounding) - - output_pdf = PdfFileWriter(title=title,author=author) - blines = iter(bounding_lines) - for page in input_pdf.pages: - if bounding_lines != []: - mo = bounding_regex.search(blines.next()) - if mo == None: - raise Exception('Error in bounding file %s' % opts.bounding) - page.mediaBox.upperRight = (float(mo.group('top_x')), Decimal(mo.group('top_y'))) - page.mediaBox.lowerLeft = (float(mo.group('bottom_x')), Decimal(mo.group('bottom_y'))) - else: - page.mediaBox.upperRight = (page.bleedBox.getUpperRight_x() - Decimal(opts.top_right_x), page.bleedBox.getUpperRight_y() - Decimal(opts.top_right_y)) - page.mediaBox.lowerLeft = (page.bleedBox.getLowerLeft_x() + Decimal(opts.bottom_left_x), page.bleedBox.getLowerLeft_y() + Decimal(opts.bottom_left_y)) - output_pdf.addPage(page) - - with open(opts.output, 'wb') as output_file: - output_pdf.write(output_file) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 1: - print 'Error: A PDF file is required.\n' - print_help(parser, log) - return 1 - - if not is_valid_pdf(args[0]): - print 'Error: Could not read file `%s`.' % args[0] - return 1 - - if is_encrypted(args[0]): - print 'Error: file `%s` is encrypted.' % args[0] - return 1 - - mi = metadata_from_formats([args[0]]) - - crop_pdf(args[0], opts, mi) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/decrypt.py b/src/calibre/ebooks/pdf/manipulate/decrypt.py deleted file mode 100644 index fd8510efc7..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/decrypt.py +++ /dev/null @@ -1,113 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Decrypt content of PDF. -''' - -import os, sys -from optparse import OptionGroup, Option - -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted - -from pyPdf import PdfFileWriter, PdfFileReader - -USAGE = '\n%prog %%name ' + _('''\ -[options] file.pdf password - -Decrypt a PDF. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='decrypted.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), -]) - -class DecryptionError(Exception): - def __init__(self, pdf_path): - self.value = 'Unable to decrypt file `%s`.' % pdf_path - - def __str__(self): - return repr(self.value) - - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def decrypt(pdf_path, out_path, password): - pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - - if pdf.decrypt(str(password)) == 0: - raise DecryptionError(pdf_path) - - title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown') - author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown') - out_pdf = PdfFileWriter(title=title, author=author) - - for page in pdf.pages: - out_pdf.addPage(page) - - with open(out_path, 'wb') as out_file: - out_pdf.write(out_file) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 2: - print 'Error: A PDF file and decryption password is required.\n' - print_help(parser, log) - return 1 - - if not is_valid_pdf(args[0]): - print 'Error: Could not read file `%s`.' % args[0] - return 1 - - if not is_encrypted(args[0]): - print 'Error: file `%s` is not encrypted.' % args[0] - return 1 - - try: - decrypt(args[0], opts.output, args[1]) - except DecryptionError as e: - print e.value - return 1 - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/encrypt.py b/src/calibre/ebooks/pdf/manipulate/encrypt.py deleted file mode 100644 index ff3b47b11a..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/encrypt.py +++ /dev/null @@ -1,107 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Encrypt a PDF. -''' - -import os, sys -from optparse import OptionGroup, Option - -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted -from calibre.ebooks.metadata import authors_to_string -from calibre.ebooks.metadata.meta import metadata_from_formats - -from pyPdf import PdfFileWriter, PdfFileReader - -USAGE = '\n%prog %%name ' + _('''\ -[options] file.pdf password - -Encrypt a PDF. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='encrypted.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), -]) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def encrypt(pdf_path, out_path, password, metadata=None): - if metadata == None: - title = _('Unknown') - author = _('Unknown') - else: - title = metadata.title - author = authors_to_string(metadata.authors) - - out_pdf = PdfFileWriter(title=title, author=author) - - pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - for page in pdf.pages: - out_pdf.addPage(page) - - with open(out_path, 'wb') as out_file: - out_pdf.encrypt(str(password)) - out_pdf.write(out_file) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 2: - print 'Error: A PDF file and decryption password is required.\n' - print_help(parser, log) - return 1 - - if not is_valid_pdf(args[0]): - print 'Error: Could not read file `%s`.' % args[0] - return 1 - - if is_encrypted(args[0]): - print 'Error: file `%s` is already encrypted.' % args[0] - return 1 - - mi = metadata_from_formats([args[0]]) - - encrypt(args[0], opts.output, args[1], mi) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/info.py b/src/calibre/ebooks/pdf/manipulate/info.py deleted file mode 100644 index ee71dac71d..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/info.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import with_statement -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Merge PDF files into a single PDF document. -''' - -import os, sys - -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted -from calibre.utils.podofo import get_podofo -from calibre import prints - -USAGE = '\n%prog %%name ' + _('''\ -file.pdf ... - -Get info about a PDF. -''') - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def print_info(pdf_path): - podofo = get_podofo() - p = podofo.PDFDoc() - p.open(pdf_path) - - fmt = lambda x, y: '%-20s: %s'%(x, y) - - print - - prints(fmt(_('Title'), p.title)) - prints(fmt(_('Author'), p.author)) - prints(fmt(_('Subject'), p.subject)) - prints(fmt(_('Creator'), p.creator)) - prints(fmt(_('Producer'), p.producer)) - prints(fmt(_('Pages'), p.pages)) - prints(fmt(_('File Size'), os.stat(pdf_path).st_size)) - prints(fmt(_('PDF Version'), p.version if p.version else _('Unknown'))) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 1: - print 'Error: No PDF sepecified.\n' - print_help(parser, log) - return 1 - - bad_pdfs = is_valid_pdfs(args) - if bad_pdfs != []: - for pdf in bad_pdfs: - print 'Error: Could not read file `%s`.' % pdf - return 1 - - enc = False - for pdf in args: - if is_encrypted(pdf): - enc = True - print 'Error: file `%s` is encrypted. Please decrypt first.' % pdf - if enc: - return 1 - - for pdf in args: - print_info(pdf) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/merge.py b/src/calibre/ebooks/pdf/manipulate/merge.py deleted file mode 100644 index e300136d16..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/merge.py +++ /dev/null @@ -1,115 +0,0 @@ -from __future__ import with_statement -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Merge PDF files into a single PDF document. -''' - -import os, sys -from optparse import OptionGroup, Option - -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted - -from pyPdf import PdfFileWriter, PdfFileReader - -USAGE = '\n%prog %%name ' + _('''\ -[options] file1.pdf file2.pdf ... - -Metadata will be used from the first PDF specified. - -Merges individual PDFs. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='merged.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), -]) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Merge Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def merge_files(in_paths, out_path, metadata=None): - if metadata == None: - title = _('Unknown') - author = _('Unknown') - else: - title = metadata.title - author = authors_to_string(metadata.authors) - - out_pdf = PdfFileWriter(title=title, author=author) - - for pdf_path in in_paths: - pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - for page in pdf.pages: - out_pdf.addPage(page) - - with open(out_path, 'wb') as out_file: - out_pdf.write(out_file) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 2: - print 'Error: Two or more PDF files are required.\n' - print_help(parser, log) - return 1 - - bad_pdfs = is_valid_pdfs(args) - if bad_pdfs != []: - for pdf in bad_pdfs: - print 'Error: Could not read file `%s`.' % pdf - return 1 - - enc = False - for pdf in args: - if is_encrypted(pdf): - enc = True - print 'Error: file `%s` is encrypted.' % pdf - if enc: - return 1 - - mi = metadata_from_formats([args[0]]) - - merge_files(args, opts.output, mi) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/reverse.py b/src/calibre/ebooks/pdf/manipulate/reverse.py deleted file mode 100644 index b4bbe27a40..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/reverse.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Reverse content of PDF. -''' - -import os, sys -from optparse import OptionGroup, Option - -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted - -from pyPdf import PdfFileWriter, PdfFileReader - -USAGE = '\n%prog %%name ' + _('''\ -[options] file.pdf - -Reverse a PDF. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='reversed.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), -]) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Reverse Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def reverse(pdf_path, out_path, metadata=None): - if metadata == None: - title = _('Unknown') - author = _('Unknown') - else: - title = metadata.title - author = authors_to_string(metadata.authors) - - out_pdf = PdfFileWriter(title=title, author=author) - - pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - for page in reversed(pdf.pages): - out_pdf.addPage(page) - - with open(out_path, 'wb') as out_file: - out_pdf.write(out_file) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 1: - print 'Error: A PDF file is required.\n' - print_help(parser, log) - return 1 - - if not is_valid_pdf(args[0]): - print 'Error: Could not read file `%s`.' % args[0] - return 1 - - if is_encrypted(args[0]): - print 'Error: file `%s` is encrypted.' % args[0] - return 1 - - mi = metadata_from_formats([args[0]]) - - reverse(args[0], opts.output, mi) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/rotate.py b/src/calibre/ebooks/pdf/manipulate/rotate.py deleted file mode 100644 index ac46a8e0c8..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/rotate.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Rotate pages of a PDF. -''' - -import os, sys -from optparse import OptionGroup, Option - -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted - -from pyPdf import PdfFileWriter, PdfFileReader - -USAGE = '\n%prog %%name ' + _('''\ -file.pdf degrees - -Rotate pages of a PDF clockwise. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='rotated.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), -]) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Rotate Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def rotate(pdf_path, out_path, degrees, metadata=None): - if metadata == None: - title = _('Unknown') - author = _('Unknown') - else: - title = metadata.title - author = authors_to_string(metadata.authors) - - out_pdf = PdfFileWriter(title=title, author=author) - - pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - for page in pdf.pages: - out_pdf.addPage(page.rotateClockwise(int(degrees))) - - with open(out_path, 'wb') as out_file: - out_pdf.write(out_file) - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - args = args[1:] - - if len(args) < 2: - print 'Error: A PDF file and how many degrees to rotate is required.\n' - print_help(parser, log) - return 1 - - if not is_valid_pdf(args[0]): - print 'Error: Could not read file `%s`.' % args[0] - return 1 - - if is_encrypted(args[0]): - print 'Error: file `%s` is encrypted.' % args[0] - return 1 - - mi = metadata_from_formats([args[0]]) - - rotate(args[0], opts.output, args[1], mi) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/manipulate/split.py b/src/calibre/ebooks/pdf/manipulate/split.py deleted file mode 100644 index 3ef2549a62..0000000000 --- a/src/calibre/ebooks/pdf/manipulate/split.py +++ /dev/null @@ -1,204 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Split PDF file into multiple PDF documents. -''' - -import os, sys, re -from optparse import OptionGroup, Option - -from calibre.ebooks.metadata.meta import metadata_from_formats -from calibre.ebooks.metadata import authors_to_string -from calibre.utils.config import OptionParser -from calibre.utils.logging import Log -from calibre.constants import preferred_encoding -from calibre.customize.conversion import OptionRecommendation -from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted - -from pyPdf import PdfFileWriter, PdfFileReader - -USAGE = _(''' -%prog %%name [options] file.pdf page_to_split_on ... -%prog %%name [options] file.pdf page_range_to_split_on ... - -Ex. - -%prog %%name file.pdf 6 -%prog %%name file.pdf 6-12 -%prog %%name file.pdf 6-12 8 10 9-20 - -Split a PDF. -''') - -OPTIONS = set([ - OptionRecommendation(name='output', recommended_value='split.pdf', - level=OptionRecommendation.HIGH, long_switch='output', short_switch='o', - help=_('Path to output file. By default a file is created in the current directory.')), -]) - -def print_help(parser, log): - help = parser.format_help().encode(preferred_encoding, 'replace') - log(help) - -def option_parser(name): - usage = USAGE.replace('%%name', name) - return OptionParser(usage=usage) - -def option_recommendation_to_cli_option(add_option, rec): - opt = rec.option - switches = ['-'+opt.short_switch] if opt.short_switch else [] - switches.append('--'+opt.long_switch) - attrs = dict(dest=opt.name, help=opt.help, - choices=opt.choices, default=rec.recommended_value) - add_option(Option(*switches, **attrs)) - -def add_options(parser): - group = OptionGroup(parser, _('Split Options:'), _('Options to control the transformation of pdf')) - parser.add_option_group(group) - add_option = group.add_option - - for rec in OPTIONS: - option_recommendation_to_cli_option(add_option, rec) - -def split_pdf(in_path, pages, page_ranges, out_name, metadata=None): - pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb')) - total_pages = pdf.numPages - 1 - - for index in pages+page_ranges: - if index in pages: - write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata) - else: - - write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata) - -def write_pdf(pdf, name, suffix, start, end, metadata=None): - if metadata == None: - title = _('Unknown') - author = _('Unknown') - else: - title = metadata.title - author = authors_to_string(metadata.authors) - - out_pdf = PdfFileWriter(title=title, author=author) - for page_num in range(start, end + 1): - out_pdf.addPage(pdf.getPage(page_num)) - with open('%s%s.pdf' % (name, suffix), 'wb') as out_file: - out_pdf.write(out_file) - -def split_args(args): - pdf = '' - pages = [] - page_ranges = [] - bad = [] - - for arg in args: - arg = arg.strip() - # Find the pdf input - if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None: - if pdf == '': - pdf = arg - else: - bad.append(arg) - # Find single indexes - elif re.search('^[ ]*\d+[ ]*$', arg) != None: - pages.append(arg) - # Find index ranges - elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None: - mo = re.search('^[ ]*(?P\d+)[ ]*-[ ]*(?P\d+)[ ]*$', arg) - start = mo.group('start') - end = mo.group('end') - - # check to see if the range is really a single index - if start == end: - pages.append(start) - else: - page_ranges.append([start, end]) - else: - bad.append(arg) - - bad = sorted(list(set(bad))) - - return pdf, pages, page_ranges, bad - -# Remove duplicates from pages and page_ranges. -# Set pages higher than the total number of pages in the pdf to the last page. -# Return pages and page_ranges as lists of ints. -def clean_page_list(pdf_path, pages, page_ranges): - pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) - - total_pages = pdf.numPages - sorted_pages = [] - sorted_ranges = [] - - for index in pages: - index = int(index) - if index > total_pages: - sorted_pages.append(total_pages - 1) - else: - sorted_pages.append(index - 1) - - for start, end in page_ranges: - start = int(start) - end = int(end) - - if start > total_pages and end > total_pages: - sorted_pages.append(total_pages - 1) - continue - - if start > total_pages: - start = total_pages - if end > total_pages: - end = total_pages - page_range = sorted([start - 1, end - 1]) - if page_range not in sorted_ranges: - sorted_ranges.append(page_range) - - # Remove duplicates and sort - pages = sorted(list(set(sorted_pages))) - page_ranges = sorted(sorted_ranges) - - return pages, page_ranges - -def main(args=sys.argv, name=''): - log = Log() - parser = option_parser(name) - add_options(parser) - - opts, args = parser.parse_args(args) - - pdf, pages, page_ranges, unknown = split_args(args[1:]) - - if pdf == '' and (pages == [] or page_ranges == []): - print 'Error: PDF and where to split is required.\n' - print_help(parser, log) - return 1 - - if unknown != []: - for arg in unknown: - print 'Error: Unknown argument `%s`' % arg - print_help(parser, log) - return 1 - - if not is_valid_pdf(pdf): - print 'Error: Could not read file `%s`.' % pdf - return 1 - - if is_encrypted(pdf): - print 'Error: file `%s` is encrypted.' % args[0] - return 1 - - pages, page_ranges = clean_page_list(pdf, pages, page_ranges) - - mi = metadata_from_formats([pdf]) - - split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi) - - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/verify.py b/src/calibre/ebooks/pdf/verify.py deleted file mode 100644 index dea2e15ce1..0000000000 --- a/src/calibre/ebooks/pdf/verify.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import with_statement -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - -''' -Verify PDF files. -''' - -import os - -from pyPdf import PdfFileReader - -def is_valid_pdf(pdf_path): - ''' - Returns True if the pdf file is valid. - ''' - - try: - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - PdfFileReader(pdf_file) - except: - return False - return True - -def is_valid_pdfs(pdf_paths): - ''' - Returns a list of invalid pdf files. - ''' - - invalid = [] - for pdf_path in pdf_paths: - if not is_valid_pdf(pdf_path): - invalid.append(pdf_path) - return invalid - -def is_encrypted(pdf_path): - with open(os.path.abspath(pdf_path), 'rb') as pdf_file: - pdf = PdfFileReader(pdf_file) - if pdf.isEncrypted: - return True - return False diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 1a66324d9f..a50cedb001 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -29,7 +29,6 @@ entry_points = { 'calibre-parallel = calibre.utils.ipc.worker:main', 'calibre-customize = calibre.customize.ui:main', 'calibre-complete = calibre.utils.complete:main', - 'pdfmanipulate = calibre.ebooks.pdf.manipulate.cli:main', 'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main', 'epub-fix = calibre.ebooks.epub.fix.main:main', 'calibre-smtp = calibre.utils.smtp:main', diff --git a/src/pyPdf/__init__.py b/src/pyPdf/__init__.py deleted file mode 100644 index af02553da6..0000000000 --- a/src/pyPdf/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from pdf import PdfFileReader, PdfFileWriter -__all__ = ["pdf"] diff --git a/src/pyPdf/filters.py b/src/pyPdf/filters.py deleted file mode 100644 index 7fe10fb481..0000000000 --- a/src/pyPdf/filters.py +++ /dev/null @@ -1,252 +0,0 @@ -# vim: sw=4:expandtab:foldmethod=marker -# -# Copyright (c) 2006, Mathieu Fenniak -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - - -""" -Implementation of stream filters for PDF. -""" -__author__ = "Mathieu Fenniak" -__author_email__ = "biziqe@mathieu.fenniak.net" - -from utils import PdfReadError -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -try: - import zlib - def decompress(data): - return zlib.decompress(data) - def compress(data): - return zlib.compress(data) -except ImportError: - # Unable to import zlib. Attempt to use the System.IO.Compression - # library from the .NET framework. (IronPython only) - import System - from System import IO, Collections, Array - def _string_to_bytearr(buf): - retval = Array.CreateInstance(System.Byte, len(buf)) - for i in range(len(buf)): - retval[i] = ord(buf[i]) - return retval - def _bytearr_to_string(bytes): - retval = "" - for i in range(bytes.Length): - retval += chr(bytes[i]) - return retval - def _read_bytes(stream): - ms = IO.MemoryStream() - buf = Array.CreateInstance(System.Byte, 2048) - while True: - bytes = stream.Read(buf, 0, buf.Length) - if bytes == 0: - break - else: - ms.Write(buf, 0, bytes) - retval = ms.ToArray() - ms.Close() - return retval - def decompress(data): - bytes = _string_to_bytearr(data) - ms = IO.MemoryStream() - ms.Write(bytes, 0, bytes.Length) - ms.Position = 0 # fseek 0 - gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress) - bytes = _read_bytes(gz) - retval = _bytearr_to_string(bytes) - gz.Close() - return retval - def compress(data): - bytes = _string_to_bytearr(data) - ms = IO.MemoryStream() - gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True) - gz.Write(bytes, 0, bytes.Length) - gz.Close() - ms.Position = 0 # fseek 0 - bytes = ms.ToArray() - retval = _bytearr_to_string(bytes) - ms.Close() - return retval - - -class FlateDecode(object): - def decode(data, decodeParms): - data = decompress(data) - predictor = 1 - if decodeParms: - predictor = decodeParms.get("/Predictor", 1) - # predictor 1 == no predictor - if predictor != 1: - columns = decodeParms["/Columns"] - # PNG prediction: - if predictor >= 10 and predictor <= 15: - output = StringIO() - # PNG prediction can vary from row to row - rowlength = columns + 1 - assert len(data) % rowlength == 0 - prev_rowdata = (0,) * rowlength - for row in xrange(len(data) / rowlength): - rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]] - filterByte = rowdata[0] - if filterByte == 0: - pass - elif filterByte == 1: - for i in range(2, rowlength): - rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256 - elif filterByte == 2: - for i in range(1, rowlength): - rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256 - else: - # unsupported PNG filter - raise PdfReadError("Unsupported PNG filter %r" % filterByte) - prev_rowdata = rowdata - output.write(''.join([chr(x) for x in rowdata[1:]])) - data = output.getvalue() - else: - # unsupported predictor - raise PdfReadError("Unsupported flatedecode predictor %r" % predictor) - return data - decode = staticmethod(decode) - - def encode(data): - return compress(data) - encode = staticmethod(encode) - -class ASCIIHexDecode(object): - def decode(data, decodeParms=None): - retval = "" - char = "" - x = 0 - while True: - c = data[x] - if c == ">": - break - elif c.isspace(): - x += 1 - continue - char += c - if len(char) == 2: - retval += chr(int(char, base=16)) - char = "" - x += 1 - assert char == "" - return retval - decode = staticmethod(decode) - -class ASCII85Decode(object): - def decode(data, decodeParms=None): - retval = "" - group = [] - x = 0 - hitEod = False - # remove all whitespace from data - data = [y for y in data if not (y in ' \n\r\t')] - while not hitEod: - c = data[x] - if len(retval) == 0 and c == "<" and data[x+1] == "~": - x += 2 - continue - #elif c.isspace(): - # x += 1 - # continue - elif c == 'z': - assert len(group) == 0 - retval += '\x00\x00\x00\x00' - continue - elif c == "~" and data[x+1] == ">": - if len(group) != 0: - # cannot have a final group of just 1 char - assert len(group) > 1 - cnt = len(group) - 1 - group += [ 85, 85, 85 ] - hitEod = cnt - else: - break - else: - c = ord(c) - 33 - assert c >= 0 and c < 85 - group += [ c ] - if len(group) >= 5: - b = group[0] * (85**4) + \ - group[1] * (85**3) + \ - group[2] * (85**2) + \ - group[3] * 85 + \ - group[4] - assert b < (2**32 - 1) - c4 = chr((b >> 0) % 256) - c3 = chr((b >> 8) % 256) - c2 = chr((b >> 16) % 256) - c1 = chr(b >> 24) - retval += (c1 + c2 + c3 + c4) - if hitEod: - retval = retval[:-4+hitEod] - group = [] - x += 1 - return retval - decode = staticmethod(decode) - -def decodeStreamData(stream): - from generic import NameObject - filters = stream.get("/Filter", ()) - if len(filters) and not isinstance(filters[0], NameObject): - # we have a single filter instance - filters = (filters,) - data = stream._data - for filterType in filters: - if filterType == "/FlateDecode": - data = FlateDecode.decode(data, stream.get("/DecodeParms")) - elif filterType == "/ASCIIHexDecode": - data = ASCIIHexDecode.decode(data) - elif filterType == "/ASCII85Decode": - data = ASCII85Decode.decode(data) - elif filterType == "/Crypt": - decodeParams = stream.get("/DecodeParams", {}) - if "/Name" not in decodeParams and "/Type" not in decodeParams: - pass - else: - raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") - else: - # unsupported filter - raise NotImplementedError("unsupported filter %s" % filterType) - return data - -if __name__ == "__main__": - assert "abc" == ASCIIHexDecode.decode('61\n626\n3>') - - ascii85Test = """ - <~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKFCj@.4Gp$d7F!,L7@<6@)/0JDEF@3BB/F*&OCAfu2/AKY - i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF-FD5W8ARlolDIa - l(DIduD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~> - """ - ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure." - assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText - diff --git a/src/pyPdf/generic.py b/src/pyPdf/generic.py deleted file mode 100644 index 5447ef5fbc..0000000000 --- a/src/pyPdf/generic.py +++ /dev/null @@ -1,780 +0,0 @@ -# vim: sw=4:expandtab:foldmethod=marker -# -# Copyright (c) 2006, Mathieu Fenniak -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - - -""" -Implementation of generic PDF objects (dictionary, number, string, and so on) -""" -__author__ = "Mathieu Fenniak" -__author_email__ = "biziqe@mathieu.fenniak.net" - -import re -from utils import readNonWhitespace, RC4_encrypt -import filters -import utils -import decimal -import codecs - -def readObject(stream, pdf): - tok = stream.read(1) - stream.seek(-1, 1) # reset to start - if tok == 't' or tok == 'f': - # boolean object - return BooleanObject.readFromStream(stream) - elif tok == '(': - # string object - return readStringFromStream(stream) - elif tok == '/': - # name object - return NameObject.readFromStream(stream) - elif tok == '[': - # array object - return ArrayObject.readFromStream(stream, pdf) - elif tok == 'n': - # null object - return NullObject.readFromStream(stream) - elif tok == '<': - # hexadecimal string OR dictionary - peek = stream.read(2) - stream.seek(-2, 1) # reset to start - if peek == '<<': - return DictionaryObject.readFromStream(stream, pdf) - else: - return readHexStringFromStream(stream) - elif tok == '%': - # comment - while tok not in ('\r', '\n'): - tok = stream.read(1) - tok = readNonWhitespace(stream) - stream.seek(-1, 1) - return readObject(stream, pdf) - else: - # number object OR indirect reference - if tok == '+' or tok == '-': - # number - return NumberObject.readFromStream(stream) - peek = stream.read(20) - stream.seek(-len(peek), 1) # reset to start - if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None: - return IndirectObject.readFromStream(stream, pdf) - else: - return NumberObject.readFromStream(stream) - -class PdfObject(object): - def getObject(self): - """Resolves indirect references.""" - return self - - -class NullObject(PdfObject): - def writeToStream(self, stream, encryption_key): - stream.write("null") - - def readFromStream(stream): - nulltxt = stream.read(4) - if nulltxt != "null": - raise utils.PdfReadError, "error reading null object" - return NullObject() - readFromStream = staticmethod(readFromStream) - - -class BooleanObject(PdfObject): - def __init__(self, value): - self.value = value - - def writeToStream(self, stream, encryption_key): - if self.value: - stream.write("true") - else: - stream.write("false") - - def readFromStream(stream): - word = stream.read(4) - if word == "true": - return BooleanObject(True) - elif word == "fals": - stream.read(1) - return BooleanObject(False) - assert False - readFromStream = staticmethod(readFromStream) - - -class ArrayObject(list, PdfObject): - def writeToStream(self, stream, encryption_key): - stream.write("[") - for data in self: - stream.write(" ") - data.writeToStream(stream, encryption_key) - stream.write(" ]") - - def readFromStream(stream, pdf): - arr = ArrayObject() - tmp = stream.read(1) - if tmp != "[": - raise utils.PdfReadError, "error reading array" - while True: - # skip leading whitespace - tok = stream.read(1) - while tok.isspace(): - tok = stream.read(1) - stream.seek(-1, 1) - # check for array ending - peekahead = stream.read(1) - if peekahead == "]": - break - stream.seek(-1, 1) - # read and append obj - arr.append(readObject(stream, pdf)) - return arr - readFromStream = staticmethod(readFromStream) - - -class IndirectObject(PdfObject): - def __init__(self, idnum, generation, pdf): - self.idnum = idnum - self.generation = generation - self.pdf = pdf - - def getObject(self): - return self.pdf.getObject(self).getObject() - - def __repr__(self): - return "IndirectObject(%r, %r)" % (self.idnum, self.generation) - - def __eq__(self, other): - return ( - other != None and - isinstance(other, IndirectObject) and - self.idnum == other.idnum and - self.generation == other.generation and - self.pdf is other.pdf - ) - - def __ne__(self, other): - return not self.__eq__(other) - - def writeToStream(self, stream, encryption_key): - stream.write("%s %s R" % (self.idnum, self.generation)) - - def readFromStream(stream, pdf): - idnum = "" - while True: - tok = stream.read(1) - if tok.isspace(): - break - idnum += tok - generation = "" - while True: - tok = stream.read(1) - if tok.isspace(): - break - generation += tok - r = stream.read(1) - if r != "R": - raise utils.PdfReadError("error reading indirect object reference") - return IndirectObject(int(idnum), int(generation), pdf) - readFromStream = staticmethod(readFromStream) - - -class FloatObject(decimal.Decimal, PdfObject): - def __new__(cls, value="0", context=None): - return decimal.Decimal.__new__(cls, str(value), context) - def __repr__(self): - return str(self) - def writeToStream(self, stream, encryption_key): - stream.write(str(self)) - - -class NumberObject(int, PdfObject): - def __init__(self, value): - int.__init__(self, value) - - def writeToStream(self, stream, encryption_key): - stream.write(repr(self)) - - def readFromStream(stream): - name = "" - while True: - tok = stream.read(1) - if tok != '+' and tok != '-' and tok != '.' and not tok.isdigit(): - stream.seek(-1, 1) - break - name += tok - if name.find(".") != -1: - return FloatObject(name) - else: - return NumberObject(name) - readFromStream = staticmethod(readFromStream) - - -## -# Given a string (either a "str" or "unicode"), create a ByteStringObject or a -# TextStringObject to represent the string. -def createStringObject(string): - if isinstance(string, unicode): - return TextStringObject(string) - elif isinstance(string, str): - if string.startswith(codecs.BOM_UTF16_BE): - retval = TextStringObject(string.decode("utf-16")) - retval.autodetect_utf16 = True - return retval - else: - # This is probably a big performance hit here, but we need to - # convert string objects into the text/unicode-aware version if - # possible... and the only way to check if that's possible is - # to try. Some strings are strings, some are just byte arrays. - try: - retval = TextStringObject(decode_pdfdocencoding(string)) - retval.autodetect_pdfdocencoding = True - return retval - except UnicodeDecodeError: - return ByteStringObject(string) - else: - raise TypeError("createStringObject should have str or unicode arg") - - -def readHexStringFromStream(stream): - stream.read(1) - txt = "" - x = "" - while True: - tok = readNonWhitespace(stream) - if tok == ">": - break - x += tok - if len(x) == 2: - txt += chr(int(x, base=16)) - x = "" - if len(x) == 1: - x += "0" - if len(x) == 2: - txt += chr(int(x, base=16)) - return createStringObject(txt) - - -def readStringFromStream(stream): - tok = stream.read(1) - parens = 1 - txt = "" - while True: - tok = stream.read(1) - if tok == "(": - parens += 1 - elif tok == ")": - parens -= 1 - if parens == 0: - break - elif tok == "\\": - tok = stream.read(1) - if tok == "n": - tok = "\n" - elif tok == "r": - tok = "\r" - elif tok == "t": - tok = "\t" - elif tok == "b": - tok = "\b" - elif tok == "f": - tok = "\f" - elif tok == "(": - tok = "(" - elif tok == ")": - tok = ")" - elif tok == "\\": - tok = "\\" - elif tok.isdigit(): - tok += stream.read(2) - tok = chr(int(tok, base=8)) - elif tok in "\n\r": - # This case is hit when a backslash followed by a line - # break occurs. If it's a multi-char EOL, consume the - # second character: - tok = stream.read(1) - if not tok in "\n\r": - stream.seek(-1, 1) - # Then don't add anything to the actual string, since this - # line break was escaped: - tok = '' - else: - raise utils.PdfReadError("Unexpected escaped string") - txt += tok - return createStringObject(txt) - - -## -# Represents a string object where the text encoding could not be determined. -# This occurs quite often, as the PDF spec doesn't provide an alternate way to -# represent strings -- for example, the encryption data stored in files (like -# /O) is clearly not text, but is still stored in a "String" object. -class ByteStringObject(str, PdfObject): - - ## - # For compatibility with TextStringObject.original_bytes. This method - # returns self. - original_bytes = property(lambda self: self) - - def writeToStream(self, stream, encryption_key): - bytearr = self - if encryption_key: - bytearr = RC4_encrypt(encryption_key, bytearr) - stream.write("<") - stream.write(bytearr.encode("hex")) - stream.write(">") - - -## -# Represents a string object that has been decoded into a real unicode string. -# If read from a PDF document, this string appeared to match the -# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to -# occur. -class TextStringObject(unicode, PdfObject): - autodetect_pdfdocencoding = False - autodetect_utf16 = False - - ## - # It is occasionally possible that a text string object gets created where - # a byte string object was expected due to the autodetection mechanism -- - # if that occurs, this "original_bytes" property can be used to - # back-calculate what the original encoded bytes were. - original_bytes = property(lambda self: self.get_original_bytes()) - - def get_original_bytes(self): - # We're a text string object, but the library is trying to get our raw - # bytes. This can happen if we auto-detected this string as text, but - # we were wrong. It's pretty common. Return the original bytes that - # would have been used to create this object, based upon the autodetect - # method. - if self.autodetect_utf16: - return codecs.BOM_UTF16_BE + self.encode("utf-16be") - elif self.autodetect_pdfdocencoding: - return encode_pdfdocencoding(self) - else: - raise Exception("no information about original bytes") - - def writeToStream(self, stream, encryption_key): - # Try to write the string out as a PDFDocEncoding encoded string. It's - # nicer to look at in the PDF file. Sadly, we take a performance hit - # here for trying... - try: - bytearr = encode_pdfdocencoding(self) - except UnicodeEncodeError: - bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be") - if encryption_key: - bytearr = RC4_encrypt(encryption_key, bytearr) - obj = ByteStringObject(bytearr) - obj.writeToStream(stream, None) - else: - stream.write("(") - for c in bytearr: - if not c.isalnum() and c != ' ': - stream.write("\\%03o" % ord(c)) - else: - stream.write(c) - stream.write(")") - - -class NameObject(str, PdfObject): - delimiterCharacters = "(", ")", "<", ">", "[", "]", "{", "}", "/", "%" - - def __init__(self, data): - str.__init__(self, data) - - def writeToStream(self, stream, encryption_key): - stream.write(self) - - def readFromStream(stream): - name = stream.read(1) - if name != "/": - raise utils.PdfReadError, "name read error" - while True: - tok = stream.read(1) - if tok.isspace() or tok in NameObject.delimiterCharacters: - stream.seek(-1, 1) - break - name += tok - return NameObject(name) - readFromStream = staticmethod(readFromStream) - - -class DictionaryObject(dict, PdfObject): - - def __init__(self, *args, **kwargs): - if len(args) == 0: - self.update(kwargs) - elif len(args) == 1: - arr = args[0] - # If we're passed a list/tuple, make a dict out of it - if not hasattr(arr, "iteritems"): - newarr = {} - for k, v in arr: - newarr[k] = v - arr = newarr - self.update(arr) - else: - raise TypeError("dict expected at most 1 argument, got 3") - - def update(self, arr): - # note, a ValueError halfway through copying values - # will leave half the values in this dict. - for k, v in arr.iteritems(): - self.__setitem__(k, v) - - def raw_get(self, key): - return dict.__getitem__(self, key) - - def __setitem__(self, key, value): - if not isinstance(key, PdfObject): - raise ValueError("key must be PdfObject") - if not isinstance(value, PdfObject): - raise ValueError("value must be PdfObject") - return dict.__setitem__(self, key, value) - - def setdefault(self, key, value=None): - if not isinstance(key, PdfObject): - raise ValueError("key must be PdfObject") - if not isinstance(value, PdfObject): - raise ValueError("value must be PdfObject") - return dict.setdefault(self, key, value) - - def __getitem__(self, key): - return dict.__getitem__(self, key).getObject() - - ## - # Retrieves XMP (Extensible Metadata Platform) data relevant to the - # this object, if available. - #

- # Stability: Added in v1.12, will exist for all future v1.x releases. - # @return Returns a {@link #xmp.XmpInformation XmlInformation} instance - # that can be used to access XMP metadata from the document. Can also - # return None if no metadata was found on the document root. - def getXmpMetadata(self): - metadata = self.get("/Metadata", None) - if metadata == None: - return None - metadata = metadata.getObject() - import xmp - if not isinstance(metadata, xmp.XmpInformation): - metadata = xmp.XmpInformation(metadata) - self[NameObject("/Metadata")] = metadata - return metadata - - ## - # Read-only property that accesses the {@link - # #DictionaryObject.getXmpData getXmpData} function. - #

- # Stability: Added in v1.12, will exist for all future v1.x releases. - xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None) - - def writeToStream(self, stream, encryption_key): - stream.write("<<\n") - for key, value in self.items(): - key.writeToStream(stream, encryption_key) - stream.write(" ") - value.writeToStream(stream, encryption_key) - stream.write("\n") - stream.write(">>") - - def readFromStream(stream, pdf): - tmp = stream.read(2) - if tmp != "<<": - raise utils.PdfReadError, "dictionary read error" - data = {} - while True: - tok = readNonWhitespace(stream) - if tok == ">": - stream.read(1) - break - stream.seek(-1, 1) - key = readObject(stream, pdf) - tok = readNonWhitespace(stream) - stream.seek(-1, 1) - value = readObject(stream, pdf) - if data.has_key(key): - # multiple definitions of key not permitted - raise utils.PdfReadError, "multiple definitions in dictionary" - data[key] = value - pos = stream.tell() - s = readNonWhitespace(stream) - if s == 's' and stream.read(5) == 'tream': - eol = stream.read(1) - # odd PDF file output has spaces after 'stream' keyword but before EOL. - # patch provided by Danial Sandler - while eol == ' ': - eol = stream.read(1) - assert eol in ("\n", "\r") - if eol == "\r": - # read \n after - stream.read(1) - # this is a stream object, not a dictionary - assert data.has_key("/Length") - length = data["/Length"] - if isinstance(length, IndirectObject): - t = stream.tell() - length = pdf.getObject(length) - stream.seek(t, 0) - data["__streamdata__"] = stream.read(length) - e = readNonWhitespace(stream) - ndstream = stream.read(8) - if (e + ndstream) != "endstream": - # (sigh) - the odd PDF file has a length that is too long, so - # we need to read backwards to find the "endstream" ending. - # ReportLab (unknown version) generates files with this bug, - # and Python users into PDF files tend to be our audience. - # we need to do this to correct the streamdata and chop off - # an extra character. - pos = stream.tell() - stream.seek(-10, 1) - end = stream.read(9) - if end == "endstream": - # we found it by looking back one character further. - data["__streamdata__"] = data["__streamdata__"][:-1] - else: - stream.seek(pos, 0) - raise utils.PdfReadError, "Unable to find 'endstream' marker after stream." - else: - stream.seek(pos, 0) - if data.has_key("__streamdata__"): - return StreamObject.initializeFromDictionary(data) - else: - retval = DictionaryObject() - retval.update(data) - return retval - readFromStream = staticmethod(readFromStream) - - -class StreamObject(DictionaryObject): - def __init__(self): - self._data = None - self.decodedSelf = None - - def writeToStream(self, stream, encryption_key): - self[NameObject("/Length")] = NumberObject(len(self._data)) - DictionaryObject.writeToStream(self, stream, encryption_key) - del self["/Length"] - stream.write("\nstream\n") - data = self._data - if encryption_key: - data = RC4_encrypt(encryption_key, data) - stream.write(data) - stream.write("\nendstream") - - def initializeFromDictionary(data): - if data.has_key("/Filter"): - retval = EncodedStreamObject() - else: - retval = DecodedStreamObject() - retval._data = data["__streamdata__"] - del data["__streamdata__"] - del data["/Length"] - retval.update(data) - return retval - initializeFromDictionary = staticmethod(initializeFromDictionary) - - def flateEncode(self): - if self.has_key("/Filter"): - f = self["/Filter"] - if isinstance(f, ArrayObject): - f.insert(0, NameObject("/FlateDecode")) - else: - newf = ArrayObject() - newf.append(NameObject("/FlateDecode")) - newf.append(f) - f = newf - else: - f = NameObject("/FlateDecode") - retval = EncodedStreamObject() - retval[NameObject("/Filter")] = f - retval._data = filters.FlateDecode.encode(self._data) - return retval - - -class DecodedStreamObject(StreamObject): - def getData(self): - return self._data - - def setData(self, data): - self._data = data - - -class EncodedStreamObject(StreamObject): - def __init__(self): - self.decodedSelf = None - - def getData(self): - if self.decodedSelf: - # cached version of decoded object - return self.decodedSelf.getData() - else: - # create decoded object - decoded = DecodedStreamObject() - decoded._data = filters.decodeStreamData(self) - for key, value in self.items(): - if not key in ("/Length", "/Filter", "/DecodeParms"): - decoded[key] = value - self.decodedSelf = decoded - return decoded._data - - def setData(self, data): - raise utils.PdfReadError, "Creating EncodedStreamObject is not currently supported" - - -class RectangleObject(ArrayObject): - def __init__(self, arr): - # must have four points - assert len(arr) == 4 - # automatically convert arr[x] into NumberObject(arr[x]) if necessary - ArrayObject.__init__(self, [self.ensureIsNumber(x) for x in arr]) - - def ensureIsNumber(self, value): - if not isinstance(value, (NumberObject, FloatObject)): - value = FloatObject(value) - return value - - def __repr__(self): - return "RectangleObject(%s)" % repr(list(self)) - - def getLowerLeft_x(self): - return self[0] - - def getLowerLeft_y(self): - return self[1] - - def getUpperRight_x(self): - return self[2] - - def getUpperRight_y(self): - return self[3] - - def getUpperLeft_x(self): - return self.getLowerLeft_x() - - def getUpperLeft_y(self): - return self.getUpperRight_y() - - def getLowerRight_x(self): - return self.getUpperRight_x() - - def getLowerRight_y(self): - return self.getLowerLeft_y() - - def getLowerLeft(self): - return self.getLowerLeft_x(), self.getLowerLeft_y() - - def getLowerRight(self): - return self.getLowerRight_x(), self.getLowerRight_y() - - def getUpperLeft(self): - return self.getUpperLeft_x(), self.getUpperLeft_y() - - def getUpperRight(self): - return self.getUpperRight_x(), self.getUpperRight_y() - - def setLowerLeft(self, value): - self[0], self[1] = [self.ensureIsNumber(x) for x in value] - - def setLowerRight(self, value): - self[2], self[1] = [self.ensureIsNumber(x) for x in value] - - def setUpperLeft(self, value): - self[0], self[3] = [self.ensureIsNumber(x) for x in value] - - def setUpperRight(self, value): - self[2], self[3] = [self.ensureIsNumber(x) for x in value] - - lowerLeft = property(getLowerLeft, setLowerLeft, None, None) - lowerRight = property(getLowerRight, setLowerRight, None, None) - upperLeft = property(getUpperLeft, setUpperLeft, None, None) - upperRight = property(getUpperRight, setUpperRight, None, None) - - -def encode_pdfdocencoding(unicode_string): - retval = '' - for c in unicode_string: - try: - retval += chr(_pdfDocEncoding_rev[c]) - except KeyError: - raise UnicodeEncodeError("pdfdocencoding", c, -1, -1, - "does not exist in translation table") - return retval - -def decode_pdfdocencoding(byte_array): - retval = u'' - for b in byte_array: - c = _pdfDocEncoding[ord(b)] - if c == u'\u0000': - raise UnicodeDecodeError("pdfdocencoding", b, -1, -1, - "does not exist in translation table") - retval += c - return retval - -_pdfDocEncoding = ( - u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', - u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', - u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', u'\u0000', - u'\u02d8', u'\u02c7', u'\u02c6', u'\u02d9', u'\u02dd', u'\u02db', u'\u02da', u'\u02dc', - u'\u0020', u'\u0021', u'\u0022', u'\u0023', u'\u0024', u'\u0025', u'\u0026', u'\u0027', - u'\u0028', u'\u0029', u'\u002a', u'\u002b', u'\u002c', u'\u002d', u'\u002e', u'\u002f', - u'\u0030', u'\u0031', u'\u0032', u'\u0033', u'\u0034', u'\u0035', u'\u0036', u'\u0037', - u'\u0038', u'\u0039', u'\u003a', u'\u003b', u'\u003c', u'\u003d', u'\u003e', u'\u003f', - u'\u0040', u'\u0041', u'\u0042', u'\u0043', u'\u0044', u'\u0045', u'\u0046', u'\u0047', - u'\u0048', u'\u0049', u'\u004a', u'\u004b', u'\u004c', u'\u004d', u'\u004e', u'\u004f', - u'\u0050', u'\u0051', u'\u0052', u'\u0053', u'\u0054', u'\u0055', u'\u0056', u'\u0057', - u'\u0058', u'\u0059', u'\u005a', u'\u005b', u'\u005c', u'\u005d', u'\u005e', u'\u005f', - u'\u0060', u'\u0061', u'\u0062', u'\u0063', u'\u0064', u'\u0065', u'\u0066', u'\u0067', - u'\u0068', u'\u0069', u'\u006a', u'\u006b', u'\u006c', u'\u006d', u'\u006e', u'\u006f', - u'\u0070', u'\u0071', u'\u0072', u'\u0073', u'\u0074', u'\u0075', u'\u0076', u'\u0077', - u'\u0078', u'\u0079', u'\u007a', u'\u007b', u'\u007c', u'\u007d', u'\u007e', u'\u0000', - u'\u2022', u'\u2020', u'\u2021', u'\u2026', u'\u2014', u'\u2013', u'\u0192', u'\u2044', - u'\u2039', u'\u203a', u'\u2212', u'\u2030', u'\u201e', u'\u201c', u'\u201d', u'\u2018', - u'\u2019', u'\u201a', u'\u2122', u'\ufb01', u'\ufb02', u'\u0141', u'\u0152', u'\u0160', - u'\u0178', u'\u017d', u'\u0131', u'\u0142', u'\u0153', u'\u0161', u'\u017e', u'\u0000', - u'\u20ac', u'\u00a1', u'\u00a2', u'\u00a3', u'\u00a4', u'\u00a5', u'\u00a6', u'\u00a7', - u'\u00a8', u'\u00a9', u'\u00aa', u'\u00ab', u'\u00ac', u'\u0000', u'\u00ae', u'\u00af', - u'\u00b0', u'\u00b1', u'\u00b2', u'\u00b3', u'\u00b4', u'\u00b5', u'\u00b6', u'\u00b7', - u'\u00b8', u'\u00b9', u'\u00ba', u'\u00bb', u'\u00bc', u'\u00bd', u'\u00be', u'\u00bf', - u'\u00c0', u'\u00c1', u'\u00c2', u'\u00c3', u'\u00c4', u'\u00c5', u'\u00c6', u'\u00c7', - u'\u00c8', u'\u00c9', u'\u00ca', u'\u00cb', u'\u00cc', u'\u00cd', u'\u00ce', u'\u00cf', - u'\u00d0', u'\u00d1', u'\u00d2', u'\u00d3', u'\u00d4', u'\u00d5', u'\u00d6', u'\u00d7', - u'\u00d8', u'\u00d9', u'\u00da', u'\u00db', u'\u00dc', u'\u00dd', u'\u00de', u'\u00df', - u'\u00e0', u'\u00e1', u'\u00e2', u'\u00e3', u'\u00e4', u'\u00e5', u'\u00e6', u'\u00e7', - u'\u00e8', u'\u00e9', u'\u00ea', u'\u00eb', u'\u00ec', u'\u00ed', u'\u00ee', u'\u00ef', - u'\u00f0', u'\u00f1', u'\u00f2', u'\u00f3', u'\u00f4', u'\u00f5', u'\u00f6', u'\u00f7', - u'\u00f8', u'\u00f9', u'\u00fa', u'\u00fb', u'\u00fc', u'\u00fd', u'\u00fe', u'\u00ff' -) - -assert len(_pdfDocEncoding) == 256 - -_pdfDocEncoding_rev = {} -for i in xrange(256): - char = _pdfDocEncoding[i] - if char == u"\u0000": - continue - assert char not in _pdfDocEncoding_rev - _pdfDocEncoding_rev[char] = i - diff --git a/src/pyPdf/pdf.py b/src/pyPdf/pdf.py deleted file mode 100644 index 1a2b8709b2..0000000000 --- a/src/pyPdf/pdf.py +++ /dev/null @@ -1,1530 +0,0 @@ -# vim: sw=4:expandtab:foldmethod=marker -# -# Copyright (c) 2006, Mathieu Fenniak -# Copyright (c) 2007, Ashish Kulkarni -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - - -""" -A pure-Python PDF library with very minimal capabilities. It was designed to -be able to split and merge PDF files by page, and that's about all it can do. -It may be a solid base for future PDF file work in Python. -""" -__author__ = "Mathieu Fenniak" -__author_email__ = "biziqe@mathieu.fenniak.net" - -import struct -from cStringIO import StringIO - -from generic import DictionaryObject, NameObject, NumberObject, \ -createStringObject, ArrayObject, ByteStringObject, StreamObject, \ -IndirectObject, utils, readObject, TextStringObject, BooleanObject, \ -RectangleObject, DecodedStreamObject -from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList - - -## -# This class supports writing PDF files out, given pages produced by another -# class (typically {@link #PdfFileReader PdfFileReader}). -class PdfFileWriter(object): - def __init__(self,title=u"Unknown",author=u"Unknown"): - self.killed = False - self._header = "%PDF-1.3" - self._objects = [] # array of indirect objects - - # The root of our page tree node. - pages = DictionaryObject() - pages.update({ - NameObject("/Type"): NameObject("/Pages"), - NameObject("/Count"): NumberObject(0), - NameObject("/Kids"): ArrayObject(), - }) - self._pages = self._addObject(pages) - - # info object - info = DictionaryObject() - info.update({ - NameObject("/Producer"): createStringObject(u"Python PDF Library - http://pybrary.net/pyPdf/"), - NameObject("/Author"): createStringObject(author), - NameObject("/Title"): createStringObject(title), - }) - self._info = self._addObject(info) - - # root object - root = DictionaryObject() - root.update({ - NameObject("/Type"): NameObject("/Catalog"), - NameObject("/Pages"): self._pages, - }) - self._root = self._addObject(root) - - def _addObject(self, obj): - self._objects.append(obj) - return IndirectObject(len(self._objects), 0, self) - - def getObject(self, ido): - if ido.pdf != self: - raise ValueError("pdf must be self") - return self._objects[ido.idnum - 1] - - ## - # Adds a page to this PDF file. The page is usually acquired from a - # {@link #PdfFileReader PdfFileReader} instance. - #

- # Stability: Added in v1.0, will exist for all v1.x releases. - # - # @param page The page to add to the document. This argument should be - # an instance of {@link #PageObject PageObject}. - def addPage(self, page): - assert page["/Type"] == "/Page" - page[NameObject("/Parent")] = self._pages - page = self._addObject(page) - pages = self.getObject(self._pages) - pages["/Kids"].append(page) - pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1) - - ## - # Encrypt this PDF file with the PDF Standard encryption handler. - # @param user_pwd The "user password", which allows for opening and reading - # the PDF file with the restrictions provided. - # @param owner_pwd The "owner password", which allows for opening the PDF - # files without any restrictions. By default, the owner password is the - # same as the user password. - # @param use_128bit Boolean argument as to whether to use 128bit - # encryption. When false, 40bit encryption will be used. By default, this - # flag is on. - def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True): - import md5, time, random - if owner_pwd == None: - owner_pwd = user_pwd - if use_128bit: - V = 2 - rev = 3 - keylen = 128 / 8 - else: - V = 1 - rev = 2 - keylen = 40 / 8 - # permit everything: - P = -1 - O = ByteStringObject(_alg33(owner_pwd, user_pwd, rev, keylen)) - ID_1 = md5.new(repr(time.time())).digest() - ID_2 = md5.new(repr(random.random())).digest() - self._ID = ArrayObject((ByteStringObject(ID_1), ByteStringObject(ID_2))) - if rev == 2: - U, key = _alg34(user_pwd, O, P, ID_1) - else: - assert rev == 3 - U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, False) - encrypt = DictionaryObject() - encrypt[NameObject("/Filter")] = NameObject("/Standard") - encrypt[NameObject("/V")] = NumberObject(V) - if V == 2: - encrypt[NameObject("/Length")] = NumberObject(keylen * 8) - encrypt[NameObject("/R")] = NumberObject(rev) - encrypt[NameObject("/O")] = ByteStringObject(O) - encrypt[NameObject("/U")] = ByteStringObject(U) - encrypt[NameObject("/P")] = NumberObject(P) - self._encrypt = self._addObject(encrypt) - self._encrypt_key = key - - ## - # Writes the collection of pages added to this object out as a PDF file. - #

- # Stability: Added in v1.0, will exist for all v1.x releases. - # @param stream An object to write the file to. The object must support - # the write method, and the tell method, similar to a file object. - def write(self, stream): - import md5 - - externalReferenceMap = {} - self.stack = [] - self._sweepIndirectReferences(externalReferenceMap, self._root) - del self.stack - - # Begin writing: - object_positions = [] - stream.write(self._header + "\n") - for i in range(len(self._objects)): - idnum = (i + 1) - obj = self._objects[i] - object_positions.append(stream.tell()) - stream.write(str(idnum) + " 0 obj\n") - key = None - if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum: - pack1 = struct.pack(" -# Stability: Added in v1.0, will exist for all v1.x releases. -# -# @param stream An object that supports the standard read and seek methods -# similar to a file object. -class PdfFileReader(object): - def __init__(self, stream): - self.flattenedPages = None - self.resolvedObjects = {} - self.read(stream) - self.stream = stream - self._override_encryption = False - - ## - # Retrieves the PDF file's document information dictionary, if it exists. - # Note that some PDF files use metadata streams instead of docinfo - # dictionaries, and these metadata streams will not be accessed by this - # function. - #

- # Stability: Added in v1.6, will exist for all future v1.x releases. - # @return Returns a {@link #DocumentInformation DocumentInformation} - # instance, or None if none exists. - def getDocumentInfo(self): - if not self.trailer.has_key("/Info"): - return None - obj = self.trailer['/Info'] - retval = DocumentInformation() - retval.update(obj) - return retval - - ## - # Read-only property that accesses the {@link - # #PdfFileReader.getDocumentInfo getDocumentInfo} function. - #

- # Stability: Added in v1.7, will exist for all future v1.x releases. - documentInfo = property(lambda self: self.getDocumentInfo(), None, None) - - ## - # Retrieves XMP (Extensible Metadata Platform) data from the PDF document - # root. - #

- # Stability: Added in v1.12, will exist for all future v1.x releases. - # @return Returns a {@link #generic.XmpInformation XmlInformation} - # instance that can be used to access XMP metadata from the document. - # Can also return None if no metadata was found on the document root. - def getXmpMetadata(self): - try: - self._override_encryption = True - return self.trailer["/Root"].getXmpMetadata() - finally: - self._override_encryption = False - - ## - # Read-only property that accesses the {@link #PdfFileReader.getXmpData - # getXmpData} function. - #

- # Stability: Added in v1.12, will exist for all future v1.x releases. - xmpMetadata = property(lambda self: self.getXmpMetadata(), None, None) - - ## - # Calculates the number of pages in this PDF file. - #

- # Stability: Added in v1.0, will exist for all v1.x releases. - # @return Returns an integer. - def getNumPages(self): - if self.flattenedPages == None: - self._flatten() - return len(self.flattenedPages) - - ## - # Read-only property that accesses the {@link #PdfFileReader.getNumPages - # getNumPages} function. - #

- # Stability: Added in v1.7, will exist for all future v1.x releases. - numPages = property(lambda self: self.getNumPages(), None, None) - - ## - # Retrieves a page by number from this PDF file. - #

- # Stability: Added in v1.0, will exist for all v1.x releases. - # @return Returns a {@link #PageObject PageObject} instance. - def getPage(self, pageNumber): - ## ensure that we're not trying to access an encrypted PDF - #assert not self.trailer.has_key("/Encrypt") - if self.flattenedPages == None: - self._flatten() - return self.flattenedPages[pageNumber] - - ## - # Read-only property that accesses the - # {@link #PdfFileReader.getNamedDestinations - # getNamedDestinations} function. - #

- # Stability: Added in v1.10, will exist for all future v1.x releases. - namedDestinations = property(lambda self: - self.getNamedDestinations(), None, None) - - ## - # Retrieves the named destinations present in the document. - #

- # Stability: Added in v1.10, will exist for all future v1.x releases. - # @return Returns a dict which maps names to {@link #Destination - # destinations}. - def getNamedDestinations(self, tree=None, retval=None): - if retval == None: - retval = {} - catalog = self.trailer["/Root"] - - # get the name tree - if catalog.has_key("/Dests"): - tree = catalog["/Dests"] - elif catalog.has_key("/Names"): - names = catalog['/Names'] - if names.has_key("/Dests"): - tree = names['/Dests'] - - if tree == None: - return retval - - if tree.has_key("/Kids"): - # recurse down the tree - for kid in tree["/Kids"]: - self.getNamedDestinations(kid.getObject(), retval) - - if tree.has_key("/Names"): - names = tree["/Names"] - for i in range(0, len(names), 2): - key = names[i].getObject() - val = names[i+1].getObject() - if isinstance(val, DictionaryObject) and val.has_key('/D'): - val = val['/D'] - dest = self._buildDestination(key, val) - if dest != None: - retval[key] = dest - - return retval - - ## - # Read-only property that accesses the {@link #PdfFileReader.getOutlines - # getOutlines} function. - #

- # Stability: Added in v1.10, will exist for all future v1.x releases. - outlines = property(lambda self: self.getOutlines(), None, None) - - ## - # Retrieves the document outline present in the document. - #

- # Stability: Added in v1.10, will exist for all future v1.x releases. - # @return Returns a nested list of {@link #Destination destinations}. - def getOutlines(self, node=None, outlines=None): - if outlines == None: - outlines = [] - catalog = self.trailer["/Root"] - - # get the outline dictionary and named destinations - if catalog.has_key("/Outlines"): - lines = catalog["/Outlines"] - if lines.has_key("/First"): - node = lines["/First"] - self._namedDests = self.getNamedDestinations() - - if node == None: - return outlines - - # see if there are any more outlines - while 1: - outline = self._buildOutline(node) - if outline: - outlines.append(outline) - - # check for sub-outlines - if node.has_key("/First"): - subOutlines = [] - self.getOutlines(node["/First"], subOutlines) - if subOutlines: - outlines.append(subOutlines) - - if not node.has_key("/Next"): - break - node = node["/Next"] - - return outlines - - def _buildDestination(self, title, array): - page, typ = array[0:2] - array = array[2:] - return Destination(title, page, typ, *array) - - def _buildOutline(self, node): - dest, title, outline = None, None, None - - if node.has_key("/A") and node.has_key("/Title"): - # Action, section 8.5 (only type GoTo supported) - title = node["/Title"] - action = node["/A"] - if action["/S"] == "/GoTo": - dest = action["/D"] - elif node.has_key("/Dest") and node.has_key("/Title"): - # Destination, section 8.2.1 - title = node["/Title"] - dest = node["/Dest"] - - # if destination found, then create outline - if dest: - if isinstance(dest, ArrayObject): - outline = self._buildDestination(title, dest) - elif isinstance(dest, unicode) and self._namedDests.has_key(dest): - outline = self._namedDests[dest] - outline[NameObject("/Title")] = title - else: - raise utils.PdfReadError("Unexpected destination %r" % dest) - return outline - - ## - # Read-only property that emulates a list based upon the {@link - # #PdfFileReader.getNumPages getNumPages} and {@link #PdfFileReader.getPage - # getPage} functions. - #

- # Stability: Added in v1.7, and will exist for all future v1.x releases. - pages = property(lambda self: ConvertFunctionsToVirtualList(self.getNumPages, self.getPage), - None, None) - - def _flatten(self, pages=None, inherit=None): - inheritablePageAttributes = ( - NameObject("/Resources"), NameObject("/MediaBox"), - NameObject("/CropBox"), NameObject("/Rotate") - ) - if inherit == None: - inherit = dict() - if pages == None: - self.flattenedPages = [] - catalog = self.trailer["/Root"].getObject() - pages = catalog["/Pages"].getObject() - t = pages["/Type"] - if t == "/Pages": - for attr in inheritablePageAttributes: - if pages.has_key(attr): - inherit[attr] = pages[attr] - for page in pages["/Kids"]: - self._flatten(page.getObject(), inherit) - elif t == "/Page": - for attr,value in inherit.items(): - # if the page has it's own value, it does not inherit the - # parent's value: - if not pages.has_key(attr): - pages[attr] = value - pageObj = PageObject(self) - pageObj.update(pages) - self.flattenedPages.append(pageObj) - - def getObject(self, indirectReference): - retval = self.resolvedObjects.get(indirectReference.generation, {}).get(indirectReference.idnum, None) - if retval != None: - return retval - if indirectReference.generation == 0 and \ - self.xref_objStm.has_key(indirectReference.idnum): - # indirect reference to object in object stream - # read the entire object stream into memory - stmnum,idx = self.xref_objStm[indirectReference.idnum] - objStm = IndirectObject(stmnum, 0, self).getObject() - assert objStm['/Type'] == '/ObjStm' - assert idx < objStm['/N'] - streamData = StringIO(objStm.getData()) - for i in range(objStm['/N']): - objnum = NumberObject.readFromStream(streamData) - readNonWhitespace(streamData) - streamData.seek(-1, 1) - offset = NumberObject.readFromStream(streamData) - readNonWhitespace(streamData) - streamData.seek(-1, 1) - t = streamData.tell() - streamData.seek(objStm['/First']+offset, 0) - obj = readObject(streamData, self) - self.resolvedObjects[0][objnum] = obj - streamData.seek(t, 0) - return self.resolvedObjects[0][indirectReference.idnum] - start = self.xref[indirectReference.generation][indirectReference.idnum] - self.stream.seek(start, 0) - idnum, generation = self.readObjectHeader(self.stream) - assert idnum == indirectReference.idnum - assert generation == indirectReference.generation - retval = readObject(self.stream, self) - - # override encryption is used for the /Encrypt dictionary - if not self._override_encryption and self.isEncrypted: - # if we don't have the encryption key: - if not hasattr(self, '_decryption_key'): - from calibre.ebooks import DRMError - raise DRMError('File contents are encrypted') - # otherwise, decrypt here... - import struct, md5 - pack1 = struct.pack("= len(array): - break - - def readNextEndLine(self, stream): - line = "" - while True: - x = stream.read(1) - stream.seek(-2, 1) - if x == '\n' or x == '\r': - while x == '\n' or x == '\r': - x = stream.read(1) - stream.seek(-2, 1) - stream.seek(1, 1) - break - else: - line = x + line - return line - - ## - # When using an encrypted / secured PDF file with the PDF Standard - # encryption handler, this function will allow the file to be decrypted. - # It checks the given password against the document's user password and - # owner password, and then stores the resulting decryption key if either - # password is correct. - #

- # It does not matter which password was matched. Both passwords provide - # the correct decryption key that will allow the document to be used with - # this library. - #

- # Stability: Added in v1.8, will exist for all future v1.x releases. - # - # @return 0 if the password failed, 1 if the password matched the user - # password, and 2 if the password matched the owner password. - # - # @exception NotImplementedError Document uses an unsupported encryption - # method. - def decrypt(self, password): - self._override_encryption = True - try: - return self._decrypt(password) - finally: - self._override_encryption = False - - def _decrypt(self, password): - encrypt = self.trailer['/Encrypt'].getObject() - if encrypt['/Filter'] != '/Standard': - raise NotImplementedError, "only Standard PDF encryption handler is available" - if not (encrypt['/V'] in (1, 2)): - raise NotImplementedError, "only algorithm code 1 and 2 are supported" - user_password, key = self._authenticateUserPassword(password) - if user_password: - self._decryption_key = key - return 1 - else: - rev = encrypt['/R'].getObject() - if rev == 2: - keylen = 5 - else: - keylen = encrypt['/Length'].getObject() / 8 - key = _alg33_1(password, rev, keylen) - real_O = encrypt["/O"].getObject() - if rev == 2: - userpass = utils.RC4_encrypt(key, real_O) - else: - val = real_O - for i in range(19, -1, -1): - new_key = '' - for l in range(len(key)): - new_key += chr(ord(key[l]) ^ i) - val = utils.RC4_encrypt(new_key, val) - userpass = val - owner_password, key = self._authenticateUserPassword(userpass) - if owner_password: - self._decryption_key = key - return 2 - return 0 - - def _authenticateUserPassword(self, password): - encrypt = self.trailer['/Encrypt'].getObject() - rev = encrypt['/R'].getObject() - owner_entry = encrypt['/O'].getObject().original_bytes - p_entry = encrypt['/P'].getObject() - id_entry = self.trailer['/ID'].getObject() - id1_entry = id_entry[0].getObject() - if rev == 2: - U, key = _alg34(password, owner_entry, p_entry, id1_entry) - elif rev >= 3: - U, key = _alg35(password, rev, - encrypt["/Length"].getObject() / 8, owner_entry, - p_entry, id1_entry, - encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject()) - real_U = encrypt['/U'].getObject().original_bytes - return U == real_U, key - - def getIsEncrypted(self): - return self.trailer.has_key("/Encrypt") - - ## - # Read-only boolean property showing whether this PDF file is encrypted. - # Note that this property, if true, will remain true even after the {@link - # #PdfFileReader.decrypt decrypt} function is called. - isEncrypted = property(lambda self: self.getIsEncrypted(), None, None) - - -def getRectangle(self, name, defaults): - retval = self.get(name) - if isinstance(retval, RectangleObject): - return retval - if retval == None: - for d in defaults: - retval = self.get(d) - if retval != None: - break - if isinstance(retval, IndirectObject): - retval = self.pdf.getObject(retval) - retval = RectangleObject(retval) - setRectangle(self, name, retval) - return retval - -def setRectangle(self, name, value): - if not isinstance(name, NameObject): - name = NameObject(name) - self[name] = value - -def deleteRectangle(self, name): - del self[name] - -def createRectangleAccessor(name, fallback): - return \ - property( - lambda self: getRectangle(self, name, fallback), - lambda self, value: setRectangle(self, name, value), - lambda self: deleteRectangle(self, name) - ) - -## -# This class represents a single page within a PDF file. Typically this object -# will be created by accessing the {@link #PdfFileReader.getPage getPage} -# function of the {@link #PdfFileReader PdfFileReader} class. -class PageObject(DictionaryObject): - def __init__(self, pdf): - DictionaryObject.__init__(self) - self.pdf = pdf - - ## - # Rotates a page clockwise by increments of 90 degrees. - #

- # Stability: Added in v1.1, will exist for all future v1.x releases. - # @param angle Angle to rotate the page. Must be an increment of 90 deg. - def rotateClockwise(self, angle): - assert angle % 90 == 0 - self._rotate(angle) - return self - - ## - # Rotates a page counter-clockwise by increments of 90 degrees. - #

- # Stability: Added in v1.1, will exist for all future v1.x releases. - # @param angle Angle to rotate the page. Must be an increment of 90 deg. - def rotateCounterClockwise(self, angle): - assert angle % 90 == 0 - self._rotate(-angle) - return self - - def _rotate(self, angle): - currentAngle = self.get("/Rotate", 0) - self[NameObject("/Rotate")] = NumberObject(currentAngle + angle) - - def _mergeResources(res1, res2, resource): - newRes = DictionaryObject() - newRes.update(res1.get(resource, DictionaryObject()).getObject()) - page2Res = res2.get(resource, DictionaryObject()).getObject() - renameRes = {} - for key in page2Res.keys(): - if newRes.has_key(key) and newRes[key] != page2Res[key]: - newname = NameObject(key + "renamed") - renameRes[key] = newname - newRes[newname] = page2Res[key] - elif not newRes.has_key(key): - newRes[key] = page2Res[key] - return newRes, renameRes - _mergeResources = staticmethod(_mergeResources) - - def _contentStreamRename(stream, rename, pdf): - if not rename: - return stream - stream = ContentStream(stream, pdf) - for operands,operator in stream.operations: - for i in range(len(operands)): - op = operands[i] - if isinstance(op, NameObject): - operands[i] = rename.get(op, op) - return stream - _contentStreamRename = staticmethod(_contentStreamRename) - - def _pushPopGS(contents, pdf): - # adds a graphics state "push" and "pop" to the beginning and end - # of a content stream. This isolates it from changes such as - # transformation matricies. - stream = ContentStream(contents, pdf) - stream.operations.insert(0, [[], "q"]) - stream.operations.append([[], "Q"]) - return stream - _pushPopGS = staticmethod(_pushPopGS) - - ## - # Merges the content streams of two pages into one. Resource references - # (i.e. fonts) are maintained from both pages. The mediabox/cropbox/etc - # of this page are not altered. The parameter page's content stream will - # be added to the end of this page's content stream, meaning that it will - # be drawn after, or "on top" of this page. - #

- # Stability: Added in v1.4, will exist for all future 1.x releases. - # @param page2 An instance of {@link #PageObject PageObject} to be merged - # into this one. - def mergePage(self, page2): - - # First we work on merging the resource dictionaries. This allows us - # to find out what symbols in the content streams we might need to - # rename. - - newResources = DictionaryObject() - rename = {} - originalResources = self["/Resources"].getObject() - page2Resources = page2["/Resources"].getObject() - - for res in "/ExtGState", "/Font", "/XObject", "/ColorSpace", "/Pattern", "/Shading": - new, newrename = PageObject._mergeResources(originalResources, page2Resources, res) - if new: - newResources[NameObject(res)] = new - rename.update(newrename) - - # Combine /ProcSet sets. - newResources[NameObject("/ProcSet")] = ArrayObject( - frozenset(originalResources.get("/ProcSet", ArrayObject()).getObject()).union( - frozenset(page2Resources.get("/ProcSet", ArrayObject()).getObject()) - ) - ) - - newContentArray = ArrayObject() - - originalContent = self["/Contents"].getObject() - newContentArray.append(PageObject._pushPopGS(originalContent, self.pdf)) - - page2Content = page2['/Contents'].getObject() - page2Content = PageObject._contentStreamRename(page2Content, rename, self.pdf) - page2Content = PageObject._pushPopGS(page2Content, self.pdf) - newContentArray.append(page2Content) - - self[NameObject('/Contents')] = ContentStream(newContentArray, self.pdf) - self[NameObject('/Resources')] = newResources - - ## - # Compresses the size of this page by joining all content streams and - # applying a FlateDecode filter. - #

- # Stability: Added in v1.6, will exist for all future v1.x releases. - # However, it is possible that this function will perform no action if - # content stream compression becomes "automatic" for some reason. - def compressContentStreams(self): - content = self["/Contents"].getObject() - if not isinstance(content, ContentStream): - content = ContentStream(content, self.pdf) - self[NameObject("/Contents")] = content.flateEncode() - - ## - # Locate all text drawing commands, in the order they are provided in the - # content stream, and extract the text. This works well for some PDF - # files, but poorly for others, depending on the generator used. This will - # be refined in the future. Do not rely on the order of text coming out of - # this function, as it will change if this function is made more - # sophisticated. - #

- # Stability: Added in v1.7, will exist for all future v1.x releases. May - # be overhauled to provide more ordered text in the future. - # @return a unicode string object - def extractText(self): - text = u"" - content = self["/Contents"].getObject() - if not isinstance(content, ContentStream): - content = ContentStream(content, self.pdf) - # Note: we check all strings are TextStringObjects. ByteStringObjects - # are strings where the byte->string encoding was unknown, so adding - # them to the text here would be gibberish. - for operands,operator in content.operations: - if operator == "Tj": - _text = operands[0] - if isinstance(_text, TextStringObject): - text += _text - elif operator == "T*": - text += "\n" - elif operator == "'": - text += "\n" - _text = operands[0] - if isinstance(_text, TextStringObject): - text += operands[0] - elif operator == '"': - _text = operands[2] - if isinstance(_text, TextStringObject): - text += "\n" - text += _text - elif operator == "TJ": - for i in operands[0]: - if isinstance(i, TextStringObject): - text += i - return text - - ## - # A rectangle (RectangleObject), expressed in default user space units, - # defining the boundaries of the physical medium on which the page is - # intended to be displayed or printed. - #

- # Stability: Added in v1.4, will exist for all future v1.x releases. - mediaBox = createRectangleAccessor("/MediaBox", ()) - - ## - # A rectangle (RectangleObject), expressed in default user space units, - # defining the visible region of default user space. When the page is - # displayed or printed, its contents are to be clipped (cropped) to this - # rectangle and then imposed on the output medium in some - # implementation-defined manner. Default value: same as MediaBox. - #

- # Stability: Added in v1.4, will exist for all future v1.x releases. - cropBox = createRectangleAccessor("/CropBox", ("/MediaBox",)) - - ## - # A rectangle (RectangleObject), expressed in default user space units, - # defining the region to which the contents of the page should be clipped - # when output in a production enviroment. - #

- # Stability: Added in v1.4, will exist for all future v1.x releases. - bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", "/MediaBox")) - - ## - # A rectangle (RectangleObject), expressed in default user space units, - # defining the intended dimensions of the finished page after trimming. - #

- # Stability: Added in v1.4, will exist for all future v1.x releases. - trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", "/MediaBox")) - - ## - # A rectangle (RectangleObject), expressed in default user space units, - # defining the extent of the page's meaningful content as intended by the - # page's creator. - #

- # Stability: Added in v1.4, will exist for all future v1.x releases. - artBox = createRectangleAccessor("/ArtBox", ("/CropBox", "/MediaBox")) - - -class ContentStream(DecodedStreamObject): - def __init__(self, stream, pdf): - self.pdf = pdf - self.operations = [] - # stream may be a StreamObject or an ArrayObject containing - # multiple StreamObjects to be cat'd together. - stream = stream.getObject() - if isinstance(stream, ArrayObject): - data = "" - for s in stream: - data += s.getObject().getData() - stream = StringIO(data) - else: - stream = StringIO(stream.getData()) - self.__parseContentStream(stream) - - def __parseContentStream(self, stream): - # file("f:\\tmp.txt", "w").write(stream.read()) - stream.seek(0, 0) - operands = [] - while True: - peek = readNonWhitespace(stream) - if peek == '': - break - stream.seek(-1, 1) - if peek.isalpha() or peek == "'" or peek == '"': - operator = "" - while True: - tok = stream.read(1) - if tok.isspace() or tok in NameObject.delimiterCharacters: - stream.seek(-1, 1) - break - elif tok == '': - break - operator += tok - if operator == "BI": - # begin inline image - a completely different parsing - # mechanism is required, of course... thanks buddy... - assert operands == [] - ii = self._readInlineImage(stream) - self.operations.append((ii, "INLINE IMAGE")) - else: - self.operations.append((operands, operator)) - operands = [] - elif peek == '%': - # If we encounter a comment in the content stream, we have to - # handle it here. Typically, readObject will handle - # encountering a comment -- but readObject assumes that - # following the comment must be the object we're trying to - # read. In this case, it could be an operator instead. - while peek not in ('\r', '\n'): - peek = stream.read(1) - else: - operands.append(readObject(stream, None)) - - def _readInlineImage(self, stream): - # begin reading just after the "BI" - begin image - # first read the dictionary of settings. - settings = DictionaryObject() - while True: - tok = readNonWhitespace(stream) - stream.seek(-1, 1) - if tok == "I": - # "ID" - begin of image data - break - key = readObject(stream, self.pdf) - tok = readNonWhitespace(stream) - stream.seek(-1, 1) - value = readObject(stream, self.pdf) - settings[key] = value - # left at beginning of ID - tmp = stream.read(3) - assert tmp[:2] == "ID" - data = "" - while True: - tok = stream.read(1) - if tok == "E": - next = stream.read(1) - if next == "I": - break - else: - stream.seek(-1, 1) - data += tok - else: - data += tok - x = readNonWhitespace(stream) - stream.seek(-1, 1) - return {"settings": settings, "data": data} - - def _getData(self): - newdata = StringIO() - for operands,operator in self.operations: - if operator == "INLINE IMAGE": - newdata.write("BI") - dicttext = StringIO() - operands["settings"].writeToStream(dicttext, None) - newdata.write(dicttext.getvalue()[2:-2]) - newdata.write("ID ") - newdata.write(operands["data"]) - newdata.write("EI") - else: - for op in operands: - op.writeToStream(newdata, None) - newdata.write(" ") - newdata.write(operator) - newdata.write("\n") - return newdata.getvalue() - - def _setData(self, value): - self.__parseContentStream(StringIO(value)) - - _data = property(_getData, _setData) - - -## -# A class representing the basic document metadata provided in a PDF File. -#

-# As of pyPdf v1.10, all text properties of the document metadata have two -# properties, eg. author and author_raw. The non-raw property will always -# return a TextStringObject, making it ideal for a case where the metadata is -# being displayed. The raw property can sometimes return a ByteStringObject, -# if pyPdf was unable to decode the string's text encoding; this requires -# additional safety in the caller and therefore is not as commonly accessed. -class DocumentInformation(DictionaryObject): - def __init__(self): - DictionaryObject.__init__(self) - - def getText(self, key): - retval = self.get(key, None) - if isinstance(retval, TextStringObject): - return retval - return None - - ## - # Read-only property accessing the document's title. Added in v1.6, will - # exist for all future v1.x releases. Modified in v1.10 to always return a - # unicode string (TextStringObject). - # @return A unicode string, or None if the title is not provided. - title = property(lambda self: self.getText("/Title")) - title_raw = property(lambda self: self.get("/Title")) - - ## - # Read-only property accessing the document's author. Added in v1.6, will - # exist for all future v1.x releases. Modified in v1.10 to always return a - # unicode string (TextStringObject). - # @return A unicode string, or None if the author is not provided. - author = property(lambda self: self.getText("/Author")) - author_raw = property(lambda self: self.get("/Author")) - - ## - # Read-only property accessing the subject of the document. Added in v1.6, - # will exist for all future v1.x releases. Modified in v1.10 to always - # return a unicode string (TextStringObject). - # @return A unicode string, or None if the subject is not provided. - subject = property(lambda self: self.getText("/Subject")) - subject_raw = property(lambda self: self.get("/Subject")) - - ## - # Read-only property accessing the document's creator. If the document was - # converted to PDF from another format, the name of the application (for - # example, OpenOffice) that created the original document from which it was - # converted. Added in v1.6, will exist for all future v1.x releases. - # Modified in v1.10 to always return a unicode string (TextStringObject). - # @return A unicode string, or None if the creator is not provided. - creator = property(lambda self: self.getText("/Creator")) - creator_raw = property(lambda self: self.get("/Creator")) - - ## - # Read-only property accessing the document's producer. If the document - # was converted to PDF from another format, the name of the application - # (for example, OSX Quartz) that converted it to PDF. Added in v1.6, will - # exist for all future v1.x releases. Modified in v1.10 to always return a - # unicode string (TextStringObject). - # @return A unicode string, or None if the producer is not provided. - producer = property(lambda self: self.getText("/Producer")) - producer_raw = property(lambda self: self.get("/Producer")) - - -## -# A class representing a destination within a PDF file. -# See section 8.2.1 of the PDF 1.6 reference. -# Stability: Added in v1.10, will exist for all v1.x releases. -class Destination(DictionaryObject): - def __init__(self, title, page, typ, *args): - DictionaryObject.__init__(self) - self[NameObject("/Title")] = title - self[NameObject("/Page")] = page - self[NameObject("/Type")] = typ - - # from table 8.2 of the PDF 1.6 reference. - if typ == "/XYZ": - (self[NameObject("/Left")], self[NameObject("/Top")], - self[NameObject("/Zoom")]) = args - elif typ == "/FitR": - (self[NameObject("/Left")], self[NameObject("/Bottom")], - self[NameObject("/Right")], self[NameObject("/Top")]) = args - elif typ in ["/FitH", "FitBH"]: - self[NameObject("/Top")], = args - elif typ in ["/FitV", "FitBV"]: - self[NameObject("/Left")], = args - elif typ in ["/Fit", "FitB"]: - pass - else: - raise utils.PdfReadError("Unknown Destination Type: %r" % typ) - - ## - # Read-only property accessing the destination title. - # @return A string. - title = property(lambda self: self.get("/Title")) - - ## - # Read-only property accessing the destination page. - # @return An integer. - page = property(lambda self: self.get("/Page")) - - ## - # Read-only property accessing the destination type. - # @return A string. - typ = property(lambda self: self.get("/Type")) - - ## - # Read-only property accessing the zoom factor. - # @return A number, or None if not available. - zoom = property(lambda self: self.get("/Zoom", None)) - - ## - # Read-only property accessing the left horizontal coordinate. - # @return A number, or None if not available. - left = property(lambda self: self.get("/Left", None)) - - ## - # Read-only property accessing the right horizontal coordinate. - # @return A number, or None if not available. - right = property(lambda self: self.get("/Right", None)) - - ## - # Read-only property accessing the top vertical coordinate. - # @return A number, or None if not available. - top = property(lambda self: self.get("/Top", None)) - - ## - # Read-only property accessing the bottom vertical coordinate. - # @return A number, or None if not available. - bottom = property(lambda self: self.get("/Bottom", None)) - -def convertToInt(d, size): - if size > 8: - raise utils.PdfReadError("invalid size in convertToInt") - d = "\x00\x00\x00\x00\x00\x00\x00\x00" + d - d = d[-8:] - return struct.unpack(">q", d)[0] - -# ref: pdf1.8 spec section 3.5.2 algorithm 3.2 -_encryption_padding = '\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56' + \ - '\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c' + \ - '\xa9\xfe\x64\x53\x69\x7a' - -# Implementation of algorithm 3.2 of the PDF standard security handler, -# section 3.5.2 of the PDF 1.6 reference. -def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True): - # 1. Pad or truncate the password string to exactly 32 bytes. If the - # password string is more than 32 bytes long, use only its first 32 bytes; - # if it is less than 32 bytes long, pad it by appending the required number - # of additional bytes from the beginning of the padding string - # (_encryption_padding). - password = (password + _encryption_padding)[:32] - # 2. Initialize the MD5 hash function and pass the result of step 1 as - # input to this function. - import md5, struct - m = md5.new(password) - # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash - # function. - m.update(owner_entry) - # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass - # these bytes to the MD5 hash function, low-order byte first. - p_entry = struct.pack('= 3 and not metadata_encrypt: - m.update("\xff\xff\xff\xff") - # 7. Finish the hash. - md5_hash = m.digest() - # 8. (Revision 3 or greater) Do the following 50 times: Take the output - # from the previous MD5 hash and pass the first n bytes of the output as - # input into a new MD5 hash, where n is the number of bytes of the - # encryption key as defined by the value of the encryption dictionary's - # /Length entry. - if rev >= 3: - for i in range(50): - md5_hash = md5.new(md5_hash[:keylen]).digest() - # 9. Set the encryption key to the first n bytes of the output from the - # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or - # greater, depends on the value of the encryption dictionary's /Length - # entry. - return md5_hash[:keylen] - -# Implementation of algorithm 3.3 of the PDF standard security handler, -# section 3.5.2 of the PDF 1.6 reference. -def _alg33(owner_pwd, user_pwd, rev, keylen): - # steps 1 - 4 - key = _alg33_1(owner_pwd, rev, keylen) - # 5. Pad or truncate the user password string as described in step 1 of - # algorithm 3.2. - user_pwd = (user_pwd + _encryption_padding)[:32] - # 6. Encrypt the result of step 5, using an RC4 encryption function with - # the encryption key obtained in step 4. - val = utils.RC4_encrypt(key, user_pwd) - # 7. (Revision 3 or greater) Do the following 19 times: Take the output - # from the previous invocation of the RC4 function and pass it as input to - # a new invocation of the function; use an encryption key generated by - # taking each byte of the encryption key obtained in step 4 and performing - # an XOR operation between that byte and the single-byte value of the - # iteration counter (from 1 to 19). - if rev >= 3: - for i in range(1, 20): - new_key = '' - for l in range(len(key)): - new_key += chr(ord(key[l]) ^ i) - val = utils.RC4_encrypt(new_key, val) - # 8. Store the output from the final invocation of the RC4 as the value of - # the /O entry in the encryption dictionary. - return val - -# Steps 1-4 of algorithm 3.3 -def _alg33_1(password, rev, keylen): - # 1. Pad or truncate the owner password string as described in step 1 of - # algorithm 3.2. If there is no owner password, use the user password - # instead. - password = (password + _encryption_padding)[:32] - # 2. Initialize the MD5 hash function and pass the result of step 1 as - # input to this function. - import md5 - m = md5.new(password) - # 3. (Revision 3 or greater) Do the following 50 times: Take the output - # from the previous MD5 hash and pass it as input into a new MD5 hash. - md5_hash = m.digest() - if rev >= 3: - for i in range(50): - md5_hash = md5.new(md5_hash).digest() - # 4. Create an RC4 encryption key using the first n bytes of the output - # from the final MD5 hash, where n is always 5 for revision 2 but, for - # revision 3 or greater, depends on the value of the encryption - # dictionary's /Length entry. - key = md5_hash[:keylen] - return key - -# Implementation of algorithm 3.4 of the PDF standard security handler, -# section 3.5.2 of the PDF 1.6 reference. -def _alg34(password, owner_entry, p_entry, id1_entry): - # 1. Create an encryption key based on the user password string, as - # described in algorithm 3.2. - key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry) - # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, - # using an RC4 encryption function with the encryption key from the - # preceding step. - U = utils.RC4_encrypt(key, _encryption_padding) - # 3. Store the result of step 2 as the value of the /U entry in the - # encryption dictionary. - return U, key - -# Implementation of algorithm 3.4 of the PDF standard security handler, -# section 3.5.2 of the PDF 1.6 reference. -def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt): - # 1. Create an encryption key based on the user password string, as - # described in Algorithm 3.2. - key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) - # 2. Initialize the MD5 hash function and pass the 32-byte padding string - # shown in step 1 of Algorithm 3.2 as input to this function. - import md5 - m = md5.new() - m.update(_encryption_padding) - # 3. Pass the first element of the file's file identifier array (the value - # of the ID entry in the document's trailer dictionary; see Table 3.13 on - # page 73) to the hash function and finish the hash. (See implementation - # note 25 in Appendix H.) - m.update(id1_entry) - md5_hash = m.digest() - # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption - # function with the encryption key from step 1. - val = utils.RC4_encrypt(key, md5_hash) - # 5. Do the following 19 times: Take the output from the previous - # invocation of the RC4 function and pass it as input to a new invocation - # of the function; use an encryption key generated by taking each byte of - # the original encryption key (obtained in step 2) and performing an XOR - # operation between that byte and the single-byte value of the iteration - # counter (from 1 to 19). - for i in range(1, 20): - new_key = '' - for l in range(len(key)): - new_key += chr(ord(key[l]) ^ i) - val = utils.RC4_encrypt(new_key, val) - # 6. Append 16 bytes of arbitrary padding to the output from the final - # invocation of the RC4 function and store the 32-byte result as the value - # of the U entry in the encryption dictionary. - # (implementator note: I don't know what "arbitrary padding" is supposed to - # mean, so I have used null bytes. This seems to match a few other - # people's implementations) - return val + ('\x00' * 16), key - -#if __name__ == "__main__": -# output = PdfFileWriter() -# -# input1 = PdfFileReader(file("test\\5000-s1-05e.pdf", "rb")) -# page1 = input1.getPage(0) -# -# input2 = PdfFileReader(file("test\\PDFReference16.pdf", "rb")) -# page2 = input2.getPage(0) -# page3 = input2.getPage(1) -# page1.mergePage(page2) -# page1.mergePage(page3) -# -# input3 = PdfFileReader(file("test\\cc-cc.pdf", "rb")) -# page1.mergePage(input3.getPage(0)) -# -# page1.compressContentStreams() -# -# output.addPage(page1) -# output.write(file("test\\merge-test.pdf", "wb")) - - diff --git a/src/pyPdf/utils.py b/src/pyPdf/utils.py deleted file mode 100644 index dd0a3d002a..0000000000 --- a/src/pyPdf/utils.py +++ /dev/null @@ -1,110 +0,0 @@ -# vim: sw=4:expandtab:foldmethod=marker -# -# Copyright (c) 2006, Mathieu Fenniak -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - - -""" -Utility functions for PDF library. -""" -__author__ = "Mathieu Fenniak" -__author_email__ = "biziqe@mathieu.fenniak.net" - -#ENABLE_PSYCO = False -#if ENABLE_PSYCO: -# try: -# import psyco -# except ImportError: -# ENABLE_PSYCO = False -# -#if not ENABLE_PSYCO: -# class psyco: -# def proxy(func): -# return func -# proxy = staticmethod(proxy) - -def readUntilWhitespace(stream, maxchars=None): - txt = "" - while True: - tok = stream.read(1) - if tok.isspace() or not tok: - break - txt += tok - if len(txt) == maxchars: - break - return txt - -def readNonWhitespace(stream): - tok = ' ' - while tok == '\n' or tok == '\r' or tok == ' ' or tok == '\t': - tok = stream.read(1) - return tok - -class ConvertFunctionsToVirtualList(object): - def __init__(self, lengthFunction, getFunction): - self.lengthFunction = lengthFunction - self.getFunction = getFunction - - def __len__(self): - return self.lengthFunction() - - def __getitem__(self, index): - if not isinstance(index, int): - raise TypeError, "sequence indices must be integers" - len_self = len(self) - if index < 0: - # support negative indexes - index = len_self + index - if index < 0 or index >= len_self: - raise IndexError, "sequence index out of range" - return self.getFunction(index) - -def RC4_encrypt(key, plaintext): - S = [i for i in range(256)] - j = 0 - for i in range(256): - j = (j + S[i] + ord(key[i % len(key)])) % 256 - S[i], S[j] = S[j], S[i] - i, j = 0, 0 - retval = "" - for x in range(len(plaintext)): - i = (i + 1) % 256 - j = (j + S[i]) % 256 - S[i], S[j] = S[j], S[i] - t = S[(S[i] + S[j]) % 256] - retval += chr(ord(plaintext[x]) ^ t) - return retval - -class PdfReadError(Exception): - pass - -if __name__ == "__main__": - # test RC4 - out = RC4_encrypt("Key", "Plaintext") - print repr(out) - pt = RC4_encrypt("Key", out) - print repr(pt) diff --git a/src/pyPdf/xmp.py b/src/pyPdf/xmp.py deleted file mode 100644 index b070df9093..0000000000 --- a/src/pyPdf/xmp.py +++ /dev/null @@ -1,355 +0,0 @@ -import re -import datetime -import decimal -from generic import PdfObject -from xml.dom import getDOMImplementation -from xml.dom.minidom import parseString - -RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -DC_NAMESPACE = "http://purl.org/dc/elements/1.1/" -XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/" -PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/" -XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/" - -# What is the PDFX namespace, you might ask? I might ask that too. It's -# a completely undocumented namespace used to place "custom metadata" -# properties, which are arbitrary metadata properties with no semantic or -# documented meaning. Elements in the namespace are key/value-style storage, -# where the element name is the key and the content is the value. The keys -# are transformed into valid XML identifiers by substituting an invalid -# identifier character with \u2182 followed by the unicode hex ID of the -# original character. A key like "my car" is therefore "my\u21820020car". -# -# \u2182, in case you're wondering, is the unicode character -# \u{ROMAN NUMERAL TEN THOUSAND}, a straightforward and obvious choice for -# escaping characters. -# -# Intentional users of the pdfx namespace should be shot on sight. A -# custom data schema and sensical XML elements could be used instead, as is -# suggested by Adobe's own documentation on XMP (under "Extensibility of -# Schemas"). -# -# Information presented here on the /pdfx/ schema is a result of limited -# reverse engineering, and does not constitute a full specification. -PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/" - -iso8601 = re.compile(""" - (?P[0-9]{4}) - (- - (?P[0-9]{2}) - (- - (?P[0-9]+) - (T - (?P[0-9]{2}): - (?P[0-9]{2}) - (:(?P[0-9]{2}(.[0-9]+)?))? - (?PZ|[-+][0-9]{2}:[0-9]{2}) - )? - )? - )? - """, re.VERBOSE) - -## -# An object that represents Adobe XMP metadata. -class XmpInformation(PdfObject): - - def __init__(self, stream): - self.stream = stream - docRoot = parseString(self.stream.getData()) - self.rdfRoot = docRoot.getElementsByTagNameNS(RDF_NAMESPACE, "RDF")[0] - self.cache = {} - - def writeToStream(self, stream, encryption_key): - self.stream.writeToStream(stream, encryption_key) - - def getElement(self, aboutUri, namespace, name): - for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"): - if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri: - attr = desc.getAttributeNodeNS(namespace, name) - if attr != None: - yield attr - for element in desc.getElementsByTagNameNS(namespace, name): - yield element - - def getNodesInNamespace(self, aboutUri, namespace): - for desc in self.rdfRoot.getElementsByTagNameNS(RDF_NAMESPACE, "Description"): - if desc.getAttributeNS(RDF_NAMESPACE, "about") == aboutUri: - for i in range(desc.attributes.length): - attr = desc.attributes.item(i) - if attr.namespaceURI == namespace: - yield attr - for child in desc.childNodes: - if child.namespaceURI == namespace: - yield child - - def _getText(self, element): - text = "" - for child in element.childNodes: - if child.nodeType == child.TEXT_NODE: - text += child.data - return text - - def _converter_string(value): - return value - - def _converter_date(value): - m = iso8601.match(value) - year = int(m.group("year")) - month = int(m.group("month") or "1") - day = int(m.group("day") or "1") - hour = int(m.group("hour") or "0") - minute = int(m.group("minute") or "0") - second = decimal.Decimal(m.group("second") or "0") - seconds = second.to_integral(decimal.ROUND_FLOOR) - milliseconds = (second - seconds) * 1000000 - tzd = m.group("tzd") or "Z" - dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds) - if tzd != "Z": - tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")] - tzd_hours *= -1 - if tzd_hours < 0: - tzd_minutes *= -1 - dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes) - return dt - _test_converter_date = staticmethod(_converter_date) - - def _getter_bag(namespace, name, converter): - def get(self): - cached = self.cache.get(namespace, {}).get(name) - if cached: - return cached - retval = [] - for element in self.getElement("", namespace, name): - bags = element.getElementsByTagNameNS(RDF_NAMESPACE, "Bag") - if len(bags): - for bag in bags: - for item in bag.getElementsByTagNameNS(RDF_NAMESPACE, "li"): - value = self._getText(item) - value = converter(value) - retval.append(value) - ns_cache = self.cache.setdefault(namespace, {}) - ns_cache[name] = retval - return retval - return get - - def _getter_seq(namespace, name, converter): - def get(self): - cached = self.cache.get(namespace, {}).get(name) - if cached: - return cached - retval = [] - for element in self.getElement("", namespace, name): - seqs = element.getElementsByTagNameNS(RDF_NAMESPACE, "Seq") - if len(seqs): - for seq in seqs: - for item in seq.getElementsByTagNameNS(RDF_NAMESPACE, "li"): - value = self._getText(item) - value = converter(value) - retval.append(value) - else: - value = converter(self._getText(element)) - retval.append(value) - ns_cache = self.cache.setdefault(namespace, {}) - ns_cache[name] = retval - return retval - return get - - def _getter_langalt(namespace, name, converter): - def get(self): - cached = self.cache.get(namespace, {}).get(name) - if cached: - return cached - retval = {} - for element in self.getElement("", namespace, name): - alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt") - if len(alts): - for alt in alts: - for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"): - value = self._getText(item) - value = converter(value) - retval[item.getAttribute("xml:lang")] = value - else: - retval["x-default"] = converter(self._getText(element)) - ns_cache = self.cache.setdefault(namespace, {}) - ns_cache[name] = retval - return retval - return get - - def _getter_single(namespace, name, converter): - def get(self): - cached = self.cache.get(namespace, {}).get(name) - if cached: - return cached - value = None - for element in self.getElement("", namespace, name): - if element.nodeType == element.ATTRIBUTE_NODE: - value = element.nodeValue - else: - value = self._getText(element) - break - if value != None: - value = converter(value) - ns_cache = self.cache.setdefault(namespace, {}) - ns_cache[name] = value - return value - return get - - ## - # Contributors to the resource (other than the authors). An unsorted - # array of names. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_contributor = property(_getter_bag(DC_NAMESPACE, "contributor", _converter_string)) - - ## - # Text describing the extent or scope of the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_coverage = property(_getter_single(DC_NAMESPACE, "coverage", _converter_string)) - - ## - # A sorted array of names of the authors of the resource, listed in order - # of precedence. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_creator = property(_getter_seq(DC_NAMESPACE, "creator", _converter_string)) - - ## - # A sorted array of dates (datetime.datetime instances) of signifigance to - # the resource. The dates and times are in UTC. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_date = property(_getter_seq(DC_NAMESPACE, "date", _converter_date)) - - ## - # A language-keyed dictionary of textual descriptions of the content of the - # resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_description = property(_getter_langalt(DC_NAMESPACE, "description", _converter_string)) - - ## - # The mime-type of the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_format = property(_getter_single(DC_NAMESPACE, "format", _converter_string)) - - ## - # Unique identifier of the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_identifier = property(_getter_single(DC_NAMESPACE, "identifier", _converter_string)) - - ## - # An unordered array specifying the languages used in the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_language = property(_getter_bag(DC_NAMESPACE, "language", _converter_string)) - - ## - # An unordered array of publisher names. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_publisher = property(_getter_bag(DC_NAMESPACE, "publisher", _converter_string)) - - ## - # An unordered array of text descriptions of relationships to other - # documents. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_relation = property(_getter_bag(DC_NAMESPACE, "relation", _converter_string)) - - ## - # A language-keyed dictionary of textual descriptions of the rights the - # user has to this resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_rights = property(_getter_langalt(DC_NAMESPACE, "rights", _converter_string)) - - ## - # Unique identifier of the work from which this resource was derived. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_source = property(_getter_single(DC_NAMESPACE, "source", _converter_string)) - - ## - # An unordered array of descriptive phrases or keywrods that specify the - # topic of the content of the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_subject = property(_getter_bag(DC_NAMESPACE, "subject", _converter_string)) - - ## - # A language-keyed dictionary of the title of the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_title = property(_getter_langalt(DC_NAMESPACE, "title", _converter_string)) - - ## - # An unordered array of textual descriptions of the document type. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - dc_type = property(_getter_bag(DC_NAMESPACE, "type", _converter_string)) - - ## - # An unformatted text string representing document keywords. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - pdf_keywords = property(_getter_single(PDF_NAMESPACE, "Keywords", _converter_string)) - - ## - # The PDF file version, for example 1.0, 1.3. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - pdf_pdfversion = property(_getter_single(PDF_NAMESPACE, "PDFVersion", _converter_string)) - - ## - # The name of the tool that created the PDF document. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - pdf_producer = property(_getter_single(PDF_NAMESPACE, "Producer", _converter_string)) - - ## - # The date and time the resource was originally created. The date and - # time are returned as a UTC datetime.datetime object. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - xmp_createDate = property(_getter_single(XMP_NAMESPACE, "CreateDate", _converter_date)) - - ## - # The date and time the resource was last modified. The date and time - # are returned as a UTC datetime.datetime object. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - xmp_modifyDate = property(_getter_single(XMP_NAMESPACE, "ModifyDate", _converter_date)) - - ## - # The date and time that any metadata for this resource was last - # changed. The date and time are returned as a UTC datetime.datetime - # object. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - xmp_metadataDate = property(_getter_single(XMP_NAMESPACE, "MetadataDate", _converter_date)) - - ## - # The name of the first known tool used to create the resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - xmp_creatorTool = property(_getter_single(XMP_NAMESPACE, "CreatorTool", _converter_string)) - - ## - # The common identifier for all versions and renditions of this resource. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - xmpmm_documentId = property(_getter_single(XMPMM_NAMESPACE, "DocumentID", _converter_string)) - - ## - # An identifier for a specific incarnation of a document, updated each - # time a file is saved. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - xmpmm_instanceId = property(_getter_single(XMPMM_NAMESPACE, "InstanceID", _converter_string)) - - def custom_properties(self): - if not hasattr(self, "_custom_properties"): - self._custom_properties = {} - for node in self.getNodesInNamespace("", PDFX_NAMESPACE): - key = node.localName - while True: - # see documentation about PDFX_NAMESPACE earlier in file - idx = key.find(u"\u2182") - if idx == -1: - break - key = key[:idx] + chr(int(key[idx+1:idx+5], base=16)) + key[idx+5:] - if node.nodeType == node.ATTRIBUTE_NODE: - value = node.nodeValue - else: - value = self._getText(node) - self._custom_properties[key] = value - return self._custom_properties - - ## - # Retrieves custom metadata properties defined in the undocumented pdfx - # metadata schema. - #

Stability: Added in v1.12, will exist for all future v1.x releases. - # @return Returns a dictionary of key/value items for custom metadata - # properties. - custom_properties = property(custom_properties) - - From 880d360e0de66bf617978920a3f0c66f23490cf1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 17:46:27 +0530 Subject: [PATCH 43/57] Fix regression that broke DnD of files having a # character in their names to the book details panel --- src/calibre/gui2/dnd.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/dnd.py b/src/calibre/gui2/dnd.py index c474fed537..5aa2cabf8d 100644 --- a/src/calibre/gui2/dnd.py +++ b/src/calibre/gui2/dnd.py @@ -130,6 +130,10 @@ def data_as_string(f, md): pass return raw +def path_from_qurl(qurl): + raw = bytes(bytearray(qurl.encodedPath())) + return urllib.unquote(raw).decode('utf-8') + def dnd_has_extension(md, extensions): if DEBUG: prints('Debugging DND event') @@ -143,7 +147,7 @@ def dnd_has_extension(md, extensions): if md.hasUrls(): urls = [unicode(u.toString()) for u in md.urls()] - paths = [urlparse(u).path for u in urls] + paths = [path_from_qurl(u) for u in md.urls()] exts = frozenset([posixpath.splitext(u)[1][1:].lower() for u in paths if u]) if DEBUG: From 955990c17d8fad45d01a944653f7379946845601 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Aug 2012 21:29:06 +0530 Subject: [PATCH 44/57] ... --- recipes/atlantic.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index daf73aebdc..928f1343b3 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -39,7 +39,7 @@ class TheAtlantic(BasicNewsRecipe): cover = soup.find('img', src=True, attrs={'class':'cover'}) if cover is not None: - self.cover_url = cover['src'] + self.cover_url = cover['src'].replace(' ', '%20') feeds = [] seen_titles = set([]) From 6c5d581218dd2927fac33c2703f9fdb3705084f9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Aug 2012 01:28:49 +0530 Subject: [PATCH 45/57] Arcadia by Masahiro Hasegawa --- recipes/arcadia.recipe | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 recipes/arcadia.recipe diff --git a/recipes/arcadia.recipe b/recipes/arcadia.recipe new file mode 100644 index 0000000000..980baa6ab8 --- /dev/null +++ b/recipes/arcadia.recipe @@ -0,0 +1,35 @@ +# -*- coding: utf8 -*- + +from calibre.web.feeds.recipes import BasicNewsRecipe +import re + +class Arcadia_BBS(BasicNewsRecipe): + title = u'Arcadia' + __author__ = 'Masahiro Hasegawa' + language = 'ja' + encoding = 'utf8' + filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com',] + timefmt = '[%Y/%m/%d]' + remove_tags_before = dict(name='a', attrs={'name':'kiji'}) + + sid_list = [] #some sotory id + + def parse_index(self): + result = [] + for sid in self.sid_list: + s_result = [] + soup = self.index_to_soup( + 'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d' + % sid) + sec = soup.findAll('a', attrs={'href':re.compile(r'.*?kiji')}) + for s in sec[:-2]: + s_result.append(dict(title=s.string, + url="http://www.mai-net.net" + s['href'], + date=s.parent.parent.parent.findAll('td')[3].string[:-6], + description='', content='')) + result.append((s_result[0]['title'], s_result)) + return result + + + + From 7d0fcd948437474152903373c269b534b95e3f71 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Aug 2012 16:42:08 +0530 Subject: [PATCH 46/57] Add code to create a PDF outline using PoDoFo --- setup/extensions.py | 1 + src/calibre/utils/podofo/__init__.py | 22 ++++- src/calibre/utils/podofo/doc.cpp | 39 +++++++- src/calibre/utils/podofo/global.h | 7 ++ src/calibre/utils/podofo/outline.cpp | 142 +++++++++++++++++++++++++++ src/calibre/utils/podofo/podofo.cpp | 3 + 6 files changed, 210 insertions(+), 4 deletions(-) create mode 100644 src/calibre/utils/podofo/outline.cpp diff --git a/setup/extensions.py b/setup/extensions.py index d6052125e5..2f2e2aa9ba 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -140,6 +140,7 @@ extensions = [ [ 'calibre/utils/podofo/utils.cpp', 'calibre/utils/podofo/doc.cpp', + 'calibre/utils/podofo/outline.cpp', 'calibre/utils/podofo/podofo.cpp', ], headers=[ diff --git a/src/calibre/utils/podofo/__init__.py b/src/calibre/utils/podofo/__init__.py index eb1d22d3e2..3134dcd1ba 100644 --- a/src/calibre/utils/podofo/__init__.py +++ b/src/calibre/utils/podofo/__init__.py @@ -98,7 +98,23 @@ def delete_all_but(path, pages): with open(path, 'wb') as f: f.write(raw) -if __name__ == '__main__': - f = u'/tmp/t.pdf' - delete_all_but(f, [0, 1, -2, -1]) +def test_outline(src): + podofo = get_podofo() + p = podofo.PDFDoc() + with open(src, 'rb') as f: + raw = f.read() + p.load(raw) + total = p.page_count() + root = p.create_outline(u'Table of Contents') + for i in xrange(0, total): + root.create(u'Page %d'%i, i, True) + raw = p.write() + out = '/tmp/outlined.pdf' + with open(out, 'wb') as f: + f.write(raw) + print 'Outlined PDF:', out + +if __name__ == '__main__': + import sys + test_outline(sys.argv[-1]) diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index 26951fcdce..fcc7c7253f 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -194,6 +194,41 @@ PDFDoc_set_box(PDFDoc *self, PyObject *args) { Py_RETURN_NONE; } // }}} +// create_outline() {{{ +static PyObject * +PDFDoc_create_outline(PDFDoc *self, PyObject *args) { + PyObject *p; + PDFOutlineItem *ans; + PdfString *title; + + if (!PyArg_ParseTuple(args, "U", &p)) return NULL; + title = podofo_convert_pystring(p); + if (title == NULL) return NULL; + + ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType); + if (ans == NULL) goto error; + + try { + PdfOutlines *outlines = self->doc->GetOutlines(); + if (outlines == NULL) {PyErr_NoMemory(); goto error;} + ans->item = outlines->CreateRoot(*title); + if (ans->item == NULL) {PyErr_NoMemory(); goto error;} + ans->doc = self->doc; + } catch(const PdfError & err) { + podofo_set_exception(err); goto error; + } catch (...) { + PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to create the outline"); + goto error; + } + + delete title; + return (PyObject*)ans; +error: + Py_XDECREF(ans); delete title; + return NULL; + +} // }}} + // Properties {{{ static PyObject * @@ -430,7 +465,9 @@ static PyMethodDef PDFDoc_methods[] = { {"set_box", (PyCFunction)PDFDoc_set_box, METH_VARARGS, "set_box(page_num, box, left, bottom, width, height) -> Set the PDF bounding box for the page numbered nu, box must be one of: MediaBox, CropBox, TrimBox, BleedBox, ArtBox. The numbers are interpreted as pts." }, - + {"create_outline", (PyCFunction)PDFDoc_create_outline, METH_VARARGS, + "create_outline(title) -> Create an outline, return the root outline item." + }, {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/podofo/global.h b/src/calibre/utils/podofo/global.h index c7a5696ad6..fa9a141b21 100644 --- a/src/calibre/utils/podofo/global.h +++ b/src/calibre/utils/podofo/global.h @@ -26,7 +26,14 @@ typedef struct { } PDFDoc; +typedef struct { + PyObject_HEAD + PdfMemDocument *doc; + PdfOutlineItem *item; +} PDFOutlineItem; + extern PyTypeObject PDFDocType; +extern PyTypeObject PDFOutlineItemType; extern PyObject *Error; // Utilities diff --git a/src/calibre/utils/podofo/outline.cpp b/src/calibre/utils/podofo/outline.cpp new file mode 100644 index 0000000000..40f5852204 --- /dev/null +++ b/src/calibre/utils/podofo/outline.cpp @@ -0,0 +1,142 @@ +/* + * outline.cpp + * Copyright (C) 2012 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "global.h" + +using namespace pdf; + +// Constructor/destructor {{{ +static void +dealloc(PDFOutlineItem* self) +{ + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +new_item(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PDFOutlineItem *self; + + self = (PDFOutlineItem *)type->tp_alloc(type, 0); + if (self != NULL) { + self->item = NULL; + } + + return (PyObject *)self; +} +// }}} + +// erase() {{{ +static PyObject * +erase(PDFOutlineItem *self, PyObject *args) { + try { + self->item->Erase(); + } catch(const PdfError & err) { + podofo_set_exception(err); + return NULL; + } + Py_RETURN_NONE; +} // }}} + +static PyObject * +create(PDFOutlineItem *self, PyObject *args) { + PyObject *ptitle, *as_child = NULL; + PDFOutlineItem *ans; + int num; + PdfString *title; + PdfPage *page; + + if (!PyArg_ParseTuple(args, "Ui|O", &ptitle, &num, &as_child)) return NULL; + title = podofo_convert_pystring(ptitle); + if (title == NULL) return NULL; + + ans = PyObject_New(PDFOutlineItem, &PDFOutlineItemType); + if (ans == NULL) goto error; + ans->doc = self->doc; + + try { + page = self->doc->GetPage(num); + if (page == NULL) { PyErr_Format(PyExc_ValueError, "Invalid page number: %d", num); goto error; } + PdfDestination dest(page); + if (as_child != NULL && PyObject_IsTrue(as_child)) { + ans->item = self->item->CreateChild(*title, dest); + } else + ans->item = self->item->CreateNext(*title, dest); + } catch (const PdfError &err) { + podofo_set_exception(err); goto error; + } catch (...) { + PyErr_SetString(PyExc_Exception, "An unknown error occurred while trying to create the outline item"); + goto error; + } + + delete title; + return (PyObject*) ans; +error: + Py_XDECREF(ans); delete title; + return NULL; +} + +static PyMethodDef methods[] = { + + {"create", (PyCFunction)create, METH_VARARGS, + "create(title, pagenum, as_child=False) -> Create a new outline item with title 'title', pointing to page number pagenum. If as_child is True the new item will be a child of this item otherwise it will be a sibling. Returns the newly created item." + }, + + {"erase", (PyCFunction)erase, METH_VARARGS, + "erase() -> Delete this item and all its children, removing it from the outline tree completely." + }, + + {NULL} /* Sentinel */ +}; + + +// Type definition {{{ +PyTypeObject pdf::PDFOutlineItemType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "podofo.PDFOutlineItem", /*tp_name*/ + sizeof(PDFOutlineItem), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "PDF Outline items", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + new_item, /* tp_new */ + +}; +// }}} + + diff --git a/src/calibre/utils/podofo/podofo.cpp b/src/calibre/utils/podofo/podofo.cpp index eefe182cec..250c18b4a8 100644 --- a/src/calibre/utils/podofo/podofo.cpp +++ b/src/calibre/utils/podofo/podofo.cpp @@ -46,6 +46,9 @@ initpodofo(void) if (PyType_Ready(&pdf::PDFDocType) < 0) return; + if (PyType_Ready(&pdf::PDFOutlineItemType) < 0) + return; + pdf::Error = PyErr_NewException((char*)"podofo.Error", NULL, NULL); if (pdf::Error == NULL) return; From c6d9524883a2be90c05d98c99dd252298beab596 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Aug 2012 20:01:13 +0530 Subject: [PATCH 47/57] Cumhuriyet Yzarlar by Sethi Eksi --- recipes/cumhuriyet.recipe | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 recipes/cumhuriyet.recipe diff --git a/recipes/cumhuriyet.recipe b/recipes/cumhuriyet.recipe new file mode 100644 index 0000000000..adbe9fad90 --- /dev/null +++ b/recipes/cumhuriyet.recipe @@ -0,0 +1,59 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +from __future__ import unicode_literals +# based on a recipe by Darko Miletic +# +# Cumhuriyet Gazetesi'nin köşe yazıları okuyuculara cumhuriyet.com.tr +# adresi üzerinden ücretsiz olarak sunulmaktadır. +# Calibre yazılımıyla kullanılabilen bu reçete Cumhuriyet Gazetesi'nin +# günlük köşe yazılarını hızlıca derleyip e-okuyucunuzda kolayca okunabilir +# hale getirir. Yazıların yayınlanma saati sabah olduğu için reçeteyi +# 7:00-24:00 arasında çizelgelemeniz gerekmektedir. + +__license__ = 'GPL v3' +__copyright__ = '2012, Sethi Eksi ' +''' +cumhuriyet.com.tr +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Cumhuriyet_tr(BasicNewsRecipe): + title = 'Cumhuriyet - Yazarlar' + __author__ = 'Cumhuriyet Gazetesi Yazarları' + description = 'Günlük Cumhuriyet Gazetesi Köşe Yazıları' + publisher = 'Cumhuriyet' + category = 'news, politics, Turkey' + oldest_article = 1 + max_articles_per_feed = 150 + no_stylesheets = True + encoding = 'cp1254' + use_embedded_content = False + masthead_url = 'http://www.cumhuriyet.com.tr/home/cumhuriyet/sablon2000/img/cumlogobeyaz1.gif' + language = 'tr' + extra_css = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + .article_description,body{font-family: Arial,Verdana,Helvetica,sans1,sans-serif} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['embed','iframe','object','link','base'])] + remove_tags_before = dict(attrs={'class':'c565'}) + remove_tags_after = dict(attrs={'class':'c565'}) + + feeds = [ + (u'Yazarlar' , u'http://www.cumhuriyet.com.tr/?kn=5&xl=rss') + ] + + def print_version(self, url): + articleid = url.rpartition('hn=')[2] + return 'http://www.cumhuriyet.com.tr/?hn=' + articleid + + def preprocess_html(self, soup): + return self.adeify_images(soup) + From 51df4f1d91da69f7097465e9c0b7ba66a15eabe7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 01:03:41 +0530 Subject: [PATCH 48/57] Fix #1042900 (Updated recipe for Coding Horror blog) --- recipes/coding_horror.recipe | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/recipes/coding_horror.recipe b/recipes/coding_horror.recipe index 417c4c774a..d63d5a2520 100644 --- a/recipes/coding_horror.recipe +++ b/recipes/coding_horror.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2012, Darko Miletic ' ''' www.codinghorror.com/blog/ ''' @@ -14,28 +12,25 @@ class CodingHorror(BasicNewsRecipe): description = 'programming and human factors - Jeff Atwood' category = 'blog, programming' publisher = 'Jeff Atwood' - language = 'en' - - author = 'Jeff Atwood' + language = 'en' oldest_article = 30 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = True - encoding = 'cp1252' + encoding = 'utf8' + auto_cleanup = True - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - , '--author' , author - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nauthors="' + author + '"' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language + , 'authors' : publisher + } remove_tags = [ dict(name=['object','link']) ,dict(name='div',attrs={'class':'feedflare'}) ] - feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )] - + feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )] \ No newline at end of file From 7e0ac417d7d60b1ac5f8f90fad1be4b8ab148d3e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 09:40:38 +0530 Subject: [PATCH 49/57] ... --- src/calibre/devices/kobo/books.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/kobo/books.py b/src/calibre/devices/kobo/books.py index 8d58dde892..1d9ccb9a0e 100644 --- a/src/calibre/devices/kobo/books.py +++ b/src/calibre/devices/kobo/books.py @@ -28,7 +28,13 @@ class Book(Book_): self.size = size # will be set later if None if ContentType == '6' and date is not None: - self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") + try: + self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") + except: + try: + self.datetime = time.gmtime(os.path.getctime(self.path)) + except: + self.datetime = time.gmtime() else: try: self.datetime = time.gmtime(os.path.getctime(self.path)) From 8c04dc14bdb9ca0255077bd3cc4536f1d62968cb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 09:45:23 +0530 Subject: [PATCH 50/57] ... --- src/calibre/devices/kobo/books.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/kobo/books.py b/src/calibre/devices/kobo/books.py index 1d9ccb9a0e..48033b3522 100644 --- a/src/calibre/devices/kobo/books.py +++ b/src/calibre/devices/kobo/books.py @@ -6,6 +6,7 @@ __copyright__ = '2010, Timothy Legge ' import os import time +from calibre.utils.date import parse_date from calibre.devices.usbms.books import Book as Book_ class Book(Book_): @@ -32,9 +33,13 @@ class Book(Book_): self.datetime = time.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") except: try: - self.datetime = time.gmtime(os.path.getctime(self.path)) + self.datetime = parse_date(date, + assume_utc=True).timetuple() except: - self.datetime = time.gmtime() + try: + self.datetime = time.gmtime(os.path.getctime(self.path)) + except: + self.datetime = time.gmtime() else: try: self.datetime = time.gmtime(os.path.getctime(self.path)) From 1dea1180043fc9a91c50fb409eb2a7c4f4e081c5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 13:39:44 +0530 Subject: [PATCH 51/57] PDF Output: Generate a PDF Outline based on the Table fo Contents of the input document --- .../ebooks/conversion/plugins/pdf_output.py | 10 +-- src/calibre/ebooks/pdf/outline_writer.py | 66 +++++++++++++++++ src/calibre/ebooks/pdf/writer.py | 71 ++++++++++++++++--- src/calibre/utils/podofo/doc.cpp | 7 +- 4 files changed, 138 insertions(+), 16 deletions(-) create mode 100644 src/calibre/ebooks/pdf/outline_writer.py diff --git a/src/calibre/ebooks/conversion/plugins/pdf_output.py b/src/calibre/ebooks/conversion/plugins/pdf_output.py index 9074d445cc..2c35fc91b9 100644 --- a/src/calibre/ebooks/conversion/plugins/pdf_output.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py @@ -107,7 +107,7 @@ class PDFOutput(OutputFormatPlugin): def convert_images(self, images): from calibre.ebooks.pdf.writer import ImagePDFWriter - self.write(ImagePDFWriter, images) + self.write(ImagePDFWriter, images, None) def get_cover_data(self): oeb = self.oeb @@ -132,11 +132,13 @@ class PDFOutput(OutputFormatPlugin): opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) - self.write(PDFWriter, [s.path for s in opf.spine]) + self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, + 'toc', None)) - def write(self, Writer, items): + def write(self, Writer, items, toc): from calibre.ebooks.pdf.writer import PDFMetadata - writer = Writer(self.opts, self.log, cover_data=self.cover_data) + writer = Writer(self.opts, self.log, cover_data=self.cover_data, + toc=toc) close = False if not hasattr(self.output_path, 'write'): diff --git a/src/calibre/ebooks/pdf/outline_writer.py b/src/calibre/ebooks/pdf/outline_writer.py new file mode 100644 index 0000000000..3474709431 --- /dev/null +++ b/src/calibre/ebooks/pdf/outline_writer.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os +from collections import defaultdict + +class Outline(object): + + def __init__(self, toc, items): + self.toc = toc + self.items = items + self.anchor_map = {} + self.pos_map = defaultdict(dict) + self.toc_map = {} + for item in items: + self.anchor_map[item] = anchors = set() + item_path = os.path.abspath(item).replace('/', os.sep) + if self.toc is not None: + for x in self.toc.flat(): + if x.abspath != item_path: continue + x.outline_item_ = item + if x.fragment: + anchors.add(x.fragment) + + def set_pos(self, item, anchor, pagenum, ypos): + self.pos_map[item][anchor] = (pagenum, ypos) + + def get_pos(self, toc): + page, ypos = 0, 0 + item = getattr(toc, 'outline_item_', None) + if item is not None: + if toc.fragment: + amap = self.pos_map.get(item, None) + if amap is not None: + page, ypos = amap.get(toc.fragment, (0, 0)) + else: + page, ypos = self.pos_map.get(item, {}).get(None, (0, 0)) + return page, ypos + + def add_children(self, toc, parent): + for child in toc: + page, ypos = self.get_pos(child) + text = child.text or _('Page %d')%page + cn = parent.create(text, page, True) + self.add_children(child, cn) + + def __call__(self, doc): + self.pos_map = dict(self.pos_map) + first = None + for child in self.toc: + page, ypos = self.get_pos(child) + text = child.text or _('Page %d')%page + if first is None: + first = node = doc.create_outline(text, page) + else: + node = first.create(text, page, False) + self.add_children(child, node) + + + diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 5ae03704c6..ccbc942eb7 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -8,16 +8,16 @@ __docformat__ = 'restructuredtext en' Write content to PDF. ''' -import os -import shutil +import os, shutil, json from future_builtins import map from PyQt4.Qt import (QEventLoop, QObject, QPrinter, QSizeF, Qt, QPainter, - QPixmap, QTimer) -from PyQt4.QtWebKit import QWebView + QPixmap, QTimer, pyqtProperty, QString) +from PyQt4.QtWebKit import QWebView, QWebPage from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks.pdf.pageoptions import (unit, paper_size, orientation) +from calibre.ebooks.pdf.outline_writer import Outline from calibre.ebooks.metadata import authors_to_string from calibre.ptempfile import PersistentTemporaryFile from calibre import __appname__, __version__, fit_image, isosx, force_unicode @@ -36,7 +36,7 @@ def get_custom_size(opts): custom_size = None return custom_size -def get_pdf_printer(opts, for_comic=False, output_file_name=None): +def get_pdf_printer(opts, for_comic=False, output_file_name=None): # {{{ from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') @@ -82,6 +82,7 @@ def get_pdf_printer(opts, for_comic=False, output_file_name=None): printer.setOutputFormat(QPrinter.NativeFormat) return printer +# }}} def draw_image_page(printer, painter, p, preserve_aspect_ratio=True): page_rect = printer.pageRect() @@ -102,7 +103,7 @@ def draw_image_page(printer, painter, p, preserve_aspect_ratio=True): painter.drawPixmap(page_rect, p, p.rect()) -class PDFMetadata(object): +class PDFMetadata(object): # {{{ def __init__(self, oeb_metadata=None): self.title = _(u'Unknown') self.author = _(u'Unknown') @@ -118,10 +119,24 @@ class PDFMetadata(object): self.title = force_unicode(self.title) self.author = force_unicode(self.author) +# }}} + +class Page(QWebPage): + + def __init__(self, log): + self.log = log + QWebPage.__init__(self) + + + def javaScriptConsoleMessage(self, msg, lineno, msgid): + self.log.debug(u'JS:', unicode(msg)) + + def javaScriptAlert(self, frame, msg): + self.log(unicode(msg)) class PDFWriter(QObject): # {{{ - def __init__(self, opts, log, cover_data=None): + def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import is_ok_to_use_qt from calibre.utils.podofo import get_podofo if not is_ok_to_use_qt(): @@ -134,6 +149,8 @@ class PDFWriter(QObject): # {{{ self.loop = QEventLoop() self.view = QWebView() + self.page = Page(self.log) + self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) @@ -147,10 +164,12 @@ class PDFWriter(QObject): # {{{ self.opts = opts self.cover_data = cover_data self.paged_js = None + self.toc = toc def dump(self, items, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() + self.outline = Outline(self.toc, items) self.render_queue = items self.combine_queue = [] @@ -178,6 +197,7 @@ class PDFWriter(QObject): # {{{ self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) self.logger.debug('Processing %s...' % item) + self.current_item = item load_html(item, self.view) def _render_html(self, ok): @@ -192,11 +212,22 @@ class PDFWriter(QObject): # {{{ return self._render_book() + def _pass_json_value_getter(self): + val = json.dumps(self.bridge_value) + return QString(val) + + def _pass_json_value_setter(self, value): + self.bridge_value = json.loads(unicode(value)) + + _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, + fset=_pass_json_value_setter) + def do_paged_render(self, outpath): from PyQt4.Qt import QSize, QPainter if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') + self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing') self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) @@ -204,12 +235,20 @@ class PDFWriter(QObject): # {{{ zoomy = printer.logicalDpiY()/self.view.logicalDpiY() painter.scale(zoomx, zoomy) + self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) pr = printer.pageRect() evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.view.page().setViewportSize(QSize(pr.width()/zoomx, pr.height()/zoomy)) evaljs(''' + py_bridge.__defineGetter__('value', function() { + return JSON.parse(this._pass_json_value); + }); + py_bridge.__defineSetter__('value', function(val) { + this._pass_json_value = JSON.stringify(val); + }); + document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, 0, 0, 0); paged_display.layout(); @@ -223,6 +262,17 @@ class PDFWriter(QObject): # {{{ evaljs('window.scrollTo(%d, 0)'%nsl[0]) printer.newPage() + self.bridge_value = tuple(self.outline.anchor_map[self.current_item]) + evaljs('py_bridge.value = book_indexing.anchor_positions(py_bridge.value)') + amap = self.bridge_value + if not isinstance(amap, dict): + amap = {} # Some javascript error occurred + pages = self.doc.page_count() + self.outline.set_pos(self.current_item, None, pages, 0) + for anchor, x in amap.iteritems(): + pagenum, ypos = x + self.outline.set_pos(self.current_item, anchor, pages + pagenum, ypos) + painter.end() printer.abort() self.append_doc(outpath) @@ -266,6 +316,7 @@ class PDFWriter(QObject): # {{{ self.doc.author = self.metadata.author if self.metadata.tags: self.doc.keywords = self.metadata.tags + self.outline(self.doc) raw = self.doc.write() self.out_stream.write(raw) self.render_succeeded = True @@ -275,9 +326,9 @@ class PDFWriter(QObject): # {{{ # }}} -class ImagePDFWriter(object): +class ImagePDFWriter(object): # {{{ - def __init__(self, opts, log, cover_data=None): + def __init__(self, opts, log, cover_data=None, toc=None): self.opts = opts self.log = log @@ -326,6 +377,6 @@ class ImagePDFWriter(object): self.log.warn('Failed to load image', i) painter.end() - +# }}} diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp index fcc7c7253f..7166b2320e 100644 --- a/src/calibre/utils/podofo/doc.cpp +++ b/src/calibre/utils/podofo/doc.cpp @@ -200,8 +200,9 @@ PDFDoc_create_outline(PDFDoc *self, PyObject *args) { PyObject *p; PDFOutlineItem *ans; PdfString *title; + int pagenum; - if (!PyArg_ParseTuple(args, "U", &p)) return NULL; + if (!PyArg_ParseTuple(args, "Ui", &p, &pagenum)) return NULL; title = podofo_convert_pystring(p); if (title == NULL) return NULL; @@ -214,6 +215,8 @@ PDFDoc_create_outline(PDFDoc *self, PyObject *args) { ans->item = outlines->CreateRoot(*title); if (ans->item == NULL) {PyErr_NoMemory(); goto error;} ans->doc = self->doc; + PdfDestination dest(self->doc->GetPage(pagenum)); + ans->item->SetDestination(dest); } catch(const PdfError & err) { podofo_set_exception(err); goto error; } catch (...) { @@ -466,7 +469,7 @@ static PyMethodDef PDFDoc_methods[] = { "set_box(page_num, box, left, bottom, width, height) -> Set the PDF bounding box for the page numbered nu, box must be one of: MediaBox, CropBox, TrimBox, BleedBox, ArtBox. The numbers are interpreted as pts." }, {"create_outline", (PyCFunction)PDFDoc_create_outline, METH_VARARGS, - "create_outline(title) -> Create an outline, return the root outline item." + "create_outline(title, pagenum) -> Create an outline, return the first outline item." }, {NULL} /* Sentinel */ From 22fad32e7662d05c3d92b319678ea4450c67ae3d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 14:16:43 +0530 Subject: [PATCH 52/57] Metadata dialog: Comments editor: Allow specifying the name of a link when using the insert link button. Fixes #1042683 ({ENHANCEMENT] Add "Label" to Insert URL) --- src/calibre/gui2/comments_editor.py | 43 ++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/calibre/gui2/comments_editor.py b/src/calibre/gui2/comments_editor.py index 10bcbf6218..39ada6b6cb 100644 --- a/src/calibre/gui2/comments_editor.py +++ b/src/calibre/gui2/comments_editor.py @@ -5,19 +5,19 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, os +import re, os, json from lxml import html import sip from PyQt4.Qt import (QApplication, QFontInfo, QSize, QWidget, QPlainTextEdit, - QToolBar, QVBoxLayout, QAction, QIcon, Qt, QTabWidget, QUrl, - QSyntaxHighlighter, QColor, QChar, QColorDialog, QMenu, QInputDialog, - QHBoxLayout, QKeySequence) + QToolBar, QVBoxLayout, QAction, QIcon, Qt, QTabWidget, QUrl, QFormLayout, + QSyntaxHighlighter, QColor, QChar, QColorDialog, QMenu, QDialog, + QHBoxLayout, QKeySequence, QLineEdit, QDialogButtonBox) from PyQt4.QtWebKit import QWebView, QWebPage from calibre.ebooks.chardet import xml_to_unicode -from calibre import xml_replace_entities +from calibre import xml_replace_entities, prepare_string_for_xml from calibre.gui2 import open_url from calibre.utils.soupparser import fromstring from calibre.utils.config import tweaks @@ -191,14 +191,36 @@ class EditorWidget(QWebView): # {{{ self.exec_command('hiliteColor', unicode(col.name())) def insert_link(self, *args): - link, ok = QInputDialog.getText(self, _('Create link'), - _('Enter URL')) - if not ok: + link, name = self.ask_link() + if not link: return url = self.parse_link(unicode(link)) if url.isValid(): url = unicode(url.toString()) - self.exec_command('createLink', url) + if name: + self.exec_command('insertHTML', + '%s'%(prepare_string_for_xml(url, True), + prepare_string_for_xml(name))) + else: + self.exec_command('createLink', url) + + def ask_link(self): + d = QDialog(self) + d.setWindowTitle(_('Create link')) + l = QFormLayout() + d.setLayout(l) + d.url = QLineEdit(d) + d.name = QLineEdit(d) + d.bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel) + l.addRow(_('Enter &URL:'), d.url) + l.addRow(_('Enter name (optional):'), d.name) + l.addRow(d.bb) + d.bb.accepted.connect(d.accept) + d.bb.rejected.connect(d.reject) + link, name = None, None + if d.exec_() == d.Accepted: + link, name = unicode(d.url.text()).strip(), unicode(d.name.text()).strip() + return link, name def parse_link(self, link): link = link.strip() @@ -227,7 +249,8 @@ class EditorWidget(QWebView): # {{{ def exec_command(self, cmd, arg=None): frame = self.page().mainFrame() if arg is not None: - js = 'document.execCommand("%s", false, "%s");' % (cmd, arg) + js = 'document.execCommand("%s", false, %s);' % (cmd, + json.dumps(unicode(arg))) else: js = 'document.execCommand("%s", false, null);' % cmd frame.evaluateJavaScript(js) From a90367296cab88ed65feebbe3abee132a9ba95a2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 14:52:33 +0530 Subject: [PATCH 53/57] ... --- src/calibre/ebooks/pdf/outline_writer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/calibre/ebooks/pdf/outline_writer.py b/src/calibre/ebooks/pdf/outline_writer.py index 3474709431..4b2db84f9e 100644 --- a/src/calibre/ebooks/pdf/outline_writer.py +++ b/src/calibre/ebooks/pdf/outline_writer.py @@ -52,14 +52,10 @@ class Outline(object): def __call__(self, doc): self.pos_map = dict(self.pos_map) - first = None for child in self.toc: page, ypos = self.get_pos(child) text = child.text or _('Page %d')%page - if first is None: - first = node = doc.create_outline(text, page) - else: - node = first.create(text, page, False) + node = doc.create_outline(text, page) self.add_children(child, node) From 05de8b4b218a0102717a8987e5ce70b0b44e68c9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 16:36:33 +0530 Subject: [PATCH 54/57] PDF Output: Allow choosing the default font family and size when generating PDF files (under PDF Options) in the conversion dialog --- src/calibre/__init__.py | 9 ++ .../ebooks/conversion/plugins/pdf_output.py | 19 +++ src/calibre/ebooks/pdf/writer.py | 23 ++- src/calibre/gui2/convert/__init__.py | 11 +- src/calibre/gui2/convert/pdf_output.py | 6 +- src/calibre/gui2/convert/pdf_output.ui | 132 +++++++++++++++--- src/calibre/gui2/viewer/documentview.py | 17 +-- 7 files changed, 173 insertions(+), 44 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index bfe23cee67..31ce61328a 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -703,6 +703,15 @@ if isosx: import traceback traceback.print_exc() +def load_builtin_fonts(): + import glob + from PyQt4.Qt import QFontDatabase + base = P('fonts/liberation/*.ttf') + for f in glob.glob(base): + QFontDatabase.addApplicationFont(f) + return 'Liberation Serif', 'Liberation Sans', 'Liberation Mono' + + def ipython(user_ns=None): from calibre.utils.ipython import ipython ipython(user_ns=user_ns) diff --git a/src/calibre/ebooks/conversion/plugins/pdf_output.py b/src/calibre/ebooks/conversion/plugins/pdf_output.py index 2c35fc91b9..35504b31fb 100644 --- a/src/calibre/ebooks/conversion/plugins/pdf_output.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py @@ -88,6 +88,25 @@ class PDFOutput(OutputFormatPlugin): help=_('Preserve the aspect ratio of the cover, instead' ' of stretching it to fill the full first page of the' ' generated pdf.')), + OptionRecommendation(name='pdf_serif_family', + recommended_value='Times New Roman', help=_( + 'The font family used to render serif fonts')), + OptionRecommendation(name='pdf_sans_family', + recommended_value='Helvetica', help=_( + 'The font family used to render sans-serif fonts')), + OptionRecommendation(name='pdf_mono_family', + recommended_value='Courier New', help=_( + 'The font family used to render monospaced fonts')), + OptionRecommendation(name='pdf_standard_font', choices=['serif', + 'sans', 'mono'], + recommended_value='serif', help=_( + 'The font family used to render monospaced fonts')), + OptionRecommendation(name='pdf_default_font_size', + recommended_value=20, help=_( + 'The default font size')), + OptionRecommendation(name='pdf_mono_font_size', + recommended_value=16, help=_( + 'The default font size for monospaced text')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index ccbc942eb7..1ec98ce181 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -13,14 +13,14 @@ from future_builtins import map from PyQt4.Qt import (QEventLoop, QObject, QPrinter, QSizeF, Qt, QPainter, QPixmap, QTimer, pyqtProperty, QString) -from PyQt4.QtWebKit import QWebView, QWebPage +from PyQt4.QtWebKit import QWebView, QWebPage, QWebSettings from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks.pdf.pageoptions import (unit, paper_size, orientation) from calibre.ebooks.pdf.outline_writer import Outline from calibre.ebooks.metadata import authors_to_string from calibre.ptempfile import PersistentTemporaryFile -from calibre import __appname__, __version__, fit_image, isosx, force_unicode +from calibre import (__appname__, __version__, fit_image, isosx, force_unicode) from calibre.ebooks.oeb.display.webview import load_html def get_custom_size(opts): @@ -123,10 +123,25 @@ class PDFMetadata(object): # {{{ class Page(QWebPage): - def __init__(self, log): + def __init__(self, opts, log): self.log = log QWebPage.__init__(self) + settings = self.settings() + settings.setFontSize(QWebSettings.DefaultFontSize, + opts.pdf_default_font_size) + settings.setFontSize(QWebSettings.DefaultFixedFontSize, + opts.pdf_mono_font_size) + settings.setFontSize(QWebSettings.MinimumLogicalFontSize, 8) + settings.setFontSize(QWebSettings.MinimumFontSize, 8) + std = {'serif':opts.pdf_serif_family, 'sans':opts.pdf_sans_family, + 'mono':opts.pdf_mono_family}.get(opts.pdf_standard_font, + opts.pdf_serif_family) + settings.setFontFamily(QWebSettings.StandardFont, std) + settings.setFontFamily(QWebSettings.SerifFont, opts.pdf_serif_family) + settings.setFontFamily(QWebSettings.SansSerifFont, + opts.pdf_sans_family) + settings.setFontFamily(QWebSettings.FixedFont, opts.pdf_mono_family) def javaScriptConsoleMessage(self, msg, lineno, msgid): self.log.debug(u'JS:', unicode(msg)) @@ -149,7 +164,7 @@ class PDFWriter(QObject): # {{{ self.loop = QEventLoop() self.view = QWebView() - self.page = Page(self.log) + self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index 84667aa740..e01238a2e5 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -9,8 +9,9 @@ __docformat__ = 'restructuredtext en' import textwrap, codecs, importlib from functools import partial -from PyQt4.Qt import QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, \ - QCheckBox, QComboBox, Qt, QIcon, pyqtSignal, QLabel +from PyQt4.Qt import (QWidget, QSpinBox, QDoubleSpinBox, QLineEdit, QTextEdit, + QCheckBox, QComboBox, Qt, QIcon, pyqtSignal, QLabel, QFontComboBox, QFont, + QFontInfo) from calibre.customize.conversion import OptionRecommendation from calibre.ebooks.conversion.config import load_defaults, \ @@ -35,8 +36,6 @@ def bulk_defaults_for_input_format(fmt): return load_defaults(w.COMMIT_NAME) return {} - - class Widget(QWidget): TITLE = _('Unknown') @@ -143,6 +142,8 @@ class Widget(QWidget): if not ans: ans = None return ans + elif isinstance(g, QFontComboBox): + ans = unicode(QFontInfo(g.currentFont().family())) elif isinstance(g, EncodingComboBox): ans = unicode(g.currentText()).strip() try: @@ -205,6 +206,8 @@ class Widget(QWidget): if not val: val = '' getattr(g, 'setPlainText', g.setText)(val) getattr(g, 'setCursorPosition', lambda x: x)(0) + elif isinstance(g, QFontComboBox): + g.setCurrentFont(QFont(val or '')) elif isinstance(g, EncodingComboBox): if val: g.setEditText(val) diff --git a/src/calibre/gui2/convert/pdf_output.py b/src/calibre/gui2/convert/pdf_output.py index 940d6b68f8..a2bfcc667f 100644 --- a/src/calibre/gui2/convert/pdf_output.py +++ b/src/calibre/gui2/convert/pdf_output.py @@ -19,13 +19,17 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, ['paper_size', 'custom_size', - 'orientation', 'preserve_cover_aspect_ratio']) + 'orientation', 'preserve_cover_aspect_ratio', 'pdf_serif_family', + 'pdf_sans_family', 'pdf_mono_family', 'pdf_standard_font', + 'pdf_default_font_size', 'pdf_mono_font_size']) self.db, self.book_id = db, book_id for x in get_option('paper_size').option.choices: self.opt_paper_size.addItem(x) for x in get_option('orientation').option.choices: self.opt_orientation.addItem(x) + for x in get_option('pdf_standard_font').option.choices: + self.opt_pdf_standard_font.addItem(x) self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/pdf_output.ui b/src/calibre/gui2/convert/pdf_output.ui index 738e75aff5..4a6c99659d 100644 --- a/src/calibre/gui2/convert/pdf_output.ui +++ b/src/calibre/gui2/convert/pdf_output.ui @@ -6,14 +6,14 @@ 0 0 - 400 - 300 + 590 + 395 Form - + @@ -40,26 +40,6 @@ - - - - Qt::Vertical - - - - 20 - 213 - - - - - - - - Preserve &aspect ratio of cover - - - @@ -73,6 +53,112 @@ + + + + Preserve &aspect ratio of cover + + + + + + + Qt::Vertical + + + + 20 + 213 + + + + + + + + Se&rif family: + + + opt_pdf_serif_family + + + + + + + + + + &Sans family: + + + opt_pdf_sans_family + + + + + + + + + + &Monospace family: + + + opt_pdf_mono_family + + + + + + + + + + S&tandard font: + + + opt_pdf_standard_font + + + + + + + + + + Default font si&ze: + + + opt_pdf_default_font_size + + + + + + + px + + + + + + + Monospace &font size: + + + opt_pdf_mono_font_size + + + + + + + px + + + diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py index 2dbc16d84c..55791afd4a 100644 --- a/src/calibre/gui2/viewer/documentview.py +++ b/src/calibre/gui2/viewer/documentview.py @@ -4,19 +4,19 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' # Imports {{{ -import os, math, glob, json +import os, math, json from base64 import b64encode from functools import partial from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, pyqtProperty, - QPainter, QPalette, QBrush, QFontDatabase, QDialog, QColor, QPoint, - QImage, QRegion, QIcon, pyqtSignature, QAction, QMenu, QString, - pyqtSignal, QSwipeGesture, QApplication, pyqtSlot) + QPainter, QPalette, QBrush, QDialog, QColor, QPoint, QImage, QRegion, + QIcon, pyqtSignature, QAction, QMenu, QString, pyqtSignal, + QSwipeGesture, QApplication, pyqtSlot) from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings from calibre.gui2.viewer.flip import SlideFlip from calibre.gui2.shortcuts import Shortcuts -from calibre import prints +from calibre import prints, load_builtin_fonts from calibre.customize.ui import all_viewer_plugins from calibre.gui2.viewer.keys import SHORTCUTS from calibre.gui2.viewer.javascript import JavaScriptLoader @@ -27,13 +27,6 @@ from calibre.ebooks.oeb.display.webview import load_html from calibre.constants import isxp, iswindows # }}} -def load_builtin_fonts(): - base = P('fonts/liberation/*.ttf') - for f in glob.glob(base): - QFontDatabase.addApplicationFont(f) - return 'Liberation Serif', 'Liberation Sans', 'Liberation Mono' - - class Document(QWebPage): # {{{ page_turn = pyqtSignal(object) From e20450d95618a1f8697edb780bde3dcecb62afab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 17:10:31 +0530 Subject: [PATCH 55/57] PDF Output: Do not create duplicate embedded fonts in the PDF for every individual HTML file in the input document --- src/calibre/ebooks/pdf/writer.py | 54 +++++++++++++++++++------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 1ec98ce181..d6ffa42107 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -12,7 +12,7 @@ import os, shutil, json from future_builtins import map from PyQt4.Qt import (QEventLoop, QObject, QPrinter, QSizeF, Qt, QPainter, - QPixmap, QTimer, pyqtProperty, QString) + QPixmap, QTimer, pyqtProperty, QString, QSize) from PyQt4.QtWebKit import QWebView, QWebPage, QWebSettings from calibre.ptempfile import PersistentTemporaryDirectory @@ -192,8 +192,17 @@ class PDFWriter(QObject): # {{{ self.insert_cover() self.render_succeeded = False + self.combine_queue.append(os.path.join(self.tmp_path, + 'qprinter_out.pdf')) + self.first_page = True + self.setup_printer(self.combine_queue[-1]) QTimer.singleShot(0, self._render_book) self.loop.exec_() + if self.painter is not None: + self.painter.end() + if self.printer is not None: + self.printer.abort() + if not self.render_succeeded: raise Exception('Rendering HTML to PDF failed') @@ -209,7 +218,6 @@ class PDFWriter(QObject): # {{{ def _render_next(self): item = unicode(self.render_queue.pop(0)) - self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) self.logger.debug('Processing %s...' % item) self.current_item = item @@ -217,9 +225,7 @@ class PDFWriter(QObject): # {{{ def _render_html(self, ok): if ok: - item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue)) - self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue))) - self.do_paged_render(item_path) + self.do_paged_render() else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') @@ -237,25 +243,28 @@ class PDFWriter(QObject): # {{{ _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter) - def do_paged_render(self, outpath): - from PyQt4.Qt import QSize, QPainter - if self.paged_js is None: - from calibre.utils.resources import compiled_coffeescript - self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') - self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing') - self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') + def setup_printer(self, outpath): + self.printer = self.painter = None printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) zoomx = printer.logicalDpiX()/self.view.logicalDpiX() zoomy = printer.logicalDpiY()/self.view.logicalDpiY() painter.scale(zoomx, zoomy) + pr = printer.pageRect() + self.printer, self.painter = printer, painter + self.viewport_size = QSize(pr.width()/zoomx, pr.height()/zoomy) + self.page.setViewportSize(self.viewport_size) + + def do_paged_render(self): + if self.paged_js is None: + from calibre.utils.resources import compiled_coffeescript + self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') + self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing') + self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) - pr = printer.pageRect() evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) - self.view.page().setViewportSize(QSize(pr.width()/zoomx, - pr.height()/zoomy)) evaljs(''' py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); @@ -271,11 +280,13 @@ class PDFWriter(QObject): # {{{ ''') mf = self.view.page().mainFrame() while True: - mf.render(painter) + if not self.first_page: + self.printer.newPage() + self.first_page = False + mf.render(self.painter) nsl = evaljs('paged_display.next_screen_location()').toInt() if not nsl[1] or nsl[0] <= 0: break evaljs('window.scrollTo(%d, 0)'%nsl[0]) - printer.newPage() self.bridge_value = tuple(self.outline.anchor_map[self.current_item]) evaljs('py_bridge.value = book_indexing.anchor_positions(py_bridge.value)') @@ -288,10 +299,6 @@ class PDFWriter(QObject): # {{{ pagenum, ypos = x self.outline.set_pos(self.current_item, anchor, pages + pagenum, ypos) - painter.end() - printer.abort() - self.append_doc(outpath) - def append_doc(self, outpath): doc = self.podofo.PDFDoc() with open(outpath, 'rb') as f: @@ -322,7 +329,10 @@ class PDFWriter(QObject): # {{{ printer.abort() def _write(self): - self.logger.debug('Combining individual PDF parts...') + self.painter.end() + self.printer.abort() + self.painter = self.printer = None + self.append_doc(self.combine_queue[-1]) try: self.doc.creator = u'%s %s [http://calibre-ebook.com]'%( From 9d8cda5f8544b7b89b2ca01637e6718d9803b743 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 17:23:42 +0530 Subject: [PATCH 56/57] ... --- src/calibre/gui2/widgets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index 385884d008..a990baaa1e 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -364,7 +364,7 @@ class FontFamilyModel(QAbstractListModel): # {{{ self.families = list(qt_families.intersection(set(self.families))) self.families.sort() self.families[:0] = [_('None')] - self.font = QFont('Verdana' if iswindows else 'sansserif') + self.font = QFont('Arial' if iswindows else 'sansserif') def rowCount(self, *args): return len(self.families) From 85dc37587197c90f95301e06245f304c79cfab2b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 29 Aug 2012 17:39:50 +0530 Subject: [PATCH 57/57] ... --- manual/creating_plugins.rst | 4 ++++ manual/develop.rst | 2 ++ 2 files changed, 6 insertions(+) diff --git a/manual/creating_plugins.rst b/manual/creating_plugins.rst index 6189f69243..9418f4a955 100644 --- a/manual/creating_plugins.rst +++ b/manual/creating_plugins.rst @@ -182,6 +182,10 @@ The plugin API As you may have noticed above, a plugin in |app| is a class. There are different classes for the different types of plugins in |app|. Details on each class, including the base class of all plugins can be found in :ref:`plugins`. +Your plugin is almost certainly going to use code from |app|. To learn +how to find various bits of functionality in the +|app| code base, read the section on the |app| :ref:`code_layout`. + Debugging plugins ------------------- diff --git a/manual/develop.rst b/manual/develop.rst index d59c315951..a6f1a1308e 100644 --- a/manual/develop.rst +++ b/manual/develop.rst @@ -30,6 +30,8 @@ a device driver plugin. You can browse the for new conversion formats involves writing input/output format plugins. Another example of the modular design is the :ref:`recipe system ` for fetching news. For more examples of plugins designed to add features to |app|, see the `plugin index `_. +.. _code_layout: + Code layout ^^^^^^^^^^^^^^