diff --git a/manual/conversion.rst b/manual/conversion.rst index 5eaca5a469..feae2a4273 100644 --- a/manual/conversion.rst +++ b/manual/conversion.rst @@ -710,3 +710,35 @@ EPUB from the ZIP file are:: Note that because this file explores the potential of EPUB, most of the advanced formatting is not going to work on readers less capable than |app|'s built-in EPUB viewer. + +Convert ODT documents +~~~~~~~~~~~~~~~~~~~~~ + +|app| can directly convert ODT (OpenDocument Text) files. You should use styles to format your document and minimize the use of direct formatting. +When inserting images into your document you need to anchor them to the paragraph, images anchored to a page will all end up in the front of the conversion. + +To enable automatic detection of chapters, you need to mark them with the build-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6' ('Heading 1' equates to the HTML tag

, 'Heading 2' to

etc). When you convert in |app| you can enter which style you used into the 'Detect chapters at' box. Example: + + * If you mark Chapters with style 'Heading 2', you have to set the 'Detect chapters at' box to ``//h:h2`` + * For a nested TOC with Sections marked with 'Heading 2' and the Chapters marked with 'Heading 3' you need to enter ``//h:h2|//h:h3``. On the Convert - TOC page set the 'Level 1 TOC' box to ``//h:h2`` and the 'Level 2 TOC' box to ``//h:h3``. + +Well-known document properties (Title, Keywords, Description, Creator) are recognized and |app| will use the first image (not to small, and with good aspect-ratio) as the cover image. + +There is also an advanced property conversion mode, which is activated by setting the custom property ``opf.metadata`` ('Yes or No' type) to Yes in your ODT document (File->Properties->Custom Properties). +If this property is detected by |app|, the following custom properties are recognized (``opf.authors`` overrides document creator):: + + opf.titlesort + opf.authors + opf.authorsort + opf.publisher + opf.pubdate + opf.isbn + opf.language + opf.series + opf.seriesindex + +In addition to this, you can specify the picture to use as the cover by naming it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no picture with this name is found, the 'smart' method is used. +As the cover detection might result in double covers in certain output formats, the process will remove the paragraph (only if the only content is the cover!) from the document. But this works only with the named picture! + +To disable cover detection you can set the custom property ``opf.nocover`` ('Yes or No' type) to Yes in advanced mode. + diff --git a/manual/develop.rst b/manual/develop.rst index 3a9488ccf5..d59c315951 100644 --- a/manual/develop.rst +++ b/manual/develop.rst @@ -152,14 +152,17 @@ calibre is the directory that contains the src and resources sub-directories. En The next step is to create a bash script that will set the environment variable ``CALIBRE_DEVELOP_FROM`` to the absolute path of the src directory when running calibre in debug mode. Create a plain text file:: + #!/bin/sh export CALIBRE_DEVELOP_FROM="/Users/kovid/work/calibre/src" calibre-debug -g Save this file as ``/usr/bin/calibre-develop``, then set its permissions so that it can be executed:: + chmod +x /usr/bin/calibre-develop -Once you have done this, type:: +Once you have done this, run:: + calibre-develop You should see some diagnostic information in the Terminal window as calibre diff --git a/manual/news.rst b/manual/news.rst index 873025d467..9783a262aa 100755 --- a/manual/news.rst +++ b/manual/news.rst @@ -30,7 +30,7 @@ Lets pick a couple of feeds that look interesting: #. Business Travel: http://feeds.portfolio.com/portfolio/businesstravel #. Tech Observer: http://feeds.portfolio.com/portfolio/thetechobserver -I got the URLs by clicking the little orange RSS icon next to each feed name. To make |app| download the feeds and convert them into an ebook, you should click the :guilabel:`Fetch news` button and then the :guilabel:`Add a custom news source` menu item. A dialog similar to that shown below should open up. +I got the URLs by clicking the little orange RSS icon next to each feed name. To make |app| download the feeds and convert them into an ebook, you should right click the :guilabel:`Fetch news` button and then the :guilabel:`Add a custom news source` menu item. A dialog similar to that shown below should open up. .. image:: images/custom_news.png :align: center diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe new file mode 100644 index 0000000000..7c12832c70 --- /dev/null +++ b/recipes/phillosophy_now.recipe @@ -0,0 +1,75 @@ +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from collections import OrderedDict + +class PhilosophyNow(BasicNewsRecipe): + + title = 'Philosophy Now' + __author__ = 'Rick Shang' + + description = '''Philosophy Now is a lively magazine for everyone + interested in ideas. It isn't afraid to tackle all the major questions of + life, the universe and everything. Published every two months, it tries to + corrupt innocent citizens by convincing them that philosophy can be + exciting, worthwhile and comprehensible, and also to provide some enjoyable + reading matter for those already ensnared by the muse, such as philosophy + students and academics.''' + language = 'en' + category = 'news' + encoding = 'UTF-8' + + keep_only_tags = [dict(attrs={'id':'fullMainColumn'})] + remove_tags = [dict(attrs={'class':'articleTools'})] + no_javascript = True + no_stylesheets = True + needs_subscription = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open('https://philosophynow.org/auth/login') + br.select_form(nr = 1) + br['username'] = self.username + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + #Go to the issue + soup0 = self.index_to_soup('http://philosophynow.org/') + issue = soup0.find('div',attrs={'id':'navColumn'}) + + #Find date & cover + cover = issue.find('div', attrs={'id':'cover'}) + date = self.tag_to_string(cover.find('h3')).strip() + self.timefmt = u' [%s]'%date + img=cover.find('img',src=True)['src'] + self.cover_url = 'http://philosophynow.org' + re.sub('medium','large',img) + issuenum = re.sub('/media/images/covers/medium/issue','',img) + issuenum = re.sub('.jpg','',issuenum) + + #Go to the main body + current_issue_url = 'http://philosophynow.org/issues/' + issuenum + soup = self.index_to_soup(current_issue_url) + div = soup.find ('div', attrs={'class':'articlesColumn'}) + + feeds = OrderedDict() + + for post in div.findAll('h3'): + articles = [] + a=post.find('a',href=True) + if a is not None: + url="http://philosophynow.org" + a['href'] + title=self.tag_to_string(a).strip() + s=post.findPrevious('h4') + section_title = self.tag_to_string(s).strip() + d=post.findNext('p') + desc = self.tag_to_string(d).strip() + articles.append({'title':title, 'url':url, 'description':desc, 'date':''}) + + if articles: + if section_title not in feeds: + feeds[section_title] = [] + feeds[section_title] += articles + ans = [(key, val) for key, val in feeds.iteritems()] + return ans + diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index 3bba5ecca5..c3ac9038c9 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -506,16 +506,6 @@ compile_gpm_templates = True # default_tweak_format = 'remember' default_tweak_format = None -#: Enable multi-character first-letters in the tag browser -# Some languages have letters that can be represented by multiple characters. -# For example, Czech has a 'character' "ch" that sorts between "h" and "i". -# If this tweak is True, then the tag browser will take these characters into -# consideration when partitioning by first letter. -# Examples: -# enable_multicharacters_in_tag_browser = True -# enable_multicharacters_in_tag_browser = False -enable_multicharacters_in_tag_browser = True - #: Do not preselect a completion when editing authors/tags/series/etc. # This means that you can make changes and press Enter and your changes will # not be overwritten by a matching completion. However, if you wish to use the diff --git a/resources/images/devices/galaxy_s3.png b/resources/images/devices/galaxy_s3.png new file mode 100644 index 0000000000..1aef78e20d Binary files /dev/null and b/resources/images/devices/galaxy_s3.png differ diff --git a/setup/extensions.py b/setup/extensions.py index e4054e87fa..4dd76be3a6 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -140,7 +140,7 @@ extensions = [ ['calibre/utils/podofo/podofo.cpp'], libraries=['podofo'], lib_dirs=[podofo_lib], - inc_dirs=[podofo_inc], + inc_dirs=[podofo_inc, os.path.dirname(podofo_inc)], optional=True, error=podofo_error), diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index 6ecb21768f..8a8fa06ee1 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -32,7 +32,7 @@ binary_includes = [ '/usr/lib/libunrar.so', '/usr/lib/libsqlite3.so.0', '/usr/lib/libmng.so.1', - '/usr/lib/libpodofo.so.0.8.4', + '/usr/lib/libpodofo.so.0.9.1', '/lib/libz.so.1', '/usr/lib/libtiff.so.5', '/lib/libbz2.so.1', diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 8d3853ea28..504f7fc49a 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -243,9 +243,6 @@ class Py2App(object): @flush def get_local_dependencies(self, path_to_lib): for x in self.get_dependencies(path_to_lib): - if x.startswith('libpodofo'): - yield x, x - continue for y in (SW+'/lib/', '/usr/local/lib/', SW+'/qt/lib/', '/opt/local/lib/', SW+'/python/Python.framework/', SW+'/freetype/lib/'): @@ -330,10 +327,6 @@ class Py2App(object): for f in glob.glob('src/calibre/plugins/*.so'): shutil.copy2(f, dest) self.fix_dependencies_in_lib(join(dest, basename(f))) - if 'podofo' in f: - self.change_dep('libpodofo.0.8.4.dylib', - self.FID+'/'+'libpodofo.0.8.4.dylib', join(dest, basename(f))) - @flush def create_plist(self): @@ -380,7 +373,7 @@ class Py2App(object): @flush def add_podofo(self): info('\nAdding PoDoFo') - pdf = join(SW, 'lib', 'libpodofo.0.8.4.dylib') + pdf = join(SW, 'lib', 'libpodofo.0.9.1.dylib') self.install_dylib(pdf) @flush diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index 8cf55cef78..e29b205de6 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -322,24 +322,7 @@ cp build/podofo-*/build/src/Release/podofo.exp lib/ cp build/podofo-*/build/podofo_config.h include/podofo/ cp -r build/podofo-*/src/* include/podofo/ -You have to use >=0.8.2 - -The following patch (against -r1269) was required to get it to compile: - - -Index: src/PdfFiltersPrivate.cpp -=================================================================== ---- src/PdfFiltersPrivate.cpp (revision 1261) -+++ src/PdfFiltersPrivate.cpp (working copy) -@@ -1019,7 +1019,7 @@ - /* - * Prepare for input from a memory buffer. - */ --GLOBAL(void) -+void - jpeg_memory_src (j_decompress_ptr cinfo, const JOCTET * buffer, size_t bufsize) - { - my_src_ptr src; +You have to use >=0.9.1 ImageMagick diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 7d4db1e512..a0f3b49498 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -201,7 +201,8 @@ def prints(*args, **kwargs): try: file.write(arg) except: - file.write(repr(arg)) + import repr as reprlib + file.write(reprlib.repr(arg)) if i != len(args)-1: file.write(bytes(sep)) file.write(bytes(end)) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 52cd7781e6..6f443a0013 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -673,7 +673,7 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO from calibre.devices.bambook.driver import BAMBOOK from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX - +from calibre.devices.smart_device_app.driver import SMART_DEVICE_APP # Order here matters. The first matched device is the one used. @@ -746,6 +746,7 @@ plugins += [ ITUNES, BOEYE_BEX, BOEYE_BDX, + SMART_DEVICE_APP, USER_DEFINED, ] # }}} diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index ee8656f0ca..50bceb4def 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -91,6 +91,37 @@ class DummyReporter(object): def __call__(self, percent, msg=''): pass +def gui_configuration_widget(name, parent, get_option_by_name, + get_option_help, db, book_id, for_output=True): + import importlib + + def widget_factory(cls): + return cls(parent, get_option_by_name, + get_option_help, db, book_id) + + if for_output: + try: + output_widget = importlib.import_module( + 'calibre.gui2.convert.'+name) + pw = output_widget.PluginWidget + pw.ICON = I('back.png') + pw.HELP = _('Options specific to the output format.') + return widget_factory(pw) + except ImportError: + pass + else: + try: + input_widget = importlib.import_module( + 'calibre.gui2.convert.'+name) + pw = input_widget.PluginWidget + pw.ICON = I('forward.png') + pw.HELP = _('Options specific to the input format.') + return widget_factory(pw) + except ImportError: + pass + return None + + class InputFormatPlugin(Plugin): ''' InputFormatPlugins are responsible for converting a document into @@ -225,6 +256,17 @@ class InputFormatPlugin(Plugin): ''' pass + def gui_configuration_widget(self, parent, get_option_by_name, + get_option_help, db, book_id): + ''' + Called to create the widget used for configuring this plugin in the + calibre GUI. The widget must be an instance of the PluginWidget class. + See the builting input plugins for examples. + ''' + name = self.name.lower().replace(' ', '_') + return gui_configuration_widget(name, parent, get_option_by_name, + get_option_help, db, book_id, for_output=False) + class OutputFormatPlugin(Plugin): ''' @@ -308,4 +350,16 @@ class OutputFormatPlugin(Plugin): ''' pass + def gui_configuration_widget(self, parent, get_option_by_name, + get_option_help, db, book_id): + ''' + Called to create the widget used for configuring this plugin in the + calibre GUI. The widget must be an instance of the PluginWidget class. + See the builtin output plugins for examples. + ''' + name = self.name.lower().replace(' ', '_') + return gui_configuration_widget(name, parent, get_option_by_name, + get_option_help, db, book_id, for_output=True) + + diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index a12ad5ebce..1971faef60 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -13,7 +13,6 @@ import datetime, os, re, sys, json, hashlib from calibre.devices.kindle.bookmark import Bookmark from calibre.devices.usbms.driver import USBMS from calibre import strftime -from calibre.utils.logging import default_log ''' Notes on collections: @@ -389,6 +388,7 @@ class KINDLE2(KINDLE): self.upload_apnx(path, filename, metadata, filepath) def upload_kindle_thumbnail(self, metadata, filepath): + from calibre.utils.logging import default_log coverdata = getattr(metadata, 'thumbnail', None) if not coverdata or not coverdata[2]: return diff --git a/src/calibre/devices/smart_device_app/__init__.py b/src/calibre/devices/smart_device_app/__init__.py new file mode 100644 index 0000000000..0080175bfa --- /dev/null +++ b/src/calibre/devices/smart_device_app/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + + diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py new file mode 100644 index 0000000000..b7857b14d4 --- /dev/null +++ b/src/calibre/devices/smart_device_app/driver.py @@ -0,0 +1,871 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) +''' +Created on 29 Jun 2012 + +@author: charles +''' +import socket, select, json, inspect, os, traceback, time, sys, random +import hashlib, threading +from base64 import b64encode, b64decode +from functools import wraps + +from calibre import prints +from calibre.constants import numeric_version, DEBUG +from calibre.devices.interface import DevicePlugin +from calibre.devices.usbms.books import Book, BookList +from calibre.devices.usbms.deviceconfig import DeviceConfig +from calibre.devices.usbms.driver import USBMS +from calibre.ebooks import BOOK_EXTENSIONS +from calibre.ebooks.metadata import title_sort +from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS +from calibre.ebooks.metadata.book.base import Metadata +from calibre.ebooks.metadata.book.json_codec import JsonCodec +from calibre.library import current_library_name +from calibre.utils.ipc import eintr_retry_call +from calibre.utils.config import from_json, tweaks +from calibre.utils.date import isoformat, now +from calibre.utils.filenames import ascii_filename as sanitize, shorten_components_to +from calibre.utils.mdns import (publish as publish_zeroconf, unpublish as + unpublish_zeroconf) + +def synchronous(tlockname): + """A decorator to place an instance based lock around a method """ + + def _synched(func): + @wraps(func) + def _synchronizer(self, *args, **kwargs): + with self.__getattribute__(tlockname): + return func(self, *args, **kwargs) + return _synchronizer + return _synched + + +class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): + name = 'SmartDevice App Interface' + gui_name = _('SmartDevice') + icon = I('devices/galaxy_s3.png') + description = _('Communicate with Smart Device apps') + supported_platforms = ['windows', 'osx', 'linux'] + author = 'Charles Haley' + version = (0, 0, 1) + + # Invalid USB vendor information so the scanner will never match + VENDOR_ID = [0xffff] + PRODUCT_ID = [0xffff] + BCD = [0xffff] + + FORMATS = list(BOOK_EXTENSIONS) + ALL_FORMATS = list(BOOK_EXTENSIONS) + HIDE_FORMATS_CONFIG_BOX = True + USER_CAN_ADD_NEW_FORMATS = False + DEVICE_PLUGBOARD_NAME = 'SMART_DEVICE_APP' + CAN_SET_METADATA = [] + CAN_DO_DEVICE_DB_PLUGBOARD = False + SUPPORTS_SUB_DIRS = False + MUST_READ_METADATA = True + NEWS_IN_FOLDER = False + SUPPORTS_USE_AUTHOR_SORT = False + WANTS_UPDATED_THUMBNAILS = True + MAX_PATH_LEN = 100 + THUMBNAIL_HEIGHT = 160 + PREFIX = '' + + # Some network protocol constants + BASE_PACKET_LEN = 4096 + PROTOCOL_VERSION = 1 + MAX_CLIENT_COMM_TIMEOUT = 60.0 # Wait at most N seconds for an answer + + opcodes = { + 'NOOP' : 12, + 'OK' : 0, + 'BOOK_DATA' : 10, + 'BOOK_DONE' : 11, + 'DELETE_BOOK' : 13, + 'DISPLAY_MESSAGE' : 17, + 'FREE_SPACE' : 5, + 'GET_BOOK_FILE_SEGMENT' : 14, + 'GET_BOOK_METADATA' : 15, + 'GET_BOOK_COUNT' : 6, + 'GET_DEVICE_INFORMATION' : 3, + 'GET_INITIALIZATION_INFO': 9, + 'SEND_BOOKLISTS' : 7, + 'SEND_BOOK' : 8, + 'SEND_BOOK_METADATA' : 16, + 'SET_CALIBRE_DEVICE_INFO': 1, + 'SET_CALIBRE_DEVICE_NAME': 2, + 'TOTAL_SPACE' : 4, + } + reverse_opcodes = dict([(v, k) for k,v in opcodes.iteritems()]) + + + EXTRA_CUSTOMIZATION_MESSAGE = [ + _('Enable connections at startup') + ':::

' + + _('Check this box to allow connections when calibre starts') + '

', + '', + _('Security password') + ':::

' + + _('Enter a password that the device app must use to connect to calibre') + '

', + '', + _('Print extra debug information') + ':::

' + + _('Check this box if requested when reporting problems') + '

', + ] + EXTRA_CUSTOMIZATION_DEFAULT = [ + False, + '', + '', + '', + False, + ] + OPT_AUTOSTART = 0 + OPT_PASSWORD = 2 + OPT_EXTRA_DEBUG = 4 + + def __init__(self, path): + self.sync_lock = threading.RLock() + self.noop_counter = 0 + self.debug_start_time = time.time() + self.debug_time = time.time() + + def _debug(self, *args): + if not DEBUG: + return + total_elapsed = time.time() - self.debug_start_time + elapsed = time.time() - self.debug_time + print('SMART_DEV (%7.2f:%7.3f) %s'%(total_elapsed, elapsed, + inspect.stack()[1][3]), end='') + for a in args: + try: + prints('', a, end='') + except: + prints('', 'value too long', end='') + print() + self.debug_time = time.time() + + # Various methods required by the plugin architecture + @classmethod + def _default_save_template(cls): + from calibre.library.save_to_disk import config + st = cls.SAVE_TEMPLATE if cls.SAVE_TEMPLATE else \ + config().parse().send_template + if st: + st = os.path.basename(st) + return st + + @classmethod + def save_template(cls): + st = cls.settings().save_template + if st: + st = os.path.basename(st) + else: + st = cls._default_save_template() + return st + + # local utilities + + # copied from USBMS. Perhaps this could be a classmethod in usbms? + def _update_driveinfo_record(self, dinfo, prefix, location_code, name=None): + import uuid + if not isinstance(dinfo, dict): + dinfo = {} + if dinfo.get('device_store_uuid', None) is None: + dinfo['device_store_uuid'] = unicode(uuid.uuid4()) + if dinfo.get('device_name') is None: + dinfo['device_name'] = self.get_gui_name() + if name is not None: + dinfo['device_name'] = name + dinfo['location_code'] = location_code + dinfo['last_library_uuid'] = getattr(self, 'current_library_uuid', None) + dinfo['calibre_version'] = '.'.join([unicode(i) for i in numeric_version]) + dinfo['date_last_connected'] = isoformat(now()) + dinfo['prefix'] = self.PREFIX + return dinfo + + # copied with changes from USBMS.Device. In particular, we needed to + # remove the 'path' argument and all its uses. Also removed the calls to + # filename_callback and sanitize_path_components + def _create_upload_path(self, mdata, fname, create_dirs=True): + maxlen = self.MAX_PATH_LEN + + special_tag = None + if mdata.tags: + for t in mdata.tags: + if t.startswith(_('News')) or t.startswith('/'): + special_tag = t + break + + settings = self.settings() + template = self.save_template() + if mdata.tags and _('News') in mdata.tags: + try: + p = mdata.pubdate + date = (p.year, p.month, p.day) + except: + today = time.localtime() + date = (today[0], today[1], today[2]) + template = "{title}_%d-%d-%d" % date + use_subdirs = self.SUPPORTS_SUB_DIRS and settings.use_subdirs + + fname = sanitize(fname) + ext = os.path.splitext(fname)[1] + + from calibre.library.save_to_disk import get_components + from calibre.library.save_to_disk import config + opts = config().parse() + if not isinstance(template, unicode): + template = template.decode('utf-8') + app_id = str(getattr(mdata, 'application_id', '')) + id_ = mdata.get('id', fname) + extra_components = get_components(template, mdata, id_, + timefmt=opts.send_timefmt, length=maxlen-len(app_id)-1) + if not extra_components: + extra_components.append(sanitize(fname)) + else: + extra_components[-1] = sanitize(extra_components[-1]+ext) + + if extra_components[-1] and extra_components[-1][0] in ('.', '_'): + extra_components[-1] = 'x' + extra_components[-1][1:] + + if special_tag is not None: + name = extra_components[-1] + extra_components = [] + tag = special_tag + if tag.startswith(_('News')): + if self.NEWS_IN_FOLDER: + extra_components.append('News') + else: + for c in tag.split('/'): + c = sanitize(c) + if not c: continue + extra_components.append(c) + extra_components.append(name) + + if not use_subdirs: + # Leave this stuff here in case we later decide to use subdirs + extra_components = extra_components[-1:] + + def remove_trailing_periods(x): + ans = x + while ans.endswith('.'): + ans = ans[:-1].strip() + if not ans: + ans = 'x' + return ans + + extra_components = list(map(remove_trailing_periods, extra_components)) + components = shorten_components_to(maxlen, extra_components) + filepath = os.path.join(*components) + return filepath + + def _strip_prefix(self, path): + if self.PREFIX and path.startswith(self.PREFIX): + return path[len(self.PREFIX):] + return path + + # JSON booklist encode & decode + + # If the argument is a booklist or contains a book, use the metadata json + # codec to first convert it to a string dict + def _json_encode(self, op, arg): + res = {} + for k,v in arg.iteritems(): + if isinstance(v, (Book, Metadata)): + res[k] = self.json_codec.encode_book_metadata(v) + series = v.get('series', None) + if series: + tsorder = tweaks['save_template_title_series_sorting'] + series = title_sort(v.get('series', ''), order=tsorder) + else: + series = '' + res[k]['_series_sort_'] = series + else: + res[k] = v + return json.dumps([op, res], encoding='utf-8') + + # Network functions + def _read_string_from_net(self): + data = bytes(0) + while True: + dex = data.find(b'[') + if dex >= 0: + break + # recv seems to return a pointer into some internal buffer. + # Things get trashed if we don't make a copy of the data. + self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT) + v = self.device_socket.recv(self.BASE_PACKET_LEN) + self.device_socket.settimeout(None) + if len(v) == 0: + return '' # documentation says the socket is broken permanently. + data += v + total_len = int(data[:dex]) + data = data[dex:] + pos = len(data) + while pos < total_len: + self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT) + v = self.device_socket.recv(total_len - pos) + self.device_socket.settimeout(None) + if len(v) == 0: + return '' # documentation says the socket is broken permanently. + data += v + pos += len(v) + return data + + def _call_client(self, op, arg, print_debug_info=True): + if op != 'NOOP': + self.noop_counter = 0 + extra_debug = self.settings().extra_customization[self.OPT_EXTRA_DEBUG] + if print_debug_info or extra_debug: + if extra_debug: + self._debug(op, arg) + else: + self._debug(op) + if self.device_socket is None: + return None, None + try: + s = self._json_encode(self.opcodes[op], arg) + if print_debug_info and extra_debug: + self._debug('send string', s) + self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT) + self.device_socket.sendall(('%d' % len(s))+s) + self.device_socket.settimeout(None) + v = self._read_string_from_net() + if print_debug_info and extra_debug: + self._debug('received string', v) + if v: + v = json.loads(v, object_hook=from_json) + if print_debug_info and extra_debug: + self._debug('receive after decode') #, v) + return (self.reverse_opcodes[v[0]], v[1]) + self._debug('protocol error -- empty json string') + except socket.timeout: + self._debug('timeout communicating with device') + self.device_socket.close() + self.device_socket = None + raise IOError(_('Device did not respond in reasonable time')) + except socket.error: + self._debug('device went away') + self.device_socket.close() + self.device_socket = None + raise IOError(_('Device closed the network connection')) + except: + self._debug('other exception') + traceback.print_exc() + self.device_socket.close() + self.device_socket = None + raise + raise IOError('Device responded with incorrect information') + + # Write a file as a series of base64-encoded strings. + def _put_file(self, infile, lpath, book_metadata, this_book, total_books): + close_ = False + if not hasattr(infile, 'read'): + infile, close_ = open(infile, 'rb'), True + infile.seek(0, os.SEEK_END) + length = infile.tell() + book_metadata.size = length + infile.seek(0) + self._debug(lpath, length) + self._call_client('SEND_BOOK', {'lpath': lpath, 'length': length, + 'metadata': book_metadata, 'thisBook': this_book, + 'totalBooks': total_books}, print_debug_info=False) + self._set_known_metadata(book_metadata) + pos = 0 + failed = False + with infile: + while True: + b = infile.read(self.max_book_packet_len) + blen = len(b) + if not b: + break; + b = b64encode(b) + opcode, result = self._call_client('BOOK_DATA', + {'lpath': lpath, 'position': pos, 'data': b}, + print_debug_info=False) + pos += blen + if opcode != 'OK': + self._debug('protocol error', opcode) + failed = True + break + self._call_client('BOOK_DONE', {'lpath': lpath}) + self.time = None + if close_: + infile.close() + return -1 if failed else length + + def _get_smartdevice_option_number(self, opt_string): + if opt_string == 'password': + return self.OPT_PASSWORD + elif opt_string == 'autostart': + return self.OPT_AUTOSTART + else: + return None + + def _compare_metadata(self, mi1, mi2): + for key in SERIALIZABLE_FIELDS: + if key in ['cover', 'mime']: + continue + if key == 'user_metadata': + meta1 = mi1.get_all_user_metadata(make_copy=False) + meta2 = mi1.get_all_user_metadata(make_copy=False) + if meta1 != meta2: + self._debug('custom metadata different') + return False + for ckey in meta1: + if mi1.get(ckey) != mi2.get(ckey): + self._debug(ckey, mi1.get(ckey), mi2.get(ckey)) + return False + elif mi1.get(key, None) != mi2.get(key, None): + self._debug(key, mi1.get(key), mi2.get(key)) + return False + return True + + def _metadata_already_on_device(self, book): + v = self.known_metadata.get(book.lpath, None) + if v is not None: + return self._compare_metadata(book, v) + return False + + def _set_known_metadata(self, book, remove=False): + lpath = book.lpath + if remove: + self.known_metadata[lpath] = None + else: + self.known_metadata[lpath] = book.deepcopy() + + # The public interface methods. + + + @synchronous('sync_lock') + def is_usb_connected(self, devices_on_system, debug=False, only_presence=False): + if getattr(self, 'listen_socket', None) is None: + self.is_connected = False + if self.is_connected: + self.noop_counter += 1 + if only_presence and (self.noop_counter % 5) != 1: + ans = select.select((self.device_socket,), (), (), 0) + if len(ans[0]) == 0: + return (True, self) + # The socket indicates that something is there. Given the + # protocol, this can only be a disconnect notification. Fall + # through and actually try to talk to the client. + try: + # This will usually toss an exception if the socket is gone. + if self._call_client('NOOP', dict())[0] is None: + self.is_connected = False + except: + self.is_connected = False + if not self.is_connected: + self.device_socket.close() + return (self.is_connected, self) + if getattr(self, 'listen_socket', None) is not None: + ans = select.select((self.listen_socket,), (), (), 0) + if len(ans[0]) > 0: + # timeout in 10 ms to detect rare case where the socket went + # way between the select and the accept + try: + self.device_socket = None + self.listen_socket.settimeout(0.010) + self.device_socket, ign = eintr_retry_call( + self.listen_socket.accept) + self.listen_socket.settimeout(None) + self.device_socket.settimeout(None) + self.is_connected = True + except socket.timeout: + if self.device_socket is not None: + self.device_socket.close() + except socket.error: + x = sys.exc_info()[1] + self._debug('unexpected socket exception', x.args[0]) + if self.device_socket is not None: + self.device_socket.close() + raise + return (True, self) + return (False, None) + + @synchronous('sync_lock') + def open(self, connected_device, library_uuid): + self._debug() + self.current_library_uuid = library_uuid + self.current_library_name = current_library_name() + try: + password = self.settings().extra_customization[self.OPT_PASSWORD] + if password: + challenge = isoformat(now()) + hasher = hashlib.new('sha1') + hasher.update(password.encode('UTF-8')) + hasher.update(challenge.encode('UTF-8')) + hash_digest = hasher.hexdigest() + else: + challenge = '' + hash_digest = '' + opcode, result = self._call_client('GET_INITIALIZATION_INFO', + {'serverProtocolVersion': self.PROTOCOL_VERSION, + 'validExtensions': self.ALL_FORMATS, + 'passwordChallenge': challenge, + 'currentLibraryName': self.current_library_name, + 'currentLibraryUUID': library_uuid}) + if opcode != 'OK': + # Something wrong with the return. Close the socket + # and continue. + self._debug('Protocol error - Opcode not OK') + self.device_socket.close() + return False + if not result.get('versionOK', False): + # protocol mismatch + self._debug('Protocol error - protocol version mismatch') + self.device_socket.close() + return False + if result.get('maxBookContentPacketLen', 0) <= 0: + # protocol mismatch + self._debug('Protocol error - bogus book packet length') + self.device_socket.close() + return False + self.max_book_packet_len = result.get('maxBookContentPacketLen', + self.BASE_PACKET_LEN) + exts = result.get('acceptedExtensions', None) + if exts is None or not isinstance(exts, list) or len(exts) == 0: + self._debug('Protocol error - bogus accepted extensions') + self.device_socket.close() + return False + self.FORMATS = exts + if password: + returned_hash = result.get('passwordHash', None) + if result.get('passwordHash', None) is None: + # protocol mismatch + self._debug('Protocol error - missing password hash') + self.device_socket.close() + return False + if returned_hash != hash_digest: + # bad password + self._debug('password mismatch') + self._call_client("DISPLAY_MESSAGE", {'messageKind':1}) + self.device_socket.close() + return False + return True + except socket.timeout: + self.device_socket.close() + except socket.error: + x = sys.exc_info()[1] + self._debug('unexpected socket exception', x.args[0]) + self.device_socket.close() + raise + return False + + @synchronous('sync_lock') + def get_device_information(self, end_session=True): + self._debug() + self.report_progress(1.0, _('Get device information...')) + opcode, result = self._call_client('GET_DEVICE_INFORMATION', dict()) + if opcode == 'OK': + self.driveinfo = result['device_info'] + self._update_driveinfo_record(self.driveinfo, self.PREFIX, 'main') + self._call_client('SET_CALIBRE_DEVICE_INFO', self.driveinfo) + return (self.get_gui_name(), result['device_version'], + result['version'], '', {'main':self.driveinfo}) + return (self.get_gui_name(), '', '', '') + + @synchronous('sync_lock') + def set_driveinfo_name(self, location_code, name): + self._update_driveinfo_record(self.driveinfo, "main", name) + self._call_client('SET_CALIBRE_DEVICE_NAME', + {'location_code': 'main', 'name':name}) + + @synchronous('sync_lock') + def reset(self, key='-1', log_packets=False, report_progress=None, + detected_device=None) : + self._debug() + self.set_progress_reporter(report_progress) + + @synchronous('sync_lock') + def set_progress_reporter(self, report_progress): + self._debug() + self.report_progress = report_progress + if self.report_progress is None: + self.report_progress = lambda x, y: x + + @synchronous('sync_lock') + def card_prefix(self, end_session=True): + self._debug() + return (None, None) + + @synchronous('sync_lock') + def total_space(self, end_session=True): + self._debug() + opcode, result = self._call_client('TOTAL_SPACE', {}) + if opcode == 'OK': + return (result['total_space_on_device'], 0, 0) + # protocol error if we get here + return (0, 0, 0) + + @synchronous('sync_lock') + def free_space(self, end_session=True): + self._debug() + opcode, result = self._call_client('FREE_SPACE', {}) + if opcode == 'OK': + return (result['free_space_on_device'], 0, 0) + # protocol error if we get here + return (0, 0, 0) + + @synchronous('sync_lock') + def books(self, oncard=None, end_session=True): + self._debug(oncard) + if oncard is not None: + return BookList(None, None, None) + opcode, result = self._call_client('GET_BOOK_COUNT', {}) + bl = BookList(None, self.PREFIX, self.settings) + if opcode == 'OK': + count = result['count'] + for i in range(0, count): + self._debug('retrieve metadata book', i) + opcode, result = self._call_client('GET_BOOK_METADATA', {'index': i}, + print_debug_info=False) + if opcode == 'OK': + if '_series_sort_' in result: + del result['_series_sort_'] + book = self.json_codec.raw_to_book(result, Book, self.PREFIX) + self._set_known_metadata(book) + bl.add_book(book, replace_metadata=True) + else: + raise IOError(_('Protocol error -- book metadata not returned')) + return bl + + @synchronous('sync_lock') + def sync_booklists(self, booklists, end_session=True): + self._debug() + # If we ever do device_db plugboards, this is where it will go. We will + # probably need to send two booklists, one with calibre's data that is + # given back by "books", and one that has been plugboarded. + self._call_client('SEND_BOOKLISTS', { 'count': len(booklists[0]) } ) + for i,book in enumerate(booklists[0]): + if not self._metadata_already_on_device(book): + self._set_known_metadata(book) + self._debug('syncing book', book.lpath) + opcode, result = self._call_client('SEND_BOOK_METADATA', + {'index': i, 'data': book}, + print_debug_info=False) + if opcode != 'OK': + self._debug('protocol error', opcode, i) + raise IOError(_('Protocol error -- sync_booklists')) + + @synchronous('sync_lock') + def eject(self): + self._debug() + if self.device_socket: + self.device_socket.close() + self.device_socket = None + self.is_connected = False + + @synchronous('sync_lock') + def post_yank_cleanup(self): + self._debug() + + @synchronous('sync_lock') + def upload_books(self, files, names, on_card=None, end_session=True, + metadata=None): + self._debug(names) + + paths = [] + names = iter(names) + metadata = iter(metadata) + + for i, infile in enumerate(files): + mdata, fname = metadata.next(), names.next() + lpath = self._create_upload_path(mdata, fname, create_dirs=False) + if not hasattr(infile, 'read'): + infile = USBMS.normalize_path(infile) + book = Book(self.PREFIX, lpath, other=mdata) + length = self._put_file(infile, lpath, book, i, len(files)) + if length < 0: + raise IOError(_('Sending book %s to device failed') % lpath) + paths.append((lpath, length)) + # No need to deal with covers. The client will get the thumbnails + # in the mi structure + self.report_progress((i+1) / float(len(files)), _('Transferring books to device...')) + + self.report_progress(1.0, _('Transferring books to device...')) + self._debug('finished uploading %d books'%(len(files))) + return paths + + @synchronous('sync_lock') + def add_books_to_metadata(self, locations, metadata, booklists): + self._debug('adding metadata for %d books'%(len(metadata))) + + metadata = iter(metadata) + for i, location in enumerate(locations): + self.report_progress((i+1) / float(len(locations)), + _('Adding books to device metadata listing...')) + info = metadata.next() + lpath = location[0] + length = location[1] + lpath = self._strip_prefix(lpath) + book = Book(self.PREFIX, lpath, other=info) + if book.size is None: + book.size = length + b = booklists[0].add_book(book, replace_metadata=True) + if b: + b._new_book = True + self.report_progress(1.0, _('Adding books to device metadata listing...')) + self._debug('finished adding metadata') + + @synchronous('sync_lock') + def delete_books(self, paths, end_session=True): + self._debug(paths) + for path in paths: + # the path has the prefix on it (I think) + path = self._strip_prefix(path) + opcode, result = self._call_client('DELETE_BOOK', {'lpath': path}) + if opcode == 'OK': + self._debug('removed book with UUID', result['uuid']) + else: + raise IOError(_('Protocol error - delete books')) + + @synchronous('sync_lock') + def remove_books_from_metadata(self, paths, booklists): + self._debug(paths) + for i, path in enumerate(paths): + path = self._strip_prefix(path) + self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...')) + for bl in booklists: + for book in bl: + if path == book.path: + bl.remove_book(book) + self._set_known_metadata(book, remove=True) + self.report_progress(1.0, _('Removing books from device metadata listing...')) + self._debug('finished removing metadata for %d books'%(len(paths))) + + + @synchronous('sync_lock') + def get_file(self, path, outfile, end_session=True): + self._debug(path) + eof = False + position = 0 + while not eof: + opcode, result = self._call_client('GET_BOOK_FILE_SEGMENT', + {'lpath' : path, 'position': position}, + print_debug_info=False ) + if opcode == 'OK': + if not result['eof']: + data = b64decode(result['data']) + if len(data) != result['next_position'] - position: + self._debug('position mismatch', result['next_position'], position) + position = result['next_position'] + outfile.write(data) + else: + eof = True + else: + raise IOError(_('request for book data failed')) + + @synchronous('sync_lock') + def set_plugboards(self, plugboards, pb_func): + self._debug() + self.plugboards = plugboards + self.plugboard_func = pb_func + + @synchronous('sync_lock') + def startup(self): + self.listen_socket = None + + @synchronous('sync_lock') + def startup_on_demand(self): + if getattr(self, 'listen_socket', None) is not None: + # we are already running + return + if len(self.opcodes) != len(self.reverse_opcodes): + self._debug(self.opcodes, self.reverse_opcodes) + self.is_connected = False + self.listen_socket = None + self.device_socket = None + self.json_codec = JsonCodec() + self.known_metadata = {} + self.debug_time = time.time() + self.debug_start_time = time.time() + self.max_book_packet_len = 0 + self.noop_counter = 0 + try: + self.listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + except: + self._debug('creation of listen socket failed') + return + + for i in range(0, 100): # try up to 100 random port numbers + port = random.randint(8192, 32000) + try: + self._debug('try port', port) + self.listen_socket.bind(('', port)) + break + except socket.error: + port = 0 + except: + self._debug('Unknown exception while allocating listen socket') + traceback.print_exc() + raise + if port == 0: + self._debug('Failed to allocate a port'); + self.listen_socket.close() + self.listen_socket = None + return + + try: + self.listen_socket.listen(0) + except: + self._debug('listen on socket failed', port) + self.listen_socket.close() + self.listen_socket = None + return + + try: + publish_zeroconf('calibre smart device client', + '_calibresmartdeviceapp._tcp', port, {}) + except: + self._debug('registration with bonjour failed') + self.listen_socket.close() + self.listen_socket = None + return + + self._debug('listening on port', port) + self.port = port + + @synchronous('sync_lock') + def shutdown(self): + if getattr(self, 'listen_socket', None) is not None: + self.listen_socket.close() + self.listen_socket = None + unpublish_zeroconf('calibre smart device client', + '_calibresmartdeviceapp._tcp', self.port, {}) + + # Methods for dynamic control + + @synchronous('sync_lock') + def is_dynamically_controllable(self): + return 'smartdevice' + + @synchronous('sync_lock') + def start_plugin(self): + self.startup_on_demand() + + @synchronous('sync_lock') + def stop_plugin(self): + self.shutdown() + + @synchronous('sync_lock') + def get_option(self, opt_string, default=None): + opt = self._get_smartdevice_option_number(opt_string) + if opt is not None: + return self.settings().extra_customization[opt] + return default + + @synchronous('sync_lock') + def set_option(self, opt_string, value): + opt = self._get_smartdevice_option_number(opt_string) + if opt is not None: + config = self._configProxy() + ec = config['extra_customization'] + ec[opt] = value + config['extra_customization'] = ec + + @synchronous('sync_lock') + def is_running(self): + return getattr(self, 'listen_socket', None) is not None + + diff --git a/src/calibre/ebooks/metadata/odt.py b/src/calibre/ebooks/metadata/odt.py index bf30dfd5f7..b919885bfd 100644 --- a/src/calibre/ebooks/metadata/odt.py +++ b/src/calibre/ebooks/metadata/odt.py @@ -1,5 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +# # Copyright (C) 2006 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms @@ -17,12 +19,20 @@ # # Contributor(s): # +from __future__ import division + import zipfile, re import xml.sax.saxutils from cStringIO import StringIO from odf.namespaces import OFFICENS, DCNS, METANS -from calibre.ebooks.metadata import MetaInformation, string_to_authors +from odf.opendocument import load as odLoad +from odf.draw import Image as odImage, Frame as odFrame + +from calibre.ebooks.metadata import MetaInformation, string_to_authors, check_isbn +from calibre.utils.magick.draw import identify_data +from calibre.utils.date import parse_date +from calibre.utils.localization import canonicalize_lang whitespace = re.compile(r'\s+') @@ -125,6 +135,10 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator): else: texttag = self._tag self.seenfields[texttag] = self.data() + # OpenOffice has the habit to capitalize custom properties, so we add a + # lowercase version for easy access + if texttag[:4].lower() == u'opf.': + self.seenfields[texttag.lower()] = self.data() if field in self.deletefields: self.output.dowrite = True @@ -141,7 +155,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator): def data(self): return normalize(''.join(self._data)) -def get_metadata(stream): +def get_metadata(stream, extract_cover=True): zin = zipfile.ZipFile(stream, 'r') odfs = odfmetaparser() parser = xml.sax.make_parser() @@ -162,7 +176,90 @@ def get_metadata(stream): if data.has_key('language'): mi.language = data['language'] if data.get('keywords', ''): - mi.tags = data['keywords'].split(',') + mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()] + opfmeta = False # we need this later for the cover + opfnocover = False + if data.get('opf.metadata','') == 'true': + # custom metadata contains OPF information + opfmeta = True + if data.get('opf.titlesort', ''): + mi.title_sort = data['opf.titlesort'] + if data.get('opf.authors', ''): + mi.authors = string_to_authors(data['opf.authors']) + if data.get('opf.authorsort', ''): + mi.author_sort = data['opf.authorsort'] + if data.get('opf.isbn', ''): + isbn = check_isbn(data['opf.isbn']) + if isbn is not None: + mi.isbn = isbn + if data.get('opf.publisher', ''): + mi.publisher = data['opf.publisher'] + if data.get('opf.pubdate', ''): + mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True) + if data.get('opf.series', ''): + mi.series = data['opf.series'] + if data.get('opf.seriesindex', ''): + try: + mi.series_index = float(data['opf.seriesindex']) + except ValueError: + mi.series_index = 1.0 + if data.get('opf.language', ''): + cl = canonicalize_lang(data['opf.language']) + if cl: + mi.languages = [cl] + opfnocover = data.get('opf.nocover', 'false') == 'true' + if not opfnocover: + try: + read_cover(stream, zin, mi, opfmeta, extract_cover) + except: + pass # Do not let an error reading the cover prevent reading other data return mi +def read_cover(stream, zin, mi, opfmeta, extract_cover): + # search for an draw:image in a draw:frame with the name 'opf.cover' + # if opf.metadata prop is false, just use the first image that + # has a proper size (borrowed from docx) + otext = odLoad(stream) + cover_href = None + cover_data = None + cover_frame = None + for frm in otext.topnode.getElementsByType(odFrame): + img = frm.getElementsByType(odImage) + if len(img) > 0: # there should be only one + i_href = img[0].getAttribute('href') + try: + raw = zin.read(i_href) + except KeyError: + continue + try: + width, height, fmt = identify_data(raw) + except: + continue + else: + continue + if opfmeta and frm.getAttribute('name').lower() == u'opf.cover': + cover_href = i_href + cover_data = (fmt, raw) + cover_frame = frm.getAttribute('name') # could have upper case + break + if cover_href is None and 0.8 <= height/width <= 1.8 and height*width >= 12000: + cover_href = i_href + cover_data = (fmt, raw) + if not opfmeta: + break + + if cover_href is not None: + mi.cover = cover_href + mi.odf_cover_frame = cover_frame + if extract_cover: + if not cover_data: + raw = zin.read(cover_href) + try: + width, height, fmt = identify_data(raw) + except: + pass + else: + cover_data = (fmt, raw) + mi.cover_data = cover_data + diff --git a/src/calibre/ebooks/mobi/reader/ncx.py b/src/calibre/ebooks/mobi/reader/ncx.py index ca3255e100..d3747f6a8a 100644 --- a/src/calibre/ebooks/mobi/reader/ncx.py +++ b/src/calibre/ebooks/mobi/reader/ncx.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' import os +from calibre import replace_entities from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.mobi.reader.headers import NULL_INDEX from calibre.ebooks.mobi.reader.index import read_index @@ -88,7 +89,8 @@ def build_toc(index_entries): for lvl in sorted(levels): for item in level_map[lvl]: parent = num_map[item['parent']] - child = parent.add_item(item['href'], item['idtag'], item['text']) + child = parent.add_item(item['href'], item['idtag'], + replace_entities(item['text'], encoding=None)) num_map[item['num']] = child # Set play orders in depth first order diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py index 5251bf934f..1e8a204ad5 100644 --- a/src/calibre/ebooks/mobi/writer2/serializer.py +++ b/src/calibre/ebooks/mobi/writer2/serializer.py @@ -11,8 +11,9 @@ import re from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize) +from calibre.ebooks import normalize from calibre.ebooks.mobi.mobiml import MBP_NS -from calibre.ebooks.mobi.utils import is_guide_ref_start, utf8_text +from calibre.ebooks.mobi.utils import is_guide_ref_start from collections import defaultdict from urlparse import urldefrag @@ -355,7 +356,7 @@ class Serializer(object): text = text.replace(u'\u00AD', '') # Soft-hyphen if quot: text = text.replace('"', '"') - self.buf.write(utf8_text(text)) + self.buf.write(normalize(text).encode('utf-8')) def fixup_links(self): ''' diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py index 5db6ee0b5c..ae8fdf364c 100644 --- a/src/calibre/ebooks/mobi/writer8/skeleton.py +++ b/src/calibre/ebooks/mobi/writer8/skeleton.py @@ -76,15 +76,13 @@ def tostring(raw, **kwargs): class Chunk(object): - def __init__(self, raw, parent_tag): + def __init__(self, raw, selector): self.raw = raw self.starts_tags = [] self.ends_tags = [] self.insert_pos = None - self.parent_tag = parent_tag - self.parent_is_body = False - self.is_last_chunk = False self.is_first_chunk = False + self.selector = "%s-//*[@aid='%s']"%selector def __len__(self): return len(self.raw) @@ -97,11 +95,6 @@ class Chunk(object): return 'Chunk(len=%r insert_pos=%r starts_tags=%r ends_tags=%r)'%( len(self.raw), self.insert_pos, self.starts_tags, self.ends_tags) - @property - def selector(self): - typ = 'S' if (self.is_last_chunk and not self.parent_is_body) else 'P' - return "%s-//*[@aid='%s']"%(typ, self.parent_tag) - __str__ = __repr__ class Skeleton(object): @@ -251,13 +244,13 @@ class Chunker(object): def step_into_tag(self, tag, chunks): aid = tag.get('aid') - is_body = tag.tag == 'body' + self.chunk_selector = ('P', aid) first_chunk_idx = len(chunks) # First handle any text if tag.text and tag.text.strip(): # Leave pure whitespace in the skel - chunks.extend(self.chunk_up_text(tag.text, aid)) + chunks.extend(self.chunk_up_text(tag.text)) tag.text = None # Now loop over children @@ -266,21 +259,21 @@ class Chunker(object): if child.tag == etree.Entity: chunks.append(raw) if child.tail: - chunks.extend(self.chunk_up_text(child.tail, aid)) + chunks.extend(self.chunk_up_text(child.tail)) continue raw = close_self_closing_tags(raw) if len(raw) > CHUNK_SIZE and child.get('aid', None): self.step_into_tag(child, chunks) if child.tail and child.tail.strip(): # Leave pure whitespace - chunks.extend(self.chunk_up_text(child.tail, aid)) + chunks.extend(self.chunk_up_text(child.tail)) child.tail = None else: if len(raw) > CHUNK_SIZE: self.log.warn('Tag %s has no aid and a too large chunk' ' size. Adding anyway.'%child.tag) - chunks.append(Chunk(raw, aid)) + chunks.append(Chunk(raw, self.chunk_selector)) if child.tail: - chunks.extend(self.chunk_up_text(child.tail, aid)) + chunks.extend(self.chunk_up_text(child.tail)) tag.remove(child) if len(chunks) <= first_chunk_idx and chunks: @@ -293,12 +286,9 @@ class Chunker(object): my_chunks = chunks[first_chunk_idx:] if my_chunks: my_chunks[0].is_first_chunk = True - my_chunks[-1].is_last_chunk = True - if is_body: - for chunk in my_chunks: - chunk.parent_is_body = True + self.chunk_selector = ('S', aid) - def chunk_up_text(self, text, parent_tag): + def chunk_up_text(self, text): text = text.encode('utf-8') ans = [] @@ -314,7 +304,7 @@ class Chunker(object): while rest: start, rest = split_multibyte_text(rest) ans.append(b'' + start + '') - return [Chunk(x, parent_tag) for x in ans] + return [Chunk(x, self.chunk_selector) for x in ans] def merge_small_chunks(self, chunks): ans = chunks[:1] diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py index 14e1ff5892..f0d2335a30 100644 --- a/src/calibre/ebooks/odt/input.py +++ b/src/calibre/ebooks/odt/input.py @@ -10,6 +10,9 @@ import os from lxml import etree from odf.odf2xhtml import ODF2XHTML +from odf.opendocument import load as odLoad +from odf.draw import Frame as odFrame, Image as odImage +from odf.namespaces import TEXTNS as odTEXTNS from calibre import CurrentDir, walk @@ -138,22 +141,84 @@ class Extract(ODF2XHTML): r.selectorText = '.'+replace_name return sheet.cssText, sel_map + def search_page_img(self, mi, log): + for frm in self.document.topnode.getElementsByType(odFrame): + try: + if frm.getAttrNS(odTEXTNS,u'anchor-type') == 'page': + log.warn('Document has Pictures anchored to Page, will all end up before first page!') + break + except ValueError: + pass + + def filter_cover(self, mi, log): + # filter the Element tree (remove the detected cover) + if mi.cover and mi.odf_cover_frame: + for frm in self.document.topnode.getElementsByType(odFrame): + # search the right frame + if frm.getAttribute('name') == mi.odf_cover_frame: + img = frm.getElementsByType(odImage) + # only one draw:image allowed in the draw:frame + if len(img) == 1 and img[0].getAttribute('href') == mi.cover: + # ok, this is the right frame with the right image + # check if there are more childs + if len(frm.childNodes) != 1: + break + # check if the parent paragraph more childs + para = frm.parentNode + if para.tagName != 'text:p' or len(para.childNodes) != 1: + break + # now it should be safe to remove the text:p + parent = para.parentNode + parent.removeChild(para) + log("Removed cover image paragraph from document...") + break + + def filter_load(self, odffile, mi, log): + """ This is an adaption from ODF2XHTML. It adds a step between + load and parse of the document where the Element tree can be + modified. + """ + # first load the odf structure + self.lines = [] + self._wfunc = self._wlines + if isinstance(odffile, basestring) \ + or hasattr(odffile, 'read'): # Added by Kovid + self.document = odLoad(odffile) + else: + self.document = odffile + # filter stuff + self.search_page_img(mi, log) + try: + self.filter_cover(mi, log) + except: + pass + # parse the modified tree and generate xhtml + self._walknode(self.document.topnode) + def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile - from calibre.ebooks.metadata.meta import get_metadata + from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator - if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') - html = self.odf2xhtml(stream) + stream.seek(0) + mi = get_metadata(stream, 'odt') + if not mi.title: + mi.title = _('Unknown') + if not mi.authors: + mi.authors = [_('Unknown')] + self.filter_load(stream, mi, log) + html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') + # odf2xhtml creates empty title tag + html = html.replace('','%s'%(mi.title,)) try: html = self.fix_markup(html, log) except: @@ -162,12 +227,6 @@ class Extract(ODF2XHTML): f.write(html.encode('utf-8')) zf = ZipFile(stream, 'r') self.extract_pictures(zf) - stream.seek(0) - mi = get_metadata(stream, 'odt') - if not mi.title: - mi.title = _('Unknown') - if not mi.authors: - mi.authors = [_('Unknown')] opf = OPFCreator(os.path.abspath(os.getcwdu()), mi) opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwdu())]) diff --git a/src/calibre/gui2/comments_editor.py b/src/calibre/gui2/comments_editor.py index 1d5e914d5f..10bcbf6218 100644 --- a/src/calibre/gui2/comments_editor.py +++ b/src/calibre/gui2/comments_editor.py @@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en' import re, os from lxml import html +import sip from PyQt4.Qt import (QApplication, QFontInfo, QSize, QWidget, QPlainTextEdit, QToolBar, QVBoxLayout, QAction, QIcon, Qt, QTabWidget, QUrl, @@ -42,6 +43,7 @@ class PageAction(QAction): # {{{ self.page_action.trigger() def update_state(self, *args): + if sip.isdeleted(self) or sip.isdeleted(self.page_action): return if self.isCheckable(): self.setChecked(self.page_action.isChecked()) self.setEnabled(self.page_action.isEnabled()) diff --git a/src/calibre/gui2/convert/bulk.py b/src/calibre/gui2/convert/bulk.py index 5324a83865..3a65a4617e 100644 --- a/src/calibre/gui2/convert/bulk.py +++ b/src/calibre/gui2/convert/bulk.py @@ -4,7 +4,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import shutil, importlib +import shutil from PyQt4.Qt import QString, SIGNAL @@ -86,17 +86,9 @@ class BulkConfig(Config): sd = widget_factory(StructureDetectionWidget) toc = widget_factory(TOCWidget) - output_widget = None - name = self.plumber.output_plugin.name.lower().replace(' ', '_') - try: - output_widget = importlib.import_module( - 'calibre.gui2.convert.'+name) - pw = output_widget.PluginWidget - pw.ICON = I('back.png') - pw.HELP = _('Options specific to the output format.') - output_widget = widget_factory(pw) - except ImportError: - pass + output_widget = self.plumber.output_plugin.gui_configuration_widget( + self.stack, self.plumber.get_option_by_name, + self.plumber.get_option_help, self.db) while True: c = self.stack.currentWidget() diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py index 9160c820bd..4d13ce371b 100644 --- a/src/calibre/gui2/convert/single.py +++ b/src/calibre/gui2/convert/single.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import cPickle, shutil, importlib +import cPickle, shutil from PyQt4.Qt import QString, SIGNAL, QAbstractListModel, Qt, QVariant, QFont @@ -187,29 +187,12 @@ class Config(ResizableDialog, Ui_Dialog): toc = widget_factory(TOCWidget) debug = widget_factory(DebugWidget) - output_widget = None - name = self.plumber.output_plugin.name.lower().replace(' ', '_') - try: - output_widget = importlib.import_module( - 'calibre.gui2.convert.'+name) - pw = output_widget.PluginWidget - pw.ICON = I('back.png') - pw.HELP = _('Options specific to the output format.') - output_widget = widget_factory(pw) - except ImportError: - pass - input_widget = None - name = self.plumber.input_plugin.name.lower().replace(' ', '_') - try: - input_widget = importlib.import_module( - 'calibre.gui2.convert.'+name) - pw = input_widget.PluginWidget - pw.ICON = I('forward.png') - pw.HELP = _('Options specific to the input format.') - input_widget = widget_factory(pw) - except ImportError: - pass - + output_widget = self.plumber.output_plugin.gui_configuration_widget( + self.stack, self.plumber.get_option_by_name, + self.plumber.get_option_help, self.db, self.book_id) + input_widget = self.plumber.input_plugin.gui_configuration_widget( + self.stack, self.plumber.get_option_by_name, + self.plumber.get_option_help, self.db, self.book_id) while True: c = self.stack.currentWidget() if not c: break diff --git a/src/calibre/gui2/dnd.py b/src/calibre/gui2/dnd.py index 90b7e1e0ca..c474fed537 100644 --- a/src/calibre/gui2/dnd.py +++ b/src/calibre/gui2/dnd.py @@ -135,21 +135,22 @@ def dnd_has_extension(md, extensions): prints('Debugging DND event') for f in md.formats(): f = unicode(f) - prints(f, repr(data_as_string(f, md))[:300], '\n') + raw = data_as_string(f, md) + prints(f, len(raw), repr(raw[:300]), '\n') print () if has_firefox_ext(md, extensions): return True if md.hasUrls(): urls = [unicode(u.toString()) for u in md.urls()] - purls = [urlparse(u) for u in urls] - paths = [u2p(x) for x in purls] + paths = [urlparse(u).path for u in urls] + exts = frozenset([posixpath.splitext(u)[1][1:].lower() for u in + paths if u]) if DEBUG: prints('URLS:', urls) prints('Paths:', paths) + prints('Extensions:', exts) - exts = frozenset([posixpath.splitext(u)[1][1:].lower() for u in - paths]) return bool(exts.intersection(frozenset(extensions))) return False diff --git a/src/calibre/gui2/store/stores/sony_plugin.py b/src/calibre/gui2/store/stores/sony_plugin.py index 7022287794..2ad344e82c 100644 --- a/src/calibre/gui2/store/stores/sony_plugin.py +++ b/src/calibre/gui2/store/stores/sony_plugin.py @@ -32,6 +32,8 @@ class SonyStore(BasicStoreConfig, StorePlugin): d.setWindowTitle(self.name) d.set_tags(self.config.get('tags', '')) d.exec_() + else: + open_url(QUrl('http://ebookstore.sony.com')) def search(self, query, max_results=10, timeout=60): url = 'http://ebookstore.sony.com/search?keyword=%s'%urllib.quote_plus( diff --git a/src/calibre/gui2/tag_browser/model.py b/src/calibre/gui2/tag_browser/model.py index 0b6a681a72..012f441bea 100644 --- a/src/calibre/gui2/tag_browser/model.py +++ b/src/calibre/gui2/tag_browser/model.py @@ -9,7 +9,6 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' import traceback, cPickle, copy -from itertools import repeat from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt, QMimeData, QModelIndex, pyqtSignal, QObject) @@ -17,7 +16,7 @@ from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt, from calibre.gui2 import NONE, gprefs, config, error_dialog from calibre.library.database2 import Tag from calibre.utils.config import tweaks -from calibre.utils.icu import sort_key, lower, strcmp, contractions +from calibre.utils.icu import sort_key, lower, strcmp, collation_order from calibre.library.field_metadata import TagsIcons, category_icon_map from calibre.gui2.dialogs.confirm_delete import confirm from calibre.utils.formatter import EvalFormatter @@ -258,16 +257,6 @@ class TagsModel(QAbstractItemModel): # {{{ self.hidden_categories.add(cat) db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories)) - conts = contractions() - if len(conts) == 0 or not tweaks['enable_multicharacters_in_tag_browser']: - self.do_contraction = False - else: - self.do_contraction = True - nconts = set() - for s in conts: - nconts.add(icu_upper(s)) - self.contraction_set = frozenset(nconts) - self.db = db self._run_rebuild() self.endResetModel() @@ -416,53 +405,23 @@ class TagsModel(QAbstractItemModel): # {{{ tt = key if in_uc else None if collapse_model == 'first letter': - # Build a list of 'equal' first letters by looking for - # overlapping ranges. If a range overlaps another, then the - # letters are assumed to be equivalent. ICU collating is complex - # beyond belief. This mechanism lets us determine the logical - # first character from ICU's standpoint. - chardict = {} + # Build a list of 'equal' first letters by noticing changes + # in ICU's 'ordinal' for the first letter. In this case, the + # first letter can actually be more than one letter long. + cl_list = [None] * len(data[key]) + last_ordnum = 0 for idx,tag in enumerate(data[key]): if not tag.sort: c = ' ' else: - if not self.do_contraction: - c = icu_upper(tag.sort)[0] - else: - v = icu_upper(tag.sort) - c = v[0] - for s in self.contraction_set: - if len(s) > len(c) and v.startswith(s): - c = s - if c not in chardict: - chardict[c] = [idx, idx] - else: - chardict[c][1] = idx + c = tag.sort + ordnum, ordlen = collation_order(c) + if last_ordnum != ordnum: + last_c = icu_upper(c[0:ordlen]) + last_ordnum = ordnum + cl_list[idx] = last_c + top_level_component = 'z' + data[key][0].original_name - # sort the ranges to facilitate detecting overlap - if len(chardict) == 1 and ' ' in chardict: - # The category could not be partitioned. - collapse_model = 'disable' - else: - ranges = sorted([(v[0], v[1], c) for c,v in chardict.items()]) - # Create a list of 'first letters' to use for each item in - # the category. The list is generated using the ranges. Overlaps - # are filled with the character that first occurs. - cl_list = list(repeat(None, len(data[key]))) - for t in ranges: - start = t[0] - c = t[2] - if cl_list[start] is None: - nc = c - else: - nc = cl_list[start] - for i in range(start, t[1]+1): - cl_list[i] = nc - - if len(data[key]) > 0: - top_level_component = 'z' + data[key][0].original_name - else: - top_level_component = '' last_idx = -collapse category_is_hierarchical = not ( key in ['authors', 'publisher', 'news', 'formats', 'rating'] or diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index c6eb76c735..791fda0f93 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -507,8 +507,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer): self.clock_label.setVisible(True) self.clock_label.setText('99:99 AA') self.clock_timer.start(1000) - self.clock_label.setStyleSheet(self.clock_label_style% - tuple(self.view.document.colors())) + self.clock_label.setStyleSheet(self.clock_label_style%( + 'rgba(0, 0, 0, 0)', self.view.document.colors()[1])) self.clock_label.resize(self.clock_label.sizeHint()) sw = QApplication.desktop().screenGeometry(self.view) self.clock_label.move(sw.width() - self.vertical_scrollbar.width() - 15 diff --git a/src/calibre/library/catalogs/epub_mobi_builder.py b/src/calibre/library/catalogs/epub_mobi_builder.py index 7cbd639fd7..76f96a3397 100644 --- a/src/calibre/library/catalogs/epub_mobi_builder.py +++ b/src/calibre/library/catalogs/epub_mobi_builder.py @@ -2637,8 +2637,10 @@ Author '{0}': navLabelTag.insert(0, textTag) navPointByLetterTag.insert(0,navLabelTag) contentTag = Tag(soup, 'content') - contentTag['src'] = "%s#%s_authors" % (HTML_file, self.generateUnicodeName(authors_by_letter[1])) - + if authors_by_letter[1] == self.SYMBOLS: + contentTag['src'] = "%s#%s_authors" % (HTML_file, authors_by_letter[1]) + else: + contentTag['src'] = "%s#%s_authors" % (HTML_file, self.generateUnicodeName(authors_by_letter[1])) navPointByLetterTag.insert(1,contentTag) if self.generateForKindle: diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index c451e9cdac..dfaf2dd53e 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -310,6 +311,41 @@ icu_Collator_startswith(icu_Collator *self, PyObject *args, PyObject *kwargs) { Py_RETURN_FALSE; } // }}} +// Collator.startswith {{{ +static PyObject * +icu_Collator_collation_order(icu_Collator *self, PyObject *args, PyObject *kwargs) { + PyObject *a_; + size_t asz; + int32_t actual_a; + UChar *a; + wchar_t *aw; + UErrorCode status = U_ZERO_ERROR; + UCollationElements *iter = NULL; + int order = 0, len = -1; + + if (!PyArg_ParseTuple(args, "U", &a_)) return NULL; + asz = PyUnicode_GetSize(a_); + + a = (UChar*)calloc(asz*4 + 2, sizeof(UChar)); + aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t)); + + if (a == NULL || aw == NULL ) return PyErr_NoMemory(); + + actual_a = (int32_t)PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1); + if (actual_a > -1) { + u_strFromWCS(a, asz*4 + 1, &actual_a, aw, -1, &status); + iter = ucol_openElements(self->collator, a, actual_a, &status); + if (iter != NULL && U_SUCCESS(status)) { + order = ucol_next(iter, &status); + len = ucol_getOffset(iter); + ucol_closeElements(iter); iter = NULL; + } + } + + free(a); free(aw); + return Py_BuildValue("ii", order, len); +} // }}} + static PyObject* icu_Collator_clone(icu_Collator *self, PyObject *args, PyObject *kwargs); @@ -338,6 +374,10 @@ static PyMethodDef icu_Collator_methods[] = { "startswith(a, b) -> returns True iff a startswith b, following the current collation rules." }, + {"collation_order", (PyCFunction)icu_Collator_collation_order, METH_VARARGS, + "collation_order(string) -> returns (order, length) where order is an integer that gives the position of string in a list. length gives the number of characters used for order." + }, + {NULL} /* Sentinel */ }; diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 0dab76cd30..93f4d7b1da 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -75,6 +75,7 @@ def icu_sort_key(collator, obj): except AttributeError: return secondary_collator().sort_key(obj) + def py_find(pattern, source): pos = source.find(pattern) if pos > -1: @@ -126,6 +127,12 @@ def icu_contractions(collator): _cmap[collator] = ans return ans +def icu_collation_order(collator, a): + try: + return collator.collation_order(a) + except TypeError: + return collator.collation_order(unicode(a)) + load_icu() load_collator() _icu_not_ok = _icu is None or _collator is None @@ -205,6 +212,14 @@ def primary_startswith(a, b): except AttributeError: return icu_startswith(primary_collator(), a, b) +def collation_order(a): + if _icu_not_ok: + return (ord(a[0]), 1) if a else (0, 0) + try: + return icu_collation_order(_secondary_collator, a) + except AttributeError: + return icu_collation_order(secondary_collator(), a) + ################################################################################ def test(): # {{{