diff --git a/.bzrignore b/.bzrignore
index 47f754ef3c..0a44159b1e 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -13,6 +13,7 @@ src/calibre/manual/cli/
build
dist
docs
+nbproject/
src/calibre/gui2/pictureflow/Makefile.Debug
src/calibre/gui2/pictureflow/Makefile.Release
src/calibre/gui2/pictureflow/debug/
diff --git a/.pydevproject b/.pydevproject
index aaa4cc3986..509137a36a 100644
--- a/.pydevproject
+++ b/.pydevproject
@@ -2,9 +2,9 @@
-python 2.5
+python 2.6
-/calibre/src
+/calibre-pluginize/srcDefault
diff --git a/setup.py b/setup.py
index cba8c17610..ee2d54cc5a 100644
--- a/setup.py
+++ b/setup.py
@@ -72,6 +72,9 @@ if __name__ == '__main__':
library_dirs=[os.environ.get('PODOFO_LIB_DIR', podofo_lib)],
include_dirs=\
[os.environ.get('PODOFO_INC_DIR', podofo_inc)]))
+ else:
+ print 'WARNING: PoDoFo not found on your system. Various PDF related',
+ print 'functionality will not work.'
ext_modules = optional + [
@@ -88,6 +91,9 @@ if __name__ == '__main__':
'src/calibre/utils/msdes/des.c'],
include_dirs=['src/calibre/utils/msdes']),
+ Extension('calibre.plugins.cPalmdoc',
+ sources=['src/calibre/ebooks/compression/palmdoc.c']),
+
PyQtExtension('calibre.plugins.pictureflow',
['src/calibre/gui2/pictureflow/pictureflow.cpp',
'src/calibre/gui2/pictureflow/pictureflow.h'],
diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index a0dc41009a..79dc659f34 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -2,11 +2,11 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
__docformat__ = 'restructuredtext en'
-
-import sys, os, re, logging, time, subprocess, atexit, mimetypes, warnings
+import sys, os, re, logging, time, mimetypes, \
+ __builtin__, warnings, multiprocessing
+__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
from htmlentitydefs import name2codepoint
from math import floor
-from logging import Formatter
warnings.simplefilter('ignore', DeprecationWarning)
@@ -45,6 +45,13 @@ def to_unicode(raw, encoding='utf-8', errors='strict'):
return raw
return raw.decode(encoding, errors)
+def patheq(p1, p2):
+ p = os.path
+ d = lambda x : p.normcase(p.normpath(p.realpath(p.normpath(x))))
+ if not p1 or not p2:
+ return False
+ return d(p1) == d(p2)
+
def unicode_path(path, abs=False):
if not isinstance(path, unicode):
path = path.decode(sys.getfilesystemencoding())
@@ -71,7 +78,7 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
**WARNING:** This function also replaces path separators, so only pass file names
and not full paths to it.
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
- are encoded in the filesystem encoding of the platform, or UTF-8.
+ are encoded in the filesystem encoding of the platform, or UTF-8.
'''
if isinstance(name, unicode):
name = name.encode(filesystem_encoding, 'ignore')
@@ -83,26 +90,33 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
return one.replace('..', '_')
+def prints(*args, **kwargs):
+ '''
+ Print unicode arguments safely by encoding them to preferred_encoding
+ Has the same signature as the print function from Python 3.
+ '''
+ file = kwargs.get('file', sys.stdout)
+ sep = kwargs.get('sep', ' ')
+ end = kwargs.get('end', '\n')
+ enc = preferred_encoding
+ if 'CALIBRE_WORKER' in os.environ:
+ enc = 'utf-8'
+ for i, arg in enumerate(args):
+ if isinstance(arg, unicode):
+ arg = arg.encode(enc)
+ if not isinstance(arg, str):
+ arg = str(arg)
+ if not isinstance(arg, unicode):
+ arg = arg.decode(preferred_encoding, 'replace').encode(enc)
+ file.write(arg)
+ if i != len(args)-1:
+ file.write(sep)
+ file.write(end)
+
class CommandLineError(Exception):
pass
-class ColoredFormatter(Formatter):
- def format(self, record):
- ln = record.__dict__['levelname']
- col = ''
- if ln == 'CRITICAL':
- col = terminal_controller.YELLOW
- elif ln == 'ERROR':
- col = terminal_controller.RED
- elif ln in ['WARN', 'WARNING']:
- col = terminal_controller.BLUE
- elif ln == 'INFO':
- col = terminal_controller.GREEN
- elif ln == 'DEBUG':
- col = terminal_controller.CYAN
- record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
- return Formatter.format(self, record)
def setup_cli_handlers(logger, level):
@@ -157,7 +171,7 @@ def extract(path, dir):
def get_proxies():
proxies = {}
-
+
for q in ('http', 'ftp'):
proxy = os.environ.get(q+'_proxy', None)
if not proxy: continue
@@ -192,8 +206,8 @@ def get_proxies():
def browser(honor_time=True, max_time=2, mobile_browser=False):
'''
Create a mechanize browser for web scraping. The browser handles cookies,
- refresh requests and ignores robots.txt. Also uses proxy if avaialable.
-
+ refresh requests and ignores robots.txt. Also uses proxy if avaialable.
+
:param honor_time: If True honors pause time in refresh requests
:param max_time: Maximum time in seconds to wait during a refresh request
'''
@@ -230,21 +244,21 @@ def fit_image(width, height, pwidth, pheight):
return scaled, int(width), int(height)
class CurrentDir(object):
-
+
def __init__(self, path):
self.path = path
self.cwd = None
-
+
def __enter__(self, *args):
self.cwd = os.getcwd()
os.chdir(self.path)
return self.cwd
-
+
def __exit__(self, *args):
os.chdir(self.cwd)
-class FileWrapper(object):
+class StreamReadWrapper(object):
'''
Used primarily with pyPdf to ensure the stream is properly closed.
'''
@@ -263,40 +277,7 @@ class FileWrapper(object):
def detect_ncpus():
"""Detects the number of effective CPUs in the system"""
- try:
- from PyQt4.QtCore import QThread
- ans = QThread.idealThreadCount()
- if ans > 0:
- return ans
- except:
- pass
- #for Linux, Unix and MacOS
- if hasattr(os, "sysconf"):
- if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
- #Linux and Unix
- ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
- if isinstance(ncpus, int) and ncpus > 0:
- return ncpus
- else:
- #MacOS X
- try:
- return int(subprocess.Popen(('sysctl', '-n', 'hw.cpu'), stdout=subprocess.PIPE).stdout.read())
- except IOError: # Occassionally the system call gets interrupted
- try:
- return int(subprocess.Popen(('sysctl', '-n', 'hw.cpu'), stdout=subprocess.PIPE).stdout.read())
- except IOError:
- return 1
- except ValueError: # On some systems the sysctl call fails
- return 1
-
- #for Windows
- if os.environ.has_key("NUMBER_OF_PROCESSORS"):
- ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]);
- if ncpus > 0:
- return ncpus
- #return the default value
- return 1
-
+ return multiprocessing.cpu_count()
def launch(path_or_url):
if os.path.exists(path_or_url):
@@ -343,67 +324,6 @@ def english_sort(x, y):
'''
return cmp(_spat.sub('', x), _spat.sub('', y))
-class LoggingInterface:
-
- def __init__(self, logger):
- self.__logger = self.logger = logger
-
- def setup_cli_handler(self, verbosity):
- for handler in self.__logger.handlers:
- if isinstance(handler, logging.StreamHandler):
- return
- if os.environ.get('CALIBRE_WORKER', None) is not None and self.__logger.handlers:
- return
- stream = sys.stdout
- formatter = logging.Formatter()
- level = logging.INFO
- if verbosity > 0:
- formatter = ColoredFormatter('[%(levelname)s] %(message)s') if verbosity > 1 else \
- ColoredFormatter('%(levelname)s: %(message)s')
- level = logging.DEBUG
- if verbosity > 1:
- stream = sys.stderr
-
- handler = logging.StreamHandler(stream)
- handler.setFormatter(formatter)
- handler.setLevel(level)
- self.__logger.addHandler(handler)
- self.__logger.setLevel(level)
-
-
- def ___log(self, func, msg, args, kwargs):
- args = [msg] + list(args)
- for i in range(len(args)):
- if not isinstance(args[i], basestring):
- continue
- if sys.version_info[:2] > (2, 5):
- if not isinstance(args[i], unicode):
- args[i] = args[i].decode(preferred_encoding, 'replace')
- elif isinstance(args[i], unicode):
- args[i] = args[i].encode(preferred_encoding, 'replace')
- func(*args, **kwargs)
-
- def log_debug(self, msg, *args, **kwargs):
- self.___log(self.__logger.debug, msg, args, kwargs)
-
- def log_info(self, msg, *args, **kwargs):
- self.___log(self.__logger.info, msg, args, kwargs)
-
- def log_warning(self, msg, *args, **kwargs):
- self.___log(self.__logger.warning, msg, args, kwargs)
-
- def log_warn(self, msg, *args, **kwargs):
- self.___log(self.__logger.warning, msg, args, kwargs)
-
- def log_error(self, msg, *args, **kwargs):
- self.___log(self.__logger.error, msg, args, kwargs)
-
- def log_critical(self, msg, *args, **kwargs):
- self.___log(self.__logger.critical, msg, args, kwargs)
-
- def log_exception(self, msg, *args):
- self.___log(self.__logger.exception, msg, args, {})
-
def walk(dir):
''' A nice interface to os.walk '''
for record in os.walk(dir):
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 52f85cc20c..e03d5e5edc 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -53,7 +53,7 @@ if plugins is None:
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
sys.path.insert(0, plugin_path)
- for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo'] + \
+ for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:
diff --git a/src/calibre/customize/__init__.py b/src/calibre/customize/__init__.py
index 3d48f42535..0e6bad8d2e 100644
--- a/src/calibre/customize/__init__.py
+++ b/src/calibre/customize/__init__.py
@@ -220,4 +220,5 @@ class MetadataWriterPlugin(Plugin):
'''
pass
-
+
+
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index d4470b16fd..d107413e38 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -1,8 +1,9 @@
-from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
-import textwrap, os
+import textwrap
+import os
+import glob
from calibre.customize import FileTypePlugin, MetadataReaderPlugin, MetadataWriterPlugin
from calibre.constants import __version__
@@ -18,170 +19,41 @@ every time you add an HTML file to the library.\
file_types = set(['html', 'htm', 'xhtml', 'xhtm'])
supported_platforms = ['windows', 'osx', 'linux']
on_import = True
-
+
def run(self, htmlfile):
- of = self.temporary_file('_plugin_html2zip.zip')
- from calibre.ebooks.html import gui_main as html2oeb
- html2oeb(htmlfile, of)
+ from calibre.ptempfile import TemporaryDirectory
+ from calibre.gui2.convert.gui_conversion import gui_convert
+ from calibre.customize.conversion import OptionRecommendation
+ from calibre.ebooks.epub import initialize_container
+
+ with TemporaryDirectory('_plugin_html2zip') as tdir:
+ gui_convert(htmlfile, tdir, [('debug_input', tdir,
+ OptionRecommendation.HIGH)])
+ of = self.temporary_file('_plugin_html2zip.zip')
+ opf = glob.glob(os.path.join(tdir, '*.opf'))[0]
+ ncx = glob.glob(os.path.join(tdir, '*.ncx'))
+ if ncx:
+ os.remove(ncx[0])
+ epub = initialize_container(of.name, os.path.basename(opf))
+ epub.add_dir(tdir)
+ epub.close()
+
return of.name
-class OPFMetadataReader(MetadataReaderPlugin):
-
- name = 'Read OPF metadata'
- file_types = set(['opf'])
- description = _('Read metadata from %s files')%'OPF'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.opf2 import OPF
- from calibre.ebooks.metadata import MetaInformation
- return MetaInformation(OPF(stream, os.getcwd()))
-
-class RTFMetadataReader(MetadataReaderPlugin):
-
- name = 'Read RTF metadata'
- file_types = set(['rtf'])
- description = _('Read metadata from %s files')%'RTF'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.rtf import get_metadata
- return get_metadata(stream)
-
-class FB2MetadataReader(MetadataReaderPlugin):
-
- name = 'Read FB2 metadata'
- file_types = set(['fb2'])
- description = _('Read metadata from %s files')%'FB2'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.fb2 import get_metadata
- return get_metadata(stream)
-
-
-class LRFMetadataReader(MetadataReaderPlugin):
-
- name = 'Read LRF metadata'
- file_types = set(['lrf'])
- description = _('Read metadata from %s files')%'LRF'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.lrf.meta import get_metadata
- return get_metadata(stream)
-
-class PDFMetadataReader(MetadataReaderPlugin):
-
- name = 'Read PDF metadata'
- file_types = set(['pdf'])
- description = _('Read metadata from %s files')%'PDF'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.pdf import get_metadata
- return get_metadata(stream)
-
-class LITMetadataReader(MetadataReaderPlugin):
-
- name = 'Read LIT metadata'
- file_types = set(['lit'])
- description = _('Read metadata from %s files')%'LIT'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.lit import get_metadata
- return get_metadata(stream)
-
-class IMPMetadataReader(MetadataReaderPlugin):
-
- name = 'Read IMP metadata'
- file_types = set(['imp'])
- description = _('Read metadata from %s files')%'IMP'
- author = 'Ashish Kulkarni'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.imp import get_metadata
- return get_metadata(stream)
-
-class RBMetadataReader(MetadataReaderPlugin):
-
- name = 'Read RB metadata'
- file_types = set(['rb'])
- description = _('Read metadata from %s files')%'RB'
- author = 'Ashish Kulkarni'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.rb import get_metadata
- return get_metadata(stream)
-
-class EPUBMetadataReader(MetadataReaderPlugin):
-
- name = 'Read EPUB metadata'
- file_types = set(['epub'])
- description = _('Read metadata from %s files')%'EPUB'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.epub import get_metadata
- return get_metadata(stream)
-
-class HTMLMetadataReader(MetadataReaderPlugin):
-
- name = 'Read HTML metadata'
- file_types = set(['html'])
- description = _('Read metadata from %s files')%'HTML'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.html import get_metadata
- return get_metadata(stream)
-
-class MOBIMetadataReader(MetadataReaderPlugin):
-
- name = 'Read MOBI metadata'
- file_types = set(['mobi', 'prc', 'azw'])
- description = _('Read metadata from %s files')%'MOBI'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.mobi.reader import get_metadata
- return get_metadata(stream)
-
-
-class TOPAZMetadataReader(MetadataReaderPlugin):
-
- name = 'Read Topaz metadata'
- file_types = set(['tpz', 'azw1'])
- description = _('Read metadata from %s files')%'MOBI'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.topaz import get_metadata
- return get_metadata(stream)
-
-class ODTMetadataReader(MetadataReaderPlugin):
-
- name = 'Read ODT metadata'
- file_types = set(['odt'])
- description = _('Read metadata from %s files')%'ODT'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.odt import get_metadata
- return get_metadata(stream)
-
-class LRXMetadataReader(MetadataReaderPlugin):
-
- name = 'Read LRX metadata'
- file_types = set(['lrx'])
- description = _('Read metadata from %s files')%'LRX'
-
- def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.lrx import get_metadata
- return get_metadata(stream)
class ComicMetadataReader(MetadataReaderPlugin):
-
+
name = 'Read comic metadata'
file_types = set(['cbr', 'cbz'])
description = _('Extract cover from comic files')
-
+
def get_metadata(self, stream, ftype):
if ftype == 'cbr':
from calibre.libunrar import extract_member as extract_first
+ extract_first
else:
from calibre.libunzip import extract_member as extract_first
- from calibre.ebooks.metadata import MetaInformation
+ from calibre.ebooks.metadata import MetaInformation
ret = extract_first(stream)
mi = MetaInformation(None, None)
if ret is not None:
@@ -189,83 +61,346 @@ class ComicMetadataReader(MetadataReaderPlugin):
ext = os.path.splitext(path)[1][1:]
mi.cover_data = (ext.lower(), data)
return mi
-
-class ZipMetadataReader(MetadataReaderPlugin):
-
- name = 'Read ZIP metadata'
- file_types = set(['zip', 'oebzip'])
- description = _('Read metadata from ebooks in ZIP archives')
-
+
+class EPUBMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read EPUB metadata'
+ file_types = set(['epub'])
+ description = _('Read metadata from %s files')%'EPUB'
+
def get_metadata(self, stream, ftype):
- from calibre.ebooks.metadata.zip import get_metadata
+ from calibre.ebooks.metadata.epub import get_metadata
+ return get_metadata(stream)
+
+class FB2MetadataReader(MetadataReaderPlugin):
+
+ name = 'Read FB2 metadata'
+ file_types = set(['fb2'])
+ description = _('Read metadata from %s files')%'FB2'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.fb2 import get_metadata
+ return get_metadata(stream)
+
+class HTMLMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read HTML metadata'
+ file_types = set(['html'])
+ description = _('Read metadata from %s files')%'HTML'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.html import get_metadata
+ return get_metadata(stream)
+
+class IMPMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read IMP metadata'
+ file_types = set(['imp'])
+ description = _('Read metadata from %s files')%'IMP'
+ author = 'Ashish Kulkarni'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.imp import get_metadata
+ return get_metadata(stream)
+
+class LITMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read LIT metadata'
+ file_types = set(['lit'])
+ description = _('Read metadata from %s files')%'LIT'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.lit import get_metadata
+ return get_metadata(stream)
+
+class LRFMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read LRF metadata'
+ file_types = set(['lrf'])
+ description = _('Read metadata from %s files')%'LRF'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.lrf.meta import get_metadata
+ return get_metadata(stream)
+
+class LRXMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read LRX metadata'
+ file_types = set(['lrx'])
+ description = _('Read metadata from %s files')%'LRX'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.lrx import get_metadata
+ return get_metadata(stream)
+
+class MOBIMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read MOBI metadata'
+ file_types = set(['mobi', 'prc', 'azw'])
+ description = _('Read metadata from %s files')%'MOBI'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.mobi.reader import get_metadata
+ return get_metadata(stream)
+
+class ODTMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read ODT metadata'
+ file_types = set(['odt'])
+ description = _('Read metadata from %s files')%'ODT'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.odt import get_metadata
+ return get_metadata(stream)
+
+class OPFMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read OPF metadata'
+ file_types = set(['opf'])
+ description = _('Read metadata from %s files')%'OPF'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.opf2 import OPF
+ from calibre.ebooks.metadata import MetaInformation
+ return MetaInformation(OPF(stream, os.getcwd()))
+
+class PDBMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read PDB metadata'
+ file_types = set(['pdb'])
+ description = _('Read metadata from %s files') % 'PDB'
+ author = 'John Schember'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.pdb import get_metadata
+ return get_metadata(stream)
+
+class PDFMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read PDF metadata'
+ file_types = set(['pdf'])
+ description = _('Read metadata from %s files')%'PDF'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.pdf import get_metadata
return get_metadata(stream)
class RARMetadataReader(MetadataReaderPlugin):
-
+
name = 'Read RAR metadata'
file_types = set(['rar'])
description = _('Read metadata from ebooks in RAR archives')
-
+
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rar import get_metadata
return get_metadata(stream)
+class RBMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read RB metadata'
+ file_types = set(['rb'])
+ description = _('Read metadata from %s files')%'RB'
+ author = 'Ashish Kulkarni'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.rb import get_metadata
+ return get_metadata(stream)
+
+class RTFMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read RTF metadata'
+ file_types = set(['rtf'])
+ description = _('Read metadata from %s files')%'RTF'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.rtf import get_metadata
+ return get_metadata(stream)
+
+class TOPAZMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read Topaz metadata'
+ file_types = set(['tpz', 'azw1'])
+ description = _('Read metadata from %s files')%'MOBI'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.topaz import get_metadata
+ return get_metadata(stream)
+
+class TXTMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read TXT metadata'
+ file_types = set(['txt'])
+ description = _('Read metadata from %s files') % 'TXT'
+ author = 'John Schember'
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.txt import get_metadata
+ return get_metadata(stream)
+
+class ZipMetadataReader(MetadataReaderPlugin):
+
+ name = 'Read ZIP metadata'
+ file_types = set(['zip', 'oebzip'])
+ description = _('Read metadata from ebooks in ZIP archives')
+
+ def get_metadata(self, stream, ftype):
+ from calibre.ebooks.metadata.zip import get_metadata
+ return get_metadata(stream)
+
class EPUBMetadataWriter(MetadataWriterPlugin):
-
+
name = 'Set EPUB metadata'
file_types = set(['epub'])
description = _('Set metadata in %s files')%'EPUB'
-
+
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.epub import set_metadata
set_metadata(stream, mi)
-
+
class LRFMetadataWriter(MetadataWriterPlugin):
-
+
name = 'Set LRF metadata'
file_types = set(['lrf'])
description = _('Set metadata in %s files')%'LRF'
-
+
def set_metadata(self, stream, mi, type):
from calibre.ebooks.lrf.meta import set_metadata
set_metadata(stream, mi)
-class RTFMetadataWriter(MetadataWriterPlugin):
-
- name = 'Set RTF metadata'
- file_types = set(['rtf'])
- description = _('Set metadata in %s files')%'RTF'
-
- def set_metadata(self, stream, mi, type):
- from calibre.ebooks.metadata.rtf import set_metadata
- set_metadata(stream, mi)
-
class MOBIMetadataWriter(MetadataWriterPlugin):
-
+
name = 'Set MOBI metadata'
file_types = set(['mobi', 'prc', 'azw'])
description = _('Set metadata in %s files')%'MOBI'
author = 'Marshall T. Vandegrift'
-
+
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.mobi import set_metadata
set_metadata(stream, mi)
-
+
+class PDBMetadataWriter(MetadataWriterPlugin):
+
+ name = 'Set PDB metadata'
+ file_types = set(['pdb'])
+ description = _('Set metadata from %s files') % 'PDB'
+ author = 'John Schember'
+
+ def set_metadata(self, stream, mi, type):
+ from calibre.ebooks.metadata.pdb import set_metadata
+ set_metadata(stream, mi)
+
class PDFMetadataWriter(MetadataWriterPlugin):
name = 'Set PDF metadata'
file_types = set(['pdf'])
description = _('Set metadata in %s files') % 'PDF'
- author = 'John Schember'
-
+ author = 'Kovid Goyal'
+
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.pdf import set_metadata
set_metadata(stream, mi)
+class RTFMetadataWriter(MetadataWriterPlugin):
-plugins = [HTML2ZIP]
+ name = 'Set RTF metadata'
+ file_types = set(['rtf'])
+ description = _('Set metadata in %s files')%'RTF'
+
+ def set_metadata(self, stream, mi, type):
+ from calibre.ebooks.metadata.rtf import set_metadata
+ set_metadata(stream, mi)
+
+
+from calibre.ebooks.comic.input import ComicInput
+from calibre.ebooks.epub.input import EPUBInput
+from calibre.ebooks.fb2.input import FB2Input
+from calibre.ebooks.html.input import HTMLInput
+from calibre.ebooks.lit.input import LITInput
+from calibre.ebooks.mobi.input import MOBIInput
+from calibre.ebooks.odt.input import ODTInput
+from calibre.ebooks.pdb.input import PDBInput
+from calibre.ebooks.pdf.input import PDFInput
+from calibre.ebooks.pml.input import PMLInput
+from calibre.ebooks.rb.input import RBInput
+from calibre.web.feeds.input import RecipeInput
+from calibre.ebooks.rtf.input import RTFInput
+from calibre.ebooks.txt.input import TXTInput
+
+from calibre.ebooks.epub.output import EPUBOutput
+from calibre.ebooks.fb2.output import FB2Output
+from calibre.ebooks.lit.output import LITOutput
+from calibre.ebooks.lrf.output import LRFOutput
+from calibre.ebooks.mobi.output import MOBIOutput
+from calibre.ebooks.oeb.output import OEBOutput
+from calibre.ebooks.pdb.output import PDBOutput
+from calibre.ebooks.pdf.output import PDFOutput
+from calibre.ebooks.pml.output import PMLOutput
+from calibre.ebooks.rb.output import RBOutput
+from calibre.ebooks.rtf.output import RTFOutput
+from calibre.ebooks.txt.output import TXTOutput
+
+from calibre.customize.profiles import input_profiles, output_profiles
+
+
+from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
+from calibre.devices.blackberry.driver import BLACKBERRY
+from calibre.devices.cybookg3.driver import CYBOOKG3
+from calibre.devices.eb600.driver import EB600
+from calibre.devices.jetbook.driver import JETBOOK
+from calibre.devices.kindle.driver import KINDLE
+from calibre.devices.kindle.driver import KINDLE2
+from calibre.devices.prs500.driver import PRS500
+from calibre.devices.prs505.driver import PRS505
+from calibre.devices.prs700.driver import PRS700
+
+
+plugins = []
+plugins += [
+ ComicInput,
+ EPUBInput,
+ FB2Input,
+ HTMLInput,
+ LITInput,
+ MOBIInput,
+ ODTInput,
+ PDBInput,
+ PDFInput,
+ PMLInput,
+ RBInput,
+ RecipeInput,
+ RTFInput,
+ TXTInput,
+]
+plugins += [
+ EPUBOutput,
+ FB2Output,
+ LITOutput,
+ LRFOutput,
+ MOBIOutput,
+ OEBOutput,
+ PDBOutput,
+ PDFOutput,
+ PMLOutput,
+ RBOutput,
+ RTFOutput,
+ TXTOutput,
+]
+plugins += [
+ BEBOOK,
+ BEBOOK_MINI,
+ BLACKBERRY,
+ CYBOOKG3,
+ EB600,
+ JETBOOK,
+ KINDLE,
+ KINDLE2,
+ PRS500,
+ PRS505,
+ PRS700,
+]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')]
+plugins += input_profiles + output_profiles
diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py
new file mode 100644
index 0000000000..4d19ba4fad
--- /dev/null
+++ b/src/calibre/customize/conversion.py
@@ -0,0 +1,293 @@
+from __future__ import with_statement
+'''
+Defines the plugin system for conversions.
+'''
+import re, os, shutil
+
+from calibre import CurrentDir
+from calibre.customize import Plugin
+
+class ConversionOption(object):
+
+ '''
+ Class representing conversion options
+ '''
+
+ def __init__(self, name=None, help=None, long_switch=None,
+ short_switch=None, choices=None):
+ self.name = name
+ self.help = help
+ self.long_switch = long_switch
+ self.short_switch = short_switch
+ self.choices = choices
+
+ if self.long_switch is None:
+ self.long_switch = self.name.replace('_', '-')
+
+ self.validate_parameters()
+
+ def validate_parameters(self):
+ '''
+ Validate the parameters passed to :method:`__init__`.
+ '''
+ if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
+ raise ValueError(self.name + ' is not a valid Python identifier')
+ if not self.help:
+ raise ValueError('You must set the help text')
+
+ def __hash__(self):
+ return hash(self.name)
+
+ def __eq__(self, other):
+ return hash(self) == hash(other)
+
+ def clone(self):
+ return ConversionOption(name=self.name, help=self.help,
+ long_switch=self.long_switch, short_switch=self.short_switch,
+ choices=self.choices)
+
+class OptionRecommendation(object):
+ LOW = 1
+ MED = 2
+ HIGH = 3
+
+ def __init__(self, recommended_value=None, level=LOW, **kwargs):
+ '''
+ An option recommendation. That is, an option as well as its recommended
+ value and the level of the recommendation.
+ '''
+ self.level = level
+ self.recommended_value = recommended_value
+ self.option = kwargs.pop('option', None)
+ if self.option is None:
+ self.option = ConversionOption(**kwargs)
+
+ self.validate_parameters()
+
+ @property
+ def help(self):
+ return self.option.help
+
+ def clone(self):
+ return OptionRecommendation(recommended_value=self.recommended_value,
+ level=self.level, option=self.option.clone())
+
+ def validate_parameters(self):
+ if self.option.choices and self.recommended_value not in \
+ self.option.choices:
+ raise ValueError('OpRec: %s: Recommended value not in choices'%
+ self.option.name)
+ if not (isinstance(self.recommended_value, (int, float, str, unicode))\
+ or self.recommended_value is None):
+ raise ValueError('OpRec: %s:'%self.option.name +
+ repr(self.recommended_value) +
+ ' is not a string or a number')
+
+class DummyReporter(object):
+
+ def __call__(self, percent, msg=''):
+ pass
+
+class InputFormatPlugin(Plugin):
+ '''
+ InputFormatPlugins are responsible for converting a document into
+ HTML+OPF+CSS+etc.
+ The results of the conversion *must* be encoded in UTF-8.
+ The main action happens in :method:`convert`.
+ '''
+
+ type = _('Conversion Input')
+ can_be_disabled = False
+ supported_platforms = ['windows', 'osx', 'linux']
+
+ #: Set of file types for which this plugin should be run
+ #: For example: ``set(['azw', 'mobi', 'prc'])``
+ file_types = set([])
+
+ #: If True, this input plugin generates a collection of images,
+ #: one per HTML file. You can obtain access to the images via
+ #: convenience method, :method:`get_image_collection`.
+ is_image_collection = False
+
+ #: Options shared by all Input format plugins. Do not override
+ #: in sub-classes. Use :member:`options` instead. Every option must be an
+ #: instance of :class:`OptionRecommendation`.
+ common_options = set([
+ OptionRecommendation(name='debug_input',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Save the output from the input plugin to the specified '
+ 'directory. Useful if you are unsure at which stage '
+ 'of the conversion process a bug is occurring. '
+ 'WARNING: This completely deletes the contents of '
+ 'the specified directory.')
+ ),
+
+ OptionRecommendation(name='input_encoding',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Specify the character encoding of the input document. If '
+ 'set this option will override any encoding declared by the '
+ 'document itself. Particularly useful for documents that '
+ 'do not declare an encoding or that have erroneous '
+ 'encoding declarations.')
+ ),
+
+ ])
+
+ #: Options to customize the behavior of this plugin. Every option must be an
+ #: instance of :class:`OptionRecommendation`.
+ options = set([])
+
+ #: A set of 3-tuples of the form
+ #: (option_name, recommended_value, recommendation_level)
+ recommendations = set([])
+
+ def __init__(self, *args):
+ Plugin.__init__(self, *args)
+ self.report_progress = DummyReporter()
+
+ def get_images(self):
+ '''
+ Return a list of absolute paths to the images, if this input plugin
+ represents an image collection. The list of images is in the same order
+ as the spine and the TOC.
+ '''
+ raise NotImplementedError()
+
+ def preprocess_html(self, html):
+ '''
+ This method is called by the conversion pipeline on all HTML before it
+ is parsed. It is meant to be used to do any required preprocessing on
+ the HTML, like removing hard line breaks, etc.
+
+ :param html: A unicode string
+ :return: A unicode string
+ '''
+ return html
+
+
+ def convert(self, stream, options, file_ext, log, accelerators):
+ '''
+ This method must be implemented in sub-classes. It must return
+ the path to the created OPF file or an :class:`OEBBook` instance.
+ All output should be contained in the current directory.
+ If this plugin creates files outside the current
+ directory they must be deleted/marked for deletion before this method
+ returns.
+
+ :param stream: A file like object that contains the input file.
+
+ :param options: Options to customize the conversion process.
+ Guaranteed to have attributes corresponding
+ to all the options declared by this plugin. In
+ addition, it will have a verbose attribute that
+ takes integral values from zero upwards. Higher numbers
+ mean be more verbose. Another useful attribute is
+ ``input_profile`` that is an instance of
+ :class:`calibre.customize.profiles.InputProfile`.
+
+ :param file_ext: The extension (without the .) of the input file. It
+ is guaranteed to be one of the `file_types` supported
+ by this plugin.
+
+ :param log: A :class:`calibre.utils.logging.Log` object. All output
+ should use this object.
+
+ :param accelarators: A dictionary of various information that the input
+ plugin can get easily that would speed up the
+ subsequent stages of the conversion.
+
+ '''
+ raise NotImplementedError
+
+ def __call__(self, stream, options, file_ext, log,
+ accelerators, output_dir):
+ log('InputFormatPlugin: %s running'%self.name, end=' ')
+ if hasattr(stream, 'name'):
+ log('on', stream.name)
+
+ with CurrentDir(output_dir):
+ for x in os.listdir('.'):
+ shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
+
+ ret = self.convert(stream, options, file_ext,
+ log, accelerators)
+
+ if options.debug_input is not None:
+ options.debug_input = os.path.abspath(options.debug_input)
+ if not os.path.exists(options.debug_input):
+ os.makedirs(options.debug_input)
+ if isinstance(ret, basestring):
+ shutil.rmtree(options.debug_input)
+ shutil.copytree(output_dir, options.debug_input)
+ else:
+ from calibre.ebooks.oeb.writer import OEBWriter
+ w = OEBWriter(pretty_print=options.pretty_print)
+ w(ret, options.debug_input)
+
+ log.info('Input debug saved to:', options.debug_input)
+
+ return ret
+
+
+class OutputFormatPlugin(Plugin):
+ '''
+ OutputFormatPlugins are responsible for converting an OEB document
+ (OPF+HTML) into an output ebook.
+
+ The OEB document can be assumed to be encoded in UTF-8.
+ The main action happens in :method:`convert`.
+ '''
+
+ type = _('Conversion Output')
+ can_be_disabled = False
+ supported_platforms = ['windows', 'osx', 'linux']
+
+ #: The file type (extension without leading period) that this
+ #: plugin outputs
+ file_type = None
+
+ #: Options shared by all Input format plugins. Do not override
+ #: in sub-classes. Use :member:`options` instead. Every option must be an
+ #: instance of :class:`OptionRecommendation`.
+ common_options = set([
+ OptionRecommendation(name='pretty_print',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('If specified, the output plugin will try to create output '
+ 'that is as human readable as possible. May not have any effect '
+ 'for some output plugins.')
+ ),
+ ])
+
+ #: Options to customize the behavior of this plugin. Every option must be an
+ #: instance of :class:`OptionRecommendation`.
+ options = set([])
+
+ #: A set of 3-tuples of the form
+ #: (option_name, recommended_value, recommendation_level)
+ recommendations = set([])
+
+ def __init__(self, *args):
+ Plugin.__init__(self, *args)
+ self.report_progress = DummyReporter()
+
+
+ def convert(self, oeb_book, output, input_plugin, opts, log):
+ '''
+ Render the contents of `oeb_book` (which is an instance of
+ :class:`calibre.ebooks.oeb.OEBBook` to the file specified by output.
+
+ :param output: Either a file like object or a string. If it is a string
+ it is the path to a directory that may or may not exist. The output
+ plugin should write its output into that directory. If it is a file like
+ object, the output plugin should write its output into the file.
+
+ :param input_plugin: The input plugin that was used at the beginning of
+ the conversion pipeline.
+
+ :param opts: Conversion options. Guaranteed to have attributes
+ corresponding to the OptionRecommendations of this plugin.
+
+ :param log: The logger. Print debug/info messages etc. using this.
+ '''
+ raise NotImplementedError
+
diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
new file mode 100644
index 0000000000..4c184ca36d
--- /dev/null
+++ b/src/calibre/customize/profiles.py
@@ -0,0 +1,241 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+from itertools import izip
+
+from calibre.customize import Plugin as _Plugin
+
+FONT_SIZES = [('xx-small', 1),
+ ('x-small', None),
+ ('small', 2),
+ ('medium', 3),
+ ('large', 4),
+ ('x-large', 5),
+ ('xx-large', 6),
+ (None, 7)]
+
+
+class Plugin(_Plugin):
+
+ fbase = 12
+ fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
+ screen_size = (1600, 1200)
+ dpi = 100
+
+ def __init__(self, *args, **kwargs):
+ _Plugin.__init__(self, *args, **kwargs)
+ self.width, self.height = self.screen_size
+ fsizes = list(self.fsizes)
+ self.fkey = list(self.fsizes)
+ self.fsizes = []
+ for (name, num), size in izip(FONT_SIZES, fsizes):
+ self.fsizes.append((name, num, float(size)))
+ self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
+ self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
+
+
+class InputProfile(Plugin):
+
+ author = 'Kovid Goyal'
+ supported_platforms = set(['windows', 'osx', 'linux'])
+ can_be_disabled = False
+ type = _('Input profile')
+
+ name = 'Default Input Profile'
+ short_name = 'default' # Used in the CLI so dont use spaces etc. in it
+ description = _('This profile tries to provide sane defaults and is useful '
+ 'if you know nothing about the input document.')
+
+
+class SonyReaderInput(InputProfile):
+
+ name = 'Sony Reader'
+ short_name = 'sony'
+ description = _('This profile is intended for the SONY PRS line. '
+ 'The 500/505/700 etc.')
+
+ screen_size = (584, 754)
+ dpi = 168.451
+ fbase = 12
+ fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+
+
+class MSReaderInput(InputProfile):
+
+ name = 'Microsoft Reader'
+ short_name = 'msreader'
+ description = _('This profile is intended for the Microsoft Reader.')
+
+ screen_size = (480, 652)
+ dpi = 96
+ fbase = 13
+ fsizes = [10, 11, 13, 16, 18, 20, 22, 26]
+
+class MobipocketInput(InputProfile):
+
+ name = 'Mobipocket Books'
+ short_name = 'mobipocket'
+ description = _('This profile is intended for the Mobipocket books.')
+
+ # Unfortunately MOBI books are not narrowly targeted, so this information is
+ # quite likely to be spurious
+ screen_size = (600, 800)
+ dpi = 96
+ fbase = 18
+ fsizes = [14, 14, 16, 18, 20, 22, 24, 26]
+
+class HanlinV3Input(InputProfile):
+
+ name = 'Hanlin V3'
+ short_name = 'hanlinv3'
+ description = _('This profile is intended for the Hanlin V3 and its clones.')
+
+ # Screen size is a best guess
+ screen_size = (584, 754)
+ dpi = 168.451
+ fbase = 16
+ fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class CybookG3Input(InputProfile):
+
+ name = 'Cybook G3'
+ short_name = 'cybookg3'
+ description = _('This profile is intended for the Cybook G3.')
+
+ # Screen size is a best guess
+ screen_size = (600, 800)
+ dpi = 168.451
+ fbase = 16
+ fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class KindleInput(InputProfile):
+
+ name = 'Kindle'
+ short_name = 'kindle'
+ description = _('This profile is intended for the Amazon Kindle.')
+
+ # Screen size is a best guess
+ screen_size = (525, 640)
+ dpi = 168.451
+ fbase = 16
+ fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
+
+
+input_profiles = [InputProfile, SonyReaderInput, MSReaderInput,
+ MobipocketInput, HanlinV3Input, CybookG3Input, KindleInput]
+
+
+class OutputProfile(Plugin):
+
+ author = 'Kovid Goyal'
+ supported_platforms = set(['windows', 'osx', 'linux'])
+ can_be_disabled = False
+ type = _('Output profile')
+
+ name = 'Default Output Profile'
+ short_name = 'default' # Used in the CLI so dont use spaces etc. in it
+ description = _('This profile tries to provide sane defaults and is useful '
+ 'if you want to produce a document intended to be read at a '
+ 'computer or on a range of devices.')
+
+ # The image size for comics
+ comic_screen_size = (584, 754)
+
+ @classmethod
+ def tags_to_string(cls, tags):
+ return ', '.join(tags)
+
+class SonyReaderOutput(OutputProfile):
+
+ name = 'Sony Reader'
+ short_name = 'sony'
+ description = _('This profile is intended for the SONY PRS line. '
+ 'The 500/505/700 etc.')
+
+ screen_size = (600, 775)
+ dpi = 168.451
+ fbase = 12
+ fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
+
+class SonyReaderLandscapeOutput(SonyReaderOutput):
+
+ name = 'Sony Reader Landscape'
+ short_name = 'sony-landscape'
+ description = _('This profile is intended for the SONY PRS line. '
+ 'The 500/505/700 etc, in landscape mode. Mainly useful '
+ 'for comics.')
+
+ screen_size = (784, 1012)
+ comic_screen_size = (784, 1012)
+
+
+class MSReaderOutput(OutputProfile):
+
+ name = 'Microsoft Reader'
+ short_name = 'msreader'
+ description = _('This profile is intended for the Microsoft Reader.')
+
+ screen_size = (480, 652)
+ dpi = 96
+ fbase = 13
+ fsizes = [10, 11, 13, 16, 18, 20, 22, 26]
+
+class MobipocketOutput(OutputProfile):
+
+ name = 'Mobipocket Books'
+ short_name = 'mobipocket'
+ description = _('This profile is intended for the Mobipocket books.')
+
+ # Unfortunately MOBI books are not narrowly targeted, so this information is
+ # quite likely to be spurious
+ screen_size = (600, 800)
+ dpi = 96
+ fbase = 18
+ fsizes = [14, 14, 16, 18, 20, 22, 24, 26]
+
+class HanlinV3Output(OutputProfile):
+
+ name = 'Hanlin V3'
+ short_name = 'hanlinv3'
+ description = _('This profile is intended for the Hanlin V3 and its clones.')
+
+ # Screen size is a best guess
+ screen_size = (584, 754)
+ dpi = 168.451
+ fbase = 16
+ fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class CybookG3Output(OutputProfile):
+
+ name = 'Cybook G3'
+ short_name = 'cybookg3'
+ description = _('This profile is intended for the Cybook G3.')
+
+ # Screen size is a best guess
+ screen_size = (600, 800)
+ dpi = 168.451
+ fbase = 16
+ fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
+
+class KindleOutput(OutputProfile):
+
+ name = 'Kindle'
+ short_name = 'kindle'
+ description = _('This profile is intended for the Amazon Kindle.')
+
+ # Screen size is a best guess
+ screen_size = (525, 640)
+ dpi = 168.451
+ fbase = 16
+ fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
+
+ @classmethod
+ def tags_to_string(cls, tags):
+ return 'ttt '.join(tags)+'ttt '
+
+
+output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
+ MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
+ SonyReaderLandscapeOutput]
diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py
index e19c17a169..f6ab19a910 100644
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@@ -6,13 +6,15 @@ import os, shutil, traceback, functools, sys, re
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin
+from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin
+from calibre.customize.profiles import InputProfile, OutputProfile
from calibre.customize.builtins import plugins as builtin_plugins
from calibre.constants import __version__, iswindows, isosx
+from calibre.devices.interface import DevicePlugin
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
plugin_dir, OptionParser
-
version = tuple([int(x) for x in __version__.split('.')])
platform = 'linux'
@@ -47,7 +49,7 @@ def load_plugin(path_to_zip_file):
:return: A :class:`Plugin` instance.
'''
- print 'Loading plugin from', path_to_zip_file
+ #print 'Loading plugin from', path_to_zip_file
if not os.access(path_to_zip_file, os.R_OK):
raise PluginNotFound
zf = ZipFile(path_to_zip_file)
@@ -77,6 +79,15 @@ _on_import = {}
_on_preprocess = {}
_on_postprocess = {}
+def input_profiles():
+ for plugin in _initialized_plugins:
+ if isinstance(plugin, InputProfile):
+ yield plugin
+
+def output_profiles():
+ for plugin in _initialized_plugins:
+ if isinstance(plugin, OutputProfile):
+ yield plugin
def reread_filetype_plugins():
@@ -121,7 +132,19 @@ def reread_metadata_plugins():
_metadata_writers[ft] = []
_metadata_writers[ft].append(plugin)
+def metadata_readers():
+ ans = set([])
+ for plugins in _metadata_readers.values():
+ for plugin in plugins:
+ ans.add(plugin)
+ return ans
+def metadata_writers():
+ ans = set([])
+ for plugins in _metadata_writers.values():
+ for plugin in plugins:
+ ans.add(plugin)
+ return ans
def get_file_type_metadata(stream, ftype):
mi = MetaInformation(None, None)
@@ -229,6 +252,47 @@ def find_plugin(name):
if plugin.name == name:
return plugin
+def input_format_plugins():
+ for plugin in _initialized_plugins:
+ if isinstance(plugin, InputFormatPlugin):
+ yield plugin
+
+def plugin_for_input_format(fmt):
+ for plugin in input_format_plugins():
+ if fmt.lower() in plugin.file_types:
+ return plugin
+
+def available_input_formats():
+ formats = set([])
+ for plugin in input_format_plugins():
+ if not is_disabled(plugin):
+ for format in plugin.file_types:
+ formats.add(format)
+ return formats
+
+def output_format_plugins():
+ for plugin in _initialized_plugins:
+ if isinstance(plugin, OutputFormatPlugin):
+ yield plugin
+
+def plugin_for_output_format(fmt):
+ for plugin in output_format_plugins():
+ if fmt.lower() == plugin.file_type:
+ return plugin
+
+def available_output_formats():
+ formats = set([])
+ for plugin in output_format_plugins():
+ if not is_disabled(plugin):
+ formats.add(plugin.file_type)
+ return formats
+
+def device_plugins():
+ for plugin in _initialized_plugins:
+ if isinstance(plugin, DevicePlugin):
+ if not is_disabled(plugin):
+ yield plugin
+
def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)
plugin = find_plugin(x)
diff --git a/src/calibre/devices/__init__.py b/src/calibre/devices/__init__.py
index 06efbc6434..874de7c070 100644
--- a/src/calibre/devices/__init__.py
+++ b/src/calibre/devices/__init__.py
@@ -5,21 +5,6 @@ __copyright__ = '2008, Kovid Goyal '
Device drivers.
'''
-def devices():
- from calibre.devices.prs500.driver import PRS500
- from calibre.devices.prs505.driver import PRS505
- from calibre.devices.prs700.driver import PRS700
- from calibre.devices.cybookg3.driver import CYBOOKG3
- from calibre.devices.kindle.driver import KINDLE
- from calibre.devices.kindle.driver import KINDLE2
- from calibre.devices.bebook.driver import BEBOOK
- from calibre.devices.bebook.driver import BEBOOKMINI
- from calibre.devices.blackberry.driver import BLACKBERRY
- from calibre.devices.eb600.driver import EB600
- from calibre.devices.jetbook.driver import JETBOOK
- return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2,
- BEBOOK, BEBOOKMINI, BLACKBERRY, EB600, JETBOOK)
-
import time
DAY_MAP = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
diff --git a/src/calibre/devices/bebook/driver.py b/src/calibre/devices/bebook/driver.py
index 7313c24253..b6655d37ec 100644
--- a/src/calibre/devices/bebook/driver.py
+++ b/src/calibre/devices/bebook/driver.py
@@ -7,19 +7,25 @@ Device driver for BeBook
from calibre.devices.usbms.driver import USBMS
class BEBOOK(USBMS):
+ name = 'BeBook driver'
+ description = _('Communicate with the BeBook eBook reader.')
+ author = _('Tijmen Ruizendaal')
+ supported_platforms = ['windows', 'osx', 'linux']
+
+
# Ordered list of supported formats
FORMATS = ['mobi', 'epub', 'pdf', 'txt']
VENDOR_ID = [0x0525]
PRODUCT_ID = [0x8803, 0x6803]
- BCD = [0x312]
+ BCD = [0x312]
- VENDOR_NAME = 'LINUX'
+ VENDOR_NAME = 'LINUX'
WINDOWS_MAIN_MEM = 'FILE-STOR_GADGET'
WINDOWS_CARD_MEM = 'FILE-STOR_GADGET'
OSX_MAIN_MEM = 'BeBook Internal Memory'
- OSX_CARD_MEM = 'BeBook Storage Card'
+ OSX_CARD_A_MEM = 'BeBook Storage Card'
MAIN_MEMORY_VOLUME_LABEL = 'BeBook Internal Memory'
STORAGE_CARD_VOLUME_LABEL = 'BeBook Storage Card'
@@ -30,20 +36,22 @@ class BEBOOK(USBMS):
def windows_sort_drives(self, drives):
main = drives.get('main', None)
- card = drives.get('card', None)
+ card = drives.get('carda', None)
if card and main and card < main:
drives['main'] = card
- drives['card'] = main
+ drives['carda'] = main
return drives
+class BEBOOK_MINI(BEBOOK):
+ name = 'BeBook Mini driver'
+ description = _('Communicate with the BeBook Mini eBook reader.')
-class BEBOOKMINI(BEBOOK):
VENDOR_ID = [0x0492]
PRODUCT_ID = [0x8813]
- BCD = [0x319]
+ BCD = [0x319]
OSX_MAIN_MEM = 'BeBook Mini Internal Memory'
OSX_CARD_MEM = 'BeBook Mini Storage Card'
diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py
index f6c615b0de..da2328419a 100644
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@@ -7,6 +7,12 @@ __docformat__ = 'restructuredtext en'
from calibre.devices.usbms.driver import USBMS
class BLACKBERRY(USBMS):
+
+ name = 'Blackberry Device Interface'
+ description = _('Communicate with the Blackberry smart phone.')
+ author = _('Kovid Goyal')
+ supported_platforms = ['windows', 'linux']
+
# Ordered list of supported formats
FORMATS = ['mobi', 'prc']
@@ -16,15 +22,11 @@ class BLACKBERRY(USBMS):
VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
- #WINDOWS_CARD_MEM = 'CARD_STORAGE'
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
- #OSX_CARD_MEM = 'Kindle Card Storage Media'
MAIN_MEMORY_VOLUME_LABEL = 'Blackberry Main Memory'
- #STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
EBOOK_DIR_MAIN = 'ebooks'
- #EBOOK_DIR_CARD = "documents"
SUPPORTS_SUB_DIRS = True
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index dcde8b873c..5dde9ab51d 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -7,11 +7,17 @@ Device driver for Bookeen's Cybook Gen 3
import os, shutil
from itertools import cycle
-from calibre.devices.errors import FreeSpaceError
+from calibre.devices.errors import DeviceError, FreeSpaceError
from calibre.devices.usbms.driver import USBMS
import calibre.devices.cybookg3.t2b as t2b
class CYBOOKG3(USBMS):
+ name = 'Cybook Gen 3 Device Interface'
+ description = _('Communicate with the Cybook eBook reader.')
+ author = _('John Schember')
+ supported_platforms = ['windows', 'osx', 'linux']
+
+
# Ordered list of supported formats
# Be sure these have an entry in calibre.devices.mime
FORMATS = ['mobi', 'prc', 'html', 'pdf', 'rtf', 'txt']
@@ -22,60 +28,45 @@ class CYBOOKG3(USBMS):
VENDOR_NAME = 'BOOKEEN'
WINDOWS_MAIN_MEM = 'CYBOOK_GEN3__-FD'
- WINDOWS_CARD_MEM = 'CYBOOK_GEN3__-SD'
+ WINDOWS_CARD_A_MEM = 'CYBOOK_GEN3__-SD'
OSX_MAIN_MEM = 'Bookeen Cybook Gen3 -FD Media'
- OSX_CARD_MEM = 'Bookeen Cybook Gen3 -SD Media'
+ OSX_CARD_A_MEM = 'Bookeen Cybook Gen3 -SD Media'
MAIN_MEMORY_VOLUME_LABEL = 'Cybook Gen 3 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card'
EBOOK_DIR_MAIN = "eBooks"
- EBOOK_DIR_CARD = "eBooks"
+ EBOOK_DIR_CARD_A = "eBooks"
THUMBNAIL_HEIGHT = 144
SUPPORTS_SUB_DIRS = True
- def upload_books(self, files, names, on_card=False, end_session=True,
+ def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):
- if on_card and not self._card_prefix:
- raise ValueError(_('The reader has no storage card connected.'))
-
- if not on_card:
- path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
- else:
- path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
-
- def get_size(obj):
- if hasattr(obj, 'seek'):
- obj.seek(0, os.SEEK_END)
- size = obj.tell()
- obj.seek(0)
- return size
- return os.path.getsize(obj)
-
- sizes = [get_size(f) for f in files]
- size = sum(sizes)
-
- if on_card and size > self.free_space()[2] - 1024*1024:
- raise FreeSpaceError(_("There is insufficient free space on the storage card"))
- if not on_card and size > self.free_space()[0] - 2*1024*1024:
- raise FreeSpaceError(_("There is insufficient free space in main memory"))
+ path = self._sanity_check(on_card, files)
paths = []
names = iter(names)
metadata = iter(metadata)
- for infile in files:
+ for i, infile in enumerate(files):
newpath = path
mdata = metadata.next()
- if self.SUPPORTS_SUB_DIRS:
- if 'tags' in mdata.keys():
- for tag in mdata['tags']:
- if tag.startswith('/'):
- newpath += tag
- newpath = os.path.normpath(newpath)
- break
+ if 'tags' in mdata.keys():
+ for tag in mdata['tags']:
+ if tag.startswith(_('News')):
+ newpath = os.path.join(newpath, 'news')
+ newpath = os.path.join(newpath, mdata.get('title', ''))
+ newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+ elif tag.startswith('/'):
+ newpath += tag
+ newpath = os.path.normpath(newpath)
+ break
+
+ if newpath == path:
+ newpath = os.path.join(newpath, mdata.get('authors', _('Unknown')))
+ newpath = os.path.join(newpath, mdata.get('title', _('Unknown')))
if not os.path.exists(newpath):
os.makedirs(newpath)
@@ -103,10 +94,15 @@ class CYBOOKG3(USBMS):
t2b.write_t2b(t2bfile, coverdata)
t2bfile.close()
+ self.report_progress(i / float(len(files)), _('Transferring books to device...'))
+
+ self.report_progress(1.0, _('Transferring books to device...'))
+
return zip(paths, cycle([on_card]))
def delete_books(self, paths, end_session=True):
- for path in paths:
+ for i, path in enumerate(paths):
+ self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
if os.path.exists(path):
os.unlink(path)
@@ -115,6 +111,8 @@ class CYBOOKG3(USBMS):
# Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp')
+ if os.path.exists(filepath + '.dat'):
+ os.unlink(filepath + '.dat')
# Delete the thumbnails file auto generated for the ebook
if os.path.exists(filepath + '_6090.t2b'):
@@ -124,4 +122,4 @@ class CYBOOKG3(USBMS):
os.removedirs(os.path.dirname(path))
except:
pass
-
+ self.report_progress(1.0, _('Removing books from device...'))
diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
old mode 100755
new mode 100644
index 4b9204ebd0..b42c77f172
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -14,6 +14,11 @@ Windows PNP strings:
from calibre.devices.usbms.driver import USBMS
class EB600(USBMS):
+ name = 'Netronix EB600 Device Interface'
+ description = _('Communicate with the EB600 eBook reader.')
+ author = _('Kovid Goyal')
+ supported_platforms = ['windows', 'osx', 'linux']
+
# Ordered list of supported formats
FORMATS = ['epub', 'prc', 'chm', 'djvu', 'html', 'rtf', 'txt', 'pdf']
DRM_FORMATS = ['prc', 'mobi', 'html', 'pdf', 'txt']
@@ -24,24 +29,24 @@ class EB600(USBMS):
VENDOR_NAME = 'NETRONIX'
WINDOWS_MAIN_MEM = 'EBOOK'
- WINDOWS_CARD_MEM = 'EBOOK'
+ WINDOWS_CARD_A_MEM = 'EBOOK'
OSX_MAIN_MEM = 'EB600 Internal Storage Media'
- OSX_CARD_MEM = 'EB600 Card Storage Media'
+ OSX_CARD_A_MEM = 'EB600 Card Storage Media'
MAIN_MEMORY_VOLUME_LABEL = 'EB600 Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'EB600 Storage Card'
EBOOK_DIR_MAIN = ''
- EBOOK_DIR_CARD = ''
+ EBOOK_DIR_CARD_A = ''
SUPPORTS_SUB_DIRS = True
def windows_sort_drives(self, drives):
main = drives.get('main', None)
- card = drives.get('card', None)
+ card = drives.get('carda', None)
if card and main and card < main:
drives['main'] = card
- drives['card'] = main
+ drives['carda'] = main
return drives
diff --git a/src/calibre/devices/interface.py b/src/calibre/devices/interface.py
index 21790e3c46..ff9d71f370 100644
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@@ -6,43 +6,46 @@ the GUI. A device backend must subclass the L{Device} class. See prs500.py for
a backend that implement the Device interface for the SONY PRS500 Reader.
"""
+from calibre.customize import Plugin
+
+class DevicePlugin(Plugin):
+ """
+ Defines the interface that should be implemented by backends that
+ communicate with an ebook reader.
-class Device(object):
- """
- Defines the interface that should be implemented by backends that
- communicate with an ebook reader.
-
The C{end_session} variables are used for USB session management. Sometimes
- the front-end needs to call several methods one after another, in which case
+ the front-end needs to call several methods one after another, in which case
the USB session should not be closed after each method call.
"""
+ type = _('Device Interface')
+
# Ordered list of supported formats
FORMATS = ["lrf", "rtf", "pdf", "txt"]
VENDOR_ID = 0x0000
PRODUCT_ID = 0x0000
- # BCD can be either None to not distinguish between devices based on BCD, or
+ # BCD can be either None to not distinguish between devices based on BCD, or
# it can be a list of the BCD numbers of all devices supported by this driver.
BCD = None
THUMBNAIL_HEIGHT = 68 # Height for thumbnails on device
# Whether the metadata on books can be set via the GUI.
CAN_SET_METADATA = True
-
- def __init__(self, key='-1', log_packets=False, report_progress=None) :
- """
+
+ def reset(self, key='-1', log_packets=False, report_progress=None) :
+ """
@param key: The key to unlock the device
- @param log_packets: If true the packet stream to/from the device is logged
- @param report_progress: Function that is called with a % progress
+ @param log_packets: If true the packet stream to/from the device is logged
+ @param report_progress: Function that is called with a % progress
(number between 0 and 100) for various tasks
- If it is called with -1 that means that the
+ If it is called with -1 that means that the
task does not have any progress information
"""
raise NotImplementedError()
-
+
@classmethod
def get_fdi(cls):
'''Return the FDI description of this device for HAL on linux.'''
return ''
-
+
@classmethod
def can_handle(cls, device_info):
'''
@@ -51,60 +54,66 @@ class Device(object):
is only called after the vendor, product ids and the bcd have matched, so
it can do some relatively time intensive checks. The default implementation
returns True.
-
- :param device_info: On windows a device ID string. On Unix a tuple of
- ``(vendor_id, product_id, bcd)``.
+
+ :param device_info: On windows a device ID string. On Unix a tuple of
+ ``(vendor_id, product_id, bcd)``.
'''
return True
-
+
def open(self):
'''
Perform any device specific initialization. Called after the device is
detected but before any other functions that communicate with the device.
For example: For devices that present themselves as USB Mass storage
devices, this method would be responsible for mounting the device or
- if the device has been automounted, for finding out where it has been
+ if the device has been automounted, for finding out where it has been
mounted. The driver for the PRS505 has a implementation of this function
that should serve as a good example for USB Mass storage devices.
'''
raise NotImplementedError()
-
+
def set_progress_reporter(self, report_progress):
'''
- @param report_progress: Function that is called with a % progress
+ @param report_progress: Function that is called with a % progress
(number between 0 and 100) for various tasks
- If it is called with -1 that means that the
+ If it is called with -1 that means that the
task does not have any progress information
'''
raise NotImplementedError()
-
+
def get_device_information(self, end_session=True):
- """
- Ask device for device information. See L{DeviceInfoQuery}.
+ """
+ Ask device for device information. See L{DeviceInfoQuery}.
@return: (device name, device version, software version on device, mime type)
"""
raise NotImplementedError()
-
+
def card_prefix(self, end_session=True):
'''
- Return prefix to paths on the card or '' if no cards present.
+ Return a 2 element list of the prefix to paths on the cards.
+ If no card is present None is set for the card's prefix.
+ E.G.
+ ('/place', '/place2')
+ (None, 'place2')
+ ('place', None)
+ (None, None)
'''
raise NotImplementedError()
-
+
def total_space(self, end_session=True):
- """
+ """
Get total space available on the mountpoints:
1. Main memory
- 2. Memory Stick
- 3. SD Card
+ 2. Memory Card A
+ 3. Memory Card B
@return: A 3 element list with total space in bytes of (1, 2, 3). If a
particular device doesn't have any of these locations it should return 0.
"""
raise NotImplementedError()
-
+
def free_space(self, end_session=True):
- """
+ """
Get free space available on the mountpoints:
1. Main memory
2. Card A
@@ -112,48 +121,49 @@ class Device(object):
@return: A 3 element list with free space in bytes of (1, 2, 3). If a
particular device doesn't have any of these locations it should return -1.
- """
+ """
raise NotImplementedError()
-
- def books(self, oncard=False, end_session=True):
- """
+
+ def books(self, oncard=None, end_session=True):
+ """
Return a list of ebooks on the device.
- @param oncard: If True return a list of ebooks on the storage card,
- otherwise return list of ebooks in main memory of device.
- If True and no books on card return empty list.
- @return: A BookList.
- """
+ @param oncard: If 'carda' or 'cardb' return a list of ebooks on the
+ specific storage card, otherwise return list of ebooks
+ in main memory of device. If a card is specified and no
+ books are on the card return empty list.
+ @return: A BookList.
+ """
raise NotImplementedError()
-
- def upload_books(self, files, names, on_card=False, end_session=True,
+
+ def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):
'''
Upload a list of books to the device. If a file already
exists on the device, it should be replaced.
This method should raise a L{FreeSpaceError} if there is not enough
free space on the device. The text of the FreeSpaceError must contain the
- word "card" if C{on_card} is True otherwise it must contain the word "memory".
+ word "card" if C{on_card} is not None otherwise it must contain the word "memory".
@param files: A list of paths and/or file-like objects.
- @param names: A list of file names that the books should have
+ @param names: A list of file names that the books should have
once uploaded to the device. len(names) == len(files)
- @return: A list of 3-element tuples. The list is meant to be passed
+ @return: A list of 3-element tuples. The list is meant to be passed
to L{add_books_to_metadata}.
- @param metadata: If not None, it is a list of dictionaries. Each dictionary
+ @param metadata: If not None, it is a list of dictionaries. Each dictionary
will have at least the key tags to allow the driver to choose book location
based on tags. len(metadata) == len(files). If your device does not support
hierarchical ebook folders, you can safely ignore this parameter.
'''
raise NotImplementedError()
-
+
@classmethod
def add_books_to_metadata(cls, locations, metadata, booklists):
'''
- Add locations to the booklists. This function must not communicate with
- the device.
+ Add locations to the booklists. This function must not communicate with
+ the device.
@param locations: Result of a call to L{upload_books}
@param metadata: List of dictionaries. Each dictionary must have the
- keys C{title}, C{authors}, C{author_sort}, C{cover}, C{tags}.
- The value of the C{cover}
+ keys C{title}, C{authors}, C{author_sort}, C{cover}, C{tags}.
+ The value of the C{cover}
element can be None or a three element tuple (width, height, data)
where data is the image data in JPEG format as a string. C{tags} must be
a possibly empty list of strings. C{authors} must be a string.
@@ -162,45 +172,72 @@ class Device(object):
The dictionary can also have an optional key "tag order" which should be
another dictionary that maps tag names to lists of book ids. The ids are
ids from the book database.
- @param booklists: A tuple containing the result of calls to
- (L{books}(oncard=False), L{books}(oncard=True)).
+ @param booklists: A tuple containing the result of calls to
+ (L{books}(oncard=None), L{books}(oncard='carda'),
+ L{books}(oncard='cardb')).
'''
raise NotImplementedError
-
+
def delete_books(self, paths, end_session=True):
'''
Delete books at paths on device.
'''
raise NotImplementedError()
-
+
@classmethod
def remove_books_from_metadata(cls, paths, booklists):
'''
- Remove books from the metadata list. This function must not communicate
+ Remove books from the metadata list. This function must not communicate
with the device.
@param paths: paths to books on the device.
- @param booklists: A tuple containing the result of calls to
- (L{books}(oncard=False), L{books}(oncard=True)).
+ @param booklists: A tuple containing the result of calls to
+ (L{books}(oncard=None), L{books}(oncard='carda'),
+ L{books}(oncard='cardb')).
'''
raise NotImplementedError()
-
+
def sync_booklists(self, booklists, end_session=True):
'''
Update metadata on device.
- @param booklists: A tuple containing the result of calls to
- (L{books}(oncard=False), L{books}(oncard=True)).
+ @param booklists: A tuple containing the result of calls to
+ (L{books}(oncard=None), L{books}(oncard='carda'),
+ L{books}(oncard='cardb')).
'''
raise NotImplementedError()
-
- def get_file(self, path, outfile, end_session=True):
+
+ def get_file(self, path, outfile, end_session=True):
'''
Read the file at C{path} on the device and write it to outfile.
@param outfile: file object like C{sys.stdout} or the result of an C{open} call
'''
- raise NotImplementedError()
+ raise NotImplementedError()
+
+ @classmethod
+ def config_widget(cls):
+ '''
+ Should return a QWidget. The QWidget contains the settings for the device interface
+ '''
+ raise NotImplementedError()
+
+ @classmethod
+ def save_settings(cls, settings_widget):
+ '''
+ Should save settings to disk. Takes the widget created in config_widget
+ and saves all settings to disk.
+ '''
+ raise NotImplementedError()
+
+ @classmethod
+ def settings(cls):
+ '''
+ Should return an opts object. The opts object should have one attribute
+ `format_map` which is an ordered list of formats for the device.
+ '''
+ raise NotImplementedError()
+
+
-
class BookList(list):
'''
A list of books. Each Book object must have the fields:
@@ -210,21 +247,21 @@ class BookList(list):
4. datetime (a UTC time tuple)
5. path (path on the device to the book)
6. thumbnail (can be None)
- 7. tags (a list of strings, can be empty).
+ 7. tags (a list of strings, can be empty).
'''
-
+
__getslice__ = None
__setslice__ = None
-
+
def supports_tags(self):
''' Return True if the the device supports tags (collections) for this book list. '''
raise NotImplementedError()
-
+
def set_tags(self, book, tags):
'''
- Set the tags for C{book} to C{tags}.
+ Set the tags for C{book} to C{tags}.
@param tags: A list of strings. Can be empty.
- @param book: A book object that is in this BookList.
+ @param book: A book object that is in this BookList.
'''
raise NotImplementedError()
diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py
index 3fb9b1cdd1..199566357b 100644
--- a/src/calibre/devices/jetbook/driver.py
+++ b/src/calibre/devices/jetbook/driver.py
@@ -7,10 +7,16 @@ Device driver for Ectaco Jetbook firmware >= JL04_v030e
import os, re, sys, shutil
from itertools import cycle
-from calibre.devices.usbms.driver import USBMS, metadata_from_formats
+from calibre.devices.usbms.driver import USBMS
from calibre import sanitize_file_name as sanitize
class JETBOOK(USBMS):
+ name = 'Ectaco JetBook Device Interface'
+ description = _('Communicate with the JetBook eBook reader.')
+ author = _('James Ralston')
+ supported_platforms = ['windows', 'osx', 'linux']
+
+
# Ordered list of supported formats
# Be sure these have an entry in calibre.devices.mime
FORMATS = ['epub', 'mobi', 'prc', 'txt', 'rtf', 'pdf']
@@ -46,27 +52,34 @@ class JETBOOK(USBMS):
names = iter(names)
metadata = iter(metadata)
- for infile in files:
+ for i, infile in enumerate(files):
newpath = path
- if self.SUPPORTS_SUB_DIRS:
- mdata = metadata.next()
+ mdata = metadata.next()
- if 'tags' in mdata.keys():
- for tag in mdata['tags']:
- if tag.startswith('/'):
- newpath += tag
- newpath = os.path.normpath(newpath)
- break
-
- if not os.path.exists(newpath):
- os.makedirs(newpath)
+ if 'tags' in mdata.keys():
+ for tag in mdata['tags']:
+ if tag.startswith(_('News')):
+ newpath = os.path.join(newpath, 'news')
+ newpath = os.path.join(newpath, mdata.get('title', ''))
+ newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+ break
+ elif tag.startswith('/'):
+ newpath += tag
+ newpath = os.path.normpath(newpath)
+ break
author = sanitize(mdata.get('authors','Unknown')).replace(' ', '_')
title = sanitize(mdata.get('title', 'Unknown')).replace(' ', '_')
fileext = os.path.splitext(os.path.basename(names.next()))[1]
fname = '%s#%s%s' % (author, title, fileext)
+ if newpath == path:
+ newpath = os.path.join(newpath, author, title)
+
+ if not os.path.exists(newpath):
+ os.makedirs(newpath)
+
filepath = os.path.join(newpath, fname)
paths.append(filepath)
@@ -81,6 +94,10 @@ class JETBOOK(USBMS):
else:
shutil.copy2(infile, filepath)
+ self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
+
+ self.report_progress(1.0, _('Transferring books to device...'))
+
return zip(paths, cycle([on_card]))
@classmethod
@@ -93,6 +110,7 @@ class JETBOOK(USBMS):
return txt
+ from calibre.devices.usbms.driver import metadata_from_formats
mi = metadata_from_formats([path])
if (mi.title==_('Unknown') or mi.authors==[_('Unknown')]) \
@@ -108,10 +126,10 @@ class JETBOOK(USBMS):
def windows_sort_drives(self, drives):
main = drives.get('main', None)
- card = drives.get('card', None)
+ card = drives.get('carda', None)
if card and main and card < main:
drives['main'] = card
- drives['card'] = main
+ drives['carda'] = main
return drives
diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py
old mode 100755
new mode 100644
index a5775dec8a..de8cf0272c
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@@ -6,9 +6,14 @@ Device driver for Amazon's Kindle
import os, re, sys
-from calibre.devices.usbms.driver import USBMS, metadata_from_formats
+from calibre.devices.usbms.driver import USBMS
class KINDLE(USBMS):
+ name = 'Kindle Device Interface'
+ description = _('Communicate with the Kindle eBook reader.')
+ author = _('John Schember')
+ supported_platforms = ['windows', 'osx', 'linux']
+
# Ordered list of supported formats
FORMATS = ['azw', 'mobi', 'prc', 'azw1', 'tpz', 'txt']
@@ -18,23 +23,24 @@ class KINDLE(USBMS):
VENDOR_NAME = 'KINDLE'
WINDOWS_MAIN_MEM = 'INTERNAL_STORAGE'
- WINDOWS_CARD_MEM = 'CARD_STORAGE'
+ WINDOWS_CARD_A_MEM = 'CARD_STORAGE'
OSX_MAIN_MEM = 'Kindle Internal Storage Media'
- OSX_CARD_MEM = 'Kindle Card Storage Media'
+ OSX_CARD_A_MEM = 'Kindle Card Storage Media'
MAIN_MEMORY_VOLUME_LABEL = 'Kindle Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
EBOOK_DIR_MAIN = "documents"
- EBOOK_DIR_CARD = "documents"
+ EBOOK_DIR_CARD_A = "documents"
SUPPORTS_SUB_DIRS = True
WIRELESS_FILE_NAME_PATTERN = re.compile(
r'(?P[^-]+)-asin_(?P[a-zA-Z\d]{10,})-type_(?P\w{4})-v_(?P\d+).*')
def delete_books(self, paths, end_session=True):
- for path in paths:
+ for i, path in enumerate(paths):
+ self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
if os.path.exists(path):
os.unlink(path)
@@ -43,9 +49,11 @@ class KINDLE(USBMS):
# Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp')
+ self.report_progress(1.0, _('Removing books from device...'))
@classmethod
def metadata_from_path(cls, path):
+ from calibre.ebooks.metadata.meta import metadata_from_formats
mi = metadata_from_formats([path])
if mi.title == _('Unknown') or ('-asin' in mi.title and '-type' in mi.title):
match = cls.WIRELESS_FILE_NAME_PATTERN.match(os.path.basename(path))
@@ -58,6 +66,10 @@ class KINDLE(USBMS):
class KINDLE2(KINDLE):
+ name = 'Kindle 2 Device Interface'
+ description = _('Communicate with the Kindle 2 eBook reader.')
+ author = _('John Schember')
+ supported_platforms = ['windows', 'osx', 'linux']
PRODUCT_ID = [0x0002]
BCD = [0x0100]
diff --git a/src/calibre/devices/libusb.py b/src/calibre/devices/libusb.py
index 226a99f239..09261e10c5 100644
--- a/src/calibre/devices/libusb.py
+++ b/src/calibre/devices/libusb.py
@@ -116,8 +116,8 @@ class Device(Structure):
raise Error("Cannot open device")
return handle.contents
- @apply
- def configurations():
+ @dynamic_property
+ def configurations(self):
doc = """ List of device configurations. See L{ConfigDescriptor} """
def fget(self):
ans = []
@@ -127,8 +127,8 @@ class Device(Structure):
return property(doc=doc, fget=fget)
class Bus(Structure):
- @apply
- def device_list():
+ @dynamic_property
+ def device_list(self):
doc = \
"""
Flat list of devices on this bus.
@@ -360,4 +360,4 @@ def get_devices():
for dev in devices:
device = (dev.device_descriptor.idVendor, dev.device_descriptor.idProduct, dev.device_descriptor.bcdDevice)
ans.append(device)
- return ans
+ return ans
\ No newline at end of file
diff --git a/src/calibre/devices/prs500/books.py b/src/calibre/devices/prs500/books.py
index 6c57920487..5eb8d7f011 100644
--- a/src/calibre/devices/prs500/books.py
+++ b/src/calibre/devices/prs500/books.py
@@ -1,8 +1,8 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
-"""
-This module contains the logic for dealing with XML book lists found
-in the reader cache.
+"""
+This module contains the logic for dealing with XML book lists found
+in the reader cache.
"""
import xml.dom.minidom as dom
from base64 import b64decode as decode
@@ -25,16 +25,16 @@ def sortable_title(title):
class book_metadata_field(object):
""" Represents metadata stored as an attribute """
- def __init__(self, attr, formatter=None, setter=None):
- self.attr = attr
+ def __init__(self, attr, formatter=None, setter=None):
+ self.attr = attr
self.formatter = formatter
self.setter = setter
-
+
def __get__(self, obj, typ=None):
""" Return a string. String may be empty if self.attr is absent """
return self.formatter(obj.elem.getAttribute(self.attr)) if \
self.formatter else obj.elem.getAttribute(self.attr).strip()
-
+
def __set__(self, obj, val):
""" Set the attribute """
val = self.setter(val) if self.setter else val
@@ -44,7 +44,7 @@ class book_metadata_field(object):
class Book(object):
""" Provides a view onto the XML element that represents a book """
-
+
title = book_metadata_field("title")
authors = book_metadata_field("author", \
formatter=lambda x: x if x and x.strip() else "Unknown")
@@ -55,8 +55,8 @@ class Book(object):
size = book_metadata_field("size", formatter=int)
# When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
- @apply
- def title_sorter():
+ @dynamic_property
+ def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
src = self.elem.getAttribute('titleSorter').strip()
@@ -66,12 +66,12 @@ class Book(object):
def fset(self, val):
self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
return property(doc=doc, fget=fget, fset=fset)
-
- @apply
- def thumbnail():
+
+ @dynamic_property
+ def thumbnail(self):
doc = \
- """
- The thumbnail. Should be a height 68 image.
+ """
+ The thumbnail. Should be a height 68 image.
Setting is not supported.
"""
def fget(self):
@@ -83,40 +83,40 @@ class Book(object):
break
rc = ""
for node in th.childNodes:
- if node.nodeType == node.TEXT_NODE:
+ if node.nodeType == node.TEXT_NODE:
rc += node.data
return decode(rc)
return property(fget=fget, doc=doc)
-
- @apply
- def path():
+
+ @dynamic_property
+ def path(self):
doc = """ Absolute path to book on device. Setting not supported. """
- def fget(self):
+ def fget(self):
return self.root + self.rpath
return property(fget=fget, doc=doc)
-
- @apply
- def db_id():
+
+ @dynamic_property
+ def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
if match:
return int(match.group(1))
return property(fget=fget, doc=doc)
-
+
def __init__(self, node, tags=[], prefix="", root="/Data/media/"):
self.elem = node
self.prefix = prefix
self.root = root
self.tags = tags
-
+
def __str__(self):
""" Return a utf-8 encoded string with title author and path information """
return self.title.encode('utf-8') + " by " + \
self.authors.encode('utf-8') + " at " + self.path.encode('utf-8')
-def fix_ids(media, cache):
+def fix_ids(media, cache, *args):
'''
Adjust ids in cache to correspond with media.
'''
@@ -131,16 +131,16 @@ def fix_ids(media, cache):
child.setAttribute("id", str(cid))
cid += 1
media.set_next_id(str(cid))
-
-
+
+
class BookList(_BookList):
- """
- A list of L{Book}s. Created from an XML file. Can write list
+ """
+ A list of L{Book}s. Created from an XML file. Can write list
to an XML file.
"""
__getslice__ = None
__setslice__ = None
-
+
def __init__(self, root="/Data/media/", sfile=None):
_BookList.__init__(self)
self.tag_order = {}
@@ -163,25 +163,25 @@ class BookList(_BookList):
if records:
self.prefix = 'xs1:'
self.root = records[0]
- self.proot = root
-
+ self.proot = root
+
for book in self.document.getElementsByTagName(self.prefix + "text"):
id = book.getAttribute('id')
pl = [i.getAttribute('title') for i in self.get_playlists(id)]
self.append(Book(book, root=root, prefix=self.prefix, tags=pl))
-
+
def supports_tags(self):
return bool(self.prefix)
-
+
def playlists(self):
return self.root.getElementsByTagName(self.prefix+'playlist')
-
- def playlist_items(self):
+
+ def playlist_items(self):
plitems = []
for pl in self.playlists():
plitems.extend(pl.getElementsByTagName(self.prefix+'item'))
return plitems
-
+
def purge_corrupted_files(self):
if not self.root:
return []
@@ -193,32 +193,32 @@ class BookList(_BookList):
c.parentNode.removeChild(c)
c.unlink()
return paths
-
+
def purge_empty_playlists(self):
''' Remove all playlist entries that have no children. '''
for pl in self.playlists():
if not pl.getElementsByTagName(self.prefix + 'item'):
pl.parentNode.removeChild(pl)
pl.unlink()
-
+
def _delete_book(self, node):
nid = node.getAttribute('id')
node.parentNode.removeChild(node)
node.unlink()
self.remove_from_playlists(nid)
-
-
+
+
def delete_book(self, cid):
- '''
+ '''
Remove DOM node corresponding to book with C{id == cid}.
Also remove book from any collections it is part of.
'''
for book in self:
if str(book.id) == str(cid):
self.remove(book)
- self._delete_book(book.elem)
+ self._delete_book(book.elem)
break
-
+
def remove_book(self, path):
'''
Remove DOM node corresponding to book with C{path == path}.
@@ -227,15 +227,15 @@ class BookList(_BookList):
for book in self:
if path.endswith(book.rpath):
self.remove(book)
- self._delete_book(book.elem)
+ self._delete_book(book.elem)
break
-
+
def next_id(self):
return self.document.documentElement.getAttribute('nextID')
-
+
def set_next_id(self, id):
self.document.documentElement.setAttribute('nextID', str(id))
-
+
def max_id(self):
max = 0
for child in self.root.childNodes:
@@ -243,15 +243,15 @@ class BookList(_BookList):
nid = int(child.getAttribute('id'))
if nid > max:
max = nid
- return max
-
+ return max
+
def book_by_path(self, path):
for child in self.root.childNodes:
if child.nodeType == child.ELEMENT_NODE and child.hasAttribute("path"):
if path == child.getAttribute('path'):
return child
return None
-
+
def add_book(self, info, name, size, ctime):
""" Add a node into DOM tree representing a book """
book = self.book_by_path(name)
@@ -262,23 +262,23 @@ class BookList(_BookList):
cid = self.max_id()+1
sourceid = str(self[0].sourceid) if len(self) else "1"
attrs = {
- "title" : info["title"],
+ "title" : info["title"],
'titleSorter' : sortable_title(info['title']),
"author" : info["authors"] if info['authors'] else 'Unknown', \
"page":"0", "part":"0", "scale":"0", \
"sourceid":sourceid, "id":str(cid), "date":"", \
"mime":mime, "path":name, "size":str(size)
- }
+ }
for attr in attrs.keys():
node.setAttributeNode(self.document.createAttribute(attr))
- node.setAttribute(attr, attrs[attr])
+ node.setAttribute(attr, attrs[attr])
try:
- w, h, data = info["cover"]
+ w, h, data = info["cover"]
except TypeError:
w, h, data = None, None, None
-
+
if data:
- th = self.document.createElement(self.prefix + "thumbnail")
+ th = self.document.createElement(self.prefix + "thumbnail")
th.setAttribute("width", str(w))
th.setAttribute("height", str(h))
jpeg = self.document.createElement(self.prefix + "jpeg")
@@ -294,15 +294,15 @@ class BookList(_BookList):
if info.has_key('tag order'):
self.tag_order.update(info['tag order'])
self.set_playlists(book.id, info['tags'])
-
-
+
+
def playlist_by_title(self, title):
for pl in self.playlists():
if pl.getAttribute('title').lower() == title.lower():
return pl
-
+
def add_playlist(self, title):
- cid = self.max_id()+1
+ cid = self.max_id()+1
pl = self.document.createElement(self.prefix+'playlist')
pl.setAttribute('sourceid', '0')
pl.setAttribute('id', str(cid))
@@ -316,18 +316,18 @@ class BookList(_BookList):
except AttributeError:
continue
return pl
-
-
+
+
def remove_from_playlists(self, id):
for pli in self.playlist_items():
if pli.getAttribute('id') == str(id):
pli.parentNode.removeChild(pli)
pli.unlink()
-
+
def set_tags(self, book, tags):
book.tags = tags
self.set_playlists(book.id, tags)
-
+
def set_playlists(self, id, collections):
self.remove_from_playlists(id)
for collection in set(collections):
@@ -337,7 +337,7 @@ class BookList(_BookList):
item = self.document.createElement(self.prefix+'item')
item.setAttribute('id', str(id))
coll.appendChild(item)
-
+
def get_playlists(self, id):
ans = []
for pl in self.playlists():
@@ -346,12 +346,12 @@ class BookList(_BookList):
ans.append(pl)
continue
return ans
-
+
def book_by_id(self, id):
for book in self:
if str(book.id) == str(id):
return book
-
+
def reorder_playlists(self):
for title in self.tag_order.keys():
pl = self.playlist_by_title(title)
@@ -364,7 +364,7 @@ class BookList(_BookList):
map[i] = j
pl_book_ids = [i for i in pl_book_ids if i is not None]
ordered_ids = [i for i in self.tag_order[title] if i in pl_book_ids]
-
+
if len(ordered_ids) < len(pl.childNodes):
continue
children = [i for i in pl.childNodes if hasattr(i, 'getAttribute')]
@@ -374,8 +374,8 @@ class BookList(_BookList):
for id in ordered_ids:
item = self.document.createElement(self.prefix+'item')
item.setAttribute('id', str(map[id]))
- pl.appendChild(item)
-
+ pl.appendChild(item)
+
def write(self, stream):
""" Write XML representation of DOM tree to C{stream} """
stream.write(self.document.toxml('utf-8'))
diff --git a/src/calibre/devices/prs500/cli/main.py b/src/calibre/devices/prs500/cli/main.py
index dfd3eb1ed6..9211fcff41 100755
--- a/src/calibre/devices/prs500/cli/main.py
+++ b/src/calibre/devices/prs500/cli/main.py
@@ -13,7 +13,7 @@ from calibre import __version__, iswindows, __appname__
from calibre.devices.errors import PathError
from calibre.utils.terminfo import TerminalController
from calibre.devices.errors import ArgumentError, DeviceError, DeviceLocked
-from calibre.devices import devices
+from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner
MINIMUM_COL_WIDTH = 12 #: Minimum width of columns in ls output
@@ -39,8 +39,8 @@ class FileFormatter(object):
self.name = file.name
self.path = file.path
- @apply
- def mode_string():
+ @dynamic_property
+ def mode_string(self):
doc=""" The mode string for this file. There are only two modes read-only and read-write """
def fget(self):
mode, x = "-", "-"
@@ -50,8 +50,8 @@ class FileFormatter(object):
return mode
return property(doc=doc, fget=fget)
- @apply
- def isdir_name():
+ @dynamic_property
+ def isdir_name(self):
doc='''Return self.name + '/' if self is a directory'''
def fget(self):
name = self.name
@@ -61,8 +61,8 @@ class FileFormatter(object):
return property(doc=doc, fget=fget)
- @apply
- def name_in_color():
+ @dynamic_property
+ def name_in_color(self):
doc=""" The name in ANSI text. Directories are blue, ebooks are green """
def fget(self):
cname = self.name
@@ -75,22 +75,22 @@ class FileFormatter(object):
return cname
return property(doc=doc, fget=fget)
- @apply
- def human_readable_size():
+ @dynamic_property
+ def human_readable_size(self):
doc=""" File size in human readable form """
def fget(self):
return human_readable(self.size)
return property(doc=doc, fget=fget)
- @apply
- def modification_time():
+ @dynamic_property
+ def modification_time(self):
doc=""" Last modified time in the Linux ls -l format """
def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime))
return property(doc=doc, fget=fget)
- @apply
- def creation_time():
+ @dynamic_property
+ def creation_time(self):
doc=""" Last modified time in the Linux ls -l format """
def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime))
@@ -203,9 +203,10 @@ def main():
_wmi = wmi.WMI()
scanner = DeviceScanner(_wmi)
scanner.scan()
- for d in devices():
+ for d in device_plugins():
if scanner.is_device_connected(d):
- dev = d(log_packets=options.log_packets)
+ dev = d
+ dev.reset(log_packets=options.log_packets)
if dev is None:
print >>sys.stderr, 'Unable to find a connected ebook reader.'
diff --git a/src/calibre/devices/prs500/driver.py b/src/calibre/devices/prs500/driver.py
old mode 100755
new mode 100644
index a905a314ae..c6cd120283
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@@ -40,13 +40,14 @@ from array import array
from functools import wraps
from StringIO import StringIO
-from calibre.devices.interface import Device
+from calibre.devices.interface import DevicePlugin
from calibre.devices.libusb import Error as USBError
from calibre.devices.libusb import get_device_by_id
from calibre.devices.prs500.prstypes import *
from calibre.devices.errors import *
from calibre.devices.prs500.books import BookList, fix_ids
from calibre import __author__, __appname__
+from calibre.devices.usbms.deviceconfig import DeviceConfig
# Protocol versions this driver has been tested with
KNOWN_USB_PROTOCOL_VERSIONS = [0x3030303030303130L]
@@ -76,12 +77,16 @@ class File(object):
return self.name
-class PRS500(Device):
+class PRS500(DeviceConfig, DevicePlugin):
"""
Implements the backend for communication with the SONY Reader.
Each method decorated by C{safe} performs a task.
"""
+ name = 'PRS-500 Device Interface'
+ description = _('Communicate with the Sony PRS-500 eBook reader.')
+ author = _('Kovid Goyal')
+ supported_platforms = ['windows', 'osx', 'linux']
VENDOR_ID = 0x054c #: SONY Vendor Id
PRODUCT_ID = 0x029b #: Product Id for the PRS-500
@@ -181,7 +186,7 @@ class PRS500(Device):
return run_session
- def __init__(self, key='-1', log_packets=False, report_progress=None) :
+ def reset(self, key='-1', log_packets=False, report_progress=None) :
"""
@param key: The key to unlock the device
@param log_packets: If true the packet stream to/from the device is logged
@@ -620,6 +625,8 @@ class PRS500(Device):
data_type=FreeSpaceAnswer, \
command_number=FreeSpaceQuery.NUMBER)[0]
data.append( pkt.free )
+ data = [x for x in data if x != 0]
+ data.append(0)
return data
def _exists(self, path):
diff --git a/src/calibre/devices/prs500/prstypes.py b/src/calibre/devices/prs500/prstypes.py
index 4e1294fc1c..3efbfcab31 100755
--- a/src/calibre/devices/prs500/prstypes.py
+++ b/src/calibre/devices/prs500/prstypes.py
@@ -284,8 +284,8 @@ class Command(TransferBuffer):
# Length of the data part of this packet
length = field(start=12, fmt=DWORD)
- @apply
- def data():
+ @dynamic_property
+ def data(self):
doc = \
"""
The data part of this command. Returned/set as/by a TransferBuffer.
@@ -447,8 +447,8 @@ class LongCommand(Command):
self.length = 16
self.command = command
- @apply
- def command():
+ @dynamic_property
+ def command(self):
doc = \
"""
Usually carries extra information needed for the command
@@ -568,8 +568,8 @@ class FileOpen(PathCommand):
PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20)
self.mode = mode
- @apply
- def mode():
+ @dynamic_property
+ def mode(self):
doc = \
"""
The file open mode. Is either L{FileOpen.READ}
@@ -651,8 +651,8 @@ class Response(Command):
raise PacketError("Response packets must have their number set to " \
+ hex(0x00001000))
- @apply
- def data():
+ @dynamic_property
+ def data(self):
doc = \
"""
The last 3 DWORDs (12 bytes) of data in this
@@ -681,43 +681,43 @@ class ListResponse(Response):
PATH_NOT_FOUND = 0xffffffd7 #: Queried path is not found
PERMISSION_DENIED = 0xffffffd6 #: Permission denied
- @apply
- def is_file():
+ @dynamic_property
+ def is_file(self):
doc = """ True iff queried path is a file """
def fget(self):
return self.code == ListResponse.IS_FILE
return property(doc=doc, fget=fget)
- @apply
- def is_invalid():
+ @dynamic_property
+ def is_invalid(self):
doc = """ True iff queried path is invalid """
def fget(self):
return self.code == ListResponse.IS_INVALID
return property(doc=doc, fget=fget)
- @apply
- def path_not_found():
+ @dynamic_property
+ def path_not_found(self):
doc = """ True iff queried path is not found """
def fget(self):
return self.code == ListResponse.PATH_NOT_FOUND
return property(doc=doc, fget=fget)
- @apply
- def permission_denied():
+ @dynamic_property
+ def permission_denied(self):
doc = """ True iff permission is denied for path operations """
def fget(self):
return self.code == ListResponse.PERMISSION_DENIED
return property(doc=doc, fget=fget)
- @apply
- def is_unmounted():
+ @dynamic_property
+ def is_unmounted(self):
doc = """ True iff queried path is unmounted (i.e. removed storage card) """
def fget(self):
return self.code == ListResponse.IS_UNMOUNTED
return property(doc=doc, fget=fget)
- @apply
- def is_eol():
+ @dynamic_property
+ def is_eol(self):
doc = """ True iff there are no more items in the list """
def fget(self):
return self.code == ListResponse.IS_EOL
@@ -759,8 +759,8 @@ class FileProperties(Answer):
# 0 = default permissions, 4 = read only
permissions = field(start=36, fmt=DWORD)
- @apply
- def is_dir():
+ @dynamic_property
+ def is_dir(self):
doc = """True if path points to a directory, False if it points to a file."""
def fget(self):
@@ -776,8 +776,8 @@ class FileProperties(Answer):
return property(doc=doc, fget=fget, fset=fset)
- @apply
- def is_readonly():
+ @dynamic_property
+ def is_readonly(self):
doc = """ Whether this file is readonly."""
def fget(self):
@@ -801,8 +801,8 @@ class IdAnswer(Answer):
""" Defines the structure of packets that contain identifiers for queries. """
- @apply
- def id():
+ @dynamic_property
+ def id(self):
doc = \
"""
The identifier. C{unsigned int} stored in 4 bytes
@@ -841,8 +841,8 @@ class ListAnswer(Answer):
name_length = field(start=20, fmt=DWORD)
name = stringfield(name_length, start=24)
- @apply
- def is_dir():
+ @dynamic_property
+ def is_dir(self):
doc = \
"""
True if list item points to a directory, False if it points to a file.
@@ -859,4 +859,3 @@ class ListAnswer(Answer):
return property(doc=doc, fget=fget, fset=fset)
-
diff --git a/src/calibre/devices/prs505/books.py b/src/calibre/devices/prs505/books.py
index 38b708a312..528770d3c5 100644
--- a/src/calibre/devices/prs505/books.py
+++ b/src/calibre/devices/prs505/books.py
@@ -15,11 +15,11 @@ from calibre.devices import strptime
strftime = functools.partial(_strftime, zone=time.gmtime)
-MIME_MAP = {
+MIME_MAP = {
"lrf" : "application/x-sony-bbeb",
- 'lrx' : 'application/x-sony-bbeb',
- "rtf" : "application/rtf",
- "pdf" : "application/pdf",
+ 'lrx' : 'application/x-sony-bbeb',
+ "rtf" : "application/rtf",
+ "pdf" : "application/pdf",
"txt" : "text/plain" ,
'epub': 'application/epub+zip',
}
@@ -32,16 +32,16 @@ def sortable_title(title):
class book_metadata_field(object):
""" Represents metadata stored as an attribute """
- def __init__(self, attr, formatter=None, setter=None):
- self.attr = attr
+ def __init__(self, attr, formatter=None, setter=None):
+ self.attr = attr
self.formatter = formatter
self.setter = setter
-
+
def __get__(self, obj, typ=None):
""" Return a string. String may be empty if self.attr is absent """
return self.formatter(obj.elem.getAttribute(self.attr)) if \
self.formatter else obj.elem.getAttribute(self.attr).strip()
-
+
def __set__(self, obj, val):
""" Set the attribute """
val = self.setter(val) if self.setter else val
@@ -52,7 +52,7 @@ class book_metadata_field(object):
class Book(object):
""" Provides a view onto the XML element that represents a book """
-
+
title = book_metadata_field("title")
authors = book_metadata_field("author", \
formatter=lambda x: x if x and x.strip() else _('Unknown'))
@@ -63,9 +63,9 @@ class Book(object):
size = book_metadata_field("size", formatter=lambda x : int(float(x)))
# When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
-
- @apply
- def title_sorter():
+
+ @dynamic_property
+ def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
src = self.elem.getAttribute('titleSorter').strip()
@@ -75,12 +75,12 @@ class Book(object):
def fset(self, val):
self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
return property(doc=doc, fget=fget, fset=fset)
-
- @apply
- def thumbnail():
+
+ @dynamic_property
+ def thumbnail(self):
doc = \
- """
- The thumbnail. Should be a height 68 image.
+ """
+ The thumbnail. Should be a height 68 image.
Setting is not supported.
"""
def fget(self):
@@ -94,33 +94,33 @@ class Book(object):
break
rc = ""
for node in th.childNodes:
- if node.nodeType == node.TEXT_NODE:
+ if node.nodeType == node.TEXT_NODE:
rc += node.data
return decode(rc)
return property(fget=fget, doc=doc)
-
- @apply
- def path():
+
+ @dynamic_property
+ def path(self):
doc = """ Absolute path to book on device. Setting not supported. """
- def fget(self):
+ def fget(self):
return self.mountpath + self.rpath
return property(fget=fget, doc=doc)
-
- @apply
- def db_id():
+
+ @dynamic_property
+ def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
if match:
return int(match.group(1))
return property(fget=fget, doc=doc)
-
+
def __init__(self, node, mountpath, tags, prefix=""):
self.elem = node
self.prefix = prefix
self.tags = tags
self.mountpath = mountpath
-
+
def __str__(self):
""" Return a utf-8 encoded string with title author and path information """
return self.title.encode('utf-8') + " by " + \
@@ -128,8 +128,8 @@ class Book(object):
class BookList(_BookList):
-
- def __init__(self, xml_file, mountpath):
+
+ def __init__(self, xml_file, mountpath, report_progress=None):
_BookList.__init__(self)
xml_file.seek(0)
self.document = dom.parse(xml_file)
@@ -143,12 +143,15 @@ class BookList(_BookList):
self.root_element = records[0]
else:
self.prefix = ''
-
- for book in self.root_element.childNodes:
+
+ nodes = self.root_element.childNodes
+ for i, book in enumerate(nodes):
+ if report_progress:
+ report_progress((i+1) / float(len(nodes)), _('Getting list of books on device...'))
if hasattr(book, 'tagName') and book.tagName.endswith('text'):
tags = [i.getAttribute('title') for i in self.get_playlists(book.getAttribute('id'))]
self.append(Book(book, mountpath, tags, prefix=self.prefix))
-
+
def max_id(self):
max = 0
for child in self.root_element.childNodes:
@@ -157,7 +160,7 @@ class BookList(_BookList):
if nid > max:
max = nid
return max
-
+
def is_id_valid(self, id):
'''Return True iff there is an element with C{id==id}.'''
id = str(id)
@@ -166,23 +169,23 @@ class BookList(_BookList):
if child.getAttribute('id') == id:
return True
return False
-
+
def supports_tags(self):
return True
-
+
def book_by_path(self, path):
for child in self.root_element.childNodes:
if child.nodeType == child.ELEMENT_NODE and child.hasAttribute("path"):
if path == child.getAttribute('path'):
return child
return None
-
+
def add_book(self, info, name, size, ctime):
""" Add a node into the DOM tree, representing a book """
book = self.book_by_path(name)
if book is not None:
self.remove_book(name)
-
+
node = self.document.createElement(self.prefix + "text")
mime = MIME_MAP[name.rpartition('.')[-1].lower()]
cid = self.max_id()+1
@@ -191,23 +194,23 @@ class BookList(_BookList):
except:
sourceid = '1'
attrs = {
- "title" : info["title"],
+ "title" : info["title"],
'titleSorter' : sortable_title(info['title']),
- "author" : info["authors"] if info['authors'] else _('Unknown'),
+ "author" : info["authors"] if info['authors'] else _('Unknown'),
"page":"0", "part":"0", "scale":"0", \
"sourceid":sourceid, "id":str(cid), "date":"", \
"mime":mime, "path":name, "size":str(size)
- }
+ }
for attr in attrs.keys():
node.setAttributeNode(self.document.createAttribute(attr))
- node.setAttribute(attr, attrs[attr])
+ node.setAttribute(attr, attrs[attr])
try:
- w, h, data = info["cover"]
+ w, h, data = info["cover"]
except TypeError:
w, h, data = None, None, None
-
+
if data:
- th = self.document.createElement(self.prefix + "thumbnail")
+ th = self.document.createElement(self.prefix + "thumbnail")
th.setAttribute("width", str(w))
th.setAttribute("height", str(h))
jpeg = self.document.createElement(self.prefix + "jpeg")
@@ -222,24 +225,24 @@ class BookList(_BookList):
if info.has_key('tag order'):
self.tag_order.update(info['tag order'])
self.set_tags(book, info['tags'])
-
+
def _delete_book(self, node):
nid = node.getAttribute('id')
self.remove_from_playlists(nid)
node.parentNode.removeChild(node)
node.unlink()
-
+
def delete_book(self, cid):
- '''
+ '''
Remove DOM node corresponding to book with C{id == cid}.
Also remove book from any collections it is part of.
'''
for book in self:
if str(book.id) == str(cid):
self.remove(book)
- self._delete_book(book.elem)
+ self._delete_book(book.elem)
break
-
+
def remove_book(self, path):
'''
Remove DOM node corresponding to book with C{path == path}.
@@ -248,24 +251,24 @@ class BookList(_BookList):
for book in self:
if path.endswith(book.rpath):
self.remove(book)
- self._delete_book(book.elem)
+ self._delete_book(book.elem)
break
-
+
def playlists(self):
ans = []
for c in self.root_element.childNodes:
if hasattr(c, 'tagName') and c.tagName.endswith('playlist'):
ans.append(c)
return ans
-
- def playlist_items(self):
+
+ def playlist_items(self):
plitems = []
for pl in self.playlists():
for c in pl.childNodes:
if hasattr(c, 'tagName') and c.tagName.endswith('item'):
plitems.append(c)
return plitems
-
+
def purge_corrupted_files(self):
if not self.root_element:
return []
@@ -276,7 +279,7 @@ class BookList(_BookList):
c.parentNode.removeChild(c)
c.unlink()
return paths
-
+
def purge_empty_playlists(self):
''' Remove all playlists that have no children. Also removes any invalid playlist items.'''
for pli in self.playlist_items():
@@ -295,32 +298,32 @@ class BookList(_BookList):
if empty:
pl.parentNode.removeChild(pl)
pl.unlink()
-
+
def playlist_by_title(self, title):
for pl in self.playlists():
if pl.getAttribute('title').lower() == title.lower():
return pl
-
+
def add_playlist(self, title):
- cid = self.max_id()+1
+ cid = self.max_id()+1
pl = self.document.createElement(self.prefix+'playlist')
pl.setAttribute('id', str(cid))
pl.setAttribute('title', title)
pl.setAttribute('uuid', uuid())
self.root_element.insertBefore(pl, self.root_element.childNodes[-1])
return pl
-
+
def remove_from_playlists(self, id):
for pli in self.playlist_items():
if pli.getAttribute('id') == str(id):
pli.parentNode.removeChild(pli)
pli.unlink()
-
+
def set_tags(self, book, tags):
tags = [t for t in tags if t]
book.tags = tags
self.set_playlists(book.id, tags)
-
+
def set_playlists(self, id, collections):
self.remove_from_playlists(id)
for collection in set(collections):
@@ -330,7 +333,7 @@ class BookList(_BookList):
item = self.document.createElement(self.prefix+'item')
item.setAttribute('id', str(id))
coll.appendChild(item)
-
+
def get_playlists(self, bookid):
ans = []
for pl in self.playlists():
@@ -339,23 +342,23 @@ class BookList(_BookList):
if item.getAttribute('id') == str(bookid):
ans.append(pl)
return ans
-
+
def next_id(self):
return self.document.documentElement.getAttribute('nextID')
-
+
def set_next_id(self, id):
self.document.documentElement.setAttribute('nextID', str(id))
-
+
def write(self, stream):
""" Write XML representation of DOM tree to C{stream} """
src = self.document.toxml('utf-8') + '\n'
stream.write(src.replace("'", '''))
-
+
def book_by_id(self, id):
for book in self:
if str(book.id) == str(id):
return book
-
+
def reorder_playlists(self):
for title in self.tag_order.keys():
pl = self.playlist_by_title(title)
@@ -368,7 +371,7 @@ class BookList(_BookList):
map[i] = j
pl_book_ids = [i for i in pl_book_ids if i is not None]
ordered_ids = [i for i in self.tag_order[title] if i in pl_book_ids]
-
+
if len(ordered_ids) < len(pl.childNodes):
continue
children = [i for i in pl.childNodes if hasattr(i, 'getAttribute')]
@@ -379,16 +382,18 @@ class BookList(_BookList):
item = self.document.createElement(self.prefix+'item')
item.setAttribute('id', str(map[id]))
pl.appendChild(item)
-
-def fix_ids(main, card):
+
+def fix_ids(main, carda, cardb):
'''
Adjust ids the XML databases.
'''
if hasattr(main, 'purge_empty_playlists'):
main.purge_empty_playlists()
- if hasattr(card, 'purge_empty_playlists'):
- card.purge_empty_playlists()
-
+ if hasattr(carda, 'purge_empty_playlists'):
+ carda.purge_empty_playlists()
+ if hasattr(cardb, 'purge_empty_playlists'):
+ cardb.purge_empty_playlists()
+
def regen_ids(db):
if not hasattr(db, 'root_element'):
return
@@ -397,11 +402,11 @@ def fix_ids(main, card):
cid = 0 if db == main else 1
for child in db.root_element.childNodes:
if child.nodeType == child.ELEMENT_NODE and child.hasAttribute('id'):
- id_map[child.getAttribute('id')] = str(cid)
+ id_map[child.getAttribute('id')] = str(cid)
child.setAttribute("sourceid", '1')
- child.setAttribute('id', str(cid))
+ child.setAttribute('id', str(cid))
cid += 1
-
+
for item in db.playlist_items():
oid = item.getAttribute('id')
try:
@@ -409,10 +414,11 @@ def fix_ids(main, card):
except KeyError:
item.parentNode.removeChild(item)
item.unlink()
-
+
db.reorder_playlists()
-
+
regen_ids(main)
- regen_ids(card)
-
+ regen_ids(carda)
+ regen_ids(cardb)
+
main.set_next_id(str(main.max_id()+1))
diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py
index 00cb78b06b..e75f67223a 100644
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@@ -1,399 +1,120 @@
__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal '
+__copyright__ = '2008, Kovid Goyal ' \
+ '2009, John Schember '
'''
Device driver for the SONY PRS-505
'''
-import sys, os, shutil, time, subprocess, re
+import os, time
from itertools import cycle
-from calibre.devices.interface import Device
+from calibre.devices.usbms.cli import CLI
+from calibre.devices.usbms.device import Device
from calibre.devices.errors import DeviceError, FreeSpaceError
from calibre.devices.prs505.books import BookList, fix_ids
-from calibre import iswindows, islinux, isosx, __appname__
-from calibre.devices.errors import PathError
+from calibre import __appname__
-class File(object):
- def __init__(self, path):
- stats = os.stat(path)
- self.is_dir = os.path.isdir(path)
- self.is_readonly = not os.access(path, os.W_OK)
- self.ctime = stats.st_ctime
- self.wtime = stats.st_mtime
- self.size = stats.st_size
- if path.endswith(os.sep):
- path = path[:-1]
- self.path = path
- self.name = os.path.basename(path)
+class PRS505(CLI, Device):
+ name = 'PRS-505 Device Interface'
+ description = _('Communicate with the Sony PRS-505 eBook reader.')
+ author = _('Kovid Goyal and John Schember')
+ supported_platforms = ['windows', 'osx', 'linux']
-class PRS505(Device):
- VENDOR_ID = 0x054c #: SONY Vendor Id
- PRODUCT_ID = 0x031e #: Product Id for the PRS-505
- BCD = [0x229] #: Needed to disambiguate 505 and 700 on linux
- PRODUCT_NAME = 'PRS-505'
- VENDOR_NAME = 'SONY'
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
- MEDIA_XML = 'database/cache/media.xml'
- CACHE_XML = 'Sony Reader/database/cache.xml'
+ VENDOR_ID = [0x054c] #: SONY Vendor Id
+ PRODUCT_ID = [0x031e] #: Product Id for the PRS-505
+ BCD = [0x229] #: Needed to disambiguate 505 and 700 on linux
+
+ VENDOR_NAME = 'SONY'
+ WINDOWS_MAIN_MEM = 'PRS-505'
+ WINDOWS_CARD_A_MEM = 'PRS-505/UC:MS'
+ WINDOWS_CARD_B_MEM = 'PRS-505/UC:SD'
+
+ OSX_MAIN_MEM = 'Sony PRS-505/UC Media'
+ OSX_CARD_A_MEM = 'Sony PRS-505/UC:MS Media'
+ OSX_CARD_B_MEM = 'Sony PRS-505/UC:SD'
MAIN_MEMORY_VOLUME_LABEL = 'Sony Reader Main Memory'
STORAGE_CARD_VOLUME_LABEL = 'Sony Reader Storage Card'
- OSX_NAME = 'Sony PRS-505'
+ MEDIA_XML = 'database/cache/media.xml'
+ CACHE_XML = 'Sony Reader/database/cache.xml'
CARD_PATH_PREFIX = __appname__
- FDI_TEMPLATE = \
-'''
-
-
-
-
-
-
- %(main_memory)s
- %(deviceclass)s
-
-
-
-
-
-
-
-
-
-
-
-
- %(storage_card)s
- %(deviceclass)s
-
-
-
-
-
-
-'''.replace('%(app)s', __appname__)
-
-
- def __init__(self, log_packets=False):
- self._main_prefix = self._card_prefix = None
-
- @classmethod
- def get_fdi(cls):
- return cls.FDI_TEMPLATE%dict(
- deviceclass=cls.__name__,
- vendor_id=hex(cls.VENDOR_ID),
- product_id=hex(cls.PRODUCT_ID),
- bcd=hex(cls.BCD[0]),
- main_memory=cls.MAIN_MEMORY_VOLUME_LABEL,
- storage_card=cls.STORAGE_CARD_VOLUME_LABEL,
- )
-
- @classmethod
- def is_device(cls, device_id):
- device_id = device_id.upper()
- if 'VEN_'+cls.VENDOR_NAME in device_id and \
- 'PROD_'+cls.PRODUCT_NAME in device_id:
- return True
- vid, pid = hex(cls.VENDOR_ID)[2:], hex(cls.PRODUCT_ID)[2:]
- if len(vid) < 4: vid = '0'+vid
- if len(pid) < 4: pid = '0'+pid
- if 'VID_'+vid in device_id and \
- 'PID_'+pid in device_id:
- return True
- return False
-
- @classmethod
- def get_osx_mountpoints(cls, raw=None):
- if raw is None:
- ioreg = '/usr/sbin/ioreg'
- if not os.access(ioreg, os.X_OK):
- ioreg = 'ioreg'
- raw = subprocess.Popen((ioreg+' -w 0 -S -c IOMedia').split(),
- stdout=subprocess.PIPE).communicate()[0]
- lines = raw.splitlines()
- names = {}
- for i, line in enumerate(lines):
- if line.strip().endswith('') and cls.OSX_NAME in line:
- loc = 'stick' if ':MS' in line else 'card' if ':SD' in line else 'main'
- for line in lines[i+1:]:
- line = line.strip()
- if line.endswith('}'):
- break
- match = re.search(r'"BSD Name"\s+=\s+"(.*?)"', line)
- if match is not None:
- names[loc] = match.group(1)
- break
- if len(names.keys()) == 3:
- break
- return names
-
-
- def open_osx(self):
- mount = subprocess.Popen('mount', shell=True,
- stdout=subprocess.PIPE).stdout.read()
- names = self.get_osx_mountpoints()
- dev_pat = r'/dev/%s(\w*)\s+on\s+([^\(]+)\s+'
- if 'main' not in names.keys():
- raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
- main_pat = dev_pat%names['main']
- self._main_prefix = re.search(main_pat, mount).group(2) + os.sep
- card_pat = names['stick'] if 'stick' in names.keys() else names['card'] if 'card' in names.keys() else None
- if card_pat is not None:
- card_pat = dev_pat%card_pat
- self._card_prefix = re.search(card_pat, mount).group(2) + os.sep
-
-
- def open_windows(self):
- time.sleep(6)
- drives = []
- wmi = __import__('wmi', globals(), locals(), [], -1)
- c = wmi.WMI(find_classes=False)
- for drive in c.Win32_DiskDrive():
- if self.__class__.is_device(str(drive.PNPDeviceID)):
- if drive.Partitions == 0:
- continue
- try:
- partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
- logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
- prefix = logical_disk.DeviceID+os.sep
- drives.append((drive.Index, prefix))
- except IndexError:
- continue
-
-
- if not drives:
- raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
-
- drives.sort(cmp=lambda a, b: cmp(a[0], b[0]))
- self._main_prefix = drives[0][1]
- if len(drives) > 1:
- self._card_prefix = drives[1][1]
-
-
- def open_linux(self):
- import dbus
- bus = dbus.SystemBus()
- hm = dbus.Interface(bus.get_object("org.freedesktop.Hal", "/org/freedesktop/Hal/Manager"), "org.freedesktop.Hal.Manager")
-
- def conditional_mount(dev, main_mem=True):
- mmo = bus.get_object("org.freedesktop.Hal", dev)
- label = mmo.GetPropertyString('volume.label', dbus_interface='org.freedesktop.Hal.Device')
- is_mounted = mmo.GetPropertyString('volume.is_mounted', dbus_interface='org.freedesktop.Hal.Device')
- mount_point = mmo.GetPropertyString('volume.mount_point', dbus_interface='org.freedesktop.Hal.Device')
- fstype = mmo.GetPropertyString('volume.fstype', dbus_interface='org.freedesktop.Hal.Device')
- if is_mounted:
- return str(mount_point)
- mmo.Mount(label, fstype, ['umask=077', 'uid='+str(os.getuid()), 'sync'],
- dbus_interface='org.freedesktop.Hal.Device.Volume')
- return os.path.normpath('/media/'+label)+'/'
-
-
- mm = hm.FindDeviceStringMatch(__appname__+'.mainvolume', self.__class__.__name__)
- if not mm:
- raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%(self.__class__.__name__,))
- self._main_prefix = None
- for dev in mm:
- try:
- self._main_prefix = conditional_mount(dev)+os.sep
- break
- except dbus.exceptions.DBusException:
- continue
-
-
- if not self._main_prefix:
- raise DeviceError('Could not open device for reading. Try a reboot.')
-
- self._card_prefix = None
- cards = hm.FindDeviceStringMatch(__appname__+'.cardvolume', self.__class__.__name__)
- keys = []
- for card in cards:
- keys.append(int('UC_SD' in bus.get_object("org.freedesktop.Hal", card).GetPropertyString('info.parent', dbus_interface='org.freedesktop.Hal.Device')))
-
- cards = zip(cards, keys)
- cards.sort(cmp=lambda x, y: cmp(x[1], y[1]))
- cards = [i[0] for i in cards]
-
- for dev in cards:
- try:
- self._card_prefix = conditional_mount(dev, False)+os.sep
- break
- except:
- import traceback
- print traceback
- continue
-
-
def open(self):
- time.sleep(5)
- self._main_prefix = self._card_prefix = None
- if islinux:
+ Device.open(self)
+
+ def write_cache(prefix):
try:
- self.open_linux()
- except DeviceError:
- time.sleep(3)
- self.open_linux()
- if iswindows:
- try:
- self.open_windows()
- except DeviceError:
- time.sleep(3)
- self.open_windows()
- if isosx:
- try:
- self.open_osx()
- except DeviceError:
- time.sleep(3)
- self.open_osx()
- if self._card_prefix is not None:
- try:
- cachep = os.path.join(self._card_prefix, self.CACHE_XML)
+ cachep = os.path.join(prefix, self.CACHE_XML)
if not os.path.exists(cachep):
try:
os.makedirs(os.path.dirname(cachep), mode=0777)
except:
time.sleep(5)
os.makedirs(os.path.dirname(cachep), mode=0777)
- f = open(cachep, 'wb')
- f.write(u'''
-
-
-'''.encode('utf8'))
- f.close()
+ with open(cachep, 'wb') as f:
+ f.write(u'''
+
+
+ '''.encode('utf8'))
+ return True
except:
self._card_prefix = None
import traceback
traceback.print_exc()
+ return False
- def set_progress_reporter(self, pr):
- self.report_progress = pr
+ if self._card_a_prefix is not None:
+ if not write_cache(self._card_a_prefix):
+ self._card_a_prefix = None
+ if self._card_b_prefix is not None:
+ if not write_cache(self._card_b_prefix):
+ self._card_b_prefix = None
def get_device_information(self, end_session=True):
+ self.report_progress(1.0, _('Get device information...'))
return (self.__class__.__name__, '', '', '')
- def card_prefix(self, end_session=True):
- return self._card_prefix
-
- @classmethod
- def _windows_space(cls, prefix):
- if prefix is None:
- return 0, 0
- win32file = __import__('win32file', globals(), locals(), [], -1)
- try:
- sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \
- win32file.GetDiskFreeSpace(prefix[:-1])
- except Exception, err:
- if getattr(err, 'args', [None])[0] == 21: # Disk not ready
- time.sleep(3)
- sectors_per_cluster, bytes_per_sector, free_clusters, total_clusters = \
- win32file.GetDiskFreeSpace(prefix[:-1])
- else: raise
- mult = sectors_per_cluster * bytes_per_sector
- return total_clusters * mult, free_clusters * mult
-
- def total_space(self, end_session=True):
- msz = csz = 0
- if not iswindows:
- if self._main_prefix is not None:
- stats = os.statvfs(self._main_prefix)
- msz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
- if self._card_prefix is not None:
- stats = os.statvfs(self._card_prefix)
- csz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
- else:
- msz = self._windows_space(self._main_prefix)[0]
- csz = self._windows_space(self._card_prefix)[0]
-
- return (msz, 0, csz)
-
- def free_space(self, end_session=True):
- msz = csz = 0
- if not iswindows:
- if self._main_prefix is not None:
- stats = os.statvfs(self._main_prefix)
- msz = stats.f_frsize * stats.f_bavail
- if self._card_prefix is not None:
- stats = os.statvfs(self._card_prefix)
- csz = stats.f_frsize * stats.f_bavail
- else:
- msz = self._windows_space(self._main_prefix)[1]
- csz = self._windows_space(self._card_prefix)[1]
-
- return (msz, 0, csz)
-
- def books(self, oncard=False, end_session=True):
- if oncard and self._card_prefix is None:
+ def books(self, oncard=None, end_session=True):
+ if oncard == 'carda' and not self._card_a_prefix:
+ self.report_progress(1.0, _('Getting list of books on device...'))
return []
+ elif oncard == 'cardb' and not self._card_b_prefix:
+ self.report_progress(1.0, _('Getting list of books on device...'))
+ return []
+ elif oncard and oncard != 'carda' and oncard != 'cardb':
+ self.report_progress(1.0, _('Getting list of books on device...'))
+ return []
+
db = self.__class__.CACHE_XML if oncard else self.__class__.MEDIA_XML
- prefix = self._card_prefix if oncard else self._main_prefix
- bl = BookList(open(prefix + db, 'rb'), prefix)
+ prefix = self._card_a_prefix if oncard == 'carda' else self._card_b_prefix if oncard == 'cardb' else self._main_prefix
+ bl = BookList(open(prefix + db, 'rb'), prefix, self.report_progress)
paths = bl.purge_corrupted_files()
for path in paths:
- path = os.path.join(self._card_prefix if oncard else self._main_prefix, path)
+ path = os.path.join(prefix, path)
if os.path.exists(path):
os.unlink(path)
+ self.report_progress(1.0, _('Getting list of books on device...'))
return bl
- def munge_path(self, path):
- if path.startswith('/') and not (path.startswith(self._main_prefix) or \
- (self._card_prefix and path.startswith(self._card_prefix))):
- path = self._main_prefix + path[1:]
- elif path.startswith('card:'):
- path = path.replace('card:', self._card_prefix[:-1])
- return path
-
- def mkdir(self, path, end_session=True):
- """ Make directory """
- path = self.munge_path(path)
- os.mkdir(path)
-
- def list(self, path, recurse=False, end_session=True, munge=True):
- if munge:
- path = self.munge_path(path)
- if os.path.isfile(path):
- return [(os.path.dirname(path), [File(path)])]
- entries = [File(os.path.join(path, f)) for f in os.listdir(path)]
- dirs = [(path, entries)]
- for _file in entries:
- if recurse and _file.is_dir:
- dirs[len(dirs):] = self.list(_file.path, recurse=True, munge=False)
- return dirs
-
- def get_file(self, path, outfile, end_session=True):
- path = self.munge_path(path)
- src = open(path, 'rb')
- shutil.copyfileobj(src, outfile, 10*1024*1024)
-
- def put_file(self, infile, path, replace_file=False, end_session=True):
- path = self.munge_path(path)
- if os.path.isdir(path):
- path = os.path.join(path, infile.name)
- if not replace_file and os.path.exists(path):
- raise PathError('File already exists: '+path)
- dest = open(path, 'wb')
- shutil.copyfileobj(infile, dest, 10*1024*1024)
- dest.flush()
- dest.close()
-
- def rm(self, path, end_session=True):
- path = self.munge_path(path)
- os.unlink(path)
-
- def touch(self, path, end_session=True):
- path = self.munge_path(path)
- if not os.path.exists(path):
- open(path, 'w').close()
- if not os.path.isdir(path):
- os.utime(path, None)
-
- def upload_books(self, files, names, on_card=False, end_session=True,
+ def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):
- if on_card and not self._card_prefix:
- raise ValueError(_('The reader has no storage card connected.'))
- path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \
- else os.path.join(self._main_prefix, 'database', 'media', 'books')
+ if on_card == 'carda' and not self._card_a_prefix:
+ raise ValueError(_('The reader has no storage card in this slot.'))
+ elif on_card == 'cardb' and not self._card_b_prefix:
+ raise ValueError(_('The reader has no storage card in this slot.'))
+ elif on_card and on_card not in ('carda', 'cardb'):
+ raise DeviceError(_('The reader has no storage card in this slot.'))
+
+ if on_card == 'carda':
+ path = os.path.join(self._card_a_prefix, self.CARD_PATH_PREFIX)
+ elif on_card == 'cardb':
+ path = os.path.join(self._card_b_prefix, self.CARD_PATH_PREFIX)
+ else:
+ path = os.path.join(self._main_prefix, 'database', 'media', 'books')
def get_size(obj):
if hasattr(obj, 'seek'):
@@ -403,34 +124,61 @@ class PRS505(Device):
return size
return os.path.getsize(obj)
- sizes = map(get_size, files)
+ sizes = [get_size(f) for f in files]
size = sum(sizes)
- space = self.free_space()
- mspace = space[0]
- cspace = space[2]
- if on_card and size > cspace - 1024*1024:
- raise FreeSpaceError("There is insufficient free space "+\
- "on the storage card")
- if not on_card and size > mspace - 2*1024*1024:
- raise FreeSpaceError("There is insufficient free space " +\
- "in main memory")
+
+ if not on_card and size > self.free_space()[0] - 2*1024*1024:
+ raise FreeSpaceError(_("There is insufficient free space in main memory"))
+ if on_card == 'carda' and size > self.free_space()[1] - 1024*1024:
+ raise FreeSpaceError(_("There is insufficient free space on the storage card"))
+ if on_card == 'cardb' and size > self.free_space()[2] - 1024*1024:
+ raise FreeSpaceError(_("There is insufficient free space on the storage card"))
paths, ctimes = [], []
names = iter(names)
- for infile in files:
+ metadata = iter(metadata)
+ for i, infile in enumerate(files):
close = False
if not hasattr(infile, 'read'):
infile, close = open(infile, 'rb'), True
infile.seek(0)
- name = names.next()
- paths.append(os.path.join(path, name))
- if not os.path.exists(os.path.dirname(paths[-1])):
- os.makedirs(os.path.dirname(paths[-1]))
+
+ newpath = path
+ mdata = metadata.next()
+
+ if 'tags' in mdata.keys():
+ for tag in mdata['tags']:
+ if tag.startswith(_('News')):
+ newpath = os.path.join(newpath, 'news')
+ newpath = os.path.join(newpath, mdata.get('title', ''))
+ newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+ elif tag.startswith('/'):
+ newpath = path
+ newpath += tag
+ newpath = os.path.normpath(newpath)
+ break
+
+ if newpath == path:
+ newpath = os.path.join(newpath, mdata.get('authors', _('Unknown')))
+ newpath = os.path.join(newpath, mdata.get('title', _('Unknown')))
+
+ if not os.path.exists(newpath):
+ os.makedirs(newpath)
+
+ filepath = os.path.join(newpath, names.next())
+ paths.append(filepath)
+
self.put_file(infile, paths[-1], replace_file=True)
+
if close:
infile.close()
ctimes.append(os.path.getctime(paths[-1]))
+
+ self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
+
+ self.report_progress(1.0, _('Transferring books to device...'))
+
return zip(paths, sizes, ctimes, cycle([on_card]))
@classmethod
@@ -439,17 +187,19 @@ class PRS505(Device):
for location in locations:
info = metadata.next()
path = location[0]
- on_card = 1 if location[3] else 0
+ blist = 2 if location[3] == 'cardb' else 1 if location[3] == 'carda' else 0
name = path.rpartition(os.sep)[2]
- name = (cls.CARD_PATH_PREFIX+'/' if on_card else 'database/media/books/') + name
+ name = (cls.CARD_PATH_PREFIX+'/' if blist else 'database/media/books/') + name
name = name.replace('//', '/')
- booklists[on_card].add_book(info, name, *location[1:-1])
+ booklists[blist].add_book(info, name, *location[1:-1])
fix_ids(*booklists)
def delete_books(self, paths, end_session=True):
- for path in paths:
+ for i, path in enumerate(paths):
+ self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
if os.path.exists(path):
os.unlink(path)
+ self.report_progress(1.0, _('Removing books from device...'))
@classmethod
def remove_books_from_metadata(cls, paths, booklists):
@@ -466,18 +216,15 @@ class PRS505(Device):
f = open(self._main_prefix + self.__class__.MEDIA_XML, 'wb')
booklists[0].write(f)
f.close()
- if self._card_prefix is not None and hasattr(booklists[1], 'write'):
- if not os.path.exists(self._card_prefix):
- os.makedirs(self._card_prefix)
- f = open(self._card_prefix + self.__class__.CACHE_XML, 'wb')
- booklists[1].write(f)
- f.close()
-
-
-
-def main(args=sys.argv):
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
+ def write_card_prefix(prefix, listid):
+ if prefix is not None and hasattr(booklists[listid], 'write'):
+ if not os.path.exists(prefix):
+ os.makedirs(prefix)
+ f = open(prefix + self.__class__.CACHE_XML, 'wb')
+ booklists[listid].write(f)
+ f.close()
+ write_card_prefix(self._card_a_prefix, 1)
+ write_card_prefix(self._card_b_prefix, 2)
+
+ self.report_progress(1.0, _('Sending metadata to device...'))
diff --git a/src/calibre/devices/prs700/driver.py b/src/calibre/devices/prs700/driver.py
index 5db60ef506..a79902fe10 100644
--- a/src/calibre/devices/prs700/driver.py
+++ b/src/calibre/devices/prs700/driver.py
@@ -8,8 +8,19 @@ Device driver for the SONY PRS-700
from calibre.devices.prs505.driver import PRS505
class PRS700(PRS505):
+
+ name = 'PRS-700 Device Interface'
+ description = _('Communicate with the Sony PRS-700 eBook reader.')
+ author = _('Kovid Goyal and John Schember')
+ supported_platforms = ['windows', 'osx', 'linux']
BCD = [0x31a]
- PRODUCT_NAME = 'PRS-700'
- OSX_NAME = 'Sony PRS-700'
-
+
+ WINDOWS_MAIN_MEM = 'PRS-700'
+ WINDOWS_CARD_A_MEM = 'PRS-700/UC:MS'
+ WINDOWS_CARD_B_MEM = 'PRS-700/UC:SD'
+
+ OSX_MAIN_MEM = 'Sony PRS-700/UC Media'
+ OSX_CARD_A_MEM = 'Sony PRS-700/UC:MS Media'
+ OSX_CARD_B_MEM = 'Sony PRS-700/UC:SD'
+
diff --git a/src/calibre/devices/usbms/books.py b/src/calibre/devices/usbms/books.py
index fffed41549..2875c04b88 100644
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@@ -21,15 +21,15 @@ class Book(object):
def __eq__(self, other):
return self.path == other.path
- @apply
- def title_sorter():
+ @dynamic_property
+ def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
return property(doc=doc, fget=fget)
- @apply
- def thumbnail():
+ @dynamic_property
+ def thumbnail(self):
return None
def __str__(self):
@@ -44,4 +44,3 @@ class BookList(_BookList):
def set_tags(self, book, tags):
pass
-
diff --git a/src/calibre/devices/usbms/cli.py b/src/calibre/devices/usbms/cli.py
new file mode 100644
index 0000000000..40e2225486
--- /dev/null
+++ b/src/calibre/devices/usbms/cli.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import os, shutil
+
+from calibre.devices.errors import PathError
+
+class File(object):
+
+ def __init__(self, path):
+ stats = os.stat(path)
+ self.is_dir = os.path.isdir(path)
+ self.is_readonly = not os.access(path, os.W_OK)
+ self.ctime = stats.st_ctime
+ self.wtime = stats.st_mtime
+ self.size = stats.st_size
+ if path.endswith(os.sep):
+ path = path[:-1]
+ self.path = path
+ self.name = os.path.basename(path)
+
+
+class CLI(object):
+
+ def get_file(self, path, outfile, end_session=True):
+ path = self.munge_path(path)
+ with open(path, 'rb') as src:
+ shutil.copyfileobj(src, outfile, 10*1024*1024)
+
+ def put_file(self, infile, path, replace_file=False, end_session=True):
+ path = self.munge_path(path)
+ if os.path.isdir(path):
+ path = os.path.join(path, infile.name)
+ if not replace_file and os.path.exists(path):
+ raise PathError('File already exists: ' + path)
+ dest = open(path, 'wb')
+ shutil.copyfileobj(infile, dest, 10*1024*1024)
+ dest.flush()
+ dest.close()
+
+ def munge_path(self, path):
+ if path.startswith('/') and not (path.startswith(self._main_prefix) or \
+ (self._card_a_prefix and path.startswith(self._card_a_prefix)) or \
+ (self._card_b_prefix and path.startswith(self._card_b_prefix))):
+ path = self._main_prefix + path[1:]
+ elif path.startswith('carda:'):
+ path = path.replace('carda:', self._card_prefix[:-1])
+ elif path.startswith('cardb:'):
+ path = path.replace('cardb:', self._card_prefix[:-1])
+ return path
+
+ def list(self, path, recurse=False, end_session=True, munge=True):
+ if munge:
+ path = self.munge_path(path)
+ if os.path.isfile(path):
+ return [(os.path.dirname(path), [File(path)])]
+ entries = [File(os.path.join(path, f)) for f in os.listdir(path)]
+ dirs = [(path, entries)]
+ for _file in entries:
+ if recurse and _file.is_dir:
+ dirs[len(dirs):] = self.list(_file.path, recurse=True, munge=False)
+ return dirs
+
+ def mkdir(self, path, end_session=True):
+ if self.SUPPORTS_SUB_DIRS:
+ path = self.munge_path(path)
+ os.mkdir(path)
+
+ def rm(self, path, end_session=True):
+ path = self.munge_path(path)
+ self.delete_books([path])
+
+ def touch(self, path, end_session=True):
+ path = self.munge_path(path)
+ if not os.path.exists(path):
+ open(path, 'w').close()
+ if not os.path.isdir(path):
+ os.utime(path, None)
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index 9f6b3cbd34..9b56509351 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -8,11 +8,12 @@ device. This class handles device detection.
import os, subprocess, time, re
-from calibre.devices.interface import Device as _Device
+from calibre.devices.interface import DevicePlugin
from calibre.devices.errors import DeviceError
+from calibre.devices.usbms.deviceconfig import DeviceConfig
from calibre import iswindows, islinux, isosx, __appname__
-class Device(_Device):
+class Device(DeviceConfig, DevicePlugin):
'''
This class provides logic common to all drivers for devices that export themselves
as USB Mass Storage devices. If you are writing such a driver, inherit from this
@@ -25,10 +26,12 @@ class Device(_Device):
VENDOR_NAME = None
WINDOWS_MAIN_MEM = None
- WINDOWS_CARD_MEM = None
+ WINDOWS_CARD_A_MEM = None
+ WINDOWS_CARD_B_MEM = None
OSX_MAIN_MEM = None
- OSX_CARD_MEM = None
+ OSX_CARD_A_MEM = None
+ OSX_CARD_B_MEM = None
MAIN_MEMORY_VOLUME_LABEL = ''
STORAGE_CARD_VOLUME_LABEL = ''
@@ -63,18 +66,30 @@ class Device(_Device):
+
+
+
+
+ %(BCD_start)s
+
+ %(storage_card)s
+ %(deviceclass)s
+
+ %(BCD_end)s
+
+
+
+
'''
- FDI_BCD_TEMPLATE = ''
FDI_LUNS = {'lun0':0, 'lun1':1, 'lun2':2}
+ FDI_BCD_TEMPLATE = ''
-
- def __init__(self, key='-1', log_packets=False, report_progress=None) :
- self._main_prefix = self._card_prefix = None
+ def reset(self, key='-1', log_packets=False, report_progress=None) :
+ self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
@classmethod
def get_fdi(cls):
fdi = ''
-
for vid in cls.VENDOR_ID:
for pid in cls.PRODUCT_ID:
fdi_base_values = dict(
@@ -85,7 +100,6 @@ class Device(_Device):
main_memory=cls.MAIN_MEMORY_VOLUME_LABEL,
storage_card=cls.STORAGE_CARD_VOLUME_LABEL,
)
-
fdi_base_values.update(cls.FDI_LUNS)
if cls.BCD is None:
@@ -105,7 +119,7 @@ class Device(_Device):
self.report_progress = report_progress
def card_prefix(self, end_session=True):
- return self._card_prefix
+ return (self._card_a_prefix, self._card_b_prefix)
@classmethod
def _windows_space(cls, prefix):
@@ -125,34 +139,41 @@ class Device(_Device):
return total_clusters * mult, free_clusters * mult
def total_space(self, end_session=True):
- msz = csz = 0
+ msz = casz = cbsz = 0
if not iswindows:
if self._main_prefix is not None:
stats = os.statvfs(self._main_prefix)
msz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
- if self._card_prefix is not None:
- stats = os.statvfs(self._card_prefix)
- csz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
+ if self._card_a_prefix is not None:
+ stats = os.statvfs(self._card_a_prefix)
+ casz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
+ if self._card_b_prefix is not None:
+ stats = os.statvfs(self._card_b_prefix)
+ cbsz = stats.f_frsize * (stats.f_blocks + stats.f_bavail - stats.f_bfree)
else:
msz = self._windows_space(self._main_prefix)[0]
- csz = self._windows_space(self._card_prefix)[0]
+ casz = self._windows_space(self._card_a_prefix)[0]
+ cbsz = self._windows_space(self._card_b_prefix)[0]
- return (msz, 0, csz)
+ return (msz, casz, cbsz)
def free_space(self, end_session=True):
- msz = csz = 0
+ msz = casz = cbsz = 0
if not iswindows:
if self._main_prefix is not None:
stats = os.statvfs(self._main_prefix)
msz = stats.f_frsize * stats.f_bavail
- if self._card_prefix is not None:
- stats = os.statvfs(self._card_prefix)
- csz = stats.f_frsize * stats.f_bavail
+ if self._card_a_prefix is not None:
+ stats = os.statvfs(self._card_a_prefix)
+ casz = stats.f_frsize * stats.f_bavail
+ if self._card_b_prefix is not None:
+ stats = os.statvfs(self._card_b_prefix)
+ cbsz = stats.f_frsize * stats.f_bavail
else:
msz = self._windows_space(self._main_prefix)[1]
csz = self._windows_space(self._card_prefix)[1]
- return (msz, 0, csz)
+ return (msz, casz, cbsz)
def windows_match_device(self, pnp_id, device_id):
pnp_id = pnp_id.upper()
@@ -193,10 +214,12 @@ class Device(_Device):
for drive in c.Win32_DiskDrive():
if self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_MAIN_MEM):
drives['main'] = self.windows_get_drive_prefix(drive)
- elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_MEM):
- drives['card'] = self.windows_get_drive_prefix(drive)
+ elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_A_MEM):
+ drives['carda'] = self.windows_get_drive_prefix(drive)
+ elif self.windows_match_device(str(drive.PNPDeviceID), self.WINDOWS_CARD_B_MEM):
+ drives['cardb'] = self.windows_get_drive_prefix(drive)
- if 'main' in drives.keys() and 'card' in drives.keys():
+ if 'main' in drives.keys() and 'carda' in drives.keys() and 'cardb' in drives.keys():
break
if 'main' not in drives:
@@ -206,7 +229,8 @@ class Device(_Device):
drives = self.windows_sort_drives(drives)
self._main_prefix = drives.get('main')
- self._card_prefix = drives.get('card', None)
+ self._card_a_prefix = drives.get('carda', None)
+ self._card_b_prefix = drives.get('cardb', None)
@classmethod
def run_ioreg(cls, raw=None):
@@ -237,9 +261,11 @@ class Device(_Device):
for i, line in enumerate(lines):
if self.OSX_MAIN_MEM is not None and line.strip().endswith('') and self.OSX_MAIN_MEM in line:
get_dev_node(lines[i+1:], 'main')
- if self.OSX_CARD_MEM is not None and line.strip().endswith('') and self.OSX_CARD_MEM in line:
- get_dev_node(lines[i+1:], 'card')
- if len(names.keys()) == 2:
+ if self.OSX_CARD_A_MEM is not None and line.strip().endswith('') and self.OSX_CARD_A_MEM in line:
+ get_dev_node(lines[i+1:], 'carda')
+ if self.OSX_CARD_B_MEM is not None and line.strip().endswith('') and self.OSX_CARD_B_MEM in line:
+ get_dev_node(lines[i+1:], 'cardb')
+ if len(names.keys()) == 3:
break
return names
@@ -251,10 +277,18 @@ class Device(_Device):
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
main_pat = dev_pat % names['main']
self._main_prefix = re.search(main_pat, mount).group(2) + os.sep
- card_pat = names['card'] if 'card' in names.keys() else None
- if card_pat is not None:
- card_pat = dev_pat % card_pat
- self._card_prefix = re.search(card_pat, mount).group(2) + os.sep
+ card_a_pat = names['carda'] if 'carda' in names.keys() else None
+ card_b_pat = names['cardb'] if 'cardb' in names.keys() else None
+
+ def get_card_prefix(pat):
+ if pat is not None:
+ pat = dev_pat % pat
+ return re.search(pat, mount).group(2) + os.sep
+ else:
+ return None
+
+ self._card_a_prefix = get_card_prefix(card_a_pat)
+ self._card_b_prefix = get_card_prefix(card_b_pat)
def open_linux(self):
import dbus
@@ -287,21 +321,24 @@ class Device(_Device):
if not self._main_prefix:
raise DeviceError('Could not open device for reading. Try a reboot.')
- self._card_prefix = None
+ self._card_a_prefix = self._card_b_prefix = None
cards = hm.FindDeviceStringMatch(__appname__+'.cardvolume', self.__class__.__name__)
- for dev in cards:
+ def mount_card(dev):
try:
- self._card_prefix = conditional_mount(dev)+os.sep
- break
+ return conditional_mount(dev)+os.sep
except:
import traceback
print traceback
- continue
+
+ if len(cards) >= 1:
+ self._card_a_prefix = mount_card(cards[0])
+ if len(cards) >=2:
+ self._card_b_prefix = mount_card(cards[1])
def open(self):
time.sleep(5)
- self._main_prefix = self._card_prefix = None
+ self._main_prefix = self._card_a_prefix = self._card_b_prefix = None
if islinux:
try:
self.open_linux()
diff --git a/src/calibre/devices/usbms/deviceconfig.py b/src/calibre/devices/usbms/deviceconfig.py
new file mode 100644
index 0000000000..bbe3a13646
--- /dev/null
+++ b/src/calibre/devices/usbms/deviceconfig.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember '
+__docformat__ = 'restructuredtext en'
+
+from calibre.utils.config import Config, ConfigProxy
+
+class DeviceConfig(object):
+
+ HELP_MESSAGE = _('Ordered list of formats the device will accept')
+
+ @classmethod
+ def _config(cls):
+ klass = cls if isinstance(cls, type) else cls.__class__
+ c = Config('device_drivers_%s' % klass.__name__, _('settings for device drivers'))
+ c.add_opt('format_map', default=cls.FORMATS, help=cls.HELP_MESSAGE)
+ return c
+
+ @classmethod
+ def _configProxy(cls):
+ return ConfigProxy(cls._config())
+
+ @classmethod
+ def config_widget(cls):
+ from calibre.gui2.device_drivers.configwidget import ConfigWidget
+ cw = ConfigWidget(cls.settings(), cls.FORMATS)
+ return cw
+
+ @classmethod
+ def save_settings(cls, config_widget):
+ cls._configProxy()['format_map'] = config_widget.format_map()
+
+ @classmethod
+ def settings(cls):
+ return cls._config().parse()
+
+ def customization_help(cls, gui=False):
+ return cls.HELP_MESSAGE
+
diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py
index fdb553f15b..700a072c5b 100644
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@@ -10,71 +10,89 @@ for a particular device.
import os, fnmatch, shutil
from itertools import cycle
-from calibre.ebooks.metadata.meta import metadata_from_formats, path_to_ext
from calibre.ebooks.metadata import authors_to_string
+from calibre.devices.usbms.cli import CLI
from calibre.devices.usbms.device import Device
from calibre.devices.usbms.books import BookList, Book
-from calibre.devices.errors import FreeSpaceError, PathError
+from calibre.devices.errors import DeviceError, FreeSpaceError
from calibre.devices.mime import mime_type_ext
-class File(object):
- def __init__(self, path):
- stats = os.stat(path)
- self.is_dir = os.path.isdir(path)
- self.is_readonly = not os.access(path, os.W_OK)
- self.ctime = stats.st_ctime
- self.wtime = stats.st_mtime
- self.size = stats.st_size
- if path.endswith(os.sep):
- path = path[:-1]
- self.path = path
- self.name = os.path.basename(path)
+# CLI must come before Device as it implments the CLI functions that
+# are inherited from the device interface in Device.
+class USBMS(CLI, Device):
+
+ name = 'USBMS Base Device Interface'
+ description = _('Communicate with an eBook reader.')
+ author = _('John Schember')
+ supported_platforms = ['windows', 'osx', 'linux']
-class USBMS(Device):
FORMATS = []
EBOOK_DIR_MAIN = ''
- EBOOK_DIR_CARD = ''
+ EBOOK_DIR_CARD_A = ''
+ EBOOK_DIR_CARD_B = ''
SUPPORTS_SUB_DIRS = False
CAN_SET_METADATA = False
- def __init__(self, key='-1', log_packets=False, report_progress=None):
- Device.__init__(self, key=key, log_packets=log_packets,
+ def reset(self, key='-1', log_packets=False, report_progress=None):
+ Device.reset(self, key=key, log_packets=log_packets,
report_progress=report_progress)
def get_device_information(self, end_session=True):
+ self.report_progress(1.0, _('Get device information...'))
return (self.__class__.__name__, '', '', '')
- def books(self, oncard=False, end_session=True):
+ def books(self, oncard=None, end_session=True):
+ from calibre.ebooks.metadata.meta import path_to_ext
bl = BookList()
- if oncard and self._card_prefix is None:
+ if oncard == 'carda' and not self._card_a_prefix:
+ self.report_progress(1.0, _('Getting list of books on device...'))
+ return bl
+ elif oncard == 'cardb' and not self._card_b_prefix:
+ self.report_progress(1.0, _('Getting list of books on device...'))
+ return bl
+ elif oncard and oncard != 'carda' and oncard != 'cardb':
+ self.report_progress(1.0, _('Getting list of books on device...'))
return bl
- prefix = self._card_prefix if oncard else self._main_prefix
- ebook_dir = self.EBOOK_DIR_CARD if oncard else self.EBOOK_DIR_MAIN
+ prefix = self._card_a_prefix if oncard == 'carda' else self._card_b_prefix if oncard == 'cardb' else self._main_prefix
+ ebook_dir = self.EBOOK_DIR_CARD_A if oncard == 'carda' else self.EBOOK_DIR_CARD_B if oncard == 'cardb' else self.EBOOK_DIR_MAIN
# Get all books in the ebook_dir directory
if self.SUPPORTS_SUB_DIRS:
for path, dirs, files in os.walk(os.path.join(prefix, ebook_dir)):
# Filter out anything that isn't in the list of supported ebook types
for book_type in self.FORMATS:
- for filename in fnmatch.filter(files, '*.%s' % (book_type)):
+ match = fnmatch.filter(files, '*.%s' % (book_type))
+ for i, filename in enumerate(match):
+ self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
else:
path = os.path.join(prefix, ebook_dir)
- for filename in os.listdir(path):
+ paths = os.listdir(path)
+ for i, filename in enumerate(paths):
+ self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
if path_to_ext(filename) in self.FORMATS:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+
+ self.report_progress(1.0, _('Getting list of books on device...'))
+
return bl
def _sanity_check(self, on_card, files):
- if on_card and not self._card_prefix:
- raise ValueError(_('The reader has no storage card connected.'))
+ if on_card == 'carda' and not self._card_a_prefix:
+ raise ValueError(_('The reader has no storage card in this slot.'))
+ elif on_card == 'cardb' and not self._card_b_prefix:
+ raise ValueError(_('The reader has no storage card in this slot.'))
+ elif on_card and on_card not in ('carda', 'cardb'):
+ raise DeviceError(_('The reader has no storage card in this slot.'))
- if not on_card:
- path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
+ if on_card == 'carda':
+ path = os.path.join(self._card_a_prefix, self.EBOOK_DIR_CARD_A)
+ elif on_card == 'cardb':
+ path = os.path.join(self._card_b_prefix, self.EBOOK_DIR_CARD_B)
else:
- path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD)
+ path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN)
def get_size(obj):
if hasattr(obj, 'seek'):
@@ -87,13 +105,15 @@ class USBMS(Device):
sizes = [get_size(f) for f in files]
size = sum(sizes)
- if on_card and size > self.free_space()[2] - 1024*1024:
- raise FreeSpaceError(_("There is insufficient free space on the storage card"))
if not on_card and size > self.free_space()[0] - 2*1024*1024:
raise FreeSpaceError(_("There is insufficient free space in main memory"))
+ if on_card == 'carda' and size > self.free_space()[1] - 1024*1024:
+ raise FreeSpaceError(_("There is insufficient free space on the storage card"))
+ if on_card == 'cardb' and size > self.free_space()[2] - 1024*1024:
+ raise FreeSpaceError(_("There is insufficient free space on the storage card"))
return path
- def upload_books(self, files, names, on_card=False, end_session=True,
+ def upload_books(self, files, names, on_card=None, end_session=True,
metadata=None):
path = self._sanity_check(on_card, files)
@@ -102,7 +122,7 @@ class USBMS(Device):
names = iter(names)
metadata = iter(metadata)
- for infile in files:
+ for i, infile in enumerate(files):
newpath = path
if self.SUPPORTS_SUB_DIRS:
@@ -110,11 +130,21 @@ class USBMS(Device):
if 'tags' in mdata.keys():
for tag in mdata['tags']:
- if tag.startswith('/'):
+ if tag.startswith(_('News')):
+ newpath = os.path.join(newpath, 'news')
+ newpath = os.path.join(newpath, mdata.get('title', ''))
+ newpath = os.path.join(newpath, mdata.get('timestamp', ''))
+ break
+ elif tag.startswith('/'):
newpath += tag
newpath = os.path.normpath(newpath)
break
+ if newpath == path:
+ newpath = os.path.join(newpath,
+ mdata.get('authors', _('Unknown')),
+ mdata.get('title', _('Unknown')))
+
if not os.path.exists(newpath):
os.makedirs(newpath)
@@ -132,22 +162,28 @@ class USBMS(Device):
else:
shutil.copy2(infile, filepath)
+ self.report_progress((i+1) / float(len(files)), _('Transferring books to device...'))
+
+ self.report_progress(1.0, _('Transferring books to device...'))
+
return zip(paths, cycle([on_card]))
- @classmethod
- def add_books_to_metadata(cls, locations, metadata, booklists):
- for location in locations:
+ def add_books_to_metadata(self, locations, metadata, booklists):
+ for i, location in enumerate(locations):
+ self.report_progress((i+1) / float(len(locations)), _('Adding books to device metadata listing...'))
path = location[0]
- on_card = 1 if location[1] else 0
+ blist = 2 if location[1] == 'cardb' else 1 if location[1] == 'carda' else 0
- book = cls.book_from_path(path)
+ book = self.book_from_path(path)
- if not book in booklists[on_card]:
- booklists[on_card].append(book)
+ if not book in booklists[blist]:
+ booklists[blist].append(book)
+ self.report_progress(1.0, _('Adding books to device metadata listing...'))
def delete_books(self, paths, end_session=True):
- for path in paths:
+ for i, path in enumerate(paths):
+ self.report_progress((i+1) / float(len(paths)), _('Removing books from device...'))
if os.path.exists(path):
# Delete the ebook
os.unlink(path)
@@ -156,79 +192,31 @@ class USBMS(Device):
os.removedirs(os.path.dirname(path))
except:
pass
+ self.report_progress(1.0, _('Removing books from device...'))
- @classmethod
- def remove_books_from_metadata(cls, paths, booklists):
- for path in paths:
+ def remove_books_from_metadata(self, paths, booklists):
+ for i, path in enumerate(paths):
+ self.report_progress((i+1) / float(len(paths)), _('Removing books from device metadata listing...'))
for bl in booklists:
for book in bl:
if path.endswith(book.path):
bl.remove(book)
+ self.report_progress(1.0, _('Removing books from device metadata listing...'))
def sync_booklists(self, booklists, end_session=True):
# There is no meta data on the device to update. The device is treated
# as a mass storage device and does not use a meta data xml file like
# the Sony Readers.
- pass
-
- def get_file(self, path, outfile, end_session=True):
- path = self.munge_path(path)
- with open(path, 'rb') as src:
- shutil.copyfileobj(src, outfile, 10*1024*1024)
-
- def put_file(self, infile, path, replace_file=False, end_session=True):
- path = self.munge_path(path)
- if os.path.isdir(path):
- path = os.path.join(path, infile.name)
- if not replace_file and os.path.exists(path):
- raise PathError('File already exists: ' + path)
- dest = open(path, 'wb')
- shutil.copyfileobj(infile, dest, 10*1024*1024)
- dest.flush()
- dest.close()
-
- def munge_path(self, path):
- if path.startswith('/') and not (path.startswith(self._main_prefix) or \
- (self._card_prefix and path.startswith(self._card_prefix))):
- path = self._main_prefix + path[1:]
- elif path.startswith('card:'):
- path = path.replace('card:', self._card_prefix[:-1])
- return path
-
- def list(self, path, recurse=False, end_session=True, munge=True):
- if munge:
- path = self.munge_path(path)
- if os.path.isfile(path):
- return [(os.path.dirname(path), [File(path)])]
- entries = [File(os.path.join(path, f)) for f in os.listdir(path)]
- dirs = [(path, entries)]
- for _file in entries:
- if recurse and _file.is_dir:
- dirs[len(dirs):] = self.list(_file.path, recurse=True, munge=False)
- return dirs
-
- def mkdir(self, path, end_session=True):
- if self.SUPPORTS_SUB_DIRS:
- path = self.munge_path(path)
- os.mkdir(path)
-
- def rm(self, path, end_session=True):
- path = self.munge_path(path)
- self.delete_books([path])
-
- def touch(self, path, end_session=True):
- path = self.munge_path(path)
- if not os.path.exists(path):
- open(path, 'w').close()
- if not os.path.isdir(path):
- os.utime(path, None)
+ self.report_progress(1.0, _('Sending metadata to device...'))
@classmethod
def metadata_from_path(cls, path):
+ from calibre.ebooks.metadata.meta import metadata_from_formats
return metadata_from_formats([path])
@classmethod
def book_from_path(cls, path):
+ from calibre.ebooks.metadata.meta import path_to_ext
fileext = path_to_ext(path)
mi = cls.metadata_from_path(path)
mime = mime_type_ext(fileext)
diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py
index 26d2394818..416fe61789 100644
--- a/src/calibre/ebooks/__init__.py
+++ b/src/calibre/ebooks/__init__.py
@@ -60,6 +60,8 @@ class HTMLRenderer(object):
def render_html(path_to_html, width=590, height=750):
from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
+ from calibre.gui2 import is_ok_to_use_qt
+ if not is_ok_to_use_qt(): return None
path_to_html = os.path.abspath(path_to_html)
with CurrentDir(os.path.dirname(path_to_html)):
page = QWebPage()
diff --git a/src/calibre/ebooks/lrf/comic/__init__.py b/src/calibre/ebooks/comic/__init__.py
similarity index 100%
rename from src/calibre/ebooks/lrf/comic/__init__.py
rename to src/calibre/ebooks/comic/__init__.py
diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py
new file mode 100755
index 0000000000..bf2aac1162
--- /dev/null
+++ b/src/calibre/ebooks/comic/input.py
@@ -0,0 +1,473 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Based on ideas from comiclrf created by FangornUK.
+'''
+
+import os, shutil, traceback, textwrap, time
+from Queue import Empty
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import extract, CurrentDir, prints
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.ipc.server import Server
+from calibre.utils.ipc.job import ParallelJob
+
+def extract_comic(path_to_comic_file):
+ '''
+ Un-archive the comic file.
+ '''
+ tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
+ extract(path_to_comic_file, tdir)
+ return tdir
+
+def find_pages(dir, sort_on_mtime=False, verbose=False):
+ '''
+ Find valid comic pages in a previously un-archived comic.
+
+ :param dir: Directory in which extracted comic lives
+ :param sort_on_mtime: If True sort pages based on their last modified time.
+ Otherwise, sort alphabetically.
+ '''
+ extensions = ['jpeg', 'jpg', 'gif', 'png']
+ pages = []
+ for datum in os.walk(dir):
+ for name in datum[-1]:
+ path = os.path.join(datum[0], name)
+ if '__MACOSX' in path: continue
+ for ext in extensions:
+ if path.lower().endswith('.'+ext):
+ pages.append(path)
+ break
+ if sort_on_mtime:
+ comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
+ else:
+ comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
+
+ pages.sort(cmp=comparator)
+ if verbose:
+ prints('Found comic pages...')
+ prints('\t'+'\n\t'.join([os.path.basename(p) for p in pages]))
+ return pages
+
+class PageProcessor(list):
+ '''
+ Contains the actual image rendering logic. See :method:`render` and
+ :method:`process_pages`.
+ '''
+
+ def __init__(self, path_to_page, dest, opts, num):
+ list.__init__(self)
+ self.path_to_page = path_to_page
+ self.opts = opts
+ self.num = num
+ self.dest = dest
+ self.rotate = False
+ self.render()
+
+
+ def render(self):
+ import calibre.utils.PythonMagickWand as pw
+ img = pw.NewMagickWand()
+ if img < 0:
+ raise RuntimeError('Cannot create wand.')
+ if not pw.MagickReadImage(img, self.path_to_page):
+ raise IOError('Failed to read image from: %'%self.path_to_page)
+ width = pw.MagickGetImageWidth(img)
+ height = pw.MagickGetImageHeight(img)
+ if self.num == 0: # First image so create a thumbnail from it
+ thumb = pw.CloneMagickWand(img)
+ if thumb < 0:
+ raise RuntimeError('Cannot create wand.')
+ pw.MagickThumbnailImage(thumb, 60, 80)
+ pw.MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
+ pw.DestroyMagickWand(thumb)
+ self.pages = [img]
+ if width > height:
+ if self.opts.landscape:
+ self.rotate = True
+ else:
+ split1, split2 = map(pw.CloneMagickWand, (img, img))
+ pw.DestroyMagickWand(img)
+ if split1 < 0 or split2 < 0:
+ raise RuntimeError('Cannot create wand.')
+ pw.MagickCropImage(split1, (width/2)-1, height, 0, 0)
+ pw.MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
+ self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
+ self.process_pages()
+
+ def process_pages(self):
+ import calibre.utils.PythonMagickWand as p
+ for i, wand in enumerate(self.pages):
+ pw = p.NewPixelWand()
+ try:
+ if pw < 0:
+ raise RuntimeError('Cannot create wand.')
+ p.PixelSetColor(pw, 'white')
+
+ p.MagickSetImageBorderColor(wand, pw)
+ if self.rotate:
+ p.MagickRotateImage(wand, pw, -90)
+
+ # 25 percent fuzzy trim?
+ if not self.opts.disable_trim:
+ p.MagickTrimImage(wand, 25*65535/100)
+ p.MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage"
+ # Do the Photoshop "Auto Levels" equivalent
+ if not self.opts.dont_normalize:
+ p.MagickNormalizeImage(wand)
+ sizex = p.MagickGetImageWidth(wand)
+ sizey = p.MagickGetImageHeight(wand)
+
+ SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
+
+ if self.opts.keep_aspect_ratio:
+ # Preserve the aspect ratio by adding border
+ aspect = float(sizex) / float(sizey)
+ if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
+ newsizey = SCRHEIGHT
+ newsizex = int(newsizey * aspect)
+ deltax = (SCRWIDTH - newsizex) / 2
+ deltay = 0
+ else:
+ newsizex = SCRWIDTH
+ newsizey = int(newsizex / aspect)
+ deltax = 0
+ deltay = (SCRHEIGHT - newsizey) / 2
+ p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
+ p.MagickSetImageBorderColor(wand, pw)
+ p.MagickBorderImage(wand, pw, deltax, deltay)
+ elif self.opts.wide:
+ # Keep aspect and Use device height as scaled image width so landscape mode is clean
+ aspect = float(sizex) / float(sizey)
+ screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
+ # Get dimensions of the landscape mode screen
+ # Add 25px back to height for the battery bar.
+ wscreenx = SCRHEIGHT + 25
+ wscreeny = int(wscreenx / screen_aspect)
+ if aspect <= screen_aspect:
+ newsizey = wscreeny
+ newsizex = int(newsizey * aspect)
+ deltax = (wscreenx - newsizex) / 2
+ deltay = 0
+ else:
+ newsizex = wscreenx
+ newsizey = int(newsizex / aspect)
+ deltax = 0
+ deltay = (wscreeny - newsizey) / 2
+ p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
+ p.MagickSetImageBorderColor(wand, pw)
+ p.MagickBorderImage(wand, pw, deltax, deltay)
+ else:
+ p.MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, p.CatromFilter, 1.0)
+
+ if not self.opts.dont_sharpen:
+ p.MagickSharpenImage(wand, 0.0, 1.0)
+
+ p.MagickSetImageType(wand, p.GrayscaleType)
+
+ if self.opts.despeckle:
+ p.MagickDespeckleImage(wand)
+
+ p.MagickQuantizeImage(wand, self.opts.colors, p.RGBColorspace, 0, 1, 0)
+ dest = '%d_%d.png'%(self.num, i)
+ dest = os.path.join(self.dest, dest)
+ p.MagickWriteImage(wand, dest+'8')
+ os.rename(dest+'8', dest)
+ self.append(dest)
+ finally:
+ if pw > 0:
+ p.DestroyPixelWand(pw)
+ p.DestroyMagickWand(wand)
+
+def render_pages(tasks, dest, opts, notification=lambda x, y: x):
+ '''
+ Entry point for the job server.
+ '''
+ failures, pages = [], []
+ from calibre.utils.PythonMagickWand import ImageMagick
+ with ImageMagick():
+ for num, path in tasks:
+ try:
+ pages.extend(PageProcessor(path, dest, opts, num))
+ msg = _('Rendered %s')%path
+ except:
+ failures.append(path)
+ msg = _('Failed %s')%path
+ if opts.verbose:
+ msg += '\n' + traceback.format_exc()
+ prints(msg)
+ notification(0.5, msg)
+
+ return pages, failures
+
+
+class Progress(object):
+
+ def __init__(self, total, update):
+ self.total = total
+ self.update = update
+ self.done = 0
+
+ def __call__(self, percent, msg=''):
+ self.done += 1
+ #msg = msg%os.path.basename(job.args[0])
+ self.update(float(self.done)/self.total, msg)
+
+def process_pages(pages, opts, update, tdir):
+ '''
+ Render all identified comic pages.
+ '''
+ from calibre.utils.PythonMagickWand import ImageMagick
+ ImageMagick
+
+ progress = Progress(len(pages), update)
+ server = Server()
+ jobs = []
+ tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages]
+ tasks = server.split(pages)
+ for task in tasks:
+ jobs.append(ParallelJob('render_pages', '', progress,
+ args=[task, tdir, opts]))
+ server.add_job(jobs[-1])
+ while True:
+ time.sleep(1)
+ running = False
+ for job in jobs:
+ while True:
+ try:
+ x = job.notifications.get_nowait()
+ progress(*x)
+ except Empty:
+ break
+ job.update()
+ if not job.is_finished:
+ running = True
+ if not running:
+ break
+ server.close()
+ ans, failures = [], []
+
+ for job in jobs:
+ if job.failed:
+ raw_input()
+ raise Exception(_('Failed to process comic: \n\n%s')%
+ job.log_file.read())
+ pages, failures_ = job.result
+ ans += pages
+ failures += failures_
+ return ans, failures
+
+
+class ComicInput(InputFormatPlugin):
+
+ name = 'Comic Input'
+ author = 'Kovid Goyal'
+ description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
+ file_types = set(['cbz', 'cbr', 'cbc'])
+ is_image_collection = True
+
+ options = set([
+ OptionRecommendation(name='colors', recommended_value=64,
+ help=_('Number of colors for grayscale image conversion. Default: %default')),
+ OptionRecommendation(name='dont_normalize', recommended_value=False,
+ help=_('Disable normalize (improve contrast) color range '
+ 'for pictures. Default: False')),
+ OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
+ help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
+ OptionRecommendation(name='dont_sharpen', recommended_value=False,
+ help=_('Disable sharpening.')),
+ OptionRecommendation(name='disable_trim', recommended_value=False,
+ help=_('Disable trimming of comic pages. For some comics, '
+ 'trimming might remove content as well as borders.')),
+ OptionRecommendation(name='landspace', recommended_value=False,
+ help=_("Don't split landscape images into two portrait images")),
+ OptionRecommendation(name='wide', recommended_value=False,
+ help=_("Keep aspect ratio and scale image using screen height as "
+ "image width for viewing in landscape mode.")),
+ OptionRecommendation(name='right2left', recommended_value=False,
+ help=_('Used for right-to-left publications like manga. '
+ 'Causes landscape pages to be split into portrait pages '
+ 'from right to left.')),
+ OptionRecommendation(name='despeckle', recommended_value=False,
+ help=_('Enable Despeckle. Reduces speckle noise. '
+ 'May greatly increase processing time.')),
+ OptionRecommendation(name='no_sort', recommended_value=False,
+ help=_("Don't sort the files found in the comic "
+ "alphabetically by name. Instead use the order they were "
+ "added to the comic.")),
+ OptionRecommendation(name='no_process', recommended_value=False,
+ help=_("Apply no processing to the image")),
+ ])
+
+ recommendations = set([
+ ('margin_left', 0, OptionRecommendation.HIGH),
+ ('margin_top', 0, OptionRecommendation.HIGH),
+ ('margin_right', 0, OptionRecommendation.HIGH),
+ ('margin_bottom', 0, OptionRecommendation.HIGH),
+ ('insert_blank_line', False, OptionRecommendation.HIGH),
+ ('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
+ ('dont_justify', True, OptionRecommendation.HIGH),
+ ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
+ ('chapter', None, OptionRecommendation.HIGH),
+ ('page_breaks_brefore', None, OptionRecommendation.HIGH),
+ ('use_auto_toc', False, OptionRecommendation.HIGH),
+ ('page_breaks_before', None, OptionRecommendation.HIGH),
+ ('disable_font_rescaling', True, OptionRecommendation.HIGH),
+ ('linearize_tables', False, OptionRecommendation.HIGH),
+ ])
+
+ def get_comics_from_collection(self, stream):
+ from calibre.libunzip import extract as zipextract
+ tdir = PersistentTemporaryDirectory('_comic_collection')
+ zipextract(stream, tdir)
+ comics = []
+ with CurrentDir(tdir):
+ if not os.path.exists('comics.txt'):
+ raise ValueError('%s is not a valid comic collection'
+ %stream.name)
+ for line in open('comics.txt',
+ 'rb').read().decode('utf-8').splitlines():
+ fname, title = line.partition(':')[0], line.partition(':')[-1]
+ fname = os.path.join(tdir, *fname.split('/'))
+ if not title:
+ title = os.path.basename(fname).rpartition('.')[0]
+ if os.access(fname, os.R_OK):
+ comics.append([title, fname])
+ if not comics:
+ raise ValueError('%s has no comics'%stream.name)
+ return comics
+
+ def get_pages(self, comic, tdir2):
+ tdir = extract_comic(comic)
+ new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
+ verbose=self.opts.verbose)
+ thumbnail = None
+ if not new_pages:
+ raise ValueError('Could not find any pages in the comic: %s'
+ %comic)
+ if self.opts.no_process:
+ n2 = []
+ for page in new_pages:
+ n2.append(os.path.join(tdir2, os.path.basename(page)))
+ shutil.copyfile(page, n2[-1])
+ new_pages = n2
+ else:
+ new_pages, failures = process_pages(new_pages, self.opts,
+ self.report_progress, tdir2)
+ if not new_pages:
+ raise ValueError('Could not find any valid pages in comic: %s'
+ % comic)
+ if failures:
+ self.log.warning('Could not process the following pages '
+ '(run with --verbose to see why):')
+ for f in failures:
+ self.log.warning('\t', f)
+ thumbnail = os.path.join(tdir2, 'thumbnail.png')
+ if not os.access(thumbnail, os.R_OK):
+ thumbnail = None
+ return new_pages
+
+ def get_images(self):
+ return self._images
+
+ def convert(self, stream, opts, file_ext, log, accelerators):
+ from calibre.ebooks.metadata import MetaInformation
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+ from calibre.ebooks.metadata.toc import TOC
+
+ self.opts, self.log= opts, log
+ if file_ext == 'cbc':
+ comics_ = self.get_comics_from_collection(stream)
+ else:
+ comics_ = [['Comic', os.path.abspath(stream.name)]]
+ stream.close()
+ comics = []
+ for i, x in enumerate(comics_):
+ title, fname = x
+ cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
+ cdir = os.path.abspath(cdir)
+ if not os.path.exists(cdir):
+ os.makedirs(cdir)
+ pages = self.get_pages(fname, cdir)
+ if not pages: continue
+ wrappers = self.create_wrappers(pages)
+ comics.append((title, pages, wrappers))
+
+ if not comics:
+ raise ValueError('No comic pages found in %s'%stream.name)
+
+ mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
+ [_('Unknown')])
+ opf = OPFCreator(os.path.abspath('.'), mi)
+ entries = []
+
+ def href(x):
+ if len(comics) == 1: return os.path.basename(x)
+ return '/'.join(x.split(os.sep)[-2:])
+
+ for comic in comics:
+ pages, wrappers = comic[1:]
+ entries += [(w, None) for w in map(href, wrappers)] + \
+ [(x, None) for x in map(href, pages)]
+ opf.create_manifest(entries)
+ spine = []
+ for comic in comics:
+ spine.extend(map(href, comic[2]))
+ self._images = []
+ for comic in comics:
+ self._images.extend(comic[1])
+ opf.create_spine(spine)
+ toc = TOC()
+ if len(comics) == 1:
+ wrappers = comics[0][2]
+ for i, x in enumerate(wrappers):
+ toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
+ play_order=i)
+ else:
+ po = 0
+ for comic in comics:
+ po += 1
+ wrappers = comic[2]
+ stoc = toc.add_item(href(wrappers[0]),
+ None, comic[0], play_order=po)
+ for i, x in enumerate(wrappers):
+ stoc.add_item(href(x), None,
+ _('Page')+' %d'%(i+1), play_order=po)
+ po += 1
+ opf.set_toc(toc)
+ m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
+ opf.render(m, n, 'toc.ncx')
+ return os.path.abspath('metadata.opf')
+
+ def create_wrappers(self, pages):
+ from calibre.ebooks.oeb.base import XHTML_NS
+ wrappers = []
+ WRAPPER = textwrap.dedent('''\
+
+
+ Page #%d
+
+
+
+
+
+
+
+
+ ''')
+ dir = os.path.dirname(pages[0])
+ for i, page in enumerate(pages):
+ wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
+ page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
+ open(page, 'wb').write(wrapper)
+ wrappers.append(page)
+ return wrappers
+
diff --git a/src/calibre/ebooks/compression/__init__.py b/src/calibre/ebooks/compression/__init__.py
new file mode 100644
index 0000000000..9e2aad729c
--- /dev/null
+++ b/src/calibre/ebooks/compression/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember '
+__docformat__ = 'restructuredtext en'
diff --git a/src/calibre/ebooks/compression/palmdoc.c b/src/calibre/ebooks/compression/palmdoc.c
new file mode 100644
index 0000000000..29e9579140
--- /dev/null
+++ b/src/calibre/ebooks/compression/palmdoc.c
@@ -0,0 +1,204 @@
+/*
+:mod:`cPalmdoc` -- Palmdoc compression/decompression
+=====================================================
+
+.. module:: cPalmdoc
+ :platform: All
+ :synopsis: Compression decompression of Palmdoc implemented in C for speed
+
+.. moduleauthor:: Kovid Goyal Copyright 2009
+
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include
+#include
+
+#define DELTA sizeof(Byte)*4096
+
+#define BUFFER 6000
+
+#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
+
+typedef unsigned short int Byte;
+typedef struct {
+ Byte *data;
+ Py_ssize_t len;
+} buffer;
+
+#ifdef bool
+#undef bool
+#endif
+#define bool int
+
+#ifdef false
+#undef false
+#endif
+#define false 0
+
+#ifdef true
+#undef true
+#endif
+#define true 1
+
+#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
+
+static PyObject *
+cpalmdoc_decompress(PyObject *self, PyObject *args) {
+ const char *_input = NULL; Py_ssize_t input_len = 0;
+ Py_ssize_t i = 0, o = 0, j = 0, di, n;
+ if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
+ return NULL;
+ Byte *input = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+ if (input == NULL) return PyErr_NoMemory();
+ // Map chars to bytes
+ for (j = 0; j < input_len; j++)
+ input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+ char *output = (char *)PyMem_Malloc(sizeof(char)*BUFFER);
+ Byte c;
+ PyObject *ans;
+ if (output == NULL) return PyErr_NoMemory();
+
+ while (i < input_len) {
+ c = input[i++];
+ if (c >= 1 && c <= 8) // copy 'c' bytes
+ while (c--) output[o++] = input[i++];
+
+ else if (c <= 0x7F) // 0, 09-7F = self
+ output[o++] = c;
+
+ else if (c >= 0xC0) { // space + ASCII char
+ output[o++] = ' ';
+ output[o++] = c ^ 0x80;
+ }
+ else { // 80-BF repeat sequences
+ c = (c << 8) + input[i++];
+ di = (c & 0x3FFF) >> 3;
+ for ( n = (c & 7) + 3; n--; ++o )
+ output[o] = output[o - di];
+ }
+ }
+ ans = Py_BuildValue("s#", output, o);
+ if (output != NULL) PyMem_Free(output);
+ if (input != NULL) PyMem_Free(input);
+ return ans;
+}
+
+static bool
+cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
+ Py_ssize_t i;
+ for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
+ return true;
+}
+
+static Py_ssize_t
+cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
+ Py_ssize_t i;
+ for (i = pos - chunk_length; i > -1; i--)
+ if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
+ return pos;
+}
+
+
+static Py_ssize_t
+cpalmdoc_do_compress(buffer *b, char *output) {
+ Py_ssize_t i = 0, j, chunk_len, dist;
+ unsigned compound;
+ Byte c, n;
+ bool found;
+ char *head;
+ head = output;
+ buffer temp;
+ temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
+ if (temp.data == NULL) return 0;
+ while (i < b->len) {
+ c = b->data[i];
+ //do repeats
+ if ( i > 10 && (b->len - i) > 10) {
+ found = false;
+ for (chunk_len = 10; chunk_len > 2; chunk_len--) {
+ j = cpalmdoc_rfind(b->data, i, chunk_len);
+ dist = i - j;
+ if (j < i && dist <= 2047) {
+ found = true;
+ compound = (dist << 3) + chunk_len-3;
+ *(output++) = CHAR(0x80 + (compound >> 8 ));
+ *(output++) = CHAR(compound & 0xFF);
+ i += chunk_len;
+ break;
+ }
+ }
+ if (found) continue;
+ }
+
+ //write single character
+ i++;
+ if (c == 32 && i < b->len) {
+ n = b->data[i];
+ if ( n >= 0x40 && n <= 0x7F) {
+ *(output++) = CHAR(n^0x80); i++; continue;
+ }
+ }
+ if (c == 0 || (c > 8 && c < 0x80))
+ *(output++) = CHAR(c);
+ else { // Write binary data
+ j = i;
+ temp.data[0] = c; temp.len = 1;
+ while (j < b->len && temp.len < 8) {
+ c = b->data[j];
+ if (c == 0 || (c > 8 && c < 0x80)) break;
+ temp.data[temp.len++] = c; j++;
+ }
+ i += temp.len - 1;
+ *(output++) = temp.len;
+ for (j=0; j < temp.len; j++) *(output++) = temp.data[j];
+ }
+ }
+ return output - head;
+}
+
+static PyObject *
+cpalmdoc_compress(PyObject *self, PyObject *args) {
+ const char *_input = NULL; Py_ssize_t input_len = 0;
+ Py_ssize_t j = 0;
+ buffer b;
+ if (!PyArg_ParseTuple(args, "t#", &_input, &input_len))
+ return NULL;
+ b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+ if (b.data == NULL) return PyErr_NoMemory();
+ // Map chars to bytes
+ for (j = 0; j < input_len; j++)
+ b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+ b.len = input_len;
+ char *output = (char *)PyMem_Malloc(sizeof(char) * b.len);
+ if (output == NULL) return PyErr_NoMemory();
+ j = cpalmdoc_do_compress(&b, output);
+ if ( j == 0) return PyErr_NoMemory();
+ PyObject *ans = Py_BuildValue("s#", output, j);
+ PyMem_Free(output);
+ PyMem_Free(b.data);
+ return ans;
+}
+
+static PyMethodDef cPalmdocMethods[] = {
+ {"decompress", cpalmdoc_decompress, METH_VARARGS,
+ "decompress(bytestring) -> decompressed bytestring\n\n"
+ "Decompress a palmdoc compressed byte string. "
+ },
+
+ {"compress", cpalmdoc_compress, METH_VARARGS,
+ "compress(bytestring) -> compressed bytestring\n\n"
+ "Palmdoc compress a byte string. "
+ },
+ {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initcPalmdoc(void) {
+ PyObject *m;
+ m = Py_InitModule3("cPalmdoc", cPalmdocMethods,
+ "Compress and decompress palmdoc strings."
+ );
+ if (m == NULL) return;
+}
+
diff --git a/src/calibre/ebooks/mobi/palmdoc.py b/src/calibre/ebooks/compression/palmdoc.py
similarity index 59%
rename from src/calibre/ebooks/mobi/palmdoc.py
rename to src/calibre/ebooks/compression/palmdoc.py
index ad65967d13..90dabcb5a8 100644
--- a/src/calibre/ebooks/mobi/palmdoc.py
+++ b/src/calibre/ebooks/compression/palmdoc.py
@@ -2,41 +2,46 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal ' \
- 'and Marshall T. Vandegrift '
+__copyright__ = '2008, Kovid Goyal '
from cStringIO import StringIO
from struct import pack
-COUNT_BITS = 3
+from calibre.constants import plugins
+cPalmdoc = plugins['cPalmdoc'][0]
+if not cPalmdoc:
+ raise RuntimeError(('Failed to load required cPalmdoc module: '
+ '%s')%plugins['cPalmdoc'][1])
def decompress_doc(data):
- buffer = [ord(i) for i in data]
- res = []
- i = 0
- while i < len(buffer):
- c = buffer[i]
- i += 1
- if c >= 1 and c <= 8:
- res.extend(buffer[i:i+c])
- i += c
- elif c <= 0x7f:
- res.append(c)
- elif c >= 0xc0:
- res.extend( (ord(' '), c^0x80) )
- else:
- c = (c << 8) + buffer[i]
- i += 1
- di = (c & 0x3fff) >> COUNT_BITS
- j = len(res)
- num = (c & ((1 << COUNT_BITS) - 1)) + 3
-
- for k in range( num ):
- res.append(res[j - di+k])
-
- return ''.join([chr(i) for i in res])
+ return cPalmdoc.decompress(data)
def compress_doc(data):
+ return cPalmdoc.compress(data)
+
+def test():
+ TESTS = [
+ 'abc\x03\x04\x05\x06ms', # Test binary writing
+ 'a b c \xfed ', # Test encoding of spaces
+ '0123456789axyz2bxyz2cdfgfo9iuyerh',
+ '0123456789asd0123456789asd|yyzzxxffhhjjkk',
+ ('ciewacnaq eiu743 r787q 0w% ; sa fd\xef\ffdxosac wocjp acoiecowei '
+ 'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
+ ]
+ for test in TESTS:
+ print 'Test:', repr(test)
+ print '\tTesting compression...'
+ good = py_compress_doc(test)
+ x = compress_doc(test)
+ print '\t\tgood:', repr(good)
+ print '\t\tx :', repr(x)
+ assert x == good
+ print '\tTesting decompression...'
+ print '\t\t', repr(decompress_doc(x))
+ assert decompress_doc(x) == test
+ print
+
+def py_compress_doc(data):
out = StringIO()
i = 0
ldata = len(data)
diff --git a/src/calibre/ebooks/conversion/__init__.py b/src/calibre/ebooks/conversion/__init__.py
new file mode 100644
index 0000000000..384ccfb79c
--- /dev/null
+++ b/src/calibre/ebooks/conversion/__init__.py
@@ -0,0 +1,4 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
new file mode 100644
index 0000000000..73e1a1e523
--- /dev/null
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -0,0 +1,224 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+'''
+Command line interface to conversion sub-system
+'''
+
+USAGE = '%prog ' + _('''\
+input_file output_file [options]
+
+Convert an ebook from one format to another.
+
+input_file is the input and output_file is the output. Both must be \
+specified as the first two arguments to the command.
+
+The output ebook format is guessed from the file extension of \
+output_file. output_file can also be of the special format .EXT where \
+EXT is the output file extension. In this case, the name of the output \
+file is derived the name of the input file. Note that the filenames must \
+not start with a hyphen. Finally, if output_file has no extension, then \
+it is treated as a directory and an "open ebook" (OEB) consisting of HTML \
+files is written to that directory. These files are the files that would \
+normally have been passed to the output plugin.
+
+After specifying the input \
+and output file you can customize the conversion by specifying various \
+options. the available options depend on the input and output file types. \
+To get help on them specify the input and output file and then use the -h \
+option.
+
+For full documentation of the conversion system see
+''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html'
+
+import sys, os
+from optparse import OptionGroup, Option
+
+from calibre.utils.config import OptionParser
+from calibre.utils.logging import Log
+from calibre.constants import preferred_encoding
+from calibre.customize.conversion import OptionRecommendation
+
+def print_help(parser, log):
+ help = parser.format_help().encode(preferred_encoding, 'replace')
+ log(help)
+
+def check_command_line_options(parser, args, log):
+ if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
+ print_help(parser, log)
+ log.error('\n\nYou must specify the input AND output files')
+ raise SystemExit(1)
+
+ input = os.path.abspath(args[1])
+ if not input.endswith('.recipe') and not os.access(input, os.R_OK):
+ log.error('Cannot read from', input)
+ raise SystemExit(1)
+
+ output = args[2]
+ if output.startswith('.') and output != '.':
+ output = os.path.splitext(os.path.basename(input))[0]+output
+ output = os.path.abspath(output)
+
+ return input, output
+
+def option_recommendation_to_cli_option(add_option, rec):
+ opt = rec.option
+ switches = ['-'+opt.short_switch] if opt.short_switch else []
+ switches.append('--'+opt.long_switch)
+ attrs = dict(dest=opt.name, help=opt.help,
+ choices=opt.choices, default=rec.recommended_value)
+ if isinstance(rec.recommended_value, type(True)):
+ attrs['action'] = 'store_false' if rec.recommended_value else \
+ 'store_true'
+ add_option(Option(*switches, **attrs))
+
+def add_input_output_options(parser, plumber):
+ input_options, output_options = \
+ plumber.input_options, plumber.output_options
+
+ def add_options(group, options):
+ for opt in options:
+ option_recommendation_to_cli_option(group, opt)
+
+ if input_options:
+ title = _('INPUT OPTIONS')
+ io = OptionGroup(parser, title, _('Options to control the processing'
+ ' of the input %s file')%plumber.input_fmt)
+ add_options(io.add_option, input_options)
+ parser.add_option_group(io)
+
+ if output_options:
+ title = _('OUTPUT OPTIONS')
+ oo = OptionGroup(parser, title, _('Options to control the processing'
+ ' of the output %s')%plumber.output_fmt)
+ add_options(oo.add_option, output_options)
+ parser.add_option_group(oo)
+
+def add_pipeline_options(parser, plumber):
+ groups = {
+ '' : ('',
+ [
+ 'input_profile',
+ 'output_profile',
+ ]
+ ),
+ 'LOOK AND FEEL' : (
+ _('Options to control the look and feel of the output'),
+ [
+ 'base_font_size', 'disable_font_rescaling',
+ 'font_size_mapping',
+ 'line_height',
+ 'linearize_tables',
+ 'extra_css',
+ 'margin_top', 'margin_left', 'margin_right',
+ 'margin_bottom', 'dont_justify',
+ 'insert_blank_line', 'remove_paragraph_spacing',
+ ]
+ ),
+
+ 'STRUCTURE DETECTION' : (
+ _('Control auto-detection of document structure.'),
+ [
+ 'chapter', 'chapter_mark',
+ 'prefer_metadata_cover', 'remove_first_image',
+ 'insert_metadata', 'page_breaks_before',
+ 'preprocess_html',
+ ]
+ ),
+
+ 'TABLE OF CONTENTS' : (
+ _('Control the automatic generation of a Table of Contents. By '
+ 'default, if the source file has a Table of Contents, it will '
+ 'be used in preference to the automatically generated one.'),
+ [
+ 'level1_toc', 'level2_toc', 'level3_toc',
+ 'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
+ 'use_auto_toc', 'toc_filter',
+ ]
+ ),
+
+ 'METADATA' : (_('Options to set metadata in the output'),
+ plumber.metadata_option_names,
+ ),
+ 'DEBUG': (_('Options to help with debugging the conversion'),
+ [
+ 'verbose',
+ ]),
+
+
+ }
+
+ group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
+ 'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
+
+ for group in group_order:
+ desc, options = groups[group]
+ if group:
+ group = OptionGroup(parser, group, desc)
+ parser.add_option_group(group)
+ add_option = group.add_option if group != '' else parser.add_option
+
+ for name in options:
+ rec = plumber.get_option_by_name(name)
+ if rec.level < rec.HIGH:
+ option_recommendation_to_cli_option(add_option, rec)
+
+ option_recommendation_to_cli_option(parser.add_option,
+ plumber.get_option_by_name('list_recipes'))
+
+def option_parser():
+ return OptionParser(usage=USAGE)
+
+
+class ProgressBar(object):
+
+ def __init__(self, log):
+ self.log = log
+
+ def __call__(self, frac, msg=''):
+ if msg:
+ percent = int(frac*100)
+ self.log('%d%% %s'%(percent, msg))
+
+def create_option_parser(args, log):
+ parser = option_parser()
+ if len(args) < 3:
+ print_help(parser, log)
+ raise SystemExit(1)
+
+ input, output = check_command_line_options(parser, args, log)
+
+ from calibre.ebooks.conversion.plumber import Plumber
+
+ reporter = ProgressBar(log)
+ plumber = Plumber(input, output, log, reporter)
+ add_input_output_options(parser, plumber)
+ add_pipeline_options(parser, plumber)
+
+ return parser, plumber
+
+def main(args=sys.argv):
+ log = Log()
+ parser, plumber = create_option_parser(args, log)
+ opts = parser.parse_args(args)[0]
+ y = lambda q : os.path.abspath(os.path.expanduser(q))
+ for x in ('read_metadata_from_opf', 'cover'):
+ if getattr(opts, x, None) is not None:
+ setattr(opts, x, y(getattr(opts, x)))
+ recommendations = [(n.dest, getattr(opts, n.dest),
+ OptionRecommendation.HIGH) \
+ for n in parser.options_iter()
+ if n.dest]
+ plumber.merge_ui_recommendations(recommendations)
+
+ plumber.run()
+
+ if plumber.opts.debug_input is None:
+ log(_('Output saved to'), ' ', plumber.output)
+
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/src/calibre/ebooks/conversion/config.py b/src/calibre/ebooks/conversion/config.py
new file mode 100644
index 0000000000..e8b923a1d7
--- /dev/null
+++ b/src/calibre/ebooks/conversion/config.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.utils.config import config_dir
+from calibre.utils.lock import ExclusiveFile
+from calibre import sanitize_file_name
+from calibre.customize.conversion import OptionRecommendation
+
+
+config_dir = os.path.join(config_dir, 'conversion')
+if not os.path.exists(config_dir):
+ os.makedirs(config_dir)
+
+def name_to_path(name):
+ return os.path.join(config_dir, sanitize_file_name(name)+'.py')
+
+def save_defaults(name, recs):
+ path = name_to_path(name)
+ raw = str(recs)
+ with open(path, 'wb'):
+ pass
+ with ExclusiveFile(path) as f:
+ f.write(raw)
+
+def load_defaults(name):
+ path = name_to_path(name)
+ if not os.path.exists(path):
+ open(path, 'wb').close()
+ with ExclusiveFile(path) as f:
+ raw = f.read()
+ r = GuiRecommendations()
+ if raw:
+ r.from_string(raw)
+ return r
+
+def save_specifics(db, book_id, recs):
+ raw = str(recs)
+ db.set_conversion_options(book_id, 'PIPE', raw)
+
+def load_specifics(db, book_id):
+ raw = db.conversion_options(book_id, 'PIPE')
+ r = GuiRecommendations()
+ if raw:
+ r.from_string(raw)
+ return r
+
+class GuiRecommendations(dict):
+
+ def __new__(cls, *args):
+ dict.__new__(cls)
+ obj = super(GuiRecommendations, cls).__new__(cls, *args)
+ obj.disabled_options = set([])
+ return obj
+
+ def to_recommendations(self, level=OptionRecommendation.LOW):
+ ans = []
+ for key, val in self.items():
+ ans.append((key, val, level))
+ return ans
+
+ def __str__(self):
+ ans = ['{']
+ for key, val in self.items():
+ ans.append('\t'+repr(key)+' : '+repr(val)+',')
+ ans.append('}')
+ return '\n'.join(ans)
+
+ def from_string(self, raw):
+ try:
+ d = eval(raw)
+ except SyntaxError:
+ d = None
+ if d:
+ self.update(d)
+
+ def merge_recommendations(self, get_option, level, options,
+ only_existing=False):
+ for name in options:
+ if only_existing and name not in self:
+ continue
+ opt = get_option(name)
+ if opt is None: continue
+ if opt.level == OptionRecommendation.HIGH:
+ self[name] = opt.recommended_value
+ self.disabled_options.add(name)
+ elif opt.level > level or name not in self:
+ self[name] = opt.recommended_value
+
+
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
new file mode 100644
index 0000000000..9bab5d6701
--- /dev/null
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -0,0 +1,690 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import os, re
+
+from calibre.customize.conversion import OptionRecommendation, DummyReporter
+from calibre.customize.ui import input_profiles, output_profiles, \
+ plugin_for_input_format, plugin_for_output_format
+from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre import extract, walk
+
+def supported_input_formats():
+ from calibre.customize.ui import available_input_formats
+ fmts = available_input_formats()
+ for x in ('zip', 'rar', 'oebzip'):
+ fmts.add(x)
+ return fmts
+
+INPUT_FORMAT_PREFERENCES = ['cbr', 'cbz', 'cbc', 'lit', 'mobi', 'prc', 'azw', 'fb2', 'html',
+ 'rtf', 'pdf', 'txt', 'pdb']
+OUTPUT_FORMAT_PREFERENCES = ['epub', 'mobi', 'lit', 'pdf', 'pdb', 'txt']
+
+class OptionValues(object):
+ pass
+
+class CompositeProgressReporter(object):
+
+ def __init__(self, global_min, global_max, global_reporter):
+ self.global_min, self.global_max = global_min, global_max
+ self.global_reporter = global_reporter
+
+ def __call__(self, fraction, msg=''):
+ global_frac = self.global_min + fraction * \
+ (self.global_max - self.global_min)
+ self.global_reporter(global_frac, msg)
+
+class Plumber(object):
+ '''
+ The `Plumber` manages the conversion pipeline. An UI should call the methods
+ :method:`merge_ui_recommendations` and then :method:`run`. The plumber will
+ take care of the rest.
+ '''
+
+ metadata_option_names = [
+ 'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
+ 'publisher', 'series', 'series_index', 'rating', 'isbn',
+ 'tags', 'book_producer', 'language'
+ ]
+
+ def __init__(self, input, output, log, report_progress=DummyReporter()):
+ '''
+ :param input: Path to input file.
+ :param output: Path to output file/directory
+ '''
+ self.input = os.path.abspath(input)
+ self.output = os.path.abspath(output)
+ self.log = log
+ self.ui_reporter = report_progress
+
+ # Initialize the conversion options that are independent of input and
+ # output formats. The input and output plugins can still disable these
+ # options via recommendations.
+ self.pipeline_options = [
+
+OptionRecommendation(name='verbose',
+ recommended_value=0, level=OptionRecommendation.LOW,
+ short_switch='v',
+ help=_('Level of verbosity. Specify multiple times for greater '
+ 'verbosity.')
+ ),
+
+OptionRecommendation(name='input_profile',
+ recommended_value='default', level=OptionRecommendation.LOW,
+ choices=[x.short_name for x in input_profiles()],
+ help=_('Specify the input profile. The input profile gives the '
+ 'conversion system information on how to interpret '
+ 'various information in the input document. For '
+ 'example resolution dependent lengths (i.e. lengths in '
+ 'pixels). Choices are:')+\
+ ', '.join([x.short_name for x in input_profiles()])
+ ),
+
+OptionRecommendation(name='output_profile',
+ recommended_value='default', level=OptionRecommendation.LOW,
+ choices=[x.short_name for x in output_profiles()],
+ help=_('Specify the output profile. The output profile '
+ 'tells the conversion system how to optimize the '
+ 'created document for the specified device. In some cases, '
+ 'an output profile is required to produce documents that '
+ 'will work on a device. For example EPUB on the SONY reader. '
+ 'Choices are:') + \
+ ', '.join([x.short_name for x in output_profiles()])
+ ),
+
+OptionRecommendation(name='base_font_size',
+ recommended_value=0, level=OptionRecommendation.LOW,
+ help=_('The base font size in pts. All font sizes in the produced book '
+ 'will be rescaled based on this size. By choosing a larger '
+ 'size you can make the fonts in the output bigger and vice '
+ 'versa. By default, the base font size is chosen based on '
+ 'the output profile you chose.'
+ )
+ ),
+
+OptionRecommendation(name='font_size_mapping',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Mapping from CSS font names to font sizes in pts. '
+ 'An example setting is 12,12,14,16,18,20,22,24. '
+ 'These are the mappings for the sizes xx-small to xx-large, '
+ 'with the final size being for huge fonts. The font '
+ 'rescaling algorithm uses these sizes to intelligently '
+ 'rescale fonts. The default is to use a mapping based on '
+ 'the output profile you chose.'
+ )
+ ),
+
+OptionRecommendation(name='disable_font_rescaling',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Disable all rescaling of font sizes.'
+ )
+ ),
+
+
+OptionRecommendation(name='line_height',
+ recommended_value=0, level=OptionRecommendation.LOW,
+ help=_('The line height in pts. Controls spacing between consecutive '
+ 'lines of text. By default no line height manipulation is '
+ 'performed.'
+ )
+ ),
+
+OptionRecommendation(name='linearize_tables',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Some badly designed documents use tables to control the '
+ 'layout of text on the page. When converted these documents '
+ 'often have text that runs off the page and other artifacts. '
+ 'This option will extract the content from the tables and '
+ 'present it in a linear fashion.'
+ )
+ ),
+
+OptionRecommendation(name='level1_toc',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('XPath expression that specifies all tags that '
+ 'should be added to the Table of Contents at level one. If '
+ 'this is specified, it takes precedence over other forms '
+ 'of auto-detection.'
+ )
+ ),
+
+OptionRecommendation(name='level2_toc',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('XPath expression that specifies all tags that should be '
+ 'added to the Table of Contents at level two. Each entry is added '
+ 'under the previous level one entry.'
+ )
+ ),
+
+OptionRecommendation(name='level3_toc',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('XPath expression that specifies all tags that should be '
+ 'added to the Table of Contents at level three. Each entry '
+ 'is added under the previous level two entry.'
+ )
+ ),
+
+OptionRecommendation(name='use_auto_toc',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Normally, if the source file already has a Table of '
+ 'Contents, it is used in preference to the auto-generated one. '
+ 'With this option, the auto-generated one is always used.'
+ )
+ ),
+
+OptionRecommendation(name='no_chapters_in_toc',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_("Don't add auto-detected chapters to the Table of "
+ 'Contents.'
+ )
+ ),
+
+OptionRecommendation(name='toc_threshold',
+ recommended_value=6, level=OptionRecommendation.LOW,
+ help=_(
+ 'If fewer than this number of chapters is detected, then links '
+ 'are added to the Table of Contents. Default: %default')
+ ),
+
+OptionRecommendation(name='max_toc_links',
+ recommended_value=50, level=OptionRecommendation.LOW,
+ help=_('Maximum number of links to insert into the TOC. Set to 0 '
+ 'to disable. Default is: %default. Links are only added to the '
+ 'TOC if less than the threshold number of chapters were detected.'
+ )
+ ),
+
+OptionRecommendation(name='toc_filter',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Remove entries from the Table of Contents whose titles '
+ 'match the specified regular expression. Matching entries and all '
+ 'their children are removed.'
+ )
+ ),
+
+
+OptionRecommendation(name='chapter',
+ recommended_value="//*[((name()='h1' or name()='h2') and "
+ r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
+ "= 'chapter']", level=OptionRecommendation.LOW,
+ help=_('An XPath expression to detect chapter titles. The default '
+ 'is to consider
or
tags that contain the words '
+ '"chapter","book","section" or "part" as chapter titles as '
+ 'well as any tags that have class="chapter". The expression '
+ 'used must evaluate to a list of elements. To disable chapter '
+ 'detection, use the expression "/". See the XPath Tutorial '
+ 'in the calibre User Manual for further help on using this '
+ 'feature.'
+ )
+ ),
+
+OptionRecommendation(name='chapter_mark',
+ recommended_value='pagebreak', level=OptionRecommendation.LOW,
+ choices=['pagebreak', 'rule', 'both', 'none'],
+ help=_('Specify how to mark detected chapters. A value of '
+ '"pagebreak" will insert page breaks before chapters. '
+ 'A value of "rule" will insert a line before chapters. '
+ 'A value of "none" will disable chapter marking and a '
+ 'value of "both" will use both page breaks and lines '
+ 'to mark chapters.')
+ ),
+
+OptionRecommendation(name='extra_css',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Either the path to a CSS stylesheet or raw CSS. '
+ 'This CSS will be appended to the style rules from '
+ 'the source file, so it can be used to override those '
+ 'rules.')
+ ),
+
+OptionRecommendation(name='page_breaks_before',
+ recommended_value="//*[name()='h1' or name()='h2']",
+ level=OptionRecommendation.LOW,
+ help=_('An XPath expression. Page breaks are inserted '
+ 'before the specified elements.')
+ ),
+
+OptionRecommendation(name='margin_top',
+ recommended_value=5.0, level=OptionRecommendation.LOW,
+ help=_('Set the top margin in pts. Default is %default. '
+ 'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='margin_bottom',
+ recommended_value=5.0, level=OptionRecommendation.LOW,
+ help=_('Set the bottom margin in pts. Default is %default. '
+ 'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='margin_left',
+ recommended_value=5.0, level=OptionRecommendation.LOW,
+ help=_('Set the left margin in pts. Default is %default. '
+ 'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='margin_right',
+ recommended_value=5.0, level=OptionRecommendation.LOW,
+ help=_('Set the right margin in pts. Default is %default. '
+ 'Note: 72 pts equals 1 inch')),
+
+OptionRecommendation(name='dont_justify',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Do not force text to be justified in output. Whether text '
+ 'is actually displayed justified or not depends on whether '
+ 'the ebook format and reading device support justification.')
+ ),
+
+OptionRecommendation(name='remove_paragraph_spacing',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Remove spacing between paragraphs. Also sets an indent on '
+ 'paragraphs of 1.5em. Spacing removal will not work '
+ 'if the source file does not use paragraphs (
or
tags).')
+ ),
+
+OptionRecommendation(name='prefer_metadata_cover',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Use the cover detected from the source file in preference '
+ 'to the specified cover.')
+ ),
+
+OptionRecommendation(name='insert_blank_line',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Insert a blank line between paragraphs. Will not work '
+ 'if the source file does not use paragraphs (
or
tags).'
+ )
+ ),
+
+OptionRecommendation(name='remove_first_image',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Remove the first image from the input ebook. Useful if the '
+ 'first image in the source file is a cover and you are specifying '
+ 'an external cover.'
+ )
+ ),
+
+OptionRecommendation(name='insert_metadata',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Insert the book metadata at the start of '
+ 'the book. This is useful if your ebook reader does not support '
+ 'displaying/searching metadata directly.'
+ )
+ ),
+
+OptionRecommendation(name='preprocess_html',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Attempt to detect and correct hard line breaks and other '
+ 'problems in the source file. This may make things worse, so use '
+ 'with care.'
+ )
+ ),
+
+
+OptionRecommendation(name='read_metadata_from_opf',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ short_switch='m',
+ help=_('Read metadata from the specified OPF file. Metadata read '
+ 'from this file will override any metadata in the source '
+ 'file.')
+ ),
+
+
+OptionRecommendation(name='title',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the title.')),
+
+OptionRecommendation(name='authors',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the authors. Multiple authors should be separated by '
+ 'ampersands.')),
+
+OptionRecommendation(name='title_sort',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('The version of the title to be used for sorting. ')),
+
+OptionRecommendation(name='author_sort',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('String to be used when sorting by author. ')),
+
+OptionRecommendation(name='cover',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the cover to the specified file.')),
+
+OptionRecommendation(name='comments',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the ebook description.')),
+
+OptionRecommendation(name='publisher',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the ebook publisher.')),
+
+OptionRecommendation(name='series',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the series this ebook belongs to.')),
+
+OptionRecommendation(name='series_index',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the index of the book in this series.')),
+
+OptionRecommendation(name='rating',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the rating. Should be a number between 1 and 5.')),
+
+OptionRecommendation(name='isbn',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the ISBN of the book.')),
+
+OptionRecommendation(name='tags',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the tags for the book. Should be a comma separated list.')),
+
+OptionRecommendation(name='book_producer',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the book producer.')),
+
+OptionRecommendation(name='language',
+ recommended_value=None, level=OptionRecommendation.LOW,
+ help=_('Set the language.')),
+
+OptionRecommendation(name='list_recipes',
+ recommended_value=False, help=_('List available recipes.')),
+
+]
+
+ input_fmt = os.path.splitext(self.input)[1]
+ if not input_fmt:
+ raise ValueError('Input file must have an extension')
+ input_fmt = input_fmt[1:].lower()
+ if input_fmt in ('zip', 'rar', 'oebzip'):
+ self.log('Processing archive...')
+ tdir = PersistentTemporaryDirectory('_plumber')
+ self.input, input_fmt = self.unarchive(self.input, tdir)
+
+ if os.path.exists(self.output) and os.path.isdir(self.output):
+ output_fmt = 'oeb'
+ else:
+ output_fmt = os.path.splitext(self.output)[1]
+ if not output_fmt:
+ output_fmt = '.oeb'
+ output_fmt = output_fmt[1:].lower()
+
+ self.input_plugin = plugin_for_input_format(input_fmt)
+ self.output_plugin = plugin_for_output_format(output_fmt)
+
+ if self.input_plugin is None:
+ raise ValueError('No plugin to handle input format: '+input_fmt)
+
+ if self.output_plugin is None:
+ raise ValueError('No plugin to handle output format: '+output_fmt)
+
+ self.input_fmt = input_fmt
+ self.output_fmt = output_fmt
+
+ # Build set of all possible options. Two options are equal if their
+ # names are the same.
+ self.input_options = self.input_plugin.options.union(
+ self.input_plugin.common_options)
+ self.output_options = self.output_plugin.options.union(
+ self.output_plugin.common_options)
+
+ # Remove the options that have been disabled by recommendations from the
+ # plugins.
+ self.merge_plugin_recommendations()
+
+ @classmethod
+ def unarchive(self, path, tdir):
+ extract(path, tdir)
+ files = list(walk(tdir))
+ from calibre.customize.ui import available_input_formats
+ fmts = available_input_formats()
+ for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
+
+ for ext in fmts:
+ for f in files:
+ if f.lower().endswith('.'+ext):
+ if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
+ continue
+ return f, ext
+ return self.find_html_index(files)
+
+ @classmethod
+ def find_html_index(self, files):
+ '''
+ Given a list of files, find the most likely root HTML file in the
+ list.
+ '''
+ html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
+ html_files = [f for f in files if html_pat.search(f) is not None]
+ if not html_files:
+ raise ValueError(_('Could not find an ebook inside the archive'))
+ html_files = [(f, os.stat(f).st_size) for f in html_files]
+ html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+ html_files = [f[0] for f in html_files]
+ for q in ('toc', 'index'):
+ for f in html_files:
+ if os.path.splitext(os.path.basename(f))[0].lower() == q:
+ return f, os.path.splitext(f)[1].lower()[1:]
+ return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
+
+
+
+ def get_option_by_name(self, name):
+ for group in (self.input_options, self.pipeline_options,
+ self.output_options):
+ for rec in group:
+ if rec.option == name:
+ return rec
+
+ def get_option_help(self, name):
+ rec = self.get_option_by_name(name)
+ help = getattr(rec, 'help', None)
+ if help is not None:
+ return help.replace('%default', str(rec.recommended_value))
+
+ def merge_plugin_recommendations(self):
+ for source in (self.input_plugin, self.output_plugin):
+ for name, val, level in source.recommendations:
+ rec = self.get_option_by_name(name)
+ if rec is not None and rec.level <= level:
+ rec.recommended_value = val
+ rec.level = level
+
+ def merge_ui_recommendations(self, recommendations):
+ '''
+ Merge recommendations from the UI. As long as the UI recommendation
+ level is >= the baseline recommended level, the UI value is used,
+ *except* if the baseline has a recommendation level of `HIGH`.
+ '''
+ for name, val, level in recommendations:
+ rec = self.get_option_by_name(name)
+ if rec is not None and rec.level <= level and rec.level < rec.HIGH:
+ rec.recommended_value = val
+ rec.level = level
+
+ def read_user_metadata(self):
+ '''
+ Read all metadata specified by the user. Command line options override
+ metadata from a specified OPF file.
+ '''
+ from calibre.ebooks.metadata import MetaInformation, string_to_authors
+ from calibre.ebooks.metadata.opf2 import OPF
+ mi = MetaInformation(None, [])
+ if self.opts.read_metadata_from_opf is not None:
+ self.opts.read_metadata_from_opf = os.path.abspath(
+ self.opts.read_metadata_from_opf)
+ opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
+ os.path.dirname(self.opts.read_metadata_from_opf))
+ mi = MetaInformation(opf)
+ for x in self.metadata_option_names:
+ val = getattr(self.opts, x, None)
+ if val is not None:
+ if x == 'authors':
+ val = string_to_authors(val)
+ elif x == 'tags':
+ val = [i.strip() for i in val.split(',')]
+ elif x in ('rating', 'series_index'):
+ val = float(val)
+ setattr(mi, x, val)
+ if mi.cover:
+ mi.cover_data = ('', open(mi.cover, 'rb').read())
+ mi.cover = None
+ self.user_metadata = mi
+
+ def setup_options(self):
+ '''
+ Setup the `self.opts` object.
+ '''
+ self.opts = OptionValues()
+ for group in (self.input_options, self.pipeline_options,
+ self.output_options):
+ for rec in group:
+ setattr(self.opts, rec.option.name, rec.recommended_value)
+
+ for x in input_profiles():
+ if x.short_name == self.opts.input_profile:
+ self.opts.input_profile = x
+ break
+
+ for x in output_profiles():
+ if x.short_name == self.opts.output_profile:
+ self.opts.output_profile = x
+ break
+
+ self.read_user_metadata()
+
+ def run(self):
+ '''
+ Run the conversion pipeline
+ '''
+ # Setup baseline option values
+ self.setup_options()
+ if self.opts.verbose:
+ self.log.filter_level = self.log.DEBUG
+ if self.opts.list_recipes:
+ from calibre.web.feeds.recipes import titles
+ self.log('Available recipes:')
+ for title in sorted(titles):
+ self.log('\t'+title)
+ self.log('%d recipes available'%len(titles))
+ raise SystemExit(0)
+
+ # Run any preprocess plugins
+ from calibre.customize.ui import run_plugins_on_preprocess
+ self.input = run_plugins_on_preprocess(self.input)
+
+ # Create an OEBBook from the input file. The input plugin does all the
+ # heavy lifting.
+ accelerators = {}
+
+ tdir = PersistentTemporaryDirectory('_plumber')
+ stream = self.input if self.input_fmt == 'recipe' else \
+ open(self.input, 'rb')
+
+ if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
+ self.opts.lrf = True
+
+ self.ui_reporter(0.01, _('Converting input to HTML...'))
+ ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
+ self.input_plugin.report_progress = ir
+ self.oeb = self.input_plugin(stream, self.opts,
+ self.input_fmt, self.log,
+ accelerators, tdir)
+ if self.opts.debug_input is not None:
+ self.log('Debug input called, aborting the rest of the pipeline.')
+ return
+ if not hasattr(self.oeb, 'manifest'):
+ self.oeb = create_oebbook(self.log, self.oeb, self.opts,
+ self.input_plugin)
+ pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
+ pr(0., _('Running transforms on ebook...'))
+
+ from calibre.ebooks.oeb.transforms.guide import Clean
+ Clean()(self.oeb, self.opts)
+ pr(0.1)
+
+ self.opts.source = self.opts.input_profile
+ self.opts.dest = self.opts.output_profile
+
+ from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
+ MergeMetadata()(self.oeb, self.user_metadata,
+ self.opts.prefer_metadata_cover)
+ pr(0.2)
+
+ from calibre.ebooks.oeb.transforms.structure import DetectStructure
+ DetectStructure()(self.oeb, self.opts)
+ pr(0.35)
+
+ from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
+ fbase = self.opts.base_font_size
+ if fbase < 1e-4:
+ fbase = float(self.opts.dest.fbase)
+ fkey = self.opts.font_size_mapping
+ if fkey is None:
+ fkey = self.opts.dest.fkey
+ else:
+ fkey = map(float, fkey.split(','))
+
+ from calibre.ebooks.oeb.transforms.jacket import Jacket
+ Jacket()(self.oeb, self.opts, self.user_metadata)
+ pr(0.4)
+
+ if self.opts.extra_css and os.path.exists(self.opts.extra_css):
+ self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
+
+ oibl = self.opts.insert_blank_line
+ orps = self.opts.remove_paragraph_spacing
+ if self.output_plugin.file_type == 'lrf':
+ self.opts.insert_blank_line = False
+ self.opts.remove_paragraph_spacing = False
+ line_height = self.opts.line_height
+ if line_height < 1e-4:
+ line_height = None
+ flattener = CSSFlattener(fbase=fbase, fkey=fkey,
+ lineh=line_height,
+ untable=self.output_plugin.file_type in ('mobi','lit'),
+ unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
+ flattener(self.oeb, self.opts)
+ self.opts.insert_blank_line = oibl
+ self.opts.remove_paragraph_spacing = orps
+
+ if self.opts.linearize_tables and \
+ self.output_plugin.file_type not in ('mobi', 'lrf'):
+ from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
+ LinearizeTables()(self.oeb, self.opts)
+ pr(0.9)
+
+ from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
+
+ self.log.info('Cleaning up manifest...')
+ trimmer = ManifestTrimmer()
+ trimmer(self.oeb, self.opts)
+
+ self.oeb.toc.rationalize_play_orders()
+ pr(1.)
+
+ self.log.info('Creating %s...'%self.output_plugin.name)
+ our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
+ self.output_plugin.report_progress = our
+ our(0., _('Creating')+' %s'%self.output_plugin.name)
+ self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
+ self.opts, self.log)
+ self.ui_reporter(1.)
+ self.log(self.output_fmt.upper(), 'output written to', self.output)
+
+def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None,
+ encoding='utf-8'):
+ '''
+ Create an OEBBook.
+ '''
+ from calibre.ebooks.oeb.base import OEBBook
+ html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
+ opts.preprocess_html)
+ oeb = OEBBook(log, html_preprocessor,
+ pretty_print=opts.pretty_print, input_encoding=encoding)
+ # Read OEB Book into OEBBook
+ log('Parsing all content...')
+ if reader is None:
+ from calibre.ebooks.oeb.reader import OEBReader
+ reader = OEBReader
+
+ reader()(oeb, path_or_stream)
+ return oeb
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
new file mode 100644
index 0000000000..2dc404e586
--- /dev/null
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import re, functools
+
+from calibre import entity_to_unicode
+
+XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
+SVG_NS = 'http://www.w3.org/2000/svg'
+XLINK_NS = 'http://www.w3.org/1999/xlink'
+
+convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
+_span_pat = re.compile('', re.DOTALL|re.IGNORECASE)
+
+
+def sanitize_head(match):
+ x = match.group(1)
+ x = _span_pat.sub('', x)
+ return '\n'+x+'\n'
+
+def chap_head(match):
+ chap = match.group('chap')
+ title = match.group('title')
+ if not title:
+ return '
'+chap+'
\n'
+ else:
+ return '
'+chap+' \n'+title+'
\n'
+
+def wrap_lines(match):
+ ital = match.group('ital')
+ if not ital:
+ return ' '
+ else:
+ return ital+' '
+
+def line_length(raw, percent):
+ '''
+ raw is the raw text to find the line length to use for wrapping.
+ percentage is a decimal number, 0 - 1 which is used to determine
+ how far in the list of line lengths to use.
+ '''
+ raw = raw.replace(' ', ' ')
+ linere = re.compile('(?<= ).*?(?= )', re.DOTALL)
+ lines = linere.findall(raw)
+
+ lengths = []
+ for line in lines:
+ if len(line) > 0:
+ lengths.append(len(line))
+ total = sum(lengths)
+ avg = total / len(lengths)
+ max_line = avg * 2
+
+ lengths = sorted(lengths)
+ for i in range(len(lengths) - 1, -1, -1):
+ if lengths[i] > max_line:
+ del lengths[i]
+
+ if percent > 1:
+ percent = 1
+ if percent < 0:
+ percent = 0
+
+ index = int(len(lengths) * percent) - 1
+
+ return lengths[index]
+
+
+class CSSPreProcessor(object):
+
+ PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
+
+ def __call__(self, data):
+ data = self.PAGE_PAT.sub('', data)
+ return data
+
+class HTMLPreProcessor(object):
+
+ PREPROCESS = [
+ # Some idiotic HTML generators (Frontpage I'm looking at you)
+ # Put all sorts of crap into . This messes up lxml
+ (re.compile(r']*>(.*?)', re.IGNORECASE|re.DOTALL),
+ sanitize_head),
+ # Convert all entities, since lxml doesn't handle them well
+ (re.compile(r'&(\S+?);'), convert_entities),
+ # Remove the ', re.IGNORECASE),
+ lambda match: ''),
+ ]
+
+ # Fix pdftohtml markup
+ PDFTOHTML = [
+ # Fix umlauts
+ (re.compile(u'¨\s*()*\s*o', re.UNICODE), lambda match: u'ö'),
+ (re.compile(u'¨\s*()*\s*O', re.UNICODE), lambda match: u'Ö'),
+ (re.compile(u'¨\s*()*\s*u', re.UNICODE), lambda match: u'ü'),
+ (re.compile(u'¨\s*()*\s*U', re.UNICODE), lambda match: u'Ü'),
+ (re.compile(u'¨\s*()*\s*e', re.UNICODE), lambda match: u'ë'),
+ (re.compile(u'¨\s*()*\s*E', re.UNICODE), lambda match: u'Ë'),
+ (re.compile(u'¨\s*()*\s*i', re.UNICODE), lambda match: u'ï'),
+ (re.compile(u'¨\s*()*\s*I', re.UNICODE), lambda match: u'Ï'),
+ (re.compile(u'¨\s*()*\s*a', re.UNICODE), lambda match: u'ä'),
+ (re.compile(u'¨\s*()*\s*A', re.UNICODE), lambda match: u'Ä'),
+
+ # Remove page links
+ (re.compile(r'', re.IGNORECASE), lambda match: ''),
+ # Remove tags
+ (re.compile(r'', re.IGNORECASE), lambda match: ' '),
+ # Replace
'%(match.group(2) if match.group(2) else 'center', match.group(3))),
+ (re.compile('<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL),
+ lambda match : '
%s
'%(match.group(1),)),
+ (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL),
+ lambda match : '
%s
'%(match.group(1),)),
+ ]
+ def __init__(self, input_plugin_preprocess, plugin_preprocess):
+ self.input_plugin_preprocess = input_plugin_preprocess
+ self.plugin_preprocess = plugin_preprocess
+
+ def is_baen(self, src):
+ return re.compile(r'<]*id=BookTitle', raw) is not None
+
+ def is_pdftohtml(self, src):
+ return '' in src[:1000]
+
+ def __call__(self, html, remove_special_chars=None):
+ if remove_special_chars is not None:
+ html = remove_special_chars.sub('', html)
+ html = html.replace('\0', '')
+ if self.is_baen(html):
+ rules = []
+ elif self.is_book_designer(html):
+ rules = self.BOOK_DESIGNER
+ elif self.is_pdftohtml(html):
+ line_length_rules = [
+ # Un wrap using punctuation
+ (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P(i|b|u)>)?\s*()\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .3), re.UNICODE), wrap_lines),
+ ]
+
+ rules = self.PDFTOHTML + line_length_rules
+ else:
+ rules = []
+ for rule in self.PREPROCESS + rules:
+ html = rule[0].sub(rule[1], html)
+
+ # Handle broken XHTML w/ SVG (ugh)
+ if 'svg:' in html and SVG_NS not in html:
+ html = html.replace(
+ ' or
-
tags that contain the words "chapter","book","section" or "part" as chapter titles as
-well as any tags that have class="chapter".
-The expression used must evaluate to a list of elements. To disable chapter detection,
-use the expression "/". See the XPath Tutorial in the calibre User Manual for further
-help on using this feature.
-''').replace('\n', ' '))
- structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
- default='pagebreak',
- help=_('Specify how to mark detected chapters. A value of '
- '"pagebreak" will insert page breaks before chapters. '
- 'A value of "rule" will insert a line before chapters. '
- 'A value of "none" will disable chapter marking and a '
- 'value of "both" will use both page breaks and lines '
- 'to mark chapters.'))
- structure('cover', ['--cover'], default=None,
- help=_('Path to the cover to be used for this book'))
- structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
- action='store_true',
- help=_('Use the cover detected from the source file in preference '
- 'to the specified cover.'))
- structure('remove_first_image', ['--remove-first-image'], default=False,
- help=_('Remove the first image from the input ebook. Useful if '
- 'the first image in the source file is a cover and you '
- 'are specifying an external cover.'))
- structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
- help=_('Turn off splitting at page breaks. Normally, input files '
- 'are automatically split at every page break into '
- 'two files. This gives an output ebook that can be parsed '
- 'faster and with less resources. However, splitting is '
- 'slow and if your source file contains a very large '
- 'number of page breaks, you should turn off splitting '
- 'on page breaks.'))
- structure('page', ['--page'], default=None,
- help=_('XPath expression to detect page boundaries for building '
- 'a custom pagination map, as used by AdobeDE. Default is '
- 'not to build an explicit pagination map.'))
- structure('page_names', ['--page-names'], default=None,
- help=_('XPath expression to find the name of each page in the '
- 'pagination map relative to its boundary element. '
- 'Default is to number all pages staring with 1.'))
- toc = c.add_group('toc',
- _('''\
-Control the automatic generation of a Table of Contents. If an OPF file is detected
-and it specifies a Table of Contents, then that will be used rather than trying
-to auto-generate a Table of Contents.
-''').replace('\n', ' '))
- toc('max_toc_links', ['--max-toc-links'], default=50,
- help=_('Maximum number of links to insert into the TOC. Set to 0 '
- 'to disable. Default is: %default. Links are only added to the '
- 'TOC if less than the --toc-threshold number of chapters were detected.'))
- toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
- help=_("Don't add auto-detected chapters to the Table of Contents."))
- toc('toc_threshold', ['--toc-threshold'], default=6,
- help=_('If fewer than this number of chapters is detected, then links '
- 'are added to the Table of Contents. Default: %default'))
- toc('level1_toc', ['--level1-toc'], default=None,
- help=_('XPath expression that specifies all tags that should be added '
- 'to the Table of Contents at level one. If this is specified, '
- 'it takes precedence over other forms of auto-detection.'))
- toc('level2_toc', ['--level2-toc'], default=None,
- help=_('XPath expression that specifies all tags that should be added '
- 'to the Table of Contents at level two. Each entry is added '
- 'under the previous level one entry.'))
- toc('level3_toc', ['--level3-toc'], default=None,
- help=_('XPath expression that specifies all tags that should be added '
- 'to the Table of Contents at level three. Each entry is added '
- 'under the previous level two entry.'))
- toc('from_ncx', ['--from-ncx'], default=None,
- help=_('Path to a .ncx file that contains the table of contents to use '
- 'for this ebook. The NCX file should contain links relative to '
- 'the directory it is placed in. See '
- 'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
- 'an overview of the NCX format.'))
- toc('use_auto_toc', ['--use-auto-toc'], default=False,
- help=_('Normally, if the source file already has a Table of Contents, '
- 'it is used in preference to the auto-generated one. '
- 'With this option, the auto-generated one is always used.'))
-
- layout = c.add_group('page layout', _('Control page layout'))
- layout('margin_top', ['--margin-top'], default=5.0,
- help=_('Set the top margin in pts. Default is %default'))
- layout('margin_bottom', ['--margin-bottom'], default=5.0,
- help=_('Set the bottom margin in pts. Default is %default'))
- layout('margin_left', ['--margin-left'], default=5.0,
- help=_('Set the left margin in pts. Default is %default'))
- layout('margin_right', ['--margin-right'], default=5.0,
- help=_('Set the right margin in pts. Default is %default'))
- layout('base_font_size2', ['--base-font-size'], default=12.0,
- help=_('The base font size in pts. Default is %defaultpt. '
- 'Set to 0 to disable rescaling of fonts.'))
- layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
- help=_('Remove spacing between paragraphs. '
- 'Also sets a indent on paragraphs of 1.5em. '
- 'You can override this by adding p {text-indent: 0cm} to '
- '--override-css. Spacing removal will not work if the source '
- 'file forces inter-paragraph spacing.'))
- layout('no_justification', ['--no-justification'], default=False,
- help=_('Do not force text to be justified in output.'))
- layout('linearize_tables', ['--linearize-tables'], default=False,
- help=_('Remove table markup, converting it into paragraphs. '
- 'This is useful if your source file uses a table to manage layout.'))
- layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
- help=_('Preserve the HTML tag structure while splitting large HTML files. '
- 'This is only neccessary if the HTML files contain CSS that '
- 'uses sibling selectors. Enabling this greatly slows down '
- 'processing of large HTML files.'))
-
- c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
- help=_('Print generated OPF file to stdout'))
- c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
- help=_('Print generated NCX file to stdout'))
- c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
- default=False,
- help=_('Keep intermediate files during processing by html2epub'))
- c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
- help=_('Extract the contents of the produced EPUB file to the '
- 'specified directory.'))
- return c
+
diff --git a/src/calibre/ebooks/epub/fonts.py b/src/calibre/ebooks/epub/fonts.py
deleted file mode 100644
index 5d0887f2d0..0000000000
--- a/src/calibre/ebooks/epub/fonts.py
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/env python
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Font size rationalization. See :function:`relativize`.
-'''
-
-import logging, re, operator, functools, collections, unittest, copy, sys
-from xml.dom import SyntaxErr
-
-from lxml.cssselect import CSSSelector
-from lxml import etree
-from lxml.html import HtmlElement
-
-from calibre.ebooks.html import fromstring
-from calibre.ebooks.epub import rules
-from cssutils import CSSParser
-
-num = r'[-]?\d+|[-]?\d*\.\d+'
-length = r'(?P0)|(?P{num})(?P%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
-absolute_size = r'(?P(x?x-)?(small|large)|medium)'
-relative_size = r'(?Psmaller|larger)'
-
-font_size_pat = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
-line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
-
-PTU = {
- 'in' : 72.,
- 'cm' : 72/2.54,
- 'mm' : 72/25.4,
- 'pt' : 1.0,
- 'pc' : 1/12.,
- }
-
-DEFAULT_FONT_SIZE = 12
-
-class Rationalizer(object):
-
- @classmethod
- def specificity(cls, s):
- '''Map CSS specificity tuple to a single integer'''
- return sum([10**(4-i) + x for i,x in enumerate(s)])
-
- @classmethod
- def compute_font_size(cls, elem):
- '''
- Calculate the effective font size of an element traversing its ancestors as far as
- neccessary.
- '''
- cfs = elem.computed_font_size
- if cfs is not None:
- return
- sfs = elem.specified_font_size
- if callable(sfs):
- parent = elem.getparent()
- cls.compute_font_size(parent)
- elem.computed_font_size = sfs(parent.computed_font_size)
- else:
- elem.computed_font_size = sfs
-
- @classmethod
- def calculate_font_size(cls, style):
- 'Return font size in pts from style object. For relative units returns a callable'
- match = font_size_pat.search(style.font)
- fs = ''
- if match:
- fs = match.group()
- if style.fontSize:
- fs = style.fontSize
-
- match = font_size_pat.search(fs)
- if match is None:
- return None
- match = match.groupdict()
- unit = match.get('unit', '')
- if unit: unit = unit.lower()
- if unit in PTU.keys():
- return PTU[unit] * float(match['num'])
- if unit in ('em', 'ex'):
- return functools.partial(operator.mul, float(match['num']))
- if unit == '%':
- return functools.partial(operator.mul, float(match['num'])/100.)
- abs = match.get('abs', '')
- if abs: abs = abs.lower()
- if abs:
- x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
- return 12 * x
- if match.get('zero', False):
- return 0.
- return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
-
- @classmethod
- def resolve_rules(cls, stylesheets):
- for sheet in stylesheets:
- if hasattr(sheet, 'fs_rules'):
- continue
- sheet.fs_rules = []
- sheet.lh_rules = []
- for r in sheet:
- if r.type == r.STYLE_RULE:
- font_size = cls.calculate_font_size(r.style)
- if font_size is not None:
- for s in r.selectorList:
- sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
- orig = line_height_pat.search(r.style.lineHeight)
- if orig is not None:
- for s in r.selectorList:
- sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
-
-
- @classmethod
- def apply_font_size_rules(cls, stylesheets, root):
- 'Add a ``specified_font_size`` attribute to every element that has a specified font size'
- cls.resolve_rules(stylesheets)
- for sheet in stylesheets:
- for selector, font_size in sheet.fs_rules:
- elems = selector(root)
- for elem in elems:
- elem.specified_font_size = font_size
-
- @classmethod
- def remove_font_size_information(cls, stylesheets):
- for r in rules(stylesheets):
- r.style.removeProperty('font-size')
- try:
- new = font_size_pat.sub('', r.style.font).strip()
- if new:
- r.style.font = new
- else:
- r.style.removeProperty('font')
- except SyntaxErr:
- r.style.removeProperty('font')
- if line_height_pat.search(r.style.lineHeight) is not None:
- r.style.removeProperty('line-height')
-
- @classmethod
- def compute_font_sizes(cls, root, stylesheets, base=12):
- stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
- cls.apply_font_size_rules(stylesheets, root)
-
- # Compute the effective font size of all tags
- root.computed_font_size = DEFAULT_FONT_SIZE
- for elem in root.iter(etree.Element):
- cls.compute_font_size(elem)
-
- extra_css = {}
- if base > 0:
- # Calculate the "base" (i.e. most common) font size
- font_sizes = collections.defaultdict(lambda : 0)
- body = root.xpath('//body')[0]
- IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
- for elem in body.iter(etree.Element):
- if elem.tag not in IGNORE:
- t = getattr(elem, 'text', '')
- if t: t = t.strip()
- if t:
- font_sizes[elem.computed_font_size] += len(t)
-
- t = getattr(elem, 'tail', '')
- if t: t = t.strip()
- if t:
- parent = elem.getparent()
- if parent.tag not in IGNORE:
- font_sizes[parent.computed_font_size] += len(t)
-
- try:
- most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
- scale = base/most_common if most_common > 0 else 1.
- except ValueError:
- scale = 1.
-
- # rescale absolute line-heights
- counter = 0
- for sheet in stylesheets:
- for selector, lh in sheet.lh_rules:
- for elem in selector(root):
- elem.set('id', elem.get('id', 'cfs_%d'%counter))
- counter += 1
- if not extra_css.has_key(elem.get('id')):
- extra_css[elem.get('id')] = []
- extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
-
-
-
- # Rescale all computed font sizes
- for elem in body.iter(etree.Element):
- if isinstance(elem, HtmlElement):
- elem.computed_font_size *= scale
-
- # Remove all font size specifications from the last stylesheet
- cls.remove_font_size_information(stylesheets[-1:])
-
- # Create the CSS to implement the rescaled font sizes
- for elem in body.iter(etree.Element):
- cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
- if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
- elem.set('id', elem.get('id', 'cfs_%d'%counter))
- counter += 1
- if not extra_css.has_key(elem.get('id')):
- extra_css[elem.get('id')] = []
- extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
-
- css = CSSParser(loglevel=logging.ERROR).parseString('')
- for id, r in extra_css.items():
- css.add('#%s {%s}'%(id, ';'.join(r)))
- return css
-
- @classmethod
- def rationalize(cls, stylesheets, root, opts):
- logger = logging.getLogger('html2epub')
- logger.info('\t\tRationalizing fonts...')
- extra_css = None
- if opts.base_font_size2 > 0:
- try:
- extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
- except:
- logger.warning('Failed to rationalize font sizes.')
- if opts.verbose > 1:
- logger.exception('')
- finally:
- root.remove_font_size_information()
- logger.debug('\t\tDone rationalizing')
- return extra_css
-
-################################################################################
-############## Testing
-################################################################################
-
-class FontTest(unittest.TestCase):
-
- def setUp(self):
- from calibre.ebooks.epub import config
- self.opts = config(defaults='').parse()
- self.html = '''
-
-
- Test document
-
-
-
-
-
Some text
-
-
Some other text.
-
The longest piece of single font size text in this entire file. Used to test resizing.
-
-
- '''
- self.root = fromstring(self.html)
-
- def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
- root1 = copy.deepcopy(self.root)
- root1.computed_font_size = DEFAULT_FONT_SIZE
- stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
- stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
- root2 = copy.deepcopy(root1)
- root2.remove_font_size_information()
- root2.computed_font_size = DEFAULT_FONT_SIZE
- Rationalizer.apply_font_size_rules([stylesheet2], root2)
- for elem in root2.iter(etree.Element):
- Rationalizer.compute_font_size(elem)
- for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
- self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
- msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
- (root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
- return stylesheet2.cssText
-
- def testStripping(self):
- 'Test that any original entries are removed from the CSS'
- css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
- css = CSSParser(loglevel=logging.ERROR).parseString(css)
- Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
- self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
- 'p{font:bolditalic}')
-
- def testIdentity(self):
- 'Test that no unnecessary font size changes are made'
- extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
- self.assertEqual(extra_css.strip(), '')
-
- def testRelativization(self):
- 'Test conversion of absolute to relative sizes'
- self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
-
- def testResizing(self):
- 'Test resizing of fonts'
- self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
-
-
-def suite():
- return unittest.TestLoader().loadTestsFromTestCase(FontTest)
-
-def test():
- unittest.TextTestRunner(verbosity=2).run(suite())
-
-if __name__ == '__main__':
- sys.exit(test())
-
\ No newline at end of file
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
deleted file mode 100644
index a3e266991f..0000000000
--- a/src/calibre/ebooks/epub/from_any.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Convert any ebook format to epub.
-'''
-
-import sys, os, re
-from contextlib import nested
-
-from calibre import extract, walk
-from calibre.ebooks import DRMError
-from calibre.ebooks.epub import config as common_config, process_encryption
-from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
-from calibre.utils.zipfile import ZipFile
-from calibre.customize.ui import run_plugins_on_preprocess
-
-def lit2opf(path, tdir, opts):
- from calibre.ebooks.lit.reader import LitReader
- print 'Exploding LIT file:', path
- reader = LitReader(path)
- reader.extract_content(tdir, False)
- opf = None
- for opf in walk(tdir):
- if opf.lower().endswith('.opf'):
- break
- if not opf.endswith('.opf'):
- opf = None
- if opf is not None: # Check for url-quoted filenames
- _opf = OPF(opf, os.path.dirname(opf))
- replacements = []
- for item in _opf.itermanifest():
- href = item.get('href', '')
- path = os.path.join(os.path.dirname(opf), *(href.split('/')))
- if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
- npath = path
- path = path.replace('&', '%26')
- replacements.append((path, npath))
- if replacements:
- print 'Fixing quoted filenames...'
- for path, npath in replacements:
- if os.path.exists(path):
- os.rename(path, npath)
- for f in walk(tdir):
- with open(f, 'r+b') as f:
- raw = f.read()
- for path, npath in replacements:
- raw = raw.replace(os.path.basename(path), os.path.basename(npath))
- f.seek(0)
- f.truncate()
- f.write(raw)
- return opf
-
-def mobi2opf(path, tdir, opts):
- from calibre.ebooks.mobi.reader import MobiReader
- print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
- reader = MobiReader(path)
- reader.extract_content(tdir)
- files = list(walk(tdir))
- opts.encoding = 'utf-8'
- for f in files:
- if f.lower().endswith('.opf'):
- return f
- html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
- hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
- mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
- opf = OPFCreator(tdir, mi)
- opf.create_manifest([(hf[0], None)])
- opf.create_spine([hf[0]])
- ans = os.path.join(tdir, 'metadata.opf')
- opf.render(open(ans, 'wb'))
- return ans
-
-def fb22opf(path, tdir, opts):
- from calibre.ebooks.lrf.fb2.convert_from import to_html
- print 'Converting FB2 to HTML...'
- return to_html(path, tdir)
-
-def rtf2opf(path, tdir, opts):
- from calibre.ebooks.lrf.rtf.convert_from import generate_html
- generate_html(path, tdir)
- return os.path.join(tdir, 'metadata.opf')
-
-def txt2opf(path, tdir, opts):
- from calibre.ebooks.lrf.txt.convert_from import generate_html
- generate_html(path, opts.encoding, tdir)
- opts.encoding = 'utf-8'
- return os.path.join(tdir, 'metadata.opf')
-
-def pdf2opf(path, tdir, opts):
- from calibre.ebooks.lrf.pdf.convert_from import generate_html
- generate_html(path, tdir)
- opts.dont_split_on_page_breaks = True
- return os.path.join(tdir, 'metadata.opf')
-
-def epub2opf(path, tdir, opts):
- zf = ZipFile(path)
- zf.extractall(tdir)
- opts.chapter_mark = 'none'
- encfile = os.path.join(tdir, 'META-INF', 'encryption.xml')
- opf = None
- for f in walk(tdir):
- if f.lower().endswith('.opf'):
- opf = f
- break
- if opf and os.path.exists(encfile):
- if not process_encryption(encfile, opf):
- raise DRMError(os.path.basename(path))
-
- if opf is None:
- raise ValueError('%s is not a valid EPUB file'%path)
- return opf
-
-def odt2epub(path, tdir, opts):
- from calibre.ebooks.odt.to_oeb import Extract
- opts.encoding = 'utf-8'
- return Extract()(path, tdir)
-
-MAP = {
- 'lit' : lit2opf,
- 'mobi' : mobi2opf,
- 'prc' : mobi2opf,
- 'azw' : mobi2opf,
- 'fb2' : fb22opf,
- 'rtf' : rtf2opf,
- 'txt' : txt2opf,
- 'pdf' : pdf2opf,
- 'epub' : epub2opf,
- 'odt' : odt2epub,
- }
-SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
- 'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
-
-def unarchive(path, tdir):
- extract(path, tdir)
- files = list(walk(tdir))
-
- for ext in ['opf'] + list(MAP.keys()):
- for f in files:
- if f.lower().endswith('.'+ext):
- if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
- continue
- return f, ext
- return find_html_index(files)
-
-def any2epub(opts, path, notification=None, create_epub=True,
- oeb_cover=False, extract_to=None):
- path = run_plugins_on_preprocess(path)
- ext = os.path.splitext(path)[1]
- if not ext:
- raise ValueError('Unknown file type: '+path)
- ext = ext.lower()[1:]
-
- if opts.output is None:
- opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
-
- with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
- if ext in ['rar', 'zip', 'oebzip']:
- path, ext = unarchive(path, tdir1)
- print 'Found %s file in archive'%(ext.upper())
-
- if ext in MAP.keys():
- path = MAP[ext](path, tdir2, opts)
- ext = 'opf'
-
-
- if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
- raise ValueError('Conversion from %s is not supported'%ext.upper())
-
- print 'Creating EPUB file...'
- html2epub(path, opts, notification=notification,
- create_epub=create_epub, oeb_cover=oeb_cover,
- extract_to=extract_to)
-
-def config(defaults=None):
- return common_config(defaults=defaults)
-
-
-def formats():
- return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
-
-USAGE = _('''\
-%%prog [options] filename
-
-Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
-''')
-
-def option_parser(usage=USAGE):
- return config().option_parser(usage=usage%('EPUB', formats()))
-
-def main(args=sys.argv):
- parser = option_parser()
- opts, args = parser.parse_args(args)
- if len(args) < 2:
- parser.print_help()
- print 'No input file specified.'
- return 1
- any2epub(opts, args[1])
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/src/calibre/ebooks/epub/from_comic.py b/src/calibre/ebooks/epub/from_comic.py
deleted file mode 100644
index c6dff349da..0000000000
--- a/src/calibre/ebooks/epub/from_comic.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'Convert a comic in CBR/CBZ format to epub'
-
-import sys
-from functools import partial
-from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, config, main as _main
-
-convert = partial(do_convert, output_format='epub')
-main = partial(_main, output_format='epub')
-
-if __name__ == '__main__':
- sys.exit(main())
-
-if False:
- option_parser
- config
-
\ No newline at end of file
diff --git a/src/calibre/ebooks/epub/from_feeds.py b/src/calibre/ebooks/epub/from_feeds.py
deleted file mode 100644
index 6a12353f50..0000000000
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Convert periodical content into EPUB ebooks.
-'''
-import sys, glob, os
-from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
-from calibre.ebooks.epub.from_html import config as html2epub_config
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.epub.from_html import convert as html2epub
-from calibre import strftime, sanitize_file_name
-
-def config(defaults=None):
- c = feeds2disk_config(defaults=defaults)
- c.remove('lrf')
- c.remove('epub')
- c.remove('output_dir')
- c.update(html2epub_config(defaults=defaults))
- c.remove('chapter_mark')
- return c
-
-def option_parser():
- c = config()
- return c.option_parser(usage=USAGE)
-
-def convert(opts, recipe_arg, notification=None):
- opts.lrf = False
- opts.epub = True
- if opts.debug:
- opts.verbose = 2
- parser = option_parser()
- with TemporaryDirectory('_feeds2epub') as tdir:
- opts.output_dir = tdir
- recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
- c = config()
- recipe_opts = c.parse_string(recipe.html2epub_options)
- c.smart_update(recipe_opts, opts)
- opts = recipe_opts
- opts.chapter_mark = 'none'
- opts.dont_split_on_page_breaks = True
- opf = glob.glob(os.path.join(tdir, '*.opf'))
- if not opf:
- raise Exception('Downloading of recipe: %s failed'%recipe_arg)
- opf = opf[0]
-
- if opts.output is None:
- fname = recipe.title + strftime(recipe.timefmt) + '.epub'
- opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
-
- print 'Generating epub...'
- opts.encoding = 'utf-8'
- opts.remove_paragraph_spacing = True
- html2epub(opf, opts, notification=notification)
-
-
-def main(args=sys.argv, notification=None, handler=None):
- parser = option_parser()
- opts, args = parser.parse_args(args)
- if len(args) != 2 and opts.feeds is None:
- parser.print_help()
- return 1
- recipe_arg = args[1] if len(args) > 1 else None
- convert(opts, recipe_arg, notification=notification)
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
\ No newline at end of file
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
deleted file mode 100644
index 3fd7b082f9..0000000000
--- a/src/calibre/ebooks/epub/from_html.py
+++ /dev/null
@@ -1,547 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Conversion of HTML/OPF files follows several stages:
-
- * All links in the HTML files or in the OPF manifest are
- followed to build up a list of HTML files to be converted.
- This stage is implemented by
- :function:`calibre.ebooks.html.traverse` and
- :class:`calibre.ebooks.html.HTMLFile`.
-
- * The HTML is pre-processed to make it more semantic.
- All links in the HTML files to other resources like images,
- stylesheets, etc. are relativized. The resources are copied
- into the `resources` sub directory. This is accomplished by
- :class:`calibre.ebooks.html.PreProcessor` and
- :class:`calibre.ebooks.html.Parser`.
-
- * The HTML is processed. Various operations are performed.
- All style declarations are extracted and consolidated into
- a single style sheet. Chapters are auto-detected and marked.
- Various font related manipulations are performed. See
- :class:`HTMLProcessor`.
-
- * The processed HTML is saved and the
- :module:`calibre.ebooks.epub.split` module is used to split up
- large HTML files into smaller chunks.
-
- * The EPUB container is created.
-'''
-
-import os, sys, cStringIO, logging, re, functools, shutil
-
-from lxml.etree import XPath
-from lxml import html, etree
-from PyQt4.Qt import QApplication, QPixmap, Qt
-
-from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
- opf_traverse, create_metadata, rebase_toc, Link, parser
-from calibre.ebooks.epub import config as common_config, tostring
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.ebooks.epub import initialize_container, PROFILES
-from calibre.ebooks.epub.split import split
-from calibre.ebooks.epub.pages import add_page_map
-from calibre.ebooks.epub.fonts import Rationalizer
-from calibre.constants import preferred_encoding
-from calibre.customize.ui import run_plugins_on_postprocess
-from calibre import walk, CurrentDir, to_unicode, fit_image
-
-content = functools.partial(os.path.join, u'content')
-
-def remove_bad_link(element, attribute, link, pos):
- if attribute is not None:
- if element.tag in ['link']:
- element.getparent().remove(element)
- else:
- element.set(attribute, '')
- del element.attrib[attribute]
-
-def check_links(opf_path, pretty_print):
- '''
- Find and remove all invalid links in the HTML files
- '''
- logger = logging.getLogger('html2epub')
- logger.info('\tChecking files for bad links...')
- pathtoopf = os.path.abspath(opf_path)
- with CurrentDir(os.path.dirname(pathtoopf)):
- opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
- html_files = []
- for item in opf.itermanifest():
- if 'html' in item.get('media-type', '').lower():
- f = item.get('href').split('/')[-1]
- if isinstance(f, str):
- f = f.decode('utf-8')
- html_files.append(os.path.abspath(content(f)))
-
- for path in html_files:
- if not os.access(path, os.R_OK):
- continue
- base = os.path.dirname(path)
- root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
- for element, attribute, link, pos in list(root.iterlinks()):
- link = to_unicode(link)
- plink = Link(link, base)
- bad = False
- if plink.path is not None and not os.path.exists(plink.path):
- bad = True
- if bad:
- remove_bad_link(element, attribute, link, pos)
- open(content(path), 'wb').write(tostring(root, pretty_print))
-
-def find_html_index(files):
- '''
- Given a list of files, find the most likely root HTML file in the
- list.
- '''
- html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
- html_files = [f for f in files if html_pat.search(f) is not None]
- if not html_files:
- raise ValueError(_('Could not find an ebook inside the archive'))
- html_files = [(f, os.stat(f).st_size) for f in html_files]
- html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
- html_files = [f[0] for f in html_files]
- for q in ('toc', 'index'):
- for f in html_files:
- if os.path.splitext(os.path.basename(f))[0].lower() == q:
- return f, os.path.splitext(f)[1].lower()[1:]
- return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
-
-def rescale_images(imgdir, screen_size, log):
- pwidth, pheight = screen_size
- if QApplication.instance() is None:
- QApplication([])
- for f in os.listdir(imgdir):
- path = os.path.join(imgdir, f)
- if os.path.splitext(f)[1] in ('.css', '.js'):
- continue
-
- p = QPixmap()
- p.load(path)
- if p.isNull():
- continue
- width, height = p.width(), p.height()
- scaled, new_width, new_height = fit_image(width, height, pwidth,
- pheight)
- if scaled:
- log.info('Rescaling image: '+f)
- p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
- Qt.SmoothTransformation).save(path, 'JPEG')
-
-
-
-
-
-class HTMLProcessor(Processor, Rationalizer):
-
- def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
- Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
- name='html2epub')
- if opts.verbose > 2:
- self.debug_tree('parsed')
- self.detect_chapters()
-
- self.extract_css(stylesheets)
- if self.opts.base_font_size2 > 0:
- self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
- self.root, self.opts)
- if opts.verbose > 2:
- self.debug_tree('nocss')
-
- if hasattr(self.body, 'xpath'):
- for script in list(self.body.xpath('descendant::script')):
- script.getparent().remove(script)
-
- self.fix_markup()
-
- def convert_image(self, img):
- rpath = img.get('src', '')
- path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
- if os.path.exists(path) and os.path.isfile(path):
- if QApplication.instance() is None:
- app = QApplication([])
- app
- p = QPixmap()
- p.load(path)
- if not p.isNull():
- p.save(path + '_calibre_converted.jpg')
- os.remove(path)
- for key, val in self.resource_map.items():
- if val == rpath:
- self.resource_map[key] = rpath+'_calibre_converted.jpg'
- img.set('src', rpath+'_calibre_converted.jpg')
-
- def fix_markup(self):
- '''
- Perform various markup transforms to get the output to render correctly
- in the quirky ADE.
- '''
- # Replace that are children of as ADE doesn't handle them
- if hasattr(self.body, 'xpath'):
- for br in self.body.xpath('./br'):
- if br.getparent() is None:
- continue
- try:
- sibling = br.itersiblings().next()
- except:
- sibling = None
- br.tag = 'p'
- br.text = u'\u00a0'
- if (br.tail and br.tail.strip()) or sibling is None or \
- getattr(sibling, 'tag', '') != 'br':
- style = br.get('style', '').split(';')
- style = filter(None, map(lambda x: x.strip(), style))
- style.append('margin: 0pt; border:0pt; height:0pt')
- br.set('style', '; '.join(style))
- else:
- sibling.getparent().remove(sibling)
- if sibling.tail:
- if not br.tail:
- br.tail = ''
- br.tail += sibling.tail
-
-
- if self.opts.profile.remove_object_tags:
- for tag in self.root.xpath('//embed'):
- tag.getparent().remove(tag)
- for tag in self.root.xpath('//object'):
- if tag.get('type', '').lower().strip() in ('image/svg+xml',):
- continue
- tag.getparent().remove(tag)
-
-
- for tag in self.root.xpath('//title|//style'):
- if not tag.text:
- tag.getparent().remove(tag)
- for tag in self.root.xpath('//script'):
- if not tag.text and not tag.get('src', False):
- tag.getparent().remove(tag)
-
- for tag in self.root.xpath('//form'):
- tag.getparent().remove(tag)
-
- for tag in self.root.xpath('//center'):
- tag.tag = 'div'
- tag.set('style', 'text-align:center')
-
- if self.opts.linearize_tables:
- for tag in self.root.xpath('//table | //tr | //th | //td'):
- tag.tag = 'div'
-
- # ADE can't handle & in an img url
- for tag in self.root.xpath('//img[@src]'):
- tag.set('src', tag.get('src', '').replace('&', ''))
-
-
- def save(self):
- for meta in list(self.root.xpath('//meta')):
- meta.getparent().remove(meta)
- # Strip all comments since Adobe DE is petrified of them
- Processor.save(self, strip_comments=True)
-
- def remove_first_image(self):
- images = self.root.xpath('//img')
- if images:
- images[0].getparent().remove(images[0])
- return True
- return False
-
-
-
-
-def config(defaults=None):
- return common_config(defaults=defaults)
-
-def option_parser():
- c = config()
- return c.option_parser(usage=_('''\
-%prog [options] file.html|opf
-
-Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
-If you specify an OPF file instead of an HTML file, the list of links is takes from
-the element of the OPF file.
-'''))
-
-def parse_content(filelist, opts, tdir):
- os.makedirs(os.path.join(tdir, 'content', 'resources'))
- resource_map, stylesheets = {}, {}
- toc = TOC(base_path=tdir, type='root')
- stylesheet_map = {}
- first_image_removed = False
- for htmlfile in filelist:
- logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
- hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
- resource_map, filelist, stylesheets)
- if not first_image_removed and opts.remove_first_image:
- first_image_removed = hp.remove_first_image()
- hp.populate_toc(toc)
- hp.save()
- stylesheet_map[os.path.basename(hp.save_path())] = \
- [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
-
- logging.getLogger('html2epub').debug('Saving stylesheets...')
- if opts.base_font_size2 > 0:
- Rationalizer.remove_font_size_information(stylesheets.values())
- for path, css in stylesheets.items():
- raw = getattr(css, 'cssText', css)
- if isinstance(raw, unicode):
- raw = raw.encode('utf-8')
- open(path, 'wb').write(raw)
- if toc.count('chapter') > opts.toc_threshold:
- toc.purge(['file', 'link', 'unknown'])
- if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
- toc.purge(['link', 'unknown'])
- toc.purge(['link'], max=opts.max_toc_links)
-
- return resource_map, hp.htmlfile_map, toc, stylesheet_map
-
-TITLEPAGE = '''\
-
-
- Cover
-
-
-
-
-
-
-
-
-'''
-
-def create_cover_image(src, dest, screen_size, rescale_cover=True):
- try:
- from PyQt4.Qt import QImage, Qt
- if QApplication.instance() is None:
- QApplication([])
- im = QImage()
- im.load(src)
- if im.isNull():
- raise ValueError('Invalid cover image')
- if rescale_cover and screen_size is not None:
- width, height = im.width(), im.height()
- dw, dh = (screen_size[0]-width)/float(width), (screen_size[1]-height)/float(height)
- delta = min(dw, dh)
- if delta > 0:
- nwidth = int(width + delta*(width))
- nheight = int(height + delta*(height))
- im = im.scaled(int(nwidth), int(nheight), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
- im.save(dest)
- except:
- import traceback
- traceback.print_exc()
- return False
- return True
-
-def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
- old_title_page = None
- f = lambda x : os.path.normcase(os.path.normpath(x))
- if not isinstance(mi.cover, basestring):
- mi.cover = None
- if mi.cover:
- if f(filelist[0].path) == f(mi.cover):
- old_title_page = htmlfilemap[filelist[0].path]
- #logger = logging.getLogger('html2epub')
- metadata_cover = mi.cover
- if metadata_cover and not os.path.exists(metadata_cover):
- metadata_cover = None
-
- cpath = '/'.join(('resources', '_cover_.jpg'))
- cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
- if metadata_cover is not None:
- if not create_cover_image(metadata_cover, cover_dest,
- opts.profile.screen_size):
- metadata_cover = None
- specified_cover = opts.cover
- if specified_cover and not os.path.exists(specified_cover):
- specified_cover = None
- if specified_cover is not None:
- if not create_cover_image(specified_cover, cover_dest,
- opts.profile.screen_size):
- specified_cover = None
-
- cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
-
- if cover is not None:
- titlepage = TITLEPAGE%cpath
- tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
- tppath = os.path.join(tdir, 'content', tp)
- with open(tppath, 'wb') as f:
- f.write(titlepage)
- return tp if old_title_page is None else None, True
- elif os.path.exists(cover_dest):
- os.remove(cover_dest)
- return None, old_title_page is not None
-
-def find_oeb_cover(htmlfile):
- if os.stat(htmlfile).st_size > 2048:
- return None
- match = re.search(r'(?i)]+src\s*=\s*[\'"](.+?)[\'"]', open(htmlfile, 'rb').read())
- if match:
- return match.group(1)
-
-def condense_ncx(ncx_path):
- tree = etree.parse(ncx_path)
- for tag in tree.getroot().iter(tag=etree.Element):
- if tag.text:
- tag.text = tag.text.strip()
- if tag.tail:
- tag.tail = tag.tail.strip()
- compressed = etree.tostring(tree.getroot(), encoding='utf-8')
- open(ncx_path, 'wb').write(compressed)
-
-def convert(htmlfile, opts, notification=None, create_epub=True,
- oeb_cover=False, extract_to=None):
- htmlfile = os.path.abspath(htmlfile)
- if opts.output is None:
- opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
- opts.profile = PROFILES[opts.profile]
- opts.output = os.path.abspath(opts.output)
- if opts.override_css is not None:
- try:
- opts.override_css = open(opts.override_css, 'rb').read().decode(preferred_encoding, 'replace')
- except:
- opts.override_css = opts.override_css.decode(preferred_encoding, 'replace')
- if opts.from_opf:
- opts.from_opf = os.path.abspath(opts.from_opf)
- if opts.from_ncx:
- opts.from_ncx = os.path.abspath(opts.from_ncx)
- if htmlfile.lower().endswith('.opf'):
- opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
- filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
- if not filelist:
- # Bad OPF look for a HTML file instead
- htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
- if htmlfile is None:
- raise ValueError('Could not find suitable file to convert.')
- filelist = get_filelist(htmlfile, opts)[1]
- mi = merge_metadata(None, opf, opts)
- else:
- opf, filelist = get_filelist(htmlfile, opts)
- mi = merge_metadata(htmlfile, opf, opts)
- opts.chapter = XPath(opts.chapter,
- namespaces={'re':'http://exslt.org/regular-expressions'})
- for x in (1, 2, 3):
- attr = 'level%d_toc'%x
- if getattr(opts, attr):
- setattr(opts, attr, XPath(getattr(opts, attr),
- namespaces={'re':'http://exslt.org/regular-expressions'}))
- else:
- setattr(opts, attr, None)
-
- with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
- if opts.keep_intermediate:
- print 'Intermediate files in', tdir
- resource_map, htmlfile_map, generated_toc, stylesheet_map = \
- parse_content(filelist, opts, tdir)
- logger = logging.getLogger('html2epub')
- resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
-
-
- title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
- spine = [htmlfile_map[f.path] for f in filelist]
- if not oeb_cover and title_page is not None:
- spine = [title_page] + spine
- mi.cover = None
- mi.cover_data = (None, None)
-
-
- mi = create_metadata(tdir, mi, spine, resources)
- buf = cStringIO.StringIO()
- if mi.toc:
- rebase_toc(mi.toc, htmlfile_map, tdir)
- if opts.use_auto_toc or mi.toc is None or len(list(mi.toc.flat())) < 2:
- mi.toc = generated_toc
- if opts.from_ncx:
- toc = TOC()
- toc.read_ncx_toc(opts.from_ncx)
- mi.toc = toc
- for item in mi.manifest:
- if getattr(item, 'mime_type', None) == 'text/html':
- item.mime_type = 'application/xhtml+xml'
- opf_path = os.path.join(tdir, 'metadata.opf')
- with open(opf_path, 'wb') as f:
- mi.render(f, buf, 'toc.ncx')
- toc = buf.getvalue()
- if toc:
- with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
- f.write(toc)
- if opts.show_ncx:
- print toc
- split(opf_path, opts, stylesheet_map)
- if opts.page:
- logger.info('\tBuilding page map...')
- add_page_map(opf_path, opts)
- check_links(opf_path, opts.pretty_print)
-
- opf = OPF(opf_path, tdir)
- opf.remove_guide()
- oeb_cover_file = None
- if oeb_cover and title_page is not None:
- oeb_cover_file = find_oeb_cover(os.path.join(tdir, 'content', title_page))
- if has_title_page or (oeb_cover and oeb_cover_file):
- opf.create_guide_element()
- if has_title_page and not oeb_cover:
- opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
- if oeb_cover and oeb_cover_file:
- opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
-
- cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
- if os.path.exists(cpath):
- opf.add_path_to_manifest(cpath, 'image/jpeg')
- with open(opf_path, 'wb') as f:
- f.write(opf.render())
- ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
- if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
- logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
- condense_ncx(ncx_path)
- if os.stat(ncx_path).st_size > opts.profile.flow_size:
- logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
-
- if opts.profile.screen_size is not None:
- rescale_images(os.path.join(tdir, 'content', 'resources'),
- opts.profile.screen_size, logger)
-
- if create_epub:
- epub = initialize_container(opts.output)
- epub.add_dir(tdir)
- epub.close()
- run_plugins_on_postprocess(opts.output, 'epub')
- logger.info(_('Output written to ')+opts.output)
-
- if opts.show_opf:
- print open(opf_path, 'rb').read()
-
- if opts.extract_to is not None:
- if os.path.exists(opts.extract_to):
- shutil.rmtree(opts.extract_to)
- shutil.copytree(tdir, opts.extract_to)
-
- if extract_to is not None:
- if os.path.exists(extract_to):
- shutil.rmtree(extract_to)
- shutil.copytree(tdir, extract_to)
-
-
-
-def main(args=sys.argv):
- parser = option_parser()
- opts, args = parser.parse_args(args)
- if len(args) < 2:
- parser.print_help()
- print _('You must specify an input HTML file')
- return 1
- convert(args[1], opts)
- return 0
-
-if __name__ == '__main__':
- sys.exit(main())
diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py
new file mode 100644
index 0000000000..b748429725
--- /dev/null
+++ b/src/calibre/ebooks/epub/input.py
@@ -0,0 +1,127 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import os, re, uuid
+from itertools import cycle
+
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class EPUBInput(InputFormatPlugin):
+
+ name = 'EPUB Input'
+ author = 'Kovid Goyal'
+ description = 'Convert EPUB files (.epub) to HTML'
+ file_types = set(['epub'])
+
+ @classmethod
+ def decrypt_font(cls, key, path):
+ raw = open(path, 'rb').read()
+ crypt = raw[:1024]
+ key = cycle(iter(key))
+ decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
+ with open(path, 'wb') as f:
+ f.write(decrypt)
+ f.write(raw[1024:])
+
+ @classmethod
+ def process_ecryption(cls, encfile, opf, log):
+ key = None
+ m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
+ if m:
+ key = m.group(1)
+ key = list(map(ord, uuid.UUID(key).bytes))
+ try:
+ root = etree.parse(encfile)
+ for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
+ algorithm = em.get('Algorithm', '')
+ if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
+ return False
+ cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
+ uri = cr.get('URI')
+ path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
+ if os.path.exists(path):
+ cls.decrypt_font(key, path)
+ return True
+ except:
+ import traceback
+ traceback.print_exc()
+ return False
+
+ @classmethod
+ def rationalize_cover(self, opf):
+ guide_cover, guide_elem = None, None
+ for guide_elem in opf.iterguide():
+ if guide_elem.get('type', '').lower() == 'cover':
+ guide_cover = guide_elem.get('href', '')
+ break
+ if not guide_cover:
+ return
+ spine = list(opf.iterspine())
+ if not spine:
+ return
+ idref = spine[0].get('idref', '')
+ manifest = list(opf.itermanifest())
+ if not manifest:
+ return
+ if manifest[0].get('id', False) != idref:
+ return
+ spine[0].getparent().remove(spine[0])
+ guide_elem.set('href', 'calibre_raster_cover.jpg')
+ for elem in list(opf.iterguide()):
+ if elem.get('type', '').lower() == 'titlepage':
+ elem.getparent().remove(elem)
+ from calibre.ebooks.oeb.base import OPF
+ t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
+ t.set('type', 'titlepage')
+ t.set('href', guide_cover)
+ t.set('title', 'Title Page')
+ from calibre.ebooks import render_html
+ renderer = render_html(guide_cover)
+ if renderer is not None:
+ open('calibre_raster_cover.jpg', 'wb').write(
+ renderer.data)
+
+
+ def convert(self, stream, options, file_ext, log, accelerators):
+ from calibre.utils.zipfile import ZipFile
+ from calibre import walk
+ from calibre.ebooks import DRMError
+ from calibre.ebooks.metadata.opf2 import OPF
+ zf = ZipFile(stream)
+ zf.extractall(os.getcwd())
+ encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
+ opf = None
+ for f in walk(u'.'):
+ if f.lower().endswith('.opf'):
+ opf = os.path.abspath(f)
+ break
+ path = getattr(stream, 'name', 'stream')
+
+ if opf is None:
+ raise ValueError('%s is not a valid EPUB file'%path)
+
+ if os.path.exists(encfile):
+ if not self.process_encryption(encfile, opf, log):
+ raise DRMError(os.path.basename(path))
+
+ opf = os.path.relpath(opf, os.getcwdu())
+ parts = os.path.split(opf)
+ opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
+
+ if len(parts) > 1 and parts[0]:
+ delta = '/'.join(parts[:-1])+'/'
+ for elem in opf.itermanifest():
+ elem.set('href', delta+elem.get('href'))
+ for elem in opf.iterguide():
+ elem.set('href', delta+elem.get('href'))
+
+ self.rationalize_cover(opf)
+
+ with open('content.opf', 'wb') as nopf:
+ nopf.write(opf.render())
+
+ return os.path.abspath('content.opf')
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
new file mode 100644
index 0000000000..3256e1168a
--- /dev/null
+++ b/src/calibre/ebooks/epub/output.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import os, shutil, re
+from urllib import unquote
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ptempfile import TemporaryDirectory
+from calibre.constants import __appname__, __version__
+from calibre import strftime, guess_type
+from calibre.customize.conversion import OptionRecommendation
+
+from lxml import etree
+
+
+class EPUBOutput(OutputFormatPlugin):
+
+ name = 'EPUB Output'
+ author = 'Kovid Goyal'
+ file_type = 'epub'
+
+ options = set([
+ OptionRecommendation(name='extract_to',
+ help=_('Extract the contents of the generated EPUB file to the '
+ 'specified directory. The contents of the directory are first '
+ 'deleted, so be careful.')),
+
+ OptionRecommendation(name='dont_split_on_page_breaks',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Turn off splitting at page breaks. Normally, input '
+ 'files are automatically split at every page break into '
+ 'two files. This gives an output ebook that can be '
+ 'parsed faster and with less resources. However, '
+ 'splitting is slow and if your source file contains a '
+ 'very large number of page breaks, you should turn off '
+ 'splitting on page breaks.'
+ )
+ ),
+
+ OptionRecommendation(name='flow_size', recommended_value=260,
+ help=_('Split all HTML files larger than this size (in KB). '
+ 'This is necessary as most EPUB readers cannot handle large '
+ 'file sizes. The default of %defaultKB is the size required '
+ 'for Adobe Digital Editions.')
+ ),
+
+
+ ])
+
+
+ TITLEPAGE_COVER = '''\
+
+
+ Cover
+
+
+
+
+
+
+'''
+
+ def convert(self, oeb, output_path, input_plugin, opts, log):
+ self.log, self.opts, self.oeb = log, opts, oeb
+
+ from calibre.ebooks.oeb.transforms.split import Split
+ split = Split(not self.opts.dont_split_on_page_breaks,
+ max_flow_size=self.opts.flow_size*1024
+ )
+ split(self.oeb, self.opts)
+
+
+ self.workaround_ade_quirks()
+
+ from calibre.ebooks.oeb.transforms.rescale import RescaleImages
+ RescaleImages()(oeb, opts)
+ self.insert_cover()
+
+ with TemporaryDirectory('_epub_output') as tdir:
+ from calibre.customize.ui import plugin_for_output_format
+ oeb_output = plugin_for_output_format('oeb')
+ oeb_output.convert(oeb, tdir, input_plugin, opts, log)
+ opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
+ self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
+ if x.endswith('.ncx')][0])
+
+ from calibre.ebooks.epub import initialize_container
+ epub = initialize_container(output_path, os.path.basename(opf))
+ epub.add_dir(tdir)
+ if opts.extract_to is not None:
+ if os.path.exists(opts.extract_to):
+ shutil.rmtree(opts.extract_to)
+ os.mkdir(opts.extract_to)
+ epub.extractall(path=opts.extract_to)
+ self.log.info('EPUB extracted to', opts.extract_to)
+ epub.close()
+
+ def default_cover(self):
+ '''
+ Create a generic cover for books that dont have a cover
+ '''
+ try:
+ from calibre.gui2 import images_rc # Needed for access to logo
+ from PyQt4.Qt import QApplication, QFile, QIODevice
+ except:
+ return None
+ from calibre.ebooks.metadata import authors_to_string
+ images_rc
+ m = self.oeb.metadata
+ title = unicode(m.title[0])
+ a = [unicode(x) for x in m.creators if m.role == 'aut']
+ author = authors_to_string(a)
+ if QApplication.instance() is None: QApplication([])
+ f = QFile(':/library')
+ f.open(QIODevice.ReadOnly)
+ img_data = str(f.readAll())
+ id, href = self.oeb.manifest.generate('calibre-logo',
+ 'calibre-logo.png')
+ self.oeb.manifest.add(id, href, 'image/png', data=img_data)
+ html = self.TITLEPAGE%dict(title=title, author=author,
+ date=strftime('%d %b, %Y'),
+ app=__appname__ +' '+__version__,
+ img=href)
+ id, href = self.oeb.manifest.generate('calibre-titlepage',
+ 'calibre-titlepage.xhtml')
+ return self.oeb.manifest.add(id, href, guess_type('t.xhtml')[0],
+ data=etree.fromstring(html))
+
+
+ def insert_cover(self):
+ from calibre.ebooks.oeb.base import urldefrag
+ from calibre import guess_type
+ g, m = self.oeb.guide, self.oeb.manifest
+ if 'titlepage' not in g:
+ if 'cover' in g:
+ tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
+ id, href = m.generate('titlepage', 'titlepage.xhtml')
+ item = m.add(id, href, guess_type('t.xhtml')[0],
+ data=etree.fromstring(tp))
+ else:
+ item = self.default_cover()
+ else:
+ item = self.oeb.manifest.hrefs[
+ urldefrag(self.oeb.guide['titlepage'].href)[0]]
+ if item is not None:
+ self.oeb.spine.insert(0, item, True)
+ if 'cover' not in self.oeb.guide.refs:
+ self.oeb.guide.add('cover', 'Title Page', 'a')
+ self.oeb.guide.refs['cover'].href = item.href
+ if 'titlepage' in self.oeb.guide.refs:
+ self.oeb.guide.refs['titlepage'].href = item.href
+
+
+
+ def condense_ncx(self, ncx_path):
+ if not self.opts.pretty_print:
+ tree = etree.parse(ncx_path)
+ for tag in tree.getroot().iter(tag=etree.Element):
+ if tag.text:
+ tag.text = tag.text.strip()
+ if tag.tail:
+ tag.tail = tag.tail.strip()
+ compressed = etree.tostring(tree.getroot(), encoding='utf-8')
+ open(ncx_path, 'wb').write(compressed)
+
+
+
+ def workaround_ade_quirks(self):
+ '''
+ Perform various markup transforms to get the output to render correctly
+ in the quirky ADE.
+ '''
+ from calibre.ebooks.oeb.base import XPNSMAP, XHTML
+ from lxml.etree import XPath as _XPath
+ from functools import partial
+ XPath = partial(_XPath, namespaces=XPNSMAP)
+
+ for x in self.oeb.spine:
+ root = x.data
+ body = XPath('//h:body')(root)
+ if body:
+ body = body[0]
+ # Replace that are children of as ADE doesn't handle them
+ if hasattr(body, 'xpath'):
+ for br in XPath('./h:br')(body):
+ if br.getparent() is None:
+ continue
+ try:
+ sibling = br.itersiblings().next()
+ except:
+ sibling = None
+ br.tag = XHTML('p')
+ br.text = u'\u00a0'
+ if (br.tail and br.tail.strip()) or sibling is None or \
+ getattr(sibling, 'tag', '') != XHTML('br'):
+ style = br.get('style', '').split(';')
+ style = filter(None, map(lambda x: x.strip(), style))
+ style.append('margin: 0pt; border:0pt; height:0pt')
+ br.set('style', '; '.join(style))
+ else:
+ sibling.getparent().remove(sibling)
+ if sibling.tail:
+ if not br.tail:
+ br.tail = ''
+ br.tail += sibling.tail
+
+ for tag in XPath('//h:embed')(root):
+ tag.getparent().remove(tag)
+ for tag in XPath('//h:object')(root):
+ if tag.get('type', '').lower().strip() in ('image/svg+xml',):
+ continue
+ tag.getparent().remove(tag)
+
+ for tag in XPath('//h:title|//h:style')(root):
+ if not tag.text:
+ tag.getparent().remove(tag)
+ for tag in XPath('//h:script')(root):
+ if not tag.text and not tag.get('src', False):
+ tag.getparent().remove(tag)
+
+ for tag in XPath('//h:form')(root):
+ tag.getparent().remove(tag)
+
+ for tag in XPath('//h:center')(root):
+ tag.tag = XHTML('div')
+ tag.set('style', 'text-align:center')
+
+ # ADE can't handle & in an img url
+ for tag in XPath('//h:img[@src]')(root):
+ tag.set('src', tag.get('src', '').replace('&', ''))
+
+ stylesheet = self.oeb.manifest.hrefs['stylesheet.css']
+ stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
+ 'cursor: default; }')
+ stylesheet.data.add('a[href] { color: blue; '
+ 'text-decoration: underline; cursor:pointer; }')
+
+ special_chars = re.compile(u'[\u200b\u00ad]')
+ for elem in root.iterdescendants():
+ if getattr(elem, 'text', False):
+ elem.text = special_chars.sub('', elem.text)
+ elem.text = elem.text.replace(u'\u2011', '-')
+ if getattr(elem, 'tail', False):
+ elem.tail = special_chars.sub('', elem.tail)
+ elem.tail = elem.tail.replace(u'\u2011', '-')
+
+
+
diff --git a/src/calibre/ebooks/epub/pages.py b/src/calibre/ebooks/epub/pages.py
index 1ab5edde86..4737107a6c 100644
--- a/src/calibre/ebooks/epub/pages.py
+++ b/src/calibre/ebooks/epub/pages.py
@@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
import os, re
from itertools import count, chain
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
-from calibre.ebooks.oeb.base import OEBBook, DirWriter
+from calibre.ebooks.oeb.base import OEBBook
from lxml import etree, html
from lxml.etree import XPath
diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py
deleted file mode 100644
index a5cc6dfc7d..0000000000
--- a/src/calibre/ebooks/epub/split.py
+++ /dev/null
@@ -1,509 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Split the flows in an epub file to conform to size limitations.
-'''
-
-import os, math, logging, functools, collections, re, copy, sys
-
-from lxml.etree import XPath as _XPath
-from lxml import etree, html
-from lxml.cssselect import CSSSelector
-
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.ebooks.epub import tostring, rules
-from calibre import CurrentDir, LoggingInterface
-
-XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
-content = functools.partial(os.path.join, 'content')
-
-SPLIT_ATTR = 'cs'
-SPLIT_POINT_ATTR = 'csp'
-
-class SplitError(ValueError):
-
- def __init__(self, path, root):
- size = len(tostring(root))/1024.
- ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
- (os.path.basename(path), size))
-
-
-
-class Splitter(LoggingInterface):
-
- def __init__(self, path, opts, stylesheet_map, opf):
- LoggingInterface.__init__(self, logging.getLogger('htmlsplit'))
- self.setup_cli_handler(opts.verbose)
- self.path = path
- self.always_remove = not opts.preserve_tag_structure or \
- os.stat(content(path)).st_size > 5*opts.profile.flow_size
- self.base = (os.path.splitext(path)[0].replace('%', '%%') + '_split_%d.html')
- self.opts = opts
- self.orig_size = os.stat(content(path)).st_size
- self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.)
- root = html.fromstring(open(content(path)).read())
-
- self.page_breaks, self.trees = [], []
- self.split_size = 0
-
- # Split on page breaks
- self.splitting_on_page_breaks = True
- if not opts.dont_split_on_page_breaks:
- self.log_info('\tSplitting on page breaks...')
- if self.path in stylesheet_map:
- self.find_page_breaks(stylesheet_map[self.path], root)
- self.split_on_page_breaks(root.getroottree())
- trees = list(self.trees)
- else:
- self.trees = [root.getroottree()]
- trees = list(self.trees)
-
- # Split any remaining over-sized trees
- self.splitting_on_page_breaks = False
- if self.opts.profile.flow_size < sys.maxint:
- lt_found = False
- self.log_info('\tLooking for large trees...')
- self.tree_map = {}
- for i, tree in enumerate(list(trees)):
- self.split_trees = []
- size = len(tostring(tree.getroot()))
- if size > self.opts.profile.flow_size:
- lt_found = True
- try:
- self.split_to_size(tree)
- self.tree_map[tree] = self.split_trees
- except (SplitError, RuntimeError): # Splitting fails
- if not self.always_remove:
- self.always_remove = True
- self.split_trees = []
- self.split_to_size(tree)
- self.tree_map[tree] = self.split_trees
- else:
- raise
- t = []
- for x in trees:
- t.extend(self.tree_map.get(x, [x]))
- trees = t
- if not lt_found:
- self.log_info('\tNo large trees found')
-
- self.trees = trees
- self.was_split = len(self.trees) > 1
- if self.was_split:
- self.commit()
- self.log_info('\t\tSplit into %d parts.', len(self.trees))
- if self.opts.verbose:
- for f in self.files:
- self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
- self.fix_opf(opf)
-
- self.trees = None
-
-
- def split_text(self, text, root, size):
- self.log_debug('\t\t\tSplitting text of length: %d'%len(text))
- rest = text.replace('\r', '')
- parts = re.split('\n\n', rest)
- self.log_debug('\t\t\t\tFound %d parts'%len(parts))
- if max(map(len, parts)) > size:
- raise SplitError('Cannot split as file contains a
tag with a very large paragraph', root)
- ans = []
- buf = ''
- for part in parts:
- if len(buf) + len(part) < size:
- buf += '\n\n'+part
- else:
- ans.append(buf)
- buf = part
- return ans
-
-
- def split_to_size(self, tree):
- self.log_debug('\t\tSplitting...')
- root = tree.getroot()
- # Split large
tags
- for pre in list(root.xpath('//pre')):
- text = u''.join(pre.xpath('descendant::text()'))
- pre.text = text
- for child in list(pre.iterchildren()):
- pre.remove(child)
- if len(pre.text) > self.opts.profile.flow_size*0.5:
- frags = self.split_text(pre.text, root, int(0.2*self.opts.profile.flow_size))
- new_pres = []
- for frag in frags:
- pre2 = copy.copy(pre)
- pre2.text = frag
- pre2.tail = u''
- new_pres.append(pre2)
- new_pres[-1].tail = pre.tail
- p = pre.getparent()
- i = p.index(pre)
- p[i:i+1] = new_pres
-
- split_point, before = self.find_split_point(root)
- if split_point is None or self.split_size > 6*self.orig_size:
- if not self.always_remove:
- self.log_warn(_('\t\tToo much markup. Re-splitting without '
- 'structure preservation. This may cause '
- 'incorrect rendering.'))
- raise SplitError(self.path, root)
-
- for t in self.do_split(tree, split_point, before):
- r = t.getroot()
- if self.is_page_empty(r):
- continue
- size = len(tostring(r))
- if size <= self.opts.profile.flow_size:
- self.split_trees.append(t)
- #print tostring(t.getroot(), pretty_print=True)
- self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
- len(self.split_trees), size/1024.)
- self.split_size += size
- else:
- self.split_to_size(t)
-
- def is_page_empty(self, root):
- body = root.find('body')
- if body is None:
- return False
- txt = re.sub(r'\s+', '', html.tostring(body, method='text', encoding=unicode))
- if len(txt) > 4:
- #if len(txt) < 100:
- # print 1111111, html.tostring(body, method='html', encoding=unicode)
- return False
- for img in root.xpath('//img'):
- if img.get('style', '') != 'display:none':
- return False
- return True
-
- def do_split(self, tree, split_point, before):
- '''
- Split ``tree`` into a *before* and *after* tree at ``split_point``,
- preserving tag structure, but not duplicating any text.
- All tags that have had their text and tail
- removed have the attribute ``calibre_split`` set to 1.
-
- :param before: If True tree is split before split_point, otherwise after split_point
- :return: before_tree, after_tree
- '''
- path = tree.getpath(split_point)
- tree, tree2 = copy.deepcopy(tree), copy.deepcopy(tree)
- root = tree.getroot()
- root2 = tree2.getroot()
- body, body2 = root.body, root2.body
- split_point = root.xpath(path)[0]
- split_point2 = root2.xpath(path)[0]
-
- def nix_element(elem, top=True):
- if self.always_remove:
- parent = elem.getparent()
- index = parent.index(elem)
- if top:
- parent.remove(elem)
- else:
- index = parent.index(elem)
- parent[index:index+1] = list(elem.iterchildren())
-
- else:
- elem.text = u''
- elem.tail = u''
- elem.set(SPLIT_ATTR, '1')
- if elem.tag.lower() in ['ul', 'ol', 'dl', 'table', 'hr', 'img']:
- elem.set('style', 'display:none')
-
- def fix_split_point(sp):
- if not self.splitting_on_page_breaks:
- sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid')
-
- # Tree 1
- hit_split_point = False
- for elem in list(body.iterdescendants(etree.Element)):
- if elem.get(SPLIT_ATTR, '0') == '1':
- continue
- if elem is split_point:
- hit_split_point = True
- if before:
- nix_element(elem)
- fix_split_point(elem)
- continue
- if hit_split_point:
- nix_element(elem)
-
-
- # Tree 2
- hit_split_point = False
- for elem in list(body2.iterdescendants(etree.Element)):
- if elem.get(SPLIT_ATTR, '0') == '1':
- continue
- if elem is split_point2:
- hit_split_point = True
- if not before:
- nix_element(elem, top=False)
- fix_split_point(elem)
- continue
- if not hit_split_point:
- nix_element(elem, top=False)
-
- return tree, tree2
-
-
- def split_on_page_breaks(self, orig_tree):
- ordered_ids = []
- for elem in orig_tree.xpath('//*[@id]'):
- id = elem.get('id')
- if id in self.page_break_ids:
- ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
-
- self.trees = []
- tree = orig_tree
- for pattern, before in ordered_ids:
- self.log_info('\t\tSplitting on page-break')
- elem = pattern(tree)
- if elem:
- before, after = self.do_split(tree, elem[0], before)
- self.trees.append(before)
- tree = after
- self.trees.append(tree)
- self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
-
-
-
- def find_page_breaks(self, stylesheets, root):
- '''
- Find all elements that have either page-break-before or page-break-after set.
- Populates `self.page_breaks` with id based XPath selectors (for elements that don't
- have ids, an id is created).
- '''
- page_break_selectors = set([])
- for rule in rules(stylesheets):
- before = getattr(rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()
- after = getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()
- try:
- if before and before != 'avoid':
- page_break_selectors.add((CSSSelector(rule.selectorText), True))
- except:
- pass
- try:
- if after and after != 'avoid':
- page_break_selectors.add((CSSSelector(rule.selectorText), False))
- except:
- pass
-
- page_breaks = set([])
- for selector, before in page_break_selectors:
- for elem in selector(root):
- elem.pb_before = before
- page_breaks.add(elem)
-
- for i, elem in enumerate(root.iter()):
- elem.pb_order = i
-
- page_breaks = list(page_breaks)
- page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
- self.page_break_ids = []
- for i, x in enumerate(page_breaks):
- x.set('id', x.get('id', 'calibre_pb_%d'%i))
- id = x.get('id')
- self.page_breaks.append((XPath('//*[@id="%s"]'%id), x.pb_before))
- self.page_break_ids.append(id)
-
-
- def find_split_point(self, root):
- '''
- Find the tag at which to split the tree rooted at `root`.
- Search order is:
- * Heading tags
- *
tags
- *
tags
- * tags
- *
tags
- * tags
- *
tags
-
- We try to split in the "middle" of the file (as defined by tag counts.
- '''
- def pick_elem(elems):
- if elems:
- elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') != '1'\
- and i.get(SPLIT_ATTR, '0') != '1']
- if elems:
- i = int(math.floor(len(elems)/2.))
- elems[i].set(SPLIT_POINT_ATTR, '1')
- return elems[i]
-
- for path in (
- '//*[re:match(name(), "h[1-6]", "i")]',
- '/html/body/div',
- '//pre',
- '//hr',
- '//p',
- '//div',
- '//br',
- '//li',
- ):
- elems = root.xpath(path,
- namespaces={'re':'http://exslt.org/regular-expressions'})
- elem = pick_elem(elems)
- if elem is not None:
- try:
- XPath(elem.getroottree().getpath(elem))
- except:
- continue
- return elem, True
-
- return None, True
-
- def commit(self):
- '''
- Commit all changes caused by the split. This removes the previously
- introduced ``calibre_split`` attribute and calculates an *anchor_map* for
- all anchors in the original tree. Internal links are re-directed. The
- original file is deleted and the split files are saved.
- '''
- self.anchor_map = collections.defaultdict(lambda :self.base%0)
- self.files = []
-
- for i, tree in enumerate(self.trees):
- root = tree.getroot()
- self.files.append(self.base%i)
- for elem in root.xpath('//*[@id]'):
- if elem.get(SPLIT_ATTR, '0') == '0':
- self.anchor_map[elem.get('id')] = self.files[-1]
- for elem in root.xpath('//*[@%s or @%s]'%(SPLIT_ATTR, SPLIT_POINT_ATTR)):
- elem.attrib.pop(SPLIT_ATTR, None)
- elem.attrib.pop(SPLIT_POINT_ATTR, '0')
-
- for current, tree in zip(self.files, self.trees):
- for a in tree.getroot().xpath('//a[@href]'):
- href = a.get('href').strip()
- if href.startswith('#'):
- anchor = href[1:]
- file = self.anchor_map[anchor]
- if file != current:
- a.set('href', file+href)
- open(content(current), 'wb').\
- write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
-
- os.remove(content(self.path))
-
-
- def fix_opf(self, opf):
- '''
- Fix references to the split file in the OPF.
- '''
- items = [item for item in opf.itermanifest() if item.get('href') == 'content/'+self.path]
- new_items = [('content/'+f, None) for f in self.files]
- id_map = {}
- for item in items:
- id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
-
- for id in id_map.keys():
- opf.replace_spine_items_by_idref(id, id_map[id])
-
- for ref in opf.iterguide():
- href = ref.get('href', '')
- if href.startswith('content/'+self.path):
- href = href.split('#')
- frag = None
- if len(href) > 1:
- frag = href[1]
- if frag not in self.anchor_map:
- self.log_warning('\t\tUnable to re-map OPF link', href)
- continue
- new_file = self.anchor_map[frag]
- ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
-
-
-
-def fix_content_links(html_files, changes, opts):
- split_files = [f.path for f in changes]
- anchor_maps = [f.anchor_map for f in changes]
- files = list(html_files)
- for j, f in enumerate(split_files):
- try:
- i = files.index(f)
- files[i:i+1] = changes[j].files
- except ValueError:
- continue
-
- for htmlfile in files:
- changed = False
- root = html.fromstring(open(content(htmlfile), 'rb').read())
- for a in root.xpath('//a[@href]'):
- href = a.get('href')
- if not href.startswith('#'):
- href = href.split('#')
- anchor = href[1] if len(href) > 1 else None
- href = href[0]
- if href in split_files:
- try:
- newf = anchor_maps[split_files.index(href)][anchor]
- except:
- print '\t\tUnable to remap HTML link:', href, anchor
- continue
- frag = ('#'+anchor) if anchor else ''
- a.set('href', newf+frag)
- changed = True
-
- if changed:
- open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
-
-def fix_ncx(path, changes):
- split_files = [f.path for f in changes]
- anchor_maps = [f.anchor_map for f in changes]
- tree = etree.parse(path)
- changed = False
- for content in tree.getroot().xpath('//x:content[@src]',
- namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
- href = content.get('src')
- if not href.startswith('#'):
- href = href.split('#')
- anchor = href[1] if len(href) > 1 else None
- href = href[0].split('/')[-1]
- if href in split_files:
- try:
- newf = anchor_maps[split_files.index(href)][anchor]
- except:
- print 'Unable to remap NCX link:', href, anchor
- frag = ('#'+anchor) if anchor else ''
- content.set('src', 'content/'+newf+frag)
- changed = True
- if changed:
- open(path, 'wb').write(etree.tostring(tree.getroot(), encoding='UTF-8', xml_declaration=True))
-
-def find_html_files(opf):
- '''
- Find all HTML files referenced by `opf`.
- '''
- html_files = []
- for item in opf.itermanifest():
- if 'html' in item.get('media-type', '').lower():
- f = item.get('href').split('/')[-1]
- f2 = f.replace('&', '%26')
- if not os.path.exists(content(f)) and os.path.exists(content(f2)):
- f = f2
- item.set('href', item.get('href').replace('&', '%26'))
- if os.path.exists(content(f)):
- html_files.append(f)
- return html_files
-
-
-def split(pathtoopf, opts, stylesheet_map):
- pathtoopf = os.path.abspath(pathtoopf)
- opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
-
- with CurrentDir(os.path.dirname(pathtoopf)):
- html_files = find_html_files(opf)
- changes = [Splitter(f, opts, stylesheet_map, opf) for f in html_files]
- changes = [c for c in changes if c.was_split]
-
- fix_content_links(html_files, changes, opts)
- for item in opf.itermanifest():
- if item.get('media-type', '') == 'application/x-dtbncx+xml':
- fix_ncx(item.get('href'), changes)
- break
-
- open(pathtoopf, 'wb').write(opf.render())
diff --git a/src/calibre/ebooks/lrf/fb2/__init__.py b/src/calibre/ebooks/fb2/__init__.py
similarity index 100%
rename from src/calibre/ebooks/lrf/fb2/__init__.py
rename to src/calibre/ebooks/fb2/__init__.py
diff --git a/src/calibre/ebooks/lrf/fb2/fb2.xsl b/src/calibre/ebooks/fb2/fb2.xsl
similarity index 100%
rename from src/calibre/ebooks/lrf/fb2/fb2.xsl
rename to src/calibre/ebooks/fb2/fb2.xsl
diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
new file mode 100644
index 0000000000..3a5806b143
--- /dev/null
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember '
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into FB2 markup
+'''
+
+import os
+import re
+from base64 import b64encode
+
+from calibre import entity_to_unicode
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.constants import __appname__, __version__
+
+from BeautifulSoup import BeautifulSoup
+
+TAG_MAP = {
+ 'b' : 'strong',
+ 'i' : 'emphasis',
+ 'p' : 'p',
+ 'div' : 'p',
+}
+
+STYLES = [
+ ('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
+ ('font-style', {'italic' : 'emphasis'}),
+]
+
+class FB2MLizer(object):
+ def __init__(self, ignore_tables=False):
+ self.ignore_tables = ignore_tables
+
+ def extract_content(self, oeb_book, opts):
+ oeb_book.logger.info('Converting XHTML to FB2 markup...')
+ self.oeb_book = oeb_book
+ self.opts = opts
+ return self.fb2mlize_spine()
+
+ def fb2mlize_spine(self):
+ output = self.fb2_header()
+ if 'titlepage' in self.oeb_book.guide:
+ href = self.oeb_book.guide['titlepage'].href
+ item = self.oeb_book.manifest.hrefs[href]
+ if item.spine_position is None:
+ stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+ output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+ for item in self.oeb_book.spine:
+ stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+ output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+ output += self.fb2_body_footer()
+ output += self.fb2mlize_images()
+ output += self.fb2_footer()
+ output = self.clean_text(output)
+ return BeautifulSoup(output.encode('utf-8')).prettify()
+
+ def fb2_header(self):
+ return u' ' \
+ ' ' \
+ '%s ' \
+ ' ' \
+ '%s - %s ' \
+ '' % (self.oeb_book.metadata.title[0].value, __appname__, __version__)
+
+ def fb2_body_footer(self):
+ return u''
+
+ def fb2_footer(self):
+ return u''
+
+ def fb2mlize_images(self):
+ images = u''
+ for item in self.oeb_book.manifest:
+ if item.media_type in OEB_IMAGES:
+ data = b64encode(item.data)
+ images += '%s' % (os.path.basename(item.href), item.media_type, data)
+ return images
+
+ def clean_text(self, text):
+ for entity in set(re.findall('&.+?;', text)):
+ mo = re.search('(%s)' % entity[1:-1], text)
+ text = text.replace(entity, entity_to_unicode(mo))
+
+ text = text.replace('&', '')
+
+ return text
+
+ def dump_text(self, elem, stylizer, tag_stack=[]):
+ if not isinstance(elem.tag, basestring) \
+ or namespace(elem.tag) != XHTML_NS:
+ return u''
+
+ fb2_text = u''
+ style = stylizer.style(elem)
+
+ if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+ or style['visibility'] == 'hidden':
+ return u''
+
+ tag = barename(elem.tag)
+ tag_count = 0
+
+ if tag == 'img':
+ fb2_text += '' % os.path.basename(elem.attrib['src'])
+
+
+ fb2_tag = TAG_MAP.get(tag, 'p')
+ if fb2_tag and fb2_tag not in tag_stack:
+ tag_count += 1
+ fb2_text += '<%s>' % fb2_tag
+ tag_stack.append(fb2_tag)
+
+ # Processes style information
+ for s in STYLES:
+ style_tag = s[1].get(style[s[0]], None)
+ if style_tag:
+ tag_count += 1
+ fb2_text += '<%s>' % style_tag
+ tag_stack.append(style_tag)
+
+ if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+ fb2_text += elem.text
+
+ for item in elem:
+ fb2_text += self.dump_text(item, stylizer, tag_stack)
+
+ close_tag_list = []
+ for i in range(0, tag_count):
+ close_tag_list.insert(0, tag_stack.pop())
+
+ fb2_text += self.close_tags(close_tag_list)
+
+ if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+ if 'p' not in tag_stack:
+ fb2_text += '
%s
' % elem.tail
+ else:
+ fb2_text += elem.tail
+
+ return fb2_text
+
+ def close_tags(self, tags):
+ fb2_text = u''
+ for i in range(0, len(tags)):
+ fb2_tag = tags.pop()
+ fb2_text += '%s>' % fb2_tag
+
+ return fb2_text
+
diff --git a/src/calibre/ebooks/fb2/input.py b/src/calibre/ebooks/fb2/input.py
new file mode 100644
index 0000000000..d96758a4bd
--- /dev/null
+++ b/src/calibre/ebooks/fb2/input.py
@@ -0,0 +1,74 @@
+from __future__ import with_statement
+__license__ = 'GPL v3'
+__copyright__ = '2008, Anatoly Shipitsin '
+"""
+Convert .fb2 files to .lrf
+"""
+import os
+from base64 import b64decode
+from lxml import etree
+
+from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import guess_type
+
+FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+
+class FB2Input(InputFormatPlugin):
+
+ name = 'FB2 Input'
+ author = 'Anatoly Shipitsin'
+ description = 'Convert FB2 files to HTML'
+ file_types = set(['fb2'])
+
+ recommendations = set([
+ ('level1_toc', '//h:h1', OptionRecommendation.MED),
+ ('level2_toc', '//h:h2', OptionRecommendation.MED),
+ ('level3_toc', '//h:h3', OptionRecommendation.MED),
+ ])
+
+ def convert(self, stream, options, file_ext, log,
+ accelerators):
+ from calibre.resources import fb2_xsl
+ from calibre.ebooks.metadata.opf2 import OPFCreator
+ from calibre.ebooks.metadata.meta import get_metadata
+ from calibre.ebooks.oeb.base import XLINK_NS
+ NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
+
+ log.debug('Parsing XML...')
+ parser = etree.XMLParser(recover=True, no_network=True)
+ doc = etree.parse(stream, parser)
+ self.extract_embedded_content(doc)
+ log.debug('Converting XML to HTML...')
+ styledoc = etree.fromstring(fb2_xsl)
+
+ transform = etree.XSLT(styledoc)
+ result = transform(doc)
+ open('index.xhtml', 'wb').write(transform.tostring(result))
+ stream.seek(0)
+ mi = get_metadata(stream, 'fb2')
+ if not mi.title:
+ mi.title = _('Unknown')
+ if not mi.authors:
+ mi.authors = [_('Unknown')]
+ opf = OPFCreator(os.getcwdu(), mi)
+ entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
+ opf.create_manifest(entries)
+ opf.create_spine(['index.xhtml'])
+
+ for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
+ href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
+ if href is not None:
+ if href.startswith('#'):
+ href = href[1:]
+ opf.guide.set_cover(os.path.abspath(href))
+
+ opf.render(open('metadata.opf', 'wb'))
+ return os.path.join(os.getcwd(), 'metadata.opf')
+
+ def extract_embedded_content(self, doc):
+ for elem in doc.xpath('./*'):
+ if 'binary' in elem.tag and elem.attrib.has_key('id'):
+ fname = elem.attrib['id']
+ data = b64decode(elem.text.strip())
+ open(fname, 'wb').write(data)
+
diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py
new file mode 100644
index 0000000000..67ee9f468e
--- /dev/null
+++ b/src/calibre/ebooks/fb2/output.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember '
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ebooks.fb2.fb2ml import FB2MLizer
+
+class FB2Output(OutputFormatPlugin):
+
+ name = 'FB2 Output'
+ author = 'John Schember'
+ file_type = 'fb2'
+
+ def convert(self, oeb_book, output_path, input_plugin, opts, log):
+ fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
+ fb2_content = fb2mlizer.extract_content(oeb_book, opts)
+
+ close = False
+ if not hasattr(output_path, 'write'):
+ close = True
+ if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+ os.makedirs(os.path.dirname(output_path))
+ out_stream = open(output_path, 'wb')
+ else:
+ out_stream = output_path
+
+ out_stream.seek(0)
+ out_stream.truncate()
+ out_stream.write(fb2_content)
+
+ if close:
+ out_stream.close()
+
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
deleted file mode 100644
index 3365df576e..0000000000
--- a/src/calibre/ebooks/html.py
+++ /dev/null
@@ -1,1192 +0,0 @@
-from __future__ import with_statement
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Code to recursively parse HTML files and create an open ebook in a specified
-directory or zip file. All the action starts in :function:`create_dir`.
-'''
-
-import sys, re, os, shutil, logging, tempfile, cStringIO, operator, functools
-from urlparse import urlparse, urlunparse
-from urllib import unquote
-
-from lxml import etree
-from lxml.html import HtmlElementClassLookup, HTMLParser as _HTMLParser, \
- fromstring as _fromstring, tostring as _tostring, \
- soupparser, HtmlElement
-from lxml.etree import XPath
-get_text = XPath("//text()")
-
-from calibre import LoggingInterface, unicode_path, entity_to_unicode
-from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
-from calibre.utils.config import Config, StringConfig
-from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.meta import get_metadata
-from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
-from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
-from calibre.utils.zipfile import ZipFile
-from cssutils import CSSParser
-
-class HTMLElement(HtmlElement):
-
- @apply
- def specified_font_size():
-
- def fget(self):
- ans = self.get('specified_font_size', '')
- if not ans:
- return lambda x: x
- if ans.startswith('f'):
- return functools.partial(operator.mul, float(ans[1:]))
- return float(ans)
-
- def fset(self, val):
- self.set('specified_font_size', ('f'+repr(val(1))) if callable(val) else repr(val))
-
- return property(fget=fget, fset=fset)
-
- @apply
- def computed_font_size():
- def fget(self):
- ans = self.get('computed_font_size', '')
- if ans == '':
- return None
- return float(ans)
-
- def fset(self, val):
- self.set('computed_font_size', repr(val))
-
- return property(fget=fget, fset=fset)
-
- def remove_font_size_information(self):
- for elem in self.iter():
- for p in ('computed', 'specified'):
- elem.attrib.pop(p+'_font_size', None)
-
- def getpath(self):
- return self.getroottree().getpath(self)
-
-class Lookup(HtmlElementClassLookup):
-
- def lookup(self, node_type, document, namespace, name):
- if node_type == 'element':
- return HTMLElement
- return HtmlElementClassLookup.lookup(self, node_type, document, namespace, name)
-
-class HTMLParser(_HTMLParser):
-
- def __init__(self, **kwargs):
- super(HTMLParser, self).__init__(**kwargs)
- self.set_element_class_lookup(Lookup())
-
-parser = HTMLParser()
-
-def fromstring(raw, **kw):
- return _fromstring(raw, parser=parser, **kw)
-
-def tostring(root, pretty_print=False):
- return _tostring(root, encoding='utf-8', method='xml',
- include_meta_content_type=True,
- pretty_print=pretty_print)
-
-class Link(object):
- '''
- Represents a link in a HTML file.
- '''
-
- @classmethod
- def url_to_local_path(cls, url, base):
- path = urlunparse(('', '', url.path, url.params, url.query, ''))
- path = unquote(path)
- if os.path.isabs(path):
- return path
- return os.path.abspath(os.path.join(base, path))
-
- def __init__(self, url, base):
- '''
- :param url: The url this link points to. Must be an unquoted unicode string.
- :param base: The base directory that relative URLs are with respect to.
- Must be a unicode string.
- '''
- assert isinstance(url, unicode) and isinstance(base, unicode)
- self.url = url
- self.parsed_url = urlparse(self.url)
- self.is_local = self.parsed_url.scheme in ('', 'file')
- self.is_internal = self.is_local and not bool(self.parsed_url.path)
- self.path = None
- self.fragment = unquote(self.parsed_url.fragment)
- if self.is_local and not self.is_internal:
- self.path = self.url_to_local_path(self.parsed_url, base)
-
- def __hash__(self):
- if self.path is None:
- return hash(self.url)
- return hash(self.path)
-
- def __eq__(self, other):
- return self.path == getattr(other, 'path', other)
-
- def __str__(self):
- return u'Link: %s --> %s'%(self.url, self.path)
-
-
-class IgnoreFile(Exception):
-
- def __init__(self, msg, errno):
- Exception.__init__(self, msg)
- self.doesnt_exist = errno == 2
- self.errno = errno
-
-class HTMLFile(object):
- '''
- Contains basic information about an HTML file. This
- includes a list of links to other files as well as
- the encoding of each file. Also tries to detect if the file is not a HTML
- file in which case :member:`is_binary` is set to True.
-
- The encoding of the file is available as :member:`encoding`.
- '''
-
- HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
- TITLE_PAT = re.compile('([^<>]+)', re.IGNORECASE)
- LINK_PAT = re.compile(
- r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P[^"]+)")|(?:\'(?P[^\']+)\')|(?P[^\s>]+))',
- re.DOTALL|re.IGNORECASE)
-
- def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
- '''
- :param level: The level of this file. Should be 0 for the root file.
- :param encoding: Use `encoding` to decode HTML.
- :param referrer: The :class:`HTMLFile` that first refers to this file.
- '''
- self.path = unicode_path(path_to_html_file, abs=True)
- self.title = os.path.splitext(os.path.basename(self.path))[0]
- self.base = os.path.dirname(self.path)
- self.level = level
- self.referrer = referrer
- self.links = []
-
- try:
- with open(self.path, 'rb') as f:
- src = f.read()
- except IOError, err:
- msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err))
- if level == 0:
- raise IOError(msg)
- raise IgnoreFile(msg, err.errno)
-
- self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
- if not self.is_binary:
- if encoding is None:
- encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
- self.encoding = encoding
- else:
- self.encoding = encoding
-
- src = src.decode(encoding, 'replace')
- match = self.TITLE_PAT.search(src)
- self.title = match.group(1) if match is not None else self.title
- self.find_links(src)
-
-
-
- def __eq__(self, other):
- return self.path == getattr(other, 'path', other)
-
- def __str__(self):
- return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
-
- def __repr__(self):
- return str(self)
-
-
- def find_links(self, src):
- for match in self.LINK_PAT.finditer(src):
- url = None
- for i in ('url1', 'url2', 'url3'):
- url = match.group(i)
- if url:
- break
- link = self.resolve(url)
- if link not in self.links:
- self.links.append(link)
-
- def resolve(self, url):
- return Link(url, self.base)
-
-
-def depth_first(root, flat, visited=set([])):
- yield root
- visited.add(root)
- for link in root.links:
- if link.path is not None and link not in visited:
- try:
- index = flat.index(link)
- except ValueError: # Can happen if max_levels is used
- continue
- hf = flat[index]
- if hf not in visited:
- yield hf
- visited.add(hf)
- for hf in depth_first(hf, flat, visited):
- if hf not in visited:
- yield hf
- visited.add(hf)
-
-
-def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
- '''
- Recursively traverse all links in the HTML file.
-
- :param max_levels: Maximum levels of recursion. Must be non-negative. 0
- implies that no links in the root HTML file are followed.
- :param encoding: Specify character encoding of HTML files. If `None` it is
- auto-detected.
- :return: A pair of lists (breadth_first, depth_first). Each list contains
- :class:`HTMLFile` objects.
- '''
- assert max_levels >= 0
- level = 0
- flat = [HTMLFile(path_to_html_file, level, encoding, verbose)]
- next_level = list(flat)
- while level < max_levels and len(next_level) > 0:
- level += 1
- nl = []
- for hf in next_level:
- rejects = []
- for link in hf.links:
- if link.path is None or link.path in flat:
- continue
- try:
- nf = HTMLFile(link.path, level, encoding, verbose, referrer=hf)
- if nf.is_binary:
- raise IgnoreFile('%s is a binary file'%nf.path, -1)
- nl.append(nf)
- flat.append(nf)
- except IgnoreFile, err:
- rejects.append(link)
- if not err.doesnt_exist or verbose > 1:
- print repr(err)
- for link in rejects:
- hf.links.remove(link)
-
- next_level = list(nl)
- orec = sys.getrecursionlimit()
- sys.setrecursionlimit(500000)
- try:
- return flat, list(depth_first(flat[0], flat))
- finally:
- sys.setrecursionlimit(orec)
-
-
-def opf_traverse(opf_reader, verbose=0, encoding=None):
- '''
- Return a list of :class:`HTMLFile` objects in the order specified by the
- `` element of the OPF.
-
- :param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.
- :param encoding: Specify character encoding of HTML files. If `None` it is
- auto-detected.
- '''
- if not opf_reader.spine:
- raise ValueError('OPF does not have a spine')
- flat = []
- for path in opf_reader.spine.items():
- path = os.path.abspath(path)
- if path not in flat:
- flat.append(os.path.abspath(path))
- for item in opf_reader.manifest:
- if 'html' in item.mime_type:
- path = os.path.abspath(item.path)
- if path not in flat:
- flat.append(path)
- for i, path in enumerate(flat):
- if not os.path.exists(path):
- path = path.replace('&', '%26')
- if os.path.exists(path):
- flat[i] = path
- for item in opf_reader.itermanifest():
- item.set('href', item.get('href').replace('&', '%26'))
- ans = []
- for path in flat:
- if os.path.exists(path):
- ans.append(HTMLFile(path, 0, encoding, verbose))
- else:
- print 'WARNING: OPF spine item %s does not exist'%path
- ans = [f for f in ans if not f.is_binary]
- return ans
-
-
-convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp'])
-_span_pat = re.compile('', re.DOTALL|re.IGNORECASE)
-
-def sanitize_head(match):
- x = match.group(1)
- x = _span_pat.sub('', x)
- return '\n'+x+'\n'
-
-class PreProcessor(object):
- PREPROCESS = [
- # Some idiotic HTML generators (Frontpage I'm looking at you)
- # Put all sorts of crap into . This messes up lxml
- (re.compile(r']*>(.*?)', re.IGNORECASE|re.DOTALL),
- sanitize_head),
- # Convert all entities, since lxml doesn't handle them well
- (re.compile(r'&(\S+?);'), convert_entities),
- # Remove the ', re.IGNORECASE),
- lambda match: ''),
- ]
-
- # Fix pdftohtml markup
- PDFTOHTML = [
- # Remove tags
- (re.compile(r'', re.IGNORECASE), lambda match: ' '),
- # Remove page numbers
- (re.compile(r'\d+ ', re.IGNORECASE), lambda match: ''),
- # Remove and replace