Sync to pluginize

This commit is contained in:
John Schember 2009-03-07 13:58:12 -05:00
commit 413da29dec
60 changed files with 2646 additions and 1895 deletions

View File

@ -2,9 +2,9 @@
<?eclipse-pydev version="1.0"?> <?eclipse-pydev version="1.0"?>
<pydev_project> <pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/calibre/src</path> <path>/calibre-pluginize/src</path>
</pydev_pathproperty> </pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project> </pydev_project>

View File

@ -2,7 +2,9 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os, re, logging, time, subprocess, atexit, mimetypes import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
__builtin__
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
from htmlentitydefs import name2codepoint from htmlentitydefs import name2codepoint
from math import floor from math import floor
from logging import Formatter from logging import Formatter
@ -73,26 +75,26 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
return one.replace('..', '_') return one.replace('..', '_')
def prints(*args, **kwargs):
'''
Print unicode arguments safely by encoding them to preferred_encoding
Has the same signature as the print function from Python 3.
'''
file = kwargs.get('file', sys.stdout)
sep = kwargs.get('sep', ' ')
end = kwargs.get('end', '\n')
for i, arg in enumerate(args):
if isinstance(arg, unicode):
arg = arg.encode(preferred_encoding)
file.write(arg)
if i != len(args)-1:
file.write(sep)
file.write(end)
class CommandLineError(Exception): class CommandLineError(Exception):
pass pass
class ColoredFormatter(Formatter):
def format(self, record):
ln = record.__dict__['levelname']
col = ''
if ln == 'CRITICAL':
col = terminal_controller.YELLOW
elif ln == 'ERROR':
col = terminal_controller.RED
elif ln in ['WARN', 'WARNING']:
col = terminal_controller.BLUE
elif ln == 'INFO':
col = terminal_controller.GREEN
elif ln == 'DEBUG':
col = terminal_controller.CYAN
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
return Formatter.format(self, record)
def setup_cli_handlers(logger, level): def setup_cli_handlers(logger, level):
@ -316,66 +318,23 @@ def english_sort(x, y):
''' '''
return cmp(_spat.sub('', x), _spat.sub('', y)) return cmp(_spat.sub('', x), _spat.sub('', y))
class LoggingInterface: class ColoredFormatter(Formatter):
def __init__(self, logger): def format(self, record):
self.__logger = self.logger = logger ln = record.__dict__['levelname']
col = ''
def setup_cli_handler(self, verbosity): if ln == 'CRITICAL':
for handler in self.__logger.handlers: col = terminal_controller.YELLOW
if isinstance(handler, logging.StreamHandler): elif ln == 'ERROR':
return col = terminal_controller.RED
if os.environ.get('CALIBRE_WORKER', None) is not None and self.__logger.handlers: elif ln in ['WARN', 'WARNING']:
return col = terminal_controller.BLUE
stream = sys.stdout elif ln == 'INFO':
formatter = logging.Formatter() col = terminal_controller.GREEN
level = logging.INFO elif ln == 'DEBUG':
if verbosity > 0: col = terminal_controller.CYAN
formatter = ColoredFormatter('[%(levelname)s] %(message)s') if verbosity > 1 else \ record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
ColoredFormatter('%(levelname)s: %(message)s') return Formatter.format(self, record)
level = logging.DEBUG
if verbosity > 1:
stream = sys.stderr
handler = logging.StreamHandler(stream)
handler.setFormatter(formatter)
handler.setLevel(level)
self.__logger.addHandler(handler)
self.__logger.setLevel(level)
def ___log(self, func, msg, args, kwargs):
args = [msg] + list(args)
for i in range(len(args)):
if not isinstance(args[i], basestring):
continue
if sys.version_info[:2] > (2, 5):
if not isinstance(args[i], unicode):
args[i] = args[i].decode(preferred_encoding, 'replace')
elif isinstance(args[i], unicode):
args[i] = args[i].encode(preferred_encoding, 'replace')
func(*args, **kwargs)
def log_debug(self, msg, *args, **kwargs):
self.___log(self.__logger.debug, msg, args, kwargs)
def log_info(self, msg, *args, **kwargs):
self.___log(self.__logger.info, msg, args, kwargs)
def log_warning(self, msg, *args, **kwargs):
self.___log(self.__logger.warning, msg, args, kwargs)
def log_warn(self, msg, *args, **kwargs):
self.___log(self.__logger.warning, msg, args, kwargs)
def log_error(self, msg, *args, **kwargs):
self.___log(self.__logger.error, msg, args, kwargs)
def log_critical(self, msg, *args, **kwargs):
self.___log(self.__logger.critical, msg, args, kwargs)
def log_exception(self, msg, *args):
self.___log(self.__logger.exception, msg, args, {})
def walk(dir): def walk(dir):
''' A nice interface to os.walk ''' ''' A nice interface to os.walk '''

View File

@ -220,4 +220,6 @@ class MetadataWriterPlugin(Plugin):
''' '''
pass pass

View File

@ -242,8 +242,13 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
set_metadata(stream, mi) set_metadata(stream, mi)
plugins = [HTML2ZIP] from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.customize.profiles import input_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')] x.__name__.endswith('MetadataWriter')]
plugins += input_profiles

View File

@ -0,0 +1,189 @@
from __future__ import with_statement
'''
Defines the plugin sytem for conversions.
'''
import re, os, shutil
from lxml import html
from calibre import CurrentDir
from calibre.customize import Plugin
class ConversionOption(object):
'''
Class representing conversion options
'''
def __init__(self, name=None, help=None, long_switch=None,
short_switch=None, choices=None):
self.name = name
self.help = help
self.long_switch = long_switch
self.short_switch = short_switch
self.choices = choices
if self.long_switch is None:
self.long_switch = '--'+self.name.replace('_', '-')
self.validate_parameters()
def validate_parameters(self):
'''
Validate the parameters passed to :method:`__init__`.
'''
if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
raise ValueError(self.name + ' is not a valid Python identifier')
if not self.help:
raise ValueError('You must set the help text')
class OptionRecommendation(object):
LOW = 1
MED = 2
HIGH = 3
def __init__(self, recommeded_value, level=LOW, **kwargs):
'''
An option recommendation. That is, an option as well as its recommended
value and the level of the recommendation.
'''
self.level = level
self.recommended_value = recommeded_value
self.option = kwargs.pop('option', None)
if self.option is None:
self.option = ConversionOption(**kwargs)
self.validate_parameters()
def validate_parameters(self):
if self.option.choices and self.recommended_value not in \
self.option.choices:
raise ValueError('Recommended value not in choices')
if not (isinstance(self.recommended_value, (int, float, str, unicode))\
or self.default is None):
raise ValueError(unicode(self.default) +
' is not a string or a number')
class InputFormatPlugin(Plugin):
'''
InputFormatPlugins are responsible for converting a document into
HTML+OPF+CSS+etc.
The results of the conversion *must* be encoded in UTF-8.
The main action happens in :method:`convert`.
'''
type = _('Conversion Input')
can_be_disabled = False
supported_platforms = ['windows', 'osx', 'linux']
#: Set of file types for which this plugin should be run
#: For example: ``set(['azw', 'mobi', 'prc'])``
file_types = set([])
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
common_options = set([
OptionRecommendation(name='debug_input',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Save the output from the input plugin to the specified '
'directory. Useful if you are unsure at which stage '
'of the conversion process a bug is occurring. '
'WARNING: This completely deletes the contents of '
'the specified directory.')
),
OptionRecommendation(name='input_encoding',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the input document. If '
'set this option will override any encoding declared by the '
'document itself. Particularly useful for documents that '
'do not declare an encoding or that have erroneous '
'encoding declarations.')
),
])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.
options = set([])
def convert(self, stream, options, file_ext, parse_cache, log):
'''
This method must be implemented in sub-classes. It must return
the path to the created OPF file. All output should be contained in
the current directory. If this plugin creates files outside the current
directory they must be deleted/marked for deletion before this method
returns.
:param stream: A file like object that contains the input file.
:param options: Options to customize the conversion process.
Guaranteed to have attributes corresponding
to all the options declared by this plugin. In
addition, it will have a verbose attribute that
takes integral values from zero upwards. Higher numbers
mean be more verbose. Another useful attribute is
``input_profile`` that is an instance of
:class:`calibre.customize.profiles.InputProfile`.
:param file_ext: The extension (without the .) of the input file. It
is guaranteed to be one of the `file_types` supported
by this plugin.
:param parse_cache: A dictionary that maps absolute file paths to
parsed representations of their contents. For
HTML the representation is an lxml element of
the root of the tree. For CSS it is a cssutils
stylesheet. If this plugin parses any of the
output files, it should add them to the cache
so that later stages of the conversion wont
have to re-parse them. If a parsed representation
is in the cache, there is no need to actually
write the file to disk.
:param log: A :class:`calibre.utils.logging.Log` object. All output
should use this object.
'''
raise NotImplementedError
def __call__(self, stream, options, file_ext, parse_cache, log, output_dir):
log('InputFormatPlugin: %s running'%self.name, end=' ')
if hasattr(stream, 'name'):
log('on', stream.name)
with CurrentDir(output_dir):
for x in os.listdir('.'):
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
ret = self.convert(stream, options, file_ext, parse_cache, log)
for key in list(parse_cache.keys()):
if os.path.abspath(key) != key:
log.warn(('InputFormatPlugin: %s returned a '
'relative path: %s')%(self.name, key)
)
parse_cache[os.path.abspath(key)] = parse_cache.pop(key)
if options.debug_input is not None:
options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input)
shutil.rmtree(options.debug_input)
for f, obj in parse_cache.items():
if hasattr(obj, 'cssText'):
raw = obj.cssText
else:
raw = html.tostring(obj, encoding='utf-8', method='xml',
include_meta_content_type=True, pretty_print=True)
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open(f, 'wb').write(raw)
shutil.copytree('.', options.debug_input)
return ret

View File

@ -0,0 +1,27 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize import Plugin
class InputProfile(Plugin):
author = 'Kovid Goyal'
supported_platforms = set(['windows', 'osx', 'linux'])
can_be_disabled = False
type = _('Input profile')
# TODO: Add some real information to this profile. All other profiles must
# inherit from this profile and override as needed
name = 'Default Input Profile'
short_name = 'default' # Used in the CLI so dont spaces etc. in it
description = _('This profile tries to provide sane defaults and is useful '
'if you know nothing about the input document.')
input_profiles = [InputProfile]

View File

@ -6,13 +6,14 @@ import os, shutil, traceback, functools, sys
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \ from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin MetadataWriterPlugin
from calibre.customize.conversion import InputFormatPlugin
from calibre.customize.profiles import InputProfile
from calibre.customize.builtins import plugins as builtin_plugins from calibre.customize.builtins import plugins as builtin_plugins
from calibre.constants import __version__, iswindows, isosx from calibre.constants import __version__, iswindows, isosx
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \ from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
plugin_dir, OptionParser plugin_dir, OptionParser
version = tuple([int(x) for x in __version__.split('.')]) version = tuple([int(x) for x in __version__.split('.')])
platform = 'linux' platform = 'linux'
@ -70,7 +71,10 @@ _on_import = {}
_on_preprocess = {} _on_preprocess = {}
_on_postprocess = {} _on_postprocess = {}
def input_profiles():
for plugin in _initialized_plugins:
if isinstance(plugin, InputProfile):
yield plugin
def reread_filetype_plugins(): def reread_filetype_plugins():
global _on_import global _on_import
@ -114,7 +118,19 @@ def reread_metadata_plugins():
_metadata_writers[ft] = [] _metadata_writers[ft] = []
_metadata_writers[ft].append(plugin) _metadata_writers[ft].append(plugin)
def metadata_readers():
ans = set([])
for plugins in _metadata_readers.values():
for plugin in plugins:
ans.add(plugin)
return ans
def metadata_writers():
ans = set([])
for plugins in _metadata_writers.values():
for plugin in plugins:
ans.add(plugin)
return ans
def get_file_type_metadata(stream, ftype): def get_file_type_metadata(stream, ftype):
mi = MetaInformation(None, None) mi = MetaInformation(None, None)
@ -222,6 +238,17 @@ def find_plugin(name):
if plugin.name == name: if plugin.name == name:
return plugin return plugin
def input_format_plugins():
for plugin in _initialized_plugins:
if isinstance(plugin, InputFormatPlugin):
yield plugin
def plugin_for_input_format(fmt):
for plugin in input_format_plugins():
if fmt in plugin.file_types:
return plugin
def disable_plugin(plugin_or_name): def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name) x = getattr(plugin_or_name, 'name', plugin_or_name)
plugin = find_plugin(x) plugin = find_plugin(x)

View File

@ -60,7 +60,7 @@ class CYBOOKG3(USBMS):
if on_card and size > self.free_space()[2] - 1024*1024: if on_card and size > self.free_space()[2] - 1024*1024:
raise FreeSpaceError(_("There is insufficient free space on the storage card")) raise FreeSpaceError(_("There is insufficient free space on the storage card"))
if not on_card and size > self.free_space()[0] - 2*1024*1024: if not on_card and size > self.free_space()[0] - 2*1024*1024:
raise FreeSpaceError(_("There is insufficient free space in main memory")) raise FreeSpaceError(_("There is insufficient free space in main memory"))
paths = [] paths = []
@ -91,7 +91,7 @@ class CYBOOKG3(USBMS):
if not os.path.exists(newpath): if not os.path.exists(newpath):
os.makedirs(newpath) os.makedirs(newpath)
filepath = os.path.join(newpath, names.next()) filepath = os.path.join(newpath, names.next())
paths.append(filepath) paths.append(filepath)
if hasattr(infile, 'read'): if hasattr(infile, 'read'):
@ -100,7 +100,7 @@ class CYBOOKG3(USBMS):
dest = open(filepath, 'wb') dest = open(filepath, 'wb')
shutil.copyfileobj(infile, dest, 10*1024*1024) shutil.copyfileobj(infile, dest, 10*1024*1024)
dest.flush() dest.flush()
dest.close() dest.close()
else: else:
shutil.copy2(infile, filepath) shutil.copy2(infile, filepath)

View File

@ -116,8 +116,8 @@ class Device(Structure):
raise Error("Cannot open device") raise Error("Cannot open device")
return handle.contents return handle.contents
@apply @dynamic_property
def configurations(): def configurations(self):
doc = """ List of device configurations. See L{ConfigDescriptor} """ doc = """ List of device configurations. See L{ConfigDescriptor} """
def fget(self): def fget(self):
ans = [] ans = []
@ -127,8 +127,8 @@ class Device(Structure):
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
class Bus(Structure): class Bus(Structure):
@apply @dynamic_property
def device_list(): def device_list(self):
doc = \ doc = \
""" """
Flat list of devices on this bus. Flat list of devices on this bus.
@ -360,4 +360,4 @@ def get_devices():
for dev in devices: for dev in devices:
device = (dev.device_descriptor.idVendor, dev.device_descriptor.idProduct, dev.device_descriptor.bcdDevice) device = (dev.device_descriptor.idVendor, dev.device_descriptor.idProduct, dev.device_descriptor.bcdDevice)
ans.append(device) ans.append(device)
return ans return ans

View File

@ -55,8 +55,8 @@ class Book(object):
size = book_metadata_field("size", formatter=int) size = book_metadata_field("size", formatter=int)
# When setting this attribute you must use an epoch # When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime) datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
@apply @dynamic_property
def title_sorter(): def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned''' doc = '''String to sort the title. If absent, title is returned'''
def fget(self): def fget(self):
src = self.elem.getAttribute('titleSorter').strip() src = self.elem.getAttribute('titleSorter').strip()
@ -67,8 +67,8 @@ class Book(object):
self.elem.setAttribute('titleSorter', sortable_title(unicode(val))) self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
return property(doc=doc, fget=fget, fset=fset) return property(doc=doc, fget=fget, fset=fset)
@apply @dynamic_property
def thumbnail(): def thumbnail(self):
doc = \ doc = \
""" """
The thumbnail. Should be a height 68 image. The thumbnail. Should be a height 68 image.
@ -88,15 +88,15 @@ class Book(object):
return decode(rc) return decode(rc)
return property(fget=fget, doc=doc) return property(fget=fget, doc=doc)
@apply @dynamic_property
def path(): def path(self):
doc = """ Absolute path to book on device. Setting not supported. """ doc = """ Absolute path to book on device. Setting not supported. """
def fget(self): def fget(self):
return self.root + self.rpath return self.root + self.rpath
return property(fget=fget, doc=doc) return property(fget=fget, doc=doc)
@apply @dynamic_property
def db_id(): def db_id(self):
doc = '''The database id in the application database that this file corresponds to''' doc = '''The database id in the application database that this file corresponds to'''
def fget(self): def fget(self):
match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0]) match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -378,4 +378,4 @@ class BookList(_BookList):
def write(self, stream): def write(self, stream):
""" Write XML representation of DOM tree to C{stream} """ """ Write XML representation of DOM tree to C{stream} """
stream.write(self.document.toxml('utf-8')) stream.write(self.document.toxml('utf-8'))

View File

@ -39,8 +39,8 @@ class FileFormatter(object):
self.name = file.name self.name = file.name
self.path = file.path self.path = file.path
@apply @dynamic_property
def mode_string(): def mode_string(self):
doc=""" The mode string for this file. There are only two modes read-only and read-write """ doc=""" The mode string for this file. There are only two modes read-only and read-write """
def fget(self): def fget(self):
mode, x = "-", "-" mode, x = "-", "-"
@ -50,8 +50,8 @@ class FileFormatter(object):
return mode return mode
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def isdir_name(): def isdir_name(self):
doc='''Return self.name + '/' if self is a directory''' doc='''Return self.name + '/' if self is a directory'''
def fget(self): def fget(self):
name = self.name name = self.name
@ -61,8 +61,8 @@ class FileFormatter(object):
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def name_in_color(): def name_in_color(self):
doc=""" The name in ANSI text. Directories are blue, ebooks are green """ doc=""" The name in ANSI text. Directories are blue, ebooks are green """
def fget(self): def fget(self):
cname = self.name cname = self.name
@ -75,22 +75,22 @@ class FileFormatter(object):
return cname return cname
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def human_readable_size(): def human_readable_size(self):
doc=""" File size in human readable form """ doc=""" File size in human readable form """
def fget(self): def fget(self):
return human_readable(self.size) return human_readable(self.size)
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def modification_time(): def modification_time(self):
doc=""" Last modified time in the Linux ls -l format """ doc=""" Last modified time in the Linux ls -l format """
def fget(self): def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime)) return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime))
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def creation_time(): def creation_time(self):
doc=""" Last modified time in the Linux ls -l format """ doc=""" Last modified time in the Linux ls -l format """
def fget(self): def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime)) return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime))
@ -334,4 +334,4 @@ def main():
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -284,8 +284,8 @@ class Command(TransferBuffer):
# Length of the data part of this packet # Length of the data part of this packet
length = field(start=12, fmt=DWORD) length = field(start=12, fmt=DWORD)
@apply @dynamic_property
def data(): def data(self):
doc = \ doc = \
""" """
The data part of this command. Returned/set as/by a TransferBuffer. The data part of this command. Returned/set as/by a TransferBuffer.
@ -447,8 +447,8 @@ class LongCommand(Command):
self.length = 16 self.length = 16
self.command = command self.command = command
@apply @dynamic_property
def command(): def command(self):
doc = \ doc = \
""" """
Usually carries extra information needed for the command Usually carries extra information needed for the command
@ -568,8 +568,8 @@ class FileOpen(PathCommand):
PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20) PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20)
self.mode = mode self.mode = mode
@apply @dynamic_property
def mode(): def mode(self):
doc = \ doc = \
""" """
The file open mode. Is either L{FileOpen.READ} The file open mode. Is either L{FileOpen.READ}
@ -651,8 +651,8 @@ class Response(Command):
raise PacketError("Response packets must have their number set to " \ raise PacketError("Response packets must have their number set to " \
+ hex(0x00001000)) + hex(0x00001000))
@apply @dynamic_property
def data(): def data(self):
doc = \ doc = \
""" """
The last 3 DWORDs (12 bytes) of data in this The last 3 DWORDs (12 bytes) of data in this
@ -681,43 +681,43 @@ class ListResponse(Response):
PATH_NOT_FOUND = 0xffffffd7 #: Queried path is not found PATH_NOT_FOUND = 0xffffffd7 #: Queried path is not found
PERMISSION_DENIED = 0xffffffd6 #: Permission denied PERMISSION_DENIED = 0xffffffd6 #: Permission denied
@apply @dynamic_property
def is_file(): def is_file(self):
doc = """ True iff queried path is a file """ doc = """ True iff queried path is a file """
def fget(self): def fget(self):
return self.code == ListResponse.IS_FILE return self.code == ListResponse.IS_FILE
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def is_invalid(): def is_invalid(self):
doc = """ True iff queried path is invalid """ doc = """ True iff queried path is invalid """
def fget(self): def fget(self):
return self.code == ListResponse.IS_INVALID return self.code == ListResponse.IS_INVALID
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def path_not_found(): def path_not_found(self):
doc = """ True iff queried path is not found """ doc = """ True iff queried path is not found """
def fget(self): def fget(self):
return self.code == ListResponse.PATH_NOT_FOUND return self.code == ListResponse.PATH_NOT_FOUND
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def permission_denied(): def permission_denied(self):
doc = """ True iff permission is denied for path operations """ doc = """ True iff permission is denied for path operations """
def fget(self): def fget(self):
return self.code == ListResponse.PERMISSION_DENIED return self.code == ListResponse.PERMISSION_DENIED
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def is_unmounted(): def is_unmounted(self):
doc = """ True iff queried path is unmounted (i.e. removed storage card) """ doc = """ True iff queried path is unmounted (i.e. removed storage card) """
def fget(self): def fget(self):
return self.code == ListResponse.IS_UNMOUNTED return self.code == ListResponse.IS_UNMOUNTED
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def is_eol(): def is_eol(self):
doc = """ True iff there are no more items in the list """ doc = """ True iff there are no more items in the list """
def fget(self): def fget(self):
return self.code == ListResponse.IS_EOL return self.code == ListResponse.IS_EOL
@ -759,8 +759,8 @@ class FileProperties(Answer):
# 0 = default permissions, 4 = read only # 0 = default permissions, 4 = read only
permissions = field(start=36, fmt=DWORD) permissions = field(start=36, fmt=DWORD)
@apply @dynamic_property
def is_dir(): def is_dir(self):
doc = """True if path points to a directory, False if it points to a file.""" doc = """True if path points to a directory, False if it points to a file."""
def fget(self): def fget(self):
@ -776,8 +776,8 @@ class FileProperties(Answer):
return property(doc=doc, fget=fget, fset=fset) return property(doc=doc, fget=fget, fset=fset)
@apply @dynamic_property
def is_readonly(): def is_readonly(self):
doc = """ Whether this file is readonly.""" doc = """ Whether this file is readonly."""
def fget(self): def fget(self):
@ -801,8 +801,8 @@ class IdAnswer(Answer):
""" Defines the structure of packets that contain identifiers for queries. """ """ Defines the structure of packets that contain identifiers for queries. """
@apply @dynamic_property
def id(): def id(self):
doc = \ doc = \
""" """
The identifier. C{unsigned int} stored in 4 bytes The identifier. C{unsigned int} stored in 4 bytes
@ -841,8 +841,8 @@ class ListAnswer(Answer):
name_length = field(start=20, fmt=DWORD) name_length = field(start=20, fmt=DWORD)
name = stringfield(name_length, start=24) name = stringfield(name_length, start=24)
@apply @dynamic_property
def is_dir(): def is_dir(self):
doc = \ doc = \
""" """
True if list item points to a directory, False if it points to a file. True if list item points to a directory, False if it points to a file.
@ -859,4 +859,3 @@ class ListAnswer(Answer):
return property(doc=doc, fget=fget, fset=fset) return property(doc=doc, fget=fget, fset=fset)

View File

@ -64,8 +64,8 @@ class Book(object):
# When setting this attribute you must use an epoch # When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime) datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
@apply @dynamic_property
def title_sorter(): def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned''' doc = '''String to sort the title. If absent, title is returned'''
def fget(self): def fget(self):
src = self.elem.getAttribute('titleSorter').strip() src = self.elem.getAttribute('titleSorter').strip()
@ -76,8 +76,8 @@ class Book(object):
self.elem.setAttribute('titleSorter', sortable_title(unicode(val))) self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
return property(doc=doc, fget=fget, fset=fset) return property(doc=doc, fget=fget, fset=fset)
@apply @dynamic_property
def thumbnail(): def thumbnail(self):
doc = \ doc = \
""" """
The thumbnail. Should be a height 68 image. The thumbnail. Should be a height 68 image.
@ -99,15 +99,15 @@ class Book(object):
return decode(rc) return decode(rc)
return property(fget=fget, doc=doc) return property(fget=fget, doc=doc)
@apply @dynamic_property
def path(): def path(self):
doc = """ Absolute path to book on device. Setting not supported. """ doc = """ Absolute path to book on device. Setting not supported. """
def fget(self): def fget(self):
return self.mountpath + self.rpath return self.mountpath + self.rpath
return property(fget=fget, doc=doc) return property(fget=fget, doc=doc)
@apply @dynamic_property
def db_id(): def db_id(self):
doc = '''The database id in the application database that this file corresponds to''' doc = '''The database id in the application database that this file corresponds to'''
def fget(self): def fget(self):
match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0]) match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -415,4 +415,4 @@ def fix_ids(main, card):
regen_ids(main) regen_ids(main)
regen_ids(card) regen_ids(card)
main.set_next_id(str(main.max_id()+1)) main.set_next_id(str(main.max_id()+1))

View File

@ -21,15 +21,15 @@ class Book(object):
def __eq__(self, other): def __eq__(self, other):
return self.path == other.path return self.path == other.path
@apply @dynamic_property
def title_sorter(): def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned''' doc = '''String to sort the title. If absent, title is returned'''
def fget(self): def fget(self):
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip() return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
return property(doc=doc, fget=fget) return property(doc=doc, fget=fget)
@apply @dynamic_property
def thumbnail(): def thumbnail(self):
return None return None
def __str__(self): def __str__(self):
@ -44,4 +44,3 @@ class BookList(_BookList):
def set_tags(self, book, tags): def set_tags(self, book, tags):
pass pass

View File

@ -37,7 +37,7 @@ class USBMS(Device):
SUPPORTS_SUB_DIRS = False SUPPORTS_SUB_DIRS = False
def __init__(self, key='-1', log_packets=False, report_progress=None): def __init__(self, key='-1', log_packets=False, report_progress=None):
Device.__init__(self, key=key, log_packets=log_packets, Device.__init__(self, key=key, log_packets=log_packets,
report_progress=report_progress) report_progress=report_progress)
def get_device_information(self, end_session=True): def get_device_information(self, end_session=True):
@ -103,7 +103,7 @@ class USBMS(Device):
if 'tags' in mdata.keys(): if 'tags' in mdata.keys():
for tag in mdata['tags']: for tag in mdata['tags']:
if tag.startswith('News'): if tag.startswith(_('News')):
newpath = os.path.join(newpath, 'news') newpath = os.path.join(newpath, 'news')
newpath = os.path.join(newpath, mdata.get('title', '')) newpath = os.path.join(newpath, mdata.get('title', ''))
newpath = os.path.join(newpath, mdata.get('timestamp', '')) newpath = os.path.join(newpath, mdata.get('timestamp', ''))

View File

@ -0,0 +1,4 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,30 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles
pipeline_options = [
OptionRecommendation(name='verbose',
recommended_value=0, level=OptionRecommendation.LOW,
short_switch='v',
help=_('Level of verbosity. Specify multiple times for greater '
'verbosity.')
),
OptionRecommendation(name='input_profile',
recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in input_profiles()],
help=_('Specify the input profile. The input profile gives the '
'conversion system information on how to interpret '
'various information in the input document. For '
'example resolution dependent lengths (i.e. lengths in '
'pixels).')
),
]

View File

@ -40,38 +40,6 @@ def rules(stylesheets):
if r.type == r.STYLE_RULE: if r.type == r.STYLE_RULE:
yield r yield r
def decrypt_font(key, path):
raw = open(path, 'rb').read()
crypt = raw[:1024]
key = cycle(iter(key))
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
with open(path, 'wb') as f:
f.write(decrypt)
f.write(raw[1024:])
def process_encryption(encfile, opf):
key = None
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
if m:
key = m.group(1)
key = list(map(ord, uuid.UUID(key).bytes))
try:
root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
algorithm = em.get('Algorithm', '')
if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if os.path.exists(path):
decrypt_font(key, path)
return True
except:
import traceback
traceback.print_exc()
return False
def initialize_container(path_to_container, opf_name='metadata.opf'): def initialize_container(path_to_container, opf_name='metadata.opf'):
''' '''
Create an empty EPUB document, with a default skeleton. Create an empty EPUB document, with a default skeleton.

View File

@ -0,0 +1,76 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re, uuid
from itertools import cycle
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
class EPUBInput(InputFormatPlugin):
name = 'EPUB Input'
author = 'Kovid Goyal'
description = 'Convert EPUB files (.epub) to HTML'
file_types = set(['epub'])
@classmethod
def decrypt_font(cls, key, path):
raw = open(path, 'rb').read()
crypt = raw[:1024]
key = cycle(iter(key))
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
with open(path, 'wb') as f:
f.write(decrypt)
f.write(raw[1024:])
@classmethod
def process_ecryption(cls, encfile, opf, log):
key = None
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
if m:
key = m.group(1)
key = list(map(ord, uuid.UUID(key).bytes))
try:
root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
algorithm = em.get('Algorithm', '')
if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if os.path.exists(path):
cls.decrypt_font(key, path)
return True
except:
import traceback
traceback.print_exc()
return False
def convert(self, stream, options, file_ext, parse_cache, log):
from calibre.utils.zipfile import ZipFile
from calibre import walk
from calibre.ebooks import DRMError
zf = ZipFile(stream)
zf.extractall(os.getcwd())
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = None
for f in walk('.'):
if f.lower().endswith('.opf'):
opf = f
break
path = getattr(stream, 'name', 'stream')
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
if os.path.exists(encfile):
if not self.process_encryption(encfile, opf, log):
raise DRMError(os.path.basename(path))
return opf

View File

@ -31,8 +31,8 @@ from cssutils import CSSParser
class HTMLElement(HtmlElement): class HTMLElement(HtmlElement):
@apply @dynamic_property
def specified_font_size(): def specified_font_size(self):
def fget(self): def fget(self):
ans = self.get('specified_font_size', '') ans = self.get('specified_font_size', '')
@ -47,8 +47,8 @@ class HTMLElement(HtmlElement):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def computed_font_size(): def computed_font_size(self):
def fget(self): def fget(self):
ans = self.get('computed_font_size', '') ans = self.get('computed_font_size', '')
if ans == '': if ans == '':
@ -1183,4 +1183,4 @@ output = %s
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@ -7,21 +7,25 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>' 'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, cStringIO, os import sys, struct, os
import functools import functools
import re import re
from urlparse import urldefrag from urlparse import urldefrag
from cStringIO import StringIO
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
from calibre.ebooks.lit import LitError from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.mssha1 as mssha1
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre import plugins from calibre import plugins
lzx, lxzerror = plugins['lzx'] lzx, lxzerror = plugins['lzx']
msdes, msdeserror = plugins['msdes'] msdes, msdeserror = plugins['msdes']
__all__ = ["LitReader"]
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?> XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
""" """
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?> OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -109,6 +113,9 @@ def consume_sized_utf8_string(bytes, zpad=False):
pos += 1 pos += 1
return u''.join(result), bytes[pos:] return u''.join(result), bytes[pos:]
def encode(string):
return unicode(string).encode('ascii', 'xmlcharrefreplace')
class UnBinary(object): class UnBinary(object):
AMPERSAND_RE = re.compile( AMPERSAND_RE = re.compile(
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)') r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -120,14 +127,16 @@ class UnBinary(object):
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS): def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
self.manifest = manifest self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.is_html = map is HTML_MAP
self.tag_atoms, self.attr_atoms = atoms self.tag_atoms, self.attr_atoms = atoms
self.opf = map is OPF_MAP self.opf = map is OPF_MAP
self.bin = bin self.bin = bin
self.dir = os.path.dirname(path) self.dir = os.path.dirname(path)
self.buf = cStringIO.StringIO() buf = StringIO()
self.binary_to_text() self.binary_to_text(bin, buf)
self.raw = self.buf.getvalue().lstrip().decode('utf-8') self.raw = buf.getvalue().lstrip()
self.escape_reserved() self.escape_reserved()
self._tree = None
def escape_reserved(self): def escape_reserved(self):
raw = self.raw raw = self.raw
@ -154,18 +163,20 @@ class UnBinary(object):
return '/'.join(relpath) return '/'.join(relpath)
def __unicode__(self): def __unicode__(self):
return self.raw.decode('utf-8')
def __str__(self):
return self.raw return self.raw
def binary_to_text(self, base=0, depth=0): def binary_to_text(self, bin, buf, index=0, depth=0):
tag_name = current_map = None tag_name = current_map = None
dynamic_tag = errors = 0 dynamic_tag = errors = 0
in_censorship = is_goingdown = False in_censorship = is_goingdown = False
state = 'text' state = 'text'
index = base
flags = 0 flags = 0
while index < len(self.bin): while index < len(bin):
c, index = read_utf8_char(self.bin, index) c, index = read_utf8_char(bin, index)
oc = ord(c) oc = ord(c)
if state == 'text': if state == 'text':
@ -178,7 +189,7 @@ class UnBinary(object):
c = '>>' c = '>>'
elif c == '<': elif c == '<':
c = '<<' c = '<<'
self.buf.write(c.encode('ascii', 'xmlcharrefreplace')) buf.write(encode(c))
elif state == 'get flags': elif state == 'get flags':
if oc == 0: if oc == 0:
@ -191,7 +202,7 @@ class UnBinary(object):
state = 'text' if oc == 0 else 'get attr' state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING: if flags & FLAG_OPENING:
tag = oc tag = oc
self.buf.write('<') buf.write('<')
if not (flags & FLAG_CLOSING): if not (flags & FLAG_CLOSING):
is_goingdown = True is_goingdown = True
if tag == 0x8000: if tag == 0x8000:
@ -211,7 +222,7 @@ class UnBinary(object):
tag_name = '?'+unichr(tag)+'?' tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag] current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown' % unichr(tag) print 'WARNING: tag %s unknown' % unichr(tag)
self.buf.write(unicode(tag_name).encode('utf-8')) buf.write(encode(tag_name))
elif flags & FLAG_CLOSING: elif flags & FLAG_CLOSING:
if depth == 0: if depth == 0:
raise LitError('Extra closing tag') raise LitError('Extra closing tag')
@ -223,15 +234,14 @@ class UnBinary(object):
if not is_goingdown: if not is_goingdown:
tag_name = None tag_name = None
dynamic_tag = 0 dynamic_tag = 0
self.buf.write(' />') buf.write(' />')
else: else:
self.buf.write('>') buf.write('>')
index = self.binary_to_text(base=index, depth=depth+1) index = self.binary_to_text(bin, buf, index, depth+1)
is_goingdown = False is_goingdown = False
if not tag_name: if not tag_name:
raise LitError('Tag ends before it begins.') raise LitError('Tag ends before it begins.')
self.buf.write(u''.join( buf.write(encode(u''.join(('</', tag_name, '>'))))
('</', tag_name, '>')).encode('utf-8'))
dynamic_tag = 0 dynamic_tag = 0
tag_name = None tag_name = None
state = 'text' state = 'text'
@ -251,7 +261,7 @@ class UnBinary(object):
in_censorship = True in_censorship = True
state = 'get value length' state = 'get value length'
continue continue
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=') buf.write(' ' + encode(attr) + '=')
if attr in ['href', 'src']: if attr in ['href', 'src']:
state = 'get href length' state = 'get href length'
else: else:
@ -259,24 +269,24 @@ class UnBinary(object):
elif state == 'get value length': elif state == 'get value length':
if not in_censorship: if not in_censorship:
self.buf.write('"') buf.write('"')
count = oc - 1 count = oc - 1
if count == 0: if count == 0:
if not in_censorship: if not in_censorship:
self.buf.write('"') buf.write('"')
in_censorship = False in_censorship = False
state = 'get attr' state = 'get attr'
continue continue
state = 'get value' state = 'get value'
if oc == 0xffff: if oc == 0xffff:
continue continue
if count < 0 or count > (len(self.bin) - index): if count < 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
elif state == 'get value': elif state == 'get value':
if count == 0xfffe: if count == 0xfffe:
if not in_censorship: if not in_censorship:
self.buf.write('%s"' % (oc - 1)) buf.write('%s"' % (oc - 1))
in_censorship = False in_censorship = False
state = 'get attr' state = 'get attr'
elif count > 0: elif count > 0:
@ -289,13 +299,13 @@ class UnBinary(object):
count -= 1 count -= 1
if count == 0: if count == 0:
if not in_censorship: if not in_censorship:
self.buf.write('"') buf.write('"')
in_censorship = False in_censorship = False
state = 'get attr' state = 'get attr'
elif state == 'get custom length': elif state == 'get custom length':
count = oc - 1 count = oc - 1
if count <= 0 or count > len(self.bin)-index: if count <= 0 or count > len(bin)-index:
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
dynamic_tag += 1 dynamic_tag += 1
state = 'get custom' state = 'get custom'
@ -305,26 +315,26 @@ class UnBinary(object):
tag_name += c tag_name += c
count -= 1 count -= 1
if count == 0: if count == 0:
self.buf.write(unicode(tag_name).encode('utf-8')) buf.write(encode(tag_name))
state = 'get attr' state = 'get attr'
elif state == 'get attr length': elif state == 'get attr length':
count = oc - 1 count = oc - 1
if count <= 0 or count > (len(self.bin) - index): if count <= 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
self.buf.write(' ') buf.write(' ')
state = 'get custom attr' state = 'get custom attr'
elif state == 'get custom attr': elif state == 'get custom attr':
self.buf.write(unicode(c).encode('utf-8')) buf.write(encode(c))
count -= 1 count -= 1
if count == 0: if count == 0:
self.buf.write('=') buf.write('=')
state = 'get value length' state = 'get value length'
elif state == 'get href length': elif state == 'get href length':
count = oc - 1 count = oc - 1
if count <= 0 or count > (len(self.bin) - index): if count <= 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
href = '' href = ''
state = 'get href' state = 'get href'
@ -338,10 +348,11 @@ class UnBinary(object):
if frag: if frag:
path = '#'.join((path, frag)) path = '#'.join((path, frag))
path = urlnormalize(path) path = urlnormalize(path)
self.buf.write((u'"%s"' % path).encode('utf-8')) buf.write(encode(u'"%s"' % path))
state = 'get attr' state = 'get attr'
return index return index
class DirectoryEntry(object): class DirectoryEntry(object):
def __init__(self, name, section, offset, size): def __init__(self, name, section, offset, size):
self.name = name self.name = name
@ -356,6 +367,7 @@ class DirectoryEntry(object):
def __str__(self): def __str__(self):
return repr(self) return repr(self)
class ManifestItem(object): class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state): def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original self.original = original
@ -383,65 +395,87 @@ class ManifestItem(object):
% (self.internal, self.path, self.mime_type, self.offset, % (self.internal, self.path, self.mime_type, self.offset,
self.root, self.state) self.root, self.state)
def preserve(function): def preserve(function):
def wrapper(self, *args, **kwargs): def wrapper(self, *args, **kwargs):
opos = self._stream.tell() opos = self.stream.tell()
try: try:
return function(self, *args, **kwargs) return function(self, *args, **kwargs)
finally: finally:
self._stream.seek(opos) self.stream.seek(opos)
functools.update_wrapper(wrapper, function) functools.update_wrapper(wrapper, function)
return wrapper return wrapper
class LitReader(object): class LitFile(object):
PIECE_SIZE = 16 PIECE_SIZE = 16
XML_PARSER = etree.XMLParser(
recover=True, resolve_entities=False) def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self.stream = filename_or_stream
else:
self.stream = open(filename_or_stream, 'rb')
try:
self.opf_path = os.path.splitext(
os.path.basename(self.stream.name))[0] + '.opf'
except AttributeError:
self.opf_path = 'content.opf'
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.read_secondary_header()
self.read_header_pieces()
self.read_section_names()
self.read_manifest()
self.read_drm()
def warn(self, msg):
print "WARNING: %s" % (msg,)
def magic(): def magic():
@preserve @preserve
def fget(self): def fget(self):
self._stream.seek(0) self.stream.seek(0)
return self._stream.read(8) return self.stream.read(8)
return property(fget=fget) return property(fget=fget)
magic = magic() magic = magic()
def version(): def version():
def fget(self): def fget(self):
self._stream.seek(8) self.stream.seek(8)
return u32(self._stream.read(4)) return u32(self.stream.read(4))
return property(fget=fget) return property(fget=fget)
version = version() version = version()
def hdr_len(): def hdr_len():
@preserve @preserve
def fget(self): def fget(self):
self._stream.seek(12) self.stream.seek(12)
return int32(self._stream.read(4)) return int32(self.stream.read(4))
return property(fget=fget) return property(fget=fget)
hdr_len = hdr_len() hdr_len = hdr_len()
def num_pieces(): def num_pieces():
@preserve @preserve
def fget(self): def fget(self):
self._stream.seek(16) self.stream.seek(16)
return int32(self._stream.read(4)) return int32(self.stream.read(4))
return property(fget=fget) return property(fget=fget)
num_pieces = num_pieces() num_pieces = num_pieces()
def sec_hdr_len(): def sec_hdr_len():
@preserve @preserve
def fget(self): def fget(self):
self._stream.seek(20) self.stream.seek(20)
return int32(self._stream.read(4)) return int32(self.stream.read(4))
return property(fget=fget) return property(fget=fget)
sec_hdr_len = sec_hdr_len() sec_hdr_len = sec_hdr_len()
def guid(): def guid():
@preserve @preserve
def fget(self): def fget(self):
self._stream.seek(24) self.stream.seek(24)
return self._stream.read(16) return self.stream.read(16)
return property(fget=fget) return property(fget=fget)
guid = guid() guid = guid()
@ -451,44 +485,27 @@ class LitReader(object):
size = self.hdr_len \ size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \ + (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len + self.sec_hdr_len
self._stream.seek(0) self.stream.seek(0)
return self._stream.read(size) return self.stream.read(size)
return property(fget=fget) return property(fget=fget)
header = header() header = header()
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self._stream = filename_or_stream
else:
self._stream = open(filename_or_stream, 'rb')
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.entries = {}
self._read_secondary_header()
self._read_header_pieces()
self._read_section_names()
self._read_manifest()
self._read_meta()
self._read_drm()
@preserve @preserve
def __len__(self): def __len__(self):
self._stream.seek(0, 2) self.stream.seek(0, 2)
return self._stream.tell() return self.stream.tell()
@preserve @preserve
def _read_raw(self, offset, size): def read_raw(self, offset, size):
self._stream.seek(offset) self.stream.seek(offset)
return self._stream.read(size) return self.stream.read(size)
def _read_content(self, offset, size): def read_content(self, offset, size):
return self._read_raw(self.content_offset + offset, size) return self.read_raw(self.content_offset + offset, size)
def _read_secondary_header(self): def read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE) offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self._read_raw(offset, self.sec_hdr_len) bytes = self.read_raw(offset, self.sec_hdr_len)
offset = int32(bytes[4:]) offset = int32(bytes[4:])
while offset < len(bytes): while offset < len(bytes):
blocktype = bytes[offset:offset+4] blocktype = bytes[offset:offset+4]
@ -516,21 +533,21 @@ class LitReader(object):
if not hasattr(self, 'content_offset'): if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset') raise LitError('Could not figure out the content offset')
def _read_header_pieces(self): def read_header_pieces(self):
src = self.header[self.hdr_len:] src = self.header[self.hdr_len:]
for i in xrange(self.num_pieces): for i in xrange(self.num_pieces):
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE] piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece)) raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:]) offset, size = u32(piece), int32(piece[8:])
piece = self._read_raw(offset, size) piece = self.read_raw(offset, size)
if i == 0: if i == 0:
continue # Dont need this piece continue # Dont need this piece
elif i == 1: elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \ if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown: u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece') raise LitError('Secondary header does not match piece')
self._read_directory(piece) self.read_directory(piece)
elif i == 2: elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \ if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown: u32(piece[12:]) != self.count_unknown:
@ -541,12 +558,13 @@ class LitReader(object):
elif i == 4: elif i == 4:
self.piece4_guid = piece self.piece4_guid = piece
def _read_directory(self, piece): def read_directory(self, piece):
if not piece.startswith('IFCM'): if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.') raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + (num_chunks * chunk_size)) != len(piece): if (32 + (num_chunks * chunk_size)) != len(piece):
raise LitError('IFCM HEADER has incorrect length') raise LitError('IFCM header has incorrect length')
self.entries = {}
for i in xrange(num_chunks): for i in xrange(num_chunks):
offset = 32 + (i * chunk_size) offset = 32 + (i * chunk_size)
chunk = piece[offset:offset + chunk_size] chunk = piece[offset:offset + chunk_size]
@ -580,17 +598,17 @@ class LitReader(object):
entry = DirectoryEntry(name, section, offset, size) entry = DirectoryEntry(name, section, offset, size)
self.entries[name] = entry self.entries[name] = entry
def _read_section_names(self): def read_section_names(self):
if '::DataSpace/NameList' not in self.entries: if '::DataSpace/NameList' not in self.entries:
raise LitError('Lit file does not have a valid NameList') raise LitError('Lit file does not have a valid NameList')
raw = self.get_file('::DataSpace/NameList') raw = self.get_file('::DataSpace/NameList')
if len(raw) < 4: if len(raw) < 4:
raise LitError('Invalid Namelist section') raise LitError('Invalid Namelist section')
pos = 4 pos = 4
self.num_sections = u16(raw[2:pos]) num_sections = u16(raw[2:pos])
self.section_names = [""]*self.num_sections self.section_names = [""] * num_sections
self.section_data = [None]*self.num_sections self.section_data = [None] * num_sections
for section in xrange(self.num_sections): for section in xrange(num_sections):
size = u16(raw[pos:pos+2]) size = u16(raw[pos:pos+2])
pos += 2 pos += 2
size = size*2 + 2 size = size*2 + 2
@ -600,11 +618,12 @@ class LitReader(object):
raw[pos:pos+size].decode('utf-16-le').rstrip('\000') raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size pos += size
def _read_manifest(self): def read_manifest(self):
if '/manifest' not in self.entries: if '/manifest' not in self.entries:
raise LitError('Lit file does not have a valid manifest') raise LitError('Lit file does not have a valid manifest')
raw = self.get_file('/manifest') raw = self.get_file('/manifest')
self.manifest = {} self.manifest = {}
self.paths = {self.opf_path: None}
while raw: while raw:
slen, raw = ord(raw[0]), raw[1:] slen, raw = ord(raw[0]), raw[1:]
if slen == 0: break if slen == 0: break
@ -645,28 +664,9 @@ class LitReader(object):
for item in mlist: for item in mlist:
if item.path[0] == '/': if item.path[0] == '/':
item.path = os.path.basename(item.path) item.path = os.path.basename(item.path)
self.paths[item.path] = item
def _pretty_print(self, xml): def read_drm(self):
f = cStringIO.StringIO(xml.encode('utf-8'))
doc = etree.parse(f, parser=self.XML_PARSER)
pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
return XML_DECL + unicode(pretty)
def _read_meta(self):
path = 'content.opf'
raw = self.get_file('/meta')
xml = OPF_DECL
try:
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
self.meta = xml
def _read_drm(self):
self.drmlevel = 0 self.drmlevel = 0
if '/DRMStorage/Licenses/EUL' in self.entries: if '/DRMStorage/Licenses/EUL' in self.entries:
self.drmlevel = 5 self.drmlevel = 5
@ -677,7 +677,7 @@ class LitReader(object):
else: else:
return return
if self.drmlevel < 5: if self.drmlevel < 5:
msdes.deskey(self._calculate_deskey(), msdes.DE1) msdes.deskey(self.calculate_deskey(), msdes.DE1)
bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed')) bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000': if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!') raise LitError('Unable to decrypt title key!')
@ -685,7 +685,7 @@ class LitReader(object):
else: else:
raise DRMError("Cannot access DRM-protected book") raise DRMError("Cannot access DRM-protected book")
def _calculate_deskey(self): def calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource'] hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3: if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate') hashfiles.append('/DRMStorage/DRMBookplate')
@ -709,18 +709,18 @@ class LitReader(object):
def get_file(self, name): def get_file(self, name):
entry = self.entries[name] entry = self.entries[name]
if entry.section == 0: if entry.section == 0:
return self._read_content(entry.offset, entry.size) return self.read_content(entry.offset, entry.size)
section = self.get_section(entry.section) section = self.get_section(entry.section)
return section[entry.offset:entry.offset+entry.size] return section[entry.offset:entry.offset+entry.size]
def get_section(self, section): def get_section(self, section):
data = self.section_data[section] data = self.section_data[section]
if not data: if not data:
data = self._get_section(section) data = self.get_section_uncached(section)
self.section_data[section] = data self.section_data[section] = data
return data return data
def _get_section(self, section): def get_section_uncached(self, section):
name = self.section_names[section] name = self.section_names[section]
path = '::DataSpace/Storage/' + name path = '::DataSpace/Storage/' + name
transform = self.get_file(path + '/Transform/List') transform = self.get_file(path + '/Transform/List')
@ -732,29 +732,29 @@ class LitReader(object):
raise LitError("ControlData is too short") raise LitError("ControlData is too short")
guid = msguid(transform) guid = msguid(transform)
if guid == DESENCRYPT_GUID: if guid == DESENCRYPT_GUID:
content = self._decrypt(content) content = self.decrypt(content)
control = control[csize:] control = control[csize:]
elif guid == LZXCOMPRESS_GUID: elif guid == LZXCOMPRESS_GUID:
reset_table = self.get_file( reset_table = self.get_file(
'/'.join(('::DataSpace/Storage', name, 'Transform', '/'.join(('::DataSpace/Storage', name, 'Transform',
LZXCOMPRESS_GUID, 'InstanceData/ResetTable'))) LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
content = self._decompress(content, control, reset_table) content = self.decompress(content, control, reset_table)
control = control[csize:] control = control[csize:]
else: else:
raise LitError("Unrecognized transform: %s." % repr(guid)) raise LitError("Unrecognized transform: %s." % repr(guid))
transform = transform[16:] transform = transform[16:]
return content return content
def _decrypt(self, content): def decrypt(self, content):
length = len(content) length = len(content)
extra = length & 0x7 extra = length & 0x7
if extra > 0: if extra > 0:
self._warn("content length not a multiple of block size") self.warn("content length not a multiple of block size")
content += "\0" * (8 - extra) content += "\0" * (8 - extra)
msdes.deskey(self.bookkey, msdes.DE1) msdes.deskey(self.bookkey, msdes.DE1)
return msdes.des(content) return msdes.des(content)
def _decompress(self, content, control, reset_table): def decompress(self, content, control, reset_table):
if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC": if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
raise LitError("Invalid ControlData tag value") raise LitError("Invalid ControlData tag value")
if len(reset_table) < (RESET_INTERVAL + 8): if len(reset_table) < (RESET_INTERVAL + 8):
@ -795,7 +795,7 @@ class LitReader(object):
result.append( result.append(
lzx.decompress(content[base:size], window_bytes)) lzx.decompress(content[base:size], window_bytes))
except lzx.LZXError: except lzx.LZXError:
self._warn("LZX decompression error; skipping chunk") self.warn("LZX decompression error; skipping chunk")
bytes_remaining -= window_bytes bytes_remaining -= window_bytes
base = size base = size
accum += int32(reset_table[RESET_INTERVAL:]) accum += int32(reset_table[RESET_INTERVAL:])
@ -805,7 +805,7 @@ class LitReader(object):
try: try:
result.append(lzx.decompress(content[base:], bytes_remaining)) result.append(lzx.decompress(content[base:], bytes_remaining))
except lzx.LZXError: except lzx.LZXError:
self._warn("LZX decompression error; skipping chunk") self.warn("LZX decompression error; skipping chunk")
bytes_remaining = 0 bytes_remaining = 0
if bytes_remaining > 0: if bytes_remaining > 0:
raise LitError("Failed to completely decompress section") raise LitError("Failed to completely decompress section")
@ -855,62 +855,51 @@ class LitReader(object):
content = self._pretty_print(content) content = self._pretty_print(content)
content = content.encode('utf-8') content = content.encode('utf-8')
else: else:
name = '/'.join(('/data', entry.internal)) internal = '/'.join(('/data', entry.internal))
content = self.get_file(name) content = self._litfile.get_file(internal)
return content return content
def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
output_dir = os.path.abspath(output_dir) class LitContainer(object):
"""Simple Container-interface, read-only accessor for LIT files."""
def __init__(self, filename_or_stream):
self._litfile = LitFile(filename_or_stream)
def namelist(self):
return self._litfile.paths.keys()
def exists(self, name):
return urlunquote(name) in self._litfile.paths
def read(self, name):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
content = OPF_DECL + self._read_meta()
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = HTML_DECL + str(unbin)
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')
try: try:
opf_path = os.path.splitext( unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
os.path.basename(self._stream.name))[0] + '.opf' except LitError:
except AttributeError: if 'PENGUIN group' not in raw: raise
opf_path = 'content.opf' print "WARNING: attempting PENGUIN malformed OPF fix"
opf_path = os.path.join(output_dir, opf_path) raw = raw.replace(
self._ensure_dir(opf_path) 'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
with open(opf_path, 'wb') as f: unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
xml = self.meta return str(unbin)
if pretty_print:
xml = self._pretty_print(xml)
f.write(xml.encode('utf-8'))
for entry in self.manifest.values():
path = os.path.join(output_dir, entry.path)
self._ensure_dir(path)
with open(path, 'wb') as f:
f.write(self.get_entry_content(entry, pretty_print))
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def _warn(self, msg): class LitReader(OEBReader):
print "WARNING: %s" % (msg,) Container = LitContainer
DEFAULT_PROFILE = 'MSReader'
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] LITFILE'))
parser.add_option(
'-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.'))
parser.add_option(
'-p', '--pretty-print', default=False, action='store_true',
help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
parser.add_option(
'--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
lr = LitReader(args[1])
lr.extract_content(opts.output_dir, opts.pretty_print)
print _('OEB ebook created in'), opts.output_dir
return 0
try: try:
import psyco import psyco
@ -918,6 +907,3 @@ try:
psyco.bind(UnBinary.binary_to_text) psyco.bind(UnBinary.binary_to_text)
except ImportError: except ImportError:
pass pass
if __name__ == '__main__':
sys.exit(main())

View File

@ -312,7 +312,7 @@ class LitWriter(object):
cover = None cover = None
if oeb.metadata.cover: if oeb.metadata.cover:
id = str(oeb.metadata.cover[0]) id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id] cover = oeb.manifest.ids[id]
for type, title in ALL_MS_COVER_TYPES: for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide: if type not in oeb.guide:
oeb.guide.add(type, title, cover.href) oeb.guide.add(type, title, cover.href)

View File

@ -229,6 +229,9 @@ def get_metadata(stream):
mi.author = lrf.author.strip() mi.author = lrf.author.strip()
mi.comments = lrf.free_text.strip() mi.comments = lrf.free_text.strip()
mi.category = lrf.category.strip()+', '+lrf.classification.strip() mi.category = lrf.category.strip()+', '+lrf.classification.strip()
tags = [x.strip() for x in mi.category.split(',') if x.strip()]
if tags:
mi.tags = tags
mi.publisher = lrf.publisher.strip() mi.publisher = lrf.publisher.strip()
mi.cover_data = lrf.get_cover() mi.cover_data = lrf.get_cover()
try: try:
@ -624,7 +627,9 @@ def set_metadata(stream, mi):
lrf.title = mi.title lrf.title = mi.title
if mi.authors: if mi.authors:
lrf.author = ', '.join(mi.authors) lrf.author = ', '.join(mi.authors)
if mi.category: if mi.tags:
lrf.category = mi.tags[0]
if getattr(mi, 'category', False):
lrf.category = mi.category lrf.category = mi.category
if mi.comments: if mi.comments:
lrf.free_text = mi.comments lrf.free_text = mi.comments

View File

@ -207,32 +207,32 @@ class Tag(object):
s += " at %08X, contents: %s" % (self.offset, repr(self.contents)) s += " at %08X, contents: %s" % (self.offset, repr(self.contents))
return s return s
@apply @dynamic_property
def byte(): def byte(self):
def fget(self): def fget(self):
if len(self.contents) != 1: if len(self.contents) != 1:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id) raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
return struct.unpack("<B", self.contents)[0] return struct.unpack("<B", self.contents)[0]
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def word(): def word(self):
def fget(self): def fget(self):
if len(self.contents) != 2: if len(self.contents) != 2:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id) raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
return struct.unpack("<H", self.contents)[0] return struct.unpack("<H", self.contents)[0]
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def sword(): def sword(self):
def fget(self): def fget(self):
if len(self.contents) != 2: if len(self.contents) != 2:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id) raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
return struct.unpack("<h", self.contents)[0] return struct.unpack("<h", self.contents)[0]
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def dword(): def dword(self):
def fget(self): def fget(self):
if len(self.contents) != 4: if len(self.contents) != 4:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id) raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)

View File

@ -4,9 +4,9 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
""" """
Provides abstraction for metadata reading.writing from a variety of ebook formats. Provides abstraction for metadata reading.writing from a variety of ebook formats.
""" """
import os, mimetypes, sys import os, mimetypes, sys, re
from urllib import unquote, quote from urllib import unquote, quote
from urlparse import urlparse from urlparse import urlparse
@ -36,32 +36,28 @@ def author_to_author_sort(author):
def authors_to_sort_string(authors): def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors)) return ' & '.join(map(author_to_author_sort, authors))
def get_parser(extension): _title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
''' Return an option parser with the basic metadata options already setup''' def title_sort(title):
parser = OptionParser(usage='%prog [options] myfile.'+extension+'\n\nRead and write metadata from an ebook file.') match = _title_pat.search(title)
parser.add_option("-t", "--title", action="store", type="string", \ if match:
dest="title", help=_("Set the book title"), default=None) prep = match.group(1)
parser.add_option("-a", "--authors", action="store", type="string", \ title = title.replace(prep, '') + ', ' + prep
dest="authors", help=_("Set the authors"), default=None) return title.strip()
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help=_("The category this book belongs to. E.g.: History"), default=None)
parser.add_option('--comment', dest='comment', default=None, action='store',
help=_('Set the comment'))
return parser
class Resource(object): class Resource(object):
''' '''
Represents a resource (usually a file on the filesystem or a URL pointing Represents a resource (usually a file on the filesystem or a URL pointing
to the web. Such resources are commonly referred to in OPF files. to the web. Such resources are commonly referred to in OPF files.
They have the interface: They have the interface:
:member:`path` :member:`path`
:member:`mime_type` :member:`mime_type`
:method:`href` :method:`href`
''' '''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True): def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
self._href = None self._href = None
self._basedir = basedir self._basedir = basedir
@ -91,13 +87,13 @@ class Resource(object):
pc = unquote(pc).decode('utf-8') pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
self.fragment = unquote(url[-1]) self.fragment = unquote(url[-1])
def href(self, basedir=None): def href(self, basedir=None):
''' '''
Return a URL pointing to this resource. If it is a file on the filesystem Return a URL pointing to this resource. If it is a file on the filesystem
the URL is relative to `basedir`. the URL is relative to `basedir`.
`basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`). `basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
If this resource has no basedir, then the current working directory is used as the basedir. If this resource has no basedir, then the current working directory is used as the basedir.
''' '''
@ -119,54 +115,54 @@ class Resource(object):
if isinstance(rpath, unicode): if isinstance(rpath, unicode):
rpath = rpath.encode('utf-8') rpath = rpath.encode('utf-8')
return quote(rpath.replace(os.sep, '/'))+frag return quote(rpath.replace(os.sep, '/'))+frag
def set_basedir(self, path): def set_basedir(self, path):
self._basedir = path self._basedir = path
def basedir(self): def basedir(self):
return self._basedir return self._basedir
def __repr__(self): def __repr__(self):
return 'Resource(%s, %s)'%(repr(self.path), repr(self.href())) return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
class ResourceCollection(object): class ResourceCollection(object):
def __init__(self): def __init__(self):
self._resources = [] self._resources = []
def __iter__(self): def __iter__(self):
for r in self._resources: for r in self._resources:
yield r yield r
def __len__(self): def __len__(self):
return len(self._resources) return len(self._resources)
def __getitem__(self, index): def __getitem__(self, index):
return self._resources[index] return self._resources[index]
def __bool__(self): def __bool__(self):
return len(self._resources) > 0 return len(self._resources) > 0
def __str__(self): def __str__(self):
resources = map(repr, self) resources = map(repr, self)
return '[%s]'%', '.join(resources) return '[%s]'%', '.join(resources)
def __repr__(self): def __repr__(self):
return str(self) return str(self)
def append(self, resource): def append(self, resource):
if not isinstance(resource, Resource): if not isinstance(resource, Resource):
raise ValueError('Can only append objects of type Resource') raise ValueError('Can only append objects of type Resource')
self._resources.append(resource) self._resources.append(resource)
def remove(self, resource): def remove(self, resource):
self._resources.remove(resource) self._resources.remove(resource)
def replace(self, start, end, items): def replace(self, start, end, items):
'Same as list[start:end] = items' 'Same as list[start:end] = items'
self._resources[start:end] = items self._resources[start:end] = items
@staticmethod @staticmethod
def from_directory_contents(top, topdown=True): def from_directory_contents(top, topdown=True):
collection = ResourceCollection() collection = ResourceCollection()
@ -176,30 +172,30 @@ class ResourceCollection(object):
res.set_basedir(top) res.set_basedir(top)
collection.append(res) collection.append(res)
return collection return collection
def set_basedir(self, path): def set_basedir(self, path):
for res in self: for res in self:
res.set_basedir(path) res.set_basedir(path)
class MetaInformation(object): class MetaInformation(object):
'''Convenient encapsulation of book metadata''' '''Convenient encapsulation of book metadata'''
@staticmethod @staticmethod
def copy(mi): def copy(mi):
ans = MetaInformation(mi.title, mi.authors) ans = MetaInformation(mi.title, mi.authors)
for attr in ('author_sort', 'title_sort', 'comments', 'category', for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id', 'guide', 'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language', 'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp'): 'book_producer', 'timestamp'):
if hasattr(mi, attr): if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr)) setattr(ans, attr, getattr(mi, attr))
def __init__(self, title, authors=[_('Unknown')]): def __init__(self, title, authors=[_('Unknown')]):
''' '''
@param title: title or "Unknown" or a MetaInformation object @param title: title or ``_('Unknown')`` or a MetaInformation object
@param authors: List of strings or [] @param authors: List of strings or []
''' '''
mi = None mi = None
@ -214,14 +210,14 @@ class MetaInformation(object):
self.tags = getattr(mi, 'tags', []) self.tags = getattr(mi, 'tags', [])
#: mi.cover_data = (ext, data) #: mi.cover_data = (ext, data)
self.cover_data = getattr(mi, 'cover_data', (None, None)) self.cover_data = getattr(mi, 'cover_data', (None, None))
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language', 'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover', 'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp' 'book_producer', 'timestamp'
): ):
setattr(self, x, getattr(mi, x, None)) setattr(self, x, getattr(mi, x, None))
def smart_update(self, mi): def smart_update(self, mi):
''' '''
Merge the information in C{mi} into self. In case of conflicts, the information Merge the information in C{mi} into self. In case of conflicts, the information
@ -229,59 +225,66 @@ class MetaInformation(object):
''' '''
if mi.title and mi.title != _('Unknown'): if mi.title and mi.title != _('Unknown'):
self.title = mi.title self.title = mi.title
if mi.authors and mi.authors[0] != _('Unknown'): if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'comments', 'category', for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating', 'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer', 'cover', 'language', 'guide', 'book_producer',
'timestamp'): 'timestamp'):
if hasattr(mi, attr): val = getattr(mi, attr, None)
val = getattr(mi, attr) if val is not None:
if val is not None: setattr(self, attr, val)
setattr(self, attr, val)
if mi.tags:
self.tags += mi.tags self.tags += mi.tags
self.tags = list(set(self.tags)) self.tags = list(set(self.tags))
if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None: if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
self.cover_data = mi.cover_data self.cover_data = mi.cover_data
def format_series_index(self): def format_series_index(self):
try: try:
x = float(self.series_index) x = float(self.series_index)
except ValueError: except ValueError:
x = 1.0 x = 1.0
return '%d'%x if int(x) == x else '%.2f'%x return '%d'%x if int(x) == x else '%.2f'%x
def __unicode__(self): def __unicode__(self):
ans = u'' ans = []
ans += u'Title : ' + unicode(self.title) + u'\n' def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
fmt('Title', self.title)
if self.title_sort:
fmt('Title sort', self.title_sort)
if self.authors: if self.authors:
ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown')) fmt('Author(s)', authors_to_string(self.authors) + \
ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n' ((' [' + self.author_sort + ']') if self.author_sort else ''))
if self.publisher: if self.publisher:
ans += u'Publisher: '+ unicode(self.publisher) + u'\n' fmt('Publisher', self.publisher)
if getattr(self, 'book_producer', False): if getattr(self, 'book_producer', False):
ans += u'Producer : '+ unicode(self.book_producer) + u'\n' fmt('Book Producer', self.book_producer)
if self.category: if self.category:
ans += u'Category : ' + unicode(self.category) + u'\n' ans += u'Category : ' + unicode(self.category) + u'\n'
if self.comments: if self.comments:
ans += u'Comments : ' + unicode(self.comments) + u'\n' fmt('Comments', self.comments)
if self.isbn: if self.isbn:
ans += u'ISBN : ' + unicode(self.isbn) + u'\n' fmt('ISBN', self.isbn)
if self.tags: if self.tags:
ans += u'Tags : ' + u', '.join([unicode(t) for t in self.tags]) + '\n' fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
if self.series: if self.series:
ans += u'Series : '+unicode(self.series) + ' #%s\n'%self.format_series_index() fmt('Series', self.series + ' #%s'%self.format_series_index())
if self.language: if self.language:
ans += u'Language : ' + unicode(self.language) + u'\n' fmt('Language', self.language)
if self.rating is not None:
fmt('Rating', self.rating)
if self.timestamp is not None: if self.timestamp is not None:
ans += u'Timestamp : ' + self.timestamp.isoformat(' ') fmt('Timestamp', self.timestamp.isoformat(' '))
return ans.strip() return u'\n'.join(ans)
def to_html(self): def to_html(self):
ans = [(_('Title'), unicode(self.title))] ans = [(_('Title'), unicode(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))] ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
@ -298,9 +301,9 @@ class MetaInformation(object):
for i, x in enumerate(ans): for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans) return u'<table>%s</table>'%u'\n'.join(ans)
def __str__(self): def __str__(self):
return self.__unicode__().encode('utf-8') return self.__unicode__().encode('utf-8')
def __nonzero__(self): def __nonzero__(self):
return bool(self.title or self.author or self.comments or self.category) return bool(self.title or self.author or self.comments or self.tags)

View File

@ -0,0 +1,198 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
ebook-meta
'''
USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
_('''
Read/Write metadata from/to ebook files.
Supported formats for reading metadata: %s
Supported formats for writing metadata: %s
Different file types support different kinds of metadata. If you try to set
some metadata on a file type that does not support it, the metadata will be
silently ignored.
''')
import sys, os
from calibre.utils.config import StringConfig
from calibre.customize.ui import metadata_readers, metadata_writers
from calibre.ebooks.metadata.meta import get_metadata, set_metadata
from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
title_sort, MetaInformation
from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre import prints
def config():
c = StringConfig('')
c.add_opt('title', ['-t', '--title'],
help=_('Set the title.'))
c.add_opt('authors', ['-a', '--authors'],
help=_('Set the authors. Multiple authors should be separated '
'by the & character. Author names should be in the order '
'Firstname Lastname.'))
c.add_opt('title_sort', ['--title-sort'],
help=_('The version of the title to be used for sorting. '
'If unspecified, and the title is specified, it will '
'be auto-generated from the title.'))
c.add_opt('author_sort', ['--author-sort'],
help=_('String to be used when sorting by author. '
'If unspecified, and the author(s) are specified, it will '
'be auto-generated from the author(s).'))
c.add_opt('cover', ['--cover'],
help=_('Set the cover to the specified file.'))
c.add_opt('comments', ['-c', '--comments'],
help=_('Set the ebook description.'))
c.add_opt('publisher', ['-p', '--publisher'],
help=_('Set the ebook publisher.'))
c.add_opt('category', ['--category'],
help=_('Set the book category.'))
c.add_opt('series', ['-s', '--series'],
help=_('Set the series this ebook belongs to.'))
c.add_opt('series_index', ['-i', '--index'],
help=_('Set the index of the book in this series.'))
c.add_opt('rating', ['-r', '--rating'],
help=_('Set the rating. Should be a number between 1 and 5.'))
c.add_opt('isbn', ['--isbn'],
help=_('Set the ISBN of the book.'))
c.add_opt('tags', ['--tags'],
help=_('Set the tags for the book. Should be a comma separated list.'))
c.add_opt('book_producer', ['-k', '--book-producer'],
help=_('Set the book producer.'))
c.add_opt('language', ['-l', '--language'],
help=_('Set the language.'))
c.add_opt('get_cover', ['--get-cover'],
help=_('Get the cover from the ebook and save it at as the '
'specified file.'))
c.add_opt('to_opf', ['--to-opf'],
help=_('Specify the name of an OPF file. The metadata will '
'be written to the OPF file.'))
c.add_opt('from_opf', ['--from-opf'],
help=_('Read metadata from the specified OPF file and use it to '
'set metadata in the ebook. Metadata specified on the'
'command line will override metadata read from the OPF file'))
c.add_opt('lrf_bookid', ['--lrf-bookid'],
help=_('Set the BookID in LRF files'))
return c
def filetypes():
readers = set([])
for r in metadata_readers():
readers = readers.union(set(r.file_types))
return readers
def option_parser():
writers = set([])
for w in metadata_writers():
writers = writers.union(set(w.file_types))
return config().option_parser(USAGE%(list(filetypes()), list(writers)))
def do_set_metadata(opts, mi, stream, stream_type):
mi = MetaInformation(mi)
for x in ('guide', 'toc', 'manifest', 'spine'):
setattr(mi, x, None)
from_opf = getattr(opts, 'from_opf', None)
if from_opf is not None:
from calibre.ebooks.metadata.opf2 import OPF
opf_mi = MetaInformation(OPF(open(from_opf, 'rb')))
mi.smart_update(opf_mi)
for pref in config().option_set.preferences:
if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort',
'author_sort', 'get_cover', 'cover', 'tags',
'lrf_bookid'):
continue
val = getattr(opts, pref.name, None)
if val is not None:
setattr(mi, pref.name, val)
if getattr(opts, 'authors', None) is not None:
mi.authors = string_to_authors(opts.authors)
mi.author_sort = authors_to_sort_string(mi.authors)
if getattr(opts, 'author_sort', None) is not None:
mi.author_sort = opts.author_sort
if getattr(opts, 'title_sort', None) is not None:
mi.title_sort = opts.title_sort
elif getattr(opts, 'title', None) is not None:
mi.title_sort = title_sort(opts.title)
if getattr(opts, 'tags', None) is not None:
mi.tags = [t.strip() for t in opts.tags.split(',')]
if getattr(opts, 'cover', None) is not None:
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
mi.cover_data = (ext, open(opts.cover, 'rb').read())
set_metadata(stream, mi, stream_type)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
prints(_('No file specified'), file=sys.stderr)
return 1
path = args[1]
stream = open(path, 'r+b')
stream_type = os.path.splitext(path)[1].replace('.', '').lower()
trying_to_set = False
for pref in config().option_set.preferences:
if pref.name in ('to_opf', 'get_cover'):
continue
if getattr(opts, pref.name) is not None:
trying_to_set = True
break
mi = get_metadata(stream, stream_type)
if trying_to_set:
prints(_('Original metadata')+'::')
metadata = unicode(mi)
if trying_to_set:
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata)
if trying_to_set:
stream.seek(0)
do_set_metadata(opts, mi, stream, stream_type)
stream.seek(0)
stream.flush()
lrf = None
if stream_type == 'lrf':
if opts.lrf_bookid is not None:
lrf = LRFMetaFile(stream)
lrf.book_id = opts.lrf_bookid
mi = get_metadata(stream, stream_type)
prints('\n' + _('Changed metadata') + '::')
metadata = unicode(mi)
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata)
if lrf is not None:
prints('\tBookID:', lrf.book_id)
if opts.to_opf is not None:
from calibre.ebooks.metadata.opf2 import OPFCreator
opf = OPFCreator(os.getcwdu(), mi)
with open(opts.opf, 'wb') as f:
opf.render(f)
prints(_('OPF created in'), opts.opf)
if opts.get_cover is not None:
if mi.cover_data and mi.cover_data[1]:
with open(opts.get_cover, 'wb') as f:
f.write(mi.cover_data[1])
prints(_('Cover saved to'), f.name)
else:
prints(_('No cover found'), file=sys.stderr)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from epub files''' '''Read meta information from epub files'''
import sys, os, time import os, time
from cStringIO import StringIO from cStringIO import StringIO
from contextlib import closing from contextlib import closing
@ -15,7 +15,7 @@ from PyQt4.QtWebKit import QWebPage
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import get_parser, MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir from calibre import CurrentDir
@ -191,67 +191,10 @@ def get_metadata(stream, extract_cover=True):
def set_metadata(stream, mi): def set_metadata(stream, mi):
stream.seek(0) stream.seek(0)
reader = OCFZipReader(stream, root=os.getcwdu()) reader = OCFZipReader(stream, root=os.getcwdu())
mi = MetaInformation(mi)
for x in ('guide', 'toc', 'manifest', 'spine'):
setattr(mi, x, None)
reader.opf.smart_update(mi) reader.opf.smart_update(mi)
newopf = StringIO(reader.opf.render()) newopf = StringIO(reader.opf.render())
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf) safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
def option_parser():
parser = get_parser('epub')
parser.remove_option('--category')
parser.add_option('--tags', default=None,
help=_('A comma separated list of tags to set'))
parser.add_option('--series', default=None,
help=_('The series to which this book belongs'))
parser.add_option('--series-index', default=None,
help=_('The series index'))
parser.add_option('--language', default=None,
help=_('The book language'))
parser.add_option('--get-cover', default=False, action='store_true',
help=_('Extract the cover'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
with open(args[1], 'r+b') as stream:
mi = get_metadata(stream, extract_cover=opts.get_cover)
changed = False
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.tags:
mi.tags = opts.tags.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.series:
mi.series = opts.series
changed = True
if opts.series_index:
mi.series_index = opts.series_index
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8')
if mi.cover_data[1] is not None:
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
print 'Cover saved to', f.name
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -48,15 +48,3 @@ def get_metadata(stream):
if cdata: if cdata:
mi.cover_data = cdata mi.cover_data = cdata
return mi return mi
def main(args=sys.argv):
if len(args) != 2 or '--help' in args or '-h' in args:
print >>sys.stderr, _('Usage:'), args[0], 'mybook.fb2'
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print unicode(get_metadata(open(path, 'rb')))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -46,17 +46,3 @@ def get_metadata(stream):
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err)) msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8') print >>sys.stderr, msg.encode('utf8')
return mi return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: imp-meta file.imp')
print >>sys.stderr, _('No filename specified.')
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -30,21 +30,3 @@ def get_metadata(stream):
mi.cover_data = ('jpg', covers[-1]) mi.cover_data = ('jpg', covers[-1])
return mi return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: %s file.lit') % args[0]
return 1
fname = args[1]
mi = get_metadata(open(fname, 'rb'))
print unicode(mi)
if mi.cover_data[1]:
cover = os.path.abspath(
'.'.join((os.path.splitext(os.path.basename(fname))[0],
mi.cover_data[0])))
open(cover, 'wb').write(mi.cover_data[1])
print _('Cover saved to'), cover
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -80,10 +80,3 @@ def get_metadata(f):
else: else:
raise ValueError('Not a LRX file') raise ValueError('Not a LRX file')
def main(args=sys.argv):
print get_metadata(open(args[1], 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -13,7 +13,6 @@ import sys
import os import os
from struct import pack, unpack from struct import pack, unpack
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks.metadata import get_parser
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader import get_metadata from calibre.ebooks.mobi.reader import get_metadata
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
@ -178,63 +177,3 @@ def set_metadata(stream, mi):
mu = MetadataUpdater(stream) mu = MetadataUpdater(stream)
mu.update(mi) mu.update(mi)
return return
def option_parser():
parser = get_parser('mobi')
parser.remove_option('--category')
parser.add_option('--tags', default=None,
help=_('Set the subject tags'))
parser.add_option('--language', default=None,
help=_('Set the language'))
parser.add_option('--publisher', default=None,
help=_('Set the publisher'))
parser.add_option('--isbn', default=None,
help=_('Set the ISBN'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print >>sys.stderr, 'Usage: %s file.mobi' % args[0]
return 1
fname = args[1]
changed = False
with open(fname, 'r+b') as stream:
mi = get_metadata(stream)
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.tags is not None:
mi.tags = opts.tags.split(',')
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
if opts.publisher is not None:
mi.publisher = opts.publisher
changed = True
if opts.isbn is not None:
mi.isbn = opts.isbn
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream))
if not changed and mi.cover_data[1]:
cover = os.path.abspath(
'.'.join((os.path.splitext(os.path.basename(fname))[0],
mi.cover_data[0].lower())))
open(cover, 'wb').write(mi.cover_data[1])
print _('Cover saved to'), cover
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -164,103 +164,3 @@ def get_metadata(stream):
return mi return mi
def main(args=sys.argv):
if len(args) != 2:
print 'Usage: %s file.odt'%args[0]
return 1
mi = get_metadata(open(args[1], 'rb'))
print mi
return 0
if __name__ == '__main__':
sys.exit(main())
#now = time.localtime()[:6]
#outputfile = "-"
#writemeta = False # Do we change any meta data?
#usenormalize = False
#
#try:
# opts, args = getopt.getopt(sys.argv[1:], "cdlI:A:a:o:x:X:")
#except getopt.GetoptError:
# exitwithusage()
#
#if len(opts) == 0:
# opts = [ ('-l','') ]
#
#for o, a in opts:
# if o in ('-a','-A','-I'):
# writemeta = True
# if a.find(":") >= 0:
# k,v = a.split(":",1)
# else:
# k,v = (a, "")
# if len(k) == 0:
# exitwithusage()
# k = fields.get(k,k)
# addfields[k] = unicode(v,'utf-8')
# if o == '-a':
# yieldfields[k] = True
# if o == '-I':
# deletefields[k] = True
# if o == '-d':
# writemeta = True
# addfields[(DCNS,u'date')] = "%04d-%02d-%02dT%02d:%02d:%02d" % now
# deletefields[(DCNS,u'date')] = True
# if o == '-c':
# usenormalize = True
# if o == '-l':
# Xfields = fields.values()
# if o == "-x":
# xfields.append(fields.get(a,a))
# if o == "-X":
# Xfields.append(fields.get(a,a))
# if o == "-o":
# outputfile = a
#
## The specification says we should change the element to our own,
## and must not export the original identifier.
#if writemeta:
# addfields[(METANS,u'generator')] = TOOLSVERSION
# deletefields[(METANS,u'generator')] = True
#
#odfs = odfmetaparser()
#parser = xml.sax.make_parser()
#parser.setFeature(xml.sax.handler.feature_namespaces, 1)
#parser.setContentHandler(odfs)
#
#if len(args) == 0:
# zin = zipfile.ZipFile(sys.stdin,'r')
#else:
# if not zipfile.is_zipfile(args[0]):
# exitwithusage()
# zin = zipfile.ZipFile(args[0], 'r')
#
#content = zin.read('meta.xml')
#parser.parse(StringIO(content))
#
#if writemeta:
# if outputfile == '-':
# if sys.stdout.isatty():
# sys.stderr.write("Won't write ODF file to terminal\n")
# sys.exit(1)
# zout = zipfile.ZipFile(sys.stdout,"w")
# else:
# zout = zipfile.ZipFile(outputfile,"w")
#
#
#
# # Loop through the input zipfile and copy the content to the output until we
# # get to the meta.xml. Then substitute.
# for zinfo in zin.infolist():
# if zinfo.filename == "meta.xml":
# # Write meta
# zi = zipfile.ZipInfo("meta.xml", now)
# zi.compress_type = zipfile.ZIP_DEFLATED
# zout.writestr(zi,odfs.meta() )
# else:
# payload = zin.read(zinfo.filename)
# zout.writestr(zinfo, payload)
#
# zout.close()
#zin.close()

View File

@ -11,7 +11,7 @@ from calibre.constants import __appname__, __version__
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
from calibre.ebooks.lrf import entity_to_unicode from calibre.ebooks.lrf import entity_to_unicode
from calibre.ebooks.metadata import get_parser, Resource, ResourceCollection from calibre.ebooks.metadata import Resource, ResourceCollection
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
class OPFSoup(BeautifulStoneSoup): class OPFSoup(BeautifulStoneSoup):
@ -38,8 +38,8 @@ class ManifestItem(Resource):
res.mime_type = mt res.mime_type = mt
return res return res
@apply @dynamic_property
def media_type(): def media_type(self):
def fget(self): def fget(self):
return self.mime_type return self.mime_type
def fset(self, val): def fset(self, val):
@ -242,14 +242,14 @@ class OPF(MetaInformation):
def __init__(self): def __init__(self):
raise NotImplementedError('Abstract base class') raise NotImplementedError('Abstract base class')
@apply @dynamic_property
def package(): def package(self):
def fget(self): def fget(self):
return self.soup.find(re.compile('package')) return self.soup.find(re.compile('package'))
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def metadata(): def metadata(self):
def fget(self): def fget(self):
return self.package.find(re.compile('metadata')) return self.package.find(re.compile('metadata'))
return property(fget=fget) return property(fget=fget)
@ -540,46 +540,4 @@ class OPFCreator(MetaInformation):
if toc is not None and ncx_stream is not None: if toc is not None and ncx_stream is not None:
toc.render(ncx_stream, self.application_id) toc.render(ncx_stream, self.application_id)
ncx_stream.flush() ncx_stream.flush()
def option_parser():
return get_parser('opf')
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
mi = MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
write = False
if opts.title is not None:
mi.title = opts.title.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if opts.authors is not None:
aus = [i.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') for i in opts.authors.split(',')]
mi.authors = aus
write = True
if opts.category is not None:
mi.category = opts.category.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if opts.comment is not None:
mi.comments = opts.comment.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if write:
mo = OPFCreator(os.path.dirname(args[1]), mi)
ncx = cStringIO.StringIO()
mo.render(open(args[1], 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
if f:
f = open(f[0], 'wb')
else:
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
f.write(ncx)
f.close()
print MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -12,7 +12,7 @@
<dc:identifier opf:scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier> <dc:identifier opf:scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
<dc:date py:if="getattr(mi, 'timestamp', None) is not None">${mi.timestamp.isoformat()}</dc:date> <dc:date py:if="getattr(mi, 'timestamp', None) is not None">${mi.timestamp.isoformat()}</dc:date>
<dc:language>${mi.language if mi.language else 'UND'}</dc:language> <dc:language>${mi.language if mi.language else 'UND'}</dc:language>
<dc:type py:if="mi.category">${mi.category}</dc:type> <dc:type py:if="getattr(mi, 'category', False)">${mi.category}</dc:type>
<dc:description py:if="mi.comments">${mi.comments}</dc:description> <dc:description py:if="mi.comments">${mi.comments}</dc:description>
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher> <dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
<dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier> <dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier>

View File

@ -169,8 +169,8 @@ class ManifestItem(Resource):
res.mime_type = mt res.mime_type = mt
return res return res
@apply @dynamic_property
def media_type(): def media_type(self):
def fget(self): def fget(self):
return self.mime_type return self.mime_type
def fset(self, val): def fset(self, val):
@ -608,8 +608,8 @@ class OPF(object):
for item in self.iterguide(): for item in self.iterguide():
item.set('href', get_href(item)) item.set('href', get_href(item))
@apply @dynamic_property
def authors(): def authors(self):
def fget(self): def fget(self):
ans = [] ans = []
@ -628,8 +628,8 @@ class OPF(object):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def author_sort(): def author_sort(self):
def fget(self): def fget(self):
matches = self.authors_path(self.metadata) matches = self.authors_path(self.metadata)
@ -651,8 +651,8 @@ class OPF(object):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def title_sort(): def title_sort(self):
def fget(self): def fget(self):
matches = self.title_path(self.metadata) matches = self.title_path(self.metadata)
@ -674,8 +674,28 @@ class OPF(object):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def tags(): def title_sort(self):
def fget(self):
matches = self.title_path(self.metadata)
if matches:
for match in matches:
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
if not ans:
ans = match.get('file-as', None)
if ans:
return ans
def fset(self, val):
matches = self.title_path(self.metadata)
if matches:
matches[0].set('file-as', unicode(val))
return property(fget=fget, fset=fset)
@dynamic_property
def tags(self):
def fget(self): def fget(self):
ans = [] ans = []
@ -692,8 +712,8 @@ class OPF(object):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def isbn(): def isbn(self):
def fget(self): def fget(self):
for match in self.isbn_path(self.metadata): for match in self.isbn_path(self.metadata):
@ -709,8 +729,8 @@ class OPF(object):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def application_id(): def application_id(self):
def fget(self): def fget(self):
for match in self.application_id_path(self.metadata): for match in self.application_id_path(self.metadata):
@ -726,8 +746,8 @@ class OPF(object):
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@apply @dynamic_property
def book_producer(): def book_producer(self):
def fget(self): def fget(self):
for match in self.bkp_path(self.metadata): for match in self.bkp_path(self.metadata):
@ -764,8 +784,8 @@ class OPF(object):
return cpath return cpath
@apply @dynamic_property
def cover(): def cover(self):
def fget(self): def fget(self):
if self.guide is not None: if self.guide is not None:
@ -1001,62 +1021,19 @@ class OPFTest(unittest.TestCase):
self.opf.smart_update(MetaInformation(self.opf)) self.opf.smart_update(MetaInformation(self.opf))
self.testReading() self.testReading()
def testCreator(self):
opf = OPFCreator(os.getcwd(), self.opf)
buf = cStringIO.StringIO()
opf.render(buf)
raw = buf.getvalue()
self.testReading(opf=OPF(cStringIO.StringIO(raw), os.getcwd()))
def testSmartUpdate(self):
self.opf.smart_update(self.opf)
self.testReading()
def suite(): def suite():
return unittest.TestLoader().loadTestsFromTestCase(OPFTest) return unittest.TestLoader().loadTestsFromTestCase(OPFTest)
def test(): def test():
unittest.TextTestRunner(verbosity=2).run(suite()) unittest.TextTestRunner(verbosity=2).run(suite())
def option_parser():
from calibre.ebooks.metadata import get_parser
parser = get_parser('opf')
parser.add_option('--language', default=None, help=_('Set the dc:language field'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
opfpath = os.path.abspath(args[1])
basedir = os.path.dirname(opfpath)
mi = MetaInformation(OPF(open(opfpath, 'rb'), basedir))
write = False
if opts.title is not None:
mi.title = opts.title
write = True
if opts.authors is not None:
aus = [i.strip() for i in opts.authors.split(',')]
mi.authors = aus
write = True
if opts.category is not None:
mi.category = opts.category
write = True
if opts.comment is not None:
mi.comments = opts.comment
write = True
if opts.language is not None:
mi.language = opts.language
write = True
if write:
mo = OPFCreator(basedir, mi)
ncx = cStringIO.StringIO()
mo.render(open(args[1], 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
if f:
f = open(f[0], 'wb')
else:
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
f.write(ncx)
f.close()
print MetaInformation(OPF(open(opfpath, 'rb'), basedir))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>' __copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from RB files''' '''Read meta information from RB files'''
import sys, os, struct import sys, struct
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -53,16 +53,4 @@ def get_metadata(stream):
raise raise
return mi return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: rb-meta file.rb')
print >>sys.stderr, _('No filename specified.')
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -5,7 +5,7 @@ Edit metadata in RTF files.
""" """
import re, cStringIO, sys import re, cStringIO, sys
from calibre.ebooks.metadata import MetaInformation, get_parser from calibre.ebooks.metadata import MetaInformation
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL) title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL) author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -166,22 +166,3 @@ def set_metadata(stream, options):
stream.write(src) stream.write(src)
stream.write(after) stream.write(after)
def option_parser():
return get_parser('rtf')
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
sys.exit(1)
stream = open(args[1], 'r+b')
if options.authors:
options.authors = options.authors.split(',')
options.comments = options.comment
set_metadata(stream, options)
mi = get_metadata(stream)
return mi
if __name__ == '__main__':
main()

View File

@ -86,8 +86,8 @@ class TOC(list):
for i in obj.flat(): for i in obj.flat():
yield i yield i
@apply @dynamic_property
def abspath(): def abspath(self):
doc='Return the file this toc entry points to as a absolute path to a file on the system.' doc='Return the file this toc entry points to as a absolute path to a file on the system.'
def fget(self): def fget(self):
if self.href is None: if self.href is None:
@ -208,4 +208,4 @@ class TOC(list):
template = MarkupTemplate(ncx_template) template = MarkupTemplate(ncx_template)
raw = template.generate(uid=uid, toc=self, __appname__=__appname__) raw = template.generate(uid=uid, toc=self, __appname__=__appname__)
raw = raw.render(doctype=doctype) raw = raw.render(doctype=doctype)
stream.write(raw) stream.write(raw)

View File

@ -0,0 +1,29 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin):
name = 'MOBI Input'
author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw'])
def convert(self, stream, options, file_ext, parse_cache, log):
from calibre.ebooks.mobi.reader import MobiReader
mr = MobiReader(stream, log, options.input_encoding,
options.debug_input)
mr.extract_content(output_dir=os.getcwdu(), parse_cache)
raw = parse_cache.get('calibre_raw_mobi_markup', False)
if raw:
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open('debug-raw.html', 'wb').write(raw)
return mr.created_opf_path

View File

@ -80,7 +80,20 @@ class MobiMLizer(object):
def __init__(self, ignore_tables=False): def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables self.ignore_tables = ignore_tables
def transform(self, oeb, context): @classmethod
def config(cls, cfg):
group = cfg.add_group('mobiml', _('Mobipocket markup options.'))
group('ignore_tables', ['--ignore-tables'], default=False,
help=_('Render HTML tables as blocks of text instead of actual '
'tables. This is neccessary if the HTML contains very '
'large or complex tables.'))
return cfg
@classmethod
def generate(cls, opts):
return cls(ignore_tables=opts.ignore_tables)
def __call__(self, oeb, context):
oeb.logger.info('Converting XHTML to Mobipocket markup...') oeb.logger.info('Converting XHTML to Mobipocket markup...')
self.oeb = oeb self.oeb = oeb
self.profile = profile = context.dest self.profile = profile = context.dest

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files Read data from .mobi files
''' '''
import sys, struct, os, cStringIO, re, functools import struct, os, cStringIO, re, functools
try: try:
from PIL import Image as PILImage from PIL import Image as PILImage
@ -35,8 +35,10 @@ class EXTHHeader(object):
pos = 0 pos = 0
self.mi = MetaInformation(_('Unknown'), [_('Unknown')]) self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
self.has_fake_cover = True self.has_fake_cover = True
left = self.num_items
for i in range(self.num_items): while left > 0:
left -= 1
id, size = struct.unpack('>LL', raw[pos:pos+8]) id, size = struct.unpack('>LL', raw[pos:pos+8])
content = raw[pos+8:pos+size] content = raw[pos+8:pos+size]
pos += size pos += size
@ -76,7 +78,8 @@ class EXTHHeader(object):
class BookHeader(object): class BookHeader(object):
def __init__(self, raw, ident): def __init__(self, raw, ident, user_encoding, log):
self.log = log
self.compression_type = raw[:2] self.compression_type = raw[:2]
self.records, self.records_size = struct.unpack('>HH', raw[8:12]) self.records, self.records_size = struct.unpack('>HH', raw[8:12])
self.encryption_type, = struct.unpack('>H', raw[12:14]) self.encryption_type, = struct.unpack('>H', raw[12:14])
@ -92,8 +95,8 @@ class BookHeader(object):
else: else:
self.ancient = False self.ancient = False
self.doctype = raw[16:20] self.doctype = raw[16:20]
self.length, self.type, self.codepage, self.unique_id, self.version = \ self.length, self.type, self.codepage, self.unique_id, \
struct.unpack('>LLLLL', raw[20:40]) self.version = struct.unpack('>LLLLL', raw[20:40])
try: try:
@ -102,8 +105,9 @@ class BookHeader(object):
65001 : 'utf-8', 65001 : 'utf-8',
}[self.codepage] }[self.codepage]
except (IndexError, KeyError): except (IndexError, KeyError):
print '[WARNING] Unknown codepage %d. Assuming cp-1252'%self.codepage self.codec = 'cp1252' if user_encoding is None else user_encoding
self.codec = 'cp1252' log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
self.codec))
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length: if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
self.extra_flags = 0 self.extra_flags = 0
@ -138,9 +142,24 @@ class MobiReader(object):
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE) PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex') IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
def __init__(self, filename_or_stream, verbose=False): def __init__(self, filename_or_stream, log, user_encoding=None, debug=None):
self.verbose = verbose self.log = log
self.debug = debug
self.embedded_mi = None self.embedded_mi = None
self.base_css_rules = '''
blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
p { margin: 0em; text-align: justify }
.bold { font-weight: bold }
.italic { font-style: italic }
.mbp_pagebreak {
page-break-after: always; margin: 0; display: block
}
'''
self.tag_css_rules = []
if hasattr(filename_or_stream, 'read'): if hasattr(filename_or_stream, 'read'):
stream = filename_or_stream stream = filename_or_stream
@ -177,17 +196,21 @@ class MobiReader(object):
self.sections.append((section(i), self.section_headers[i])) self.sections.append((section(i), self.section_headers[i]))
self.book_header = BookHeader(self.sections[0][0], self.ident) self.book_header = BookHeader(self.sections[0][0], self.ident,
user_encoding, self.log)
self.name = self.name.decode(self.book_header.codec, 'replace') self.name = self.name.decode(self.book_header.codec, 'replace')
def extract_content(self, output_dir=os.getcwdu()): def extract_content(self, output_dir, parse_cache):
output_dir = os.path.abspath(output_dir) output_dir = os.path.abspath(output_dir)
if self.book_header.encryption_type != 0: if self.book_header.encryption_type != 0:
raise DRMError(self.name) raise DRMError(self.name)
processed_records = self.extract_text() processed_records = self.extract_text()
if self.debug is not None:
self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
self.add_anchors() self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') self.processed_html = self.processed_html.decode(self.book_header.codec,
'ignore')
for pat in ENCODING_PATS: for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html) self.processed_html = pat.sub('', self.processed_html)
e2u = functools.partial(entity_to_unicode, e2u = functools.partial(entity_to_unicode,
@ -203,16 +226,10 @@ class MobiReader(object):
self.processed_html = \ self.processed_html = \
re.compile('<head>', re.IGNORECASE).sub( re.compile('<head>', re.IGNORECASE).sub(
'\n<head>\n' '\n<head>\n'
'<style type="text/css">\n' '\t<link type="text/css" href="styles.css" />\n',
'blockquote { margin: 0em 0em 0em 1.25em; text-align: justify; }\n'
'p { margin: 0em; text-align: justify; }\n'
'.bold { font-weight: bold; }\n'
'.italic { font-style: italic; }\n'
'</style>\n',
self.processed_html) self.processed_html)
if self.verbose: self.log.debug('Parsing HTML...')
print 'Parsing HTML...'
root = html.fromstring(self.processed_html) root = html.fromstring(self.processed_html)
self.upshift_markup(root) self.upshift_markup(root)
guides = root.xpath('//guide') guides = root.xpath('//guide')
@ -230,25 +247,23 @@ class MobiReader(object):
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href'] ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
except AttributeError: except AttributeError:
pass pass
if self.verbose: parse_cache[htmlfile] = root
print 'Serializing...'
with open(htmlfile, 'wb') as f:
raw = html.tostring(root, encoding='utf-8', method='xml',
include_meta_content_type=True, pretty_print=True)
raw = raw.replace('<head>',
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
f.write(raw)
self.htmlfile = htmlfile self.htmlfile = htmlfile
self.log.debug('Creating OPF...')
if self.book_header.exth is not None or self.embedded_mi is not None: ncx = cStringIO.StringIO()
if self.verbose: opf = self.create_opf(htmlfile, guide, root)
print 'Creating OPF...' self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
ncx = cStringIO.StringIO() opf.render(open(self.created_opf_path, 'wb'), ncx)
opf = self.create_opf(htmlfile, guide, root) ncx = ncx.getvalue()
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) if ncx:
ncx = ncx.getvalue() open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) with open('styles.css', 'wb') as s:
s.write(self.base_css_rules+'\n\n')
for rule in self.tag_css_rules:
if isinstance(rule, unicode):
rule = rule.encode('utf-8')
s.write(rule+'\n\n')
def read_embedded_metadata(self, root, elem, guide): def read_embedded_metadata(self, root, elem, guide):
raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>' raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
@ -274,11 +289,9 @@ class MobiReader(object):
elem.getparent().remove(elem) elem.getparent().remove(elem)
break break
break break
def cleanup_html(self): def cleanup_html(self):
if self.verbose: self.log.debug('Cleaning up HTML...')
print 'Cleaning up HTML...'
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html) self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower(): if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>' self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
@ -286,8 +299,7 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace('> <', '>\n<')
def upshift_markup(self, root): def upshift_markup(self, root):
if self.verbose: self.log.debug('Converting style information to CSS...')
print 'Converting style information to CSS...'
size_map = { size_map = {
'xx-small' : '0.5', 'xx-small' : '0.5',
'x-small' : '1', 'x-small' : '1',
@ -298,7 +310,7 @@ class MobiReader(object):
'xx-large' : '6', 'xx-large' : '6',
} }
mobi_version = self.book_header.mobi_version mobi_version = self.book_header.mobi_version
for tag in root.iter(etree.Element): for i, tag in enumerate(root.iter(etree.Element)):
if tag.tag in ('country-region', 'place', 'placetype', 'placename', if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city'): 'state', 'city'):
tag.tag = 'span' tag.tag = 'span'
@ -352,8 +364,7 @@ class MobiReader(object):
elif tag.tag == 'pre': elif tag.tag == 'pre':
if not tag.text: if not tag.text:
tag.tag = 'div' tag.tag = 'div'
if styles:
attrib['style'] = '; '.join(styles)
if 'filepos-id' in attrib: if 'filepos-id' in attrib:
attrib['id'] = attrib.pop('filepos-id') attrib['id'] = attrib.pop('filepos-id')
if 'filepos' in attrib: if 'filepos' in attrib:
@ -362,15 +373,24 @@ class MobiReader(object):
attrib['href'] = "#filepos%d" % int(filepos) attrib['href'] = "#filepos%d" % int(filepos)
except ValueError: except ValueError:
pass pass
if styles:
attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
self.tag_css_rules.append('#%s {%s}'%(attrib['id'],
'; '.join(styles)))
def create_opf(self, htmlfile, guide=None, root=None): def create_opf(self, htmlfile, guide=None, root=None):
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
if mi is None:
mi = MetaInformation(self.title, [_('Unknown')])
opf = OPFCreator(os.path.dirname(htmlfile), mi) opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'): if hasattr(self.book_header.exth, 'cover_offset'):
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1) opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
elif mi.cover is not None: elif mi.cover is not None:
opf.cover = mi.cover opf.cover = mi.cover
manifest = [(htmlfile, 'text/x-oeb1-document')] manifest = [(htmlfile, 'text/x-oeb1-document'),
(os.path.abspath('styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile) bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []): for i in getattr(self, 'image_names', []):
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg')) manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
@ -441,8 +461,7 @@ class MobiReader(object):
return data[:len(data)-trail_size] return data[:len(data)-trail_size]
def extract_text(self): def extract_text(self):
if self.verbose: self.log.debug('Extracting text...')
print 'Extracting text...'
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)] text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
processed_records = list(range(0, self.book_header.records+1)) processed_records = list(range(0, self.book_header.records+1))
@ -472,12 +491,11 @@ class MobiReader(object):
def replace_page_breaks(self): def replace_page_breaks(self):
self.processed_html = self.PAGE_BREAK_PAT.sub( self.processed_html = self.PAGE_BREAK_PAT.sub(
'<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />', '<div class="mbp_pagebreak" />',
self.processed_html) self.processed_html)
def add_anchors(self): def add_anchors(self):
if self.verbose: self.log.debug('Adding anchors...')
print 'Adding anchors...'
positions = set([]) positions = set([])
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
re.IGNORECASE) re.IGNORECASE)
@ -507,8 +525,7 @@ class MobiReader(object):
def extract_images(self, processed_records, output_dir): def extract_images(self, processed_records, output_dir):
if self.verbose: self.log.debug('Extracting images...')
print 'Extracting images...'
output_dir = os.path.abspath(os.path.join(output_dir, 'images')) output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
if not os.path.exists(output_dir): if not os.path.exists(output_dir):
os.makedirs(output_dir) os.makedirs(output_dir)
@ -535,14 +552,17 @@ class MobiReader(object):
im.convert('RGB').save(open(path, 'wb'), format='JPEG') im.convert('RGB').save(open(path, 'wb'), format='JPEG')
def get_metadata(stream): def get_metadata(stream):
mr = MobiReader(stream) from calibre.utils.logging import Log
log = Log()
mr = MobiReader(stream, log)
if mr.book_header.exth is None: if mr.book_header.exth is None:
mi = MetaInformation(mr.name, [_('Unknown')]) mi = MetaInformation(mr.name, [_('Unknown')])
else: else:
mi = mr.create_opf('dummy.html') mi = mr.create_opf('dummy.html')
try: try:
if hasattr(mr.book_header.exth, 'cover_offset'): if hasattr(mr.book_header.exth, 'cover_offset'):
cover_index = mr.book_header.first_image_index + mr.book_header.exth.cover_offset cover_index = mr.book_header.first_image_index + \
mr.book_header.exth.cover_offset
data = mr.sections[int(cover_index)][0] data = mr.sections[int(cover_index)][0]
else: else:
data = mr.sections[mr.book_header.first_image_index][0] data = mr.sections[mr.book_header.first_image_index][0]
@ -552,42 +572,6 @@ def get_metadata(stream):
im.convert('RGBA').save(obuf, format='JPEG') im.convert('RGBA').save(obuf, format='JPEG')
mi.cover_data = ('jpg', obuf.getvalue()) mi.cover_data = ('jpg', obuf.getvalue())
except: except:
import traceback log.exception()
traceback.print_exc()
return mi return mi
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
parser.add_option('-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.'))
parser.add_option('-v', '--verbose', default=False, action='store_true',
help='Useful for debugging.')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
mr = MobiReader(args[1], verbose=opts.verbose)
opts.output_dir = os.path.abspath(opts.output_dir)
mr.extract_content(opts.output_dir)
if opts.verbose:
oname = os.path.join(opts.output_dir, 'debug-raw.html')
dat = mr.mobi_html
if isinstance(dat, unicode):
dat = dat.encode('utf-8')
open(oname, 'wb').write(dat)
print _('Raw MOBI HTML saved in'), oname
print _('OEB ebook created in'), opts.output_dir
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -292,9 +292,29 @@ class Serializer(object):
buffer.seek(hoff) buffer.seek(hoff)
buffer.write('%010d' % ioff) buffer.write('%010d' % ioff)
class MobiFlattener(object):
def config(self, cfg):
return cfg
def generate(self, opts):
return self
def __call__(self, oeb, context):
fbase = context.dest.fbase
fkey = context.dest.fnums.values()
flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
return flattener(oeb, context)
class MobiWriter(object): class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
DEFAULT_PROFILE = 'CybookG3'
TRANSFORMS = [HTMLTOCAdder, CaseMangler, MobiFlattener(), SVGRasterizer,
ManifestTrimmer, MobiMLizer]
def __init__(self, compression=None, imagemax=None, def __init__(self, compression=None, imagemax=None,
prefer_author_sort=False): prefer_author_sort=False):
@ -302,7 +322,32 @@ class MobiWriter(object):
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
self._prefer_author_sort = prefer_author_sort self._prefer_author_sort = prefer_author_sort
def dump(self, oeb, path): @classmethod
def config(cls, cfg):
"""Add any book-writing options to the :class:`Config` object
:param:`cfg`.
"""
mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
mobi('compress', ['--compress'], default=False,
help=_('Compress file text using PalmDOC compression. '
'Results in smaller files, but takes a long time to run.'))
mobi('rescale_images', ['--rescale-images'], default=False,
help=_('Modify images to meet Palm device size limitations.'))
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
help=_('When present, use the author sorting information for '
'generating the Mobipocket author metadata.'))
return cfg
@classmethod
def generate(cls, opts):
"""Generate a Writer instance from command-line options."""
compression = PALMDOC if opts.compress else UNCOMPRESSED
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
prefer_author_sort = opts.prefer_author_sort
return cls(compression=compression, imagemax=imagemax,
prefer_author_sort=prefer_author_sort)
def __call__(self, oeb, path):
if hasattr(path, 'write'): if hasattr(path, 'write'):
return self._dump_stream(oeb, path) return self._dump_stream(oeb, path)
with open(path, 'w+b') as stream: with open(path, 'w+b') as stream:
@ -542,21 +587,6 @@ def config(defaults=None):
else: else:
c = StringConfig(defaults, desc) c = StringConfig(defaults, desc)
mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))
mobi('compress', ['--compress'], default=False,
help=_('Compress file text using PalmDOC compression. '
'Results in smaller files, but takes a long time to run.'))
mobi('rescale_images', ['--rescale-images'], default=False,
help=_('Modify images to meet Palm device size limitations.'))
mobi('toc_title', ['--toc-title'], default=None,
help=_('Title for any generated in-line table of contents.'))
mobi('ignore_tables', ['--ignore-tables'], default=False,
help=_('Render HTML tables as blocks of text instead of actual '
'tables. This is neccessary if the HTML contains very large '
'or complex tables.'))
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
help=_('When present, use the author sorting information for '
'generating the Mobipocket author metadata.'))
profiles = c.add_group('profiles', _('Device renderer profiles. ' profiles = c.add_group('profiles', _('Device renderer profiles. '
'Affects conversion of font sizes, image rescaling and rasterization ' 'Affects conversion of font sizes, image rescaling and rasterization '
'of tables. Valid profiles are: %s.') % ', '.join(_profiles)) 'of tables. Valid profiles are: %s.') % ', '.join(_profiles))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,98 @@
'''
Registry associating file extensions with Reader classes.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, logging
from itertools import chain
from calibre.ebooks.oeb.base import OEBError
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.lit.reader import LitReader
from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.mobi.writer import MobiWriter
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.utils.config import Config
__all__ = ['get_reader']
REGISTRY = {
'.opf': (OEBReader, None),
'.lit': (LitReader, LitWriter),
'.mobi': (MobiReader, MobiWriter),
}
def ReaderFactory(path):
if os.path.isdir(path):
return OEBReader
ext = os.path.splitext(path)[1].lower()
Reader = REGISTRY.get(ext, (None, None))[0]
if Reader is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Reader
def WriterFactory(path):
if os.path.isdir(path):
return OEBWriter
ext = os.path.splitext(path)[1].lower()
if not os.path.exists(path) and not ext:
return OEBWriter
Writer = REGISTRY.get(ext, (None, None))[1]
if Writer is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Writer
def option_parser(Reader, Writer):
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
Reader.config(cfg)
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
Transform.config(cfg)
Writer.config(cfg)
parser = cfg.option_parser()
parser.add_option('--encoding', default=None,
help=_('Character encoding for input. Default is to auto detect.'))
parser.add_option('-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option('-p', '--pretty-print', action='store_true',
default=False, help=_('Produce more human-readable XML output.'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
return parser
def main(argv=sys.argv):
if len(argv) < 3:
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
return 1
inpath, outpath = argv[1], argv[2]
Reader = ReaderFactory(inpath)
Writer = WriterFactory(outpath)
parser = option_parser(Reader, Writer)
opts, args = parser.parse_args(argv[3:])
if len(args) != 0:
parser.print_help()
return 1
logger = Logger(logging.getLogger('ebook-convert'))
logger.setup_cli_handler(opts.verbose)
encoding = opts.encoding
pretty_print = opts.pretty_print
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
reader = Reader.generate(opts)
writer = Writer.generate(opts)
transforms = []
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
transforms.append(Transform.generate(opts))
reader(oeb, inpath)
for transform in transforms:
transform(oeb, context)
writer(oeb, outpath)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,562 @@
"""
Container-/OPF-based input OEBBook reader.
"""
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, uuid, copy
from itertools import izip, chain
from urlparse import urldefrag, urlparse
from urllib import unquote as urlunquote
from mimetypes import guess_type
from collections import defaultdict
from lxml import etree
from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
DC_NSES, OPF
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \
ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE
from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath
from calibre.ebooks.oeb.base import urlnormalize, xml2str
from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
from calibre.ebooks.metadata.epub import CoverRenderer
from calibre.startup import get_lang
from calibre.ptempfile import TemporaryDirectory
__all__ = ['OEBReader']
class OEBReader(object):
"""Read an OEBPS 1.x or OPF/OPS 2.0 file collection."""
COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]')
COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
Container = DirContainer
"""Container type used to access book files. Override in sub-classes."""
DEFAULT_PROFILE = 'PRS505'
"""Default renderer profile for content read with this Reader."""
TRANSFORMS = []
"""List of transforms to apply to content read with this Reader."""
def __init__(self):
return
@classmethod
def config(cls, cfg):
"""Add any book-reading options to the :class:`Config` object
:param:`cfg`.
"""
return
@classmethod
def generate(cls, opts):
"""Generate a Reader instance from command-line options."""
return cls()
def __call__(self, oeb, path):
"""Read the book at :param:`path` into the :class:`OEBBook` object
:param:`oeb`.
"""
self.oeb = oeb
self.logger = oeb.logger
oeb.container = self.Container(path)
opf = self._read_opf()
self._all_from_opf(opf)
return oeb
def _clean_opf(self, opf):
nsmap = {}
for elem in opf.iter(tag=etree.Element):
nsmap.update(elem.nsmap)
for elem in opf.iter(tag=etree.Element):
if namespace(elem.tag) in ('', OPF1_NS):
elem.tag = OPF(barename(elem.tag))
nsmap.update(OPF2_NSMAP)
attrib = dict(opf.attrib)
nroot = etree.Element(OPF('package'),
nsmap={None: OPF2_NS}, attrib=attrib)
metadata = etree.SubElement(nroot, OPF('metadata'), nsmap=nsmap)
ignored = (OPF('dc-metadata'), OPF('x-metadata'))
for elem in xpath(opf, 'o2:metadata//*'):
if elem.tag in ignored:
continue
if namespace(elem.tag) in DC_NSES:
tag = barename(elem.tag).lower()
elem.tag = '{%s}%s' % (DC11_NS, tag)
metadata.append(elem)
for element in xpath(opf, 'o2:metadata//o2:meta'):
metadata.append(element)
for tag in ('o2:manifest', 'o2:spine', 'o2:tours', 'o2:guide'):
for element in xpath(opf, tag):
nroot.append(element)
return nroot
def _read_opf(self):
data = self.oeb.container.read(None)
data = self.oeb.decode(data)
data = XMLDECL_RE.sub('', data)
try:
opf = etree.fromstring(data)
except etree.XMLSyntaxError:
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
data = ENTITY_RE.sub(repl, data)
opf = etree.fromstring(data)
self.logger.warn('OPF contains invalid HTML named entities')
ns = namespace(opf.tag)
if ns not in ('', OPF1_NS, OPF2_NS):
raise OEBError('Invalid namespace %r for OPF document' % ns)
opf = self._clean_opf(opf)
return opf
def _metadata_from_opf(self, opf):
uid = opf.get('unique-identifier', None)
self.oeb.uid = None
metadata = self.oeb.metadata
for elem in xpath(opf, '/o2:package/o2:metadata//*'):
term = elem.tag
value = elem.text
attrib = dict(elem.attrib)
nsmap = elem.nsmap
if term == OPF('meta'):
term = qname(attrib.pop('name', None), nsmap)
value = attrib.pop('content', None)
if value:
value = COLLAPSE_RE.sub(' ', value.strip())
if term and (value or attrib):
metadata.add(term, value, attrib, nsmap=nsmap)
haveuuid = haveid = False
for ident in metadata.identifier:
if unicode(ident).startswith('urn:uuid:'):
haveuuid = True
if 'id' in ident.attrib:
haveid = True
if not (haveuuid and haveid):
bookid = "urn:uuid:%s" % str(uuid.uuid4())
metadata.add('identifier', bookid, id='calibre-uuid')
if uid is None:
self.logger.warn(u'Unique-identifier not specified')
for item in metadata.identifier:
if not item.id:
continue
if uid is None or item.id == uid:
self.oeb.uid = item
break
else:
self.logger.warn(u'Unique-identifier %r not found' % uid)
for ident in metadata.identifier:
if 'id' in ident.attrib:
self.oeb.uid = metadata.identifier[0]
break
if not metadata.language:
self.logger.warn(u'Language not specified')
metadata.add('language', get_lang())
if not metadata.creator:
self.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown')))
if not metadata.title:
self.logger.warn('Title not specified')
metadata.add('title', self.oeb.translate(__('Unknown')))
def _manifest_add_missing(self):
manifest = self.oeb.manifest
known = set(manifest.hrefs)
unchecked = set(manifest.values())
while unchecked:
new = set()
for item in unchecked:
if (item.media_type in OEB_DOCS or
item.media_type[-4:] in ('/xml', '+xml')) and \
item.data is not None:
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
href, _ = urldefrag(href)
if not href:
continue
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
elif item.media_type in OEB_STYLES:
for match in CSSURL_RE.finditer(item.data):
href, _ = urldefrag(match.group('url'))
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
unchecked.clear()
for href in new:
known.add(href)
if not self.oeb.container.exists(href):
self.logger.warn('Referenced file %r not found' % href)
continue
self.logger.warn('Referenced file %r not in manifest' % href)
id, _ = manifest.generate(id='added')
guessed = guess_type(href)[0]
media_type = guessed or BINARY_MIME
added = manifest.add(id, href, media_type)
unchecked.add(added)
def _manifest_from_opf(self, opf):
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
id = elem.get('id')
href = elem.get('href')
media_type = elem.get('media-type', None)
if media_type is None:
media_type = elem.get('mediatype', None)
if media_type is None or media_type == 'text/xml':
guessed = guess_type(href)[0]
media_type = guessed or media_type or BINARY_MIME
fallback = elem.get('fallback')
if href in manifest.hrefs:
self.logger.warn(u'Duplicate manifest entry for %r' % href)
continue
if not self.oeb.container.exists(href):
self.logger.warn(u'Manifest item %r not found' % href)
continue
if id in manifest.ids:
self.logger.warn(u'Duplicate manifest id %r' % id)
id, href = manifest.generate(id, href)
manifest.add(id, href, media_type, fallback)
self._manifest_add_missing()
def _spine_add_extra(self):
manifest = self.oeb.manifest
spine = self.oeb.spine
unchecked = set(spine)
selector = XPath('h:body//h:a/@href')
extras = set()
while unchecked:
new = set()
for item in unchecked:
if item.media_type not in OEB_DOCS:
# TODO: handle fallback chains
continue
for href in selector(item.data):
href, _ = urldefrag(href)
if not href:
continue
href = item.abshref(urlnormalize(href))
if href not in manifest.hrefs:
continue
found = manifest.hrefs[href]
if found.media_type not in OEB_DOCS or \
found in spine or found in extras:
continue
new.add(found)
extras.update(new)
unchecked = new
version = int(self.oeb.version[0])
for item in sorted(extras):
if version >= 2:
self.logger.warn(
'Spine-referenced file %r not in spine' % item.href)
spine.add(item, linear=False)
def _spine_from_opf(self, opf):
spine = self.oeb.spine
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
idref = elem.get('idref')
if idref not in manifest.ids:
self.logger.warn(u'Spine item %r not found' % idref)
continue
item = manifest.ids[idref]
spine.add(item, elem.get('linear'))
if len(spine) == 0:
raise OEBError("Spine is empty")
self._spine_add_extra()
def _guide_from_opf(self, opf):
guide = self.oeb.guide
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
href = elem.get('href')
path = urldefrag(href)[0]
if path not in manifest.hrefs:
self.logger.warn(u'Guide reference %r not found' % href)
continue
guide.add(elem.get('type'), elem.get('title'), href)
def _find_ncx(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@toc')
if result:
id = result[0]
if id not in self.oeb.manifest.ids:
return None
item = self.oeb.manifest.ids[id]
self.oeb.manifest.remove(item)
return item
for item in self.oeb.manifest.values():
if item.media_type == NCX_MIME:
self.oeb.manifest.remove(item)
return item
return None
def _toc_from_navpoint(self, item, toc, navpoint):
children = xpath(navpoint, 'ncx:navPoint')
for child in children:
title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
href = xpath(child, 'ncx:content/@src')
if not title or not href:
continue
href = item.abshref(urlnormalize(href[0]))
path, _ = urldefrag(href)
if path not in self.oeb.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href)
continue
id = child.get('id')
klass = child.get('class')
node = toc.add(title, href, id=id, klass=klass)
self._toc_from_navpoint(item, node, child)
def _toc_from_ncx(self, item):
if item is None:
return False
ncx = item.data
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
title = title or unicode(self.oeb.metadata.title[0])
toc = self.oeb.toc
toc.title = title
navmaps = xpath(ncx, 'ncx:navMap')
for navmap in navmaps:
self._toc_from_navpoint(item, toc, navmap)
return True
def _toc_from_tour(self, opf):
result = xpath(opf, 'o2:tours/o2:tour')
if not result:
return False
tour = result[0]
toc = self.oeb.toc
toc.title = tour.get('title')
sites = xpath(tour, 'o2:site')
for site in sites:
title = site.get('title')
href = site.get('href')
if not title or not href:
continue
path, _ = urldefrag(urlnormalize(href))
if path not in self.oeb.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href)
continue
id = site.get('id')
toc.add(title, href, id=id)
return True
def _toc_from_html(self, opf):
if 'toc' not in self.oeb.guide:
return False
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
item = self.oeb.manifest.hrefs[itempath]
html = item.data
if frag:
elems = xpath(html, './/*[@id="%s"]' % frag)
if not elems:
elems = xpath(html, './/*[@name="%s"]' % frag)
elem = elems[0] if elems else html
while elem != html and not xpath(elem, './/h:a[@href]'):
elem = elem.getparent()
html = elem
titles = defaultdict(list)
order = []
for anchor in xpath(html, './/h:a[@href]'):
href = anchor.attrib['href']
href = item.abshref(urlnormalize(href))
path, frag = urldefrag(href)
if path not in self.oeb.manifest.hrefs:
continue
title = ' '.join(xpath(anchor, './/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
if href not in titles:
order.append(href)
titles[href].append(title)
toc = self.oeb.toc
for href in order:
toc.add(' '.join(titles[href]), href)
return True
def _toc_from_spine(self, opf):
toc = self.oeb.toc
titles = []
headers = []
for item in self.oeb.spine:
if not item.linear: continue
html = item.data
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
if title:
titles.append(title)
headers.append('(unlabled)')
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
expr = '/h:html/h:body//h:%s[position()=1]/text()'
header = ''.join(xpath(html, expr % tag))
header = COLLAPSE_RE.sub(' ', header.strip())
if header:
headers[-1] = header
break
use = titles
if len(titles) > len(set(titles)):
use = headers
for title, item in izip(use, self.oeb.spine):
if not item.linear: continue
toc.add(title, item.href)
return True
def _toc_from_opf(self, opf, item):
if self._toc_from_ncx(item): return
if self._toc_from_tour(opf): return
self.logger.warn('No metadata table of contents found')
if self._toc_from_html(opf): return
self._toc_from_spine(opf)
def _pages_from_ncx(self, opf, item):
if item is None:
return False
ncx = item.data
ptargets = xpath(ncx, 'ncx:pageList/ncx:pageTarget')
if not ptargets:
return False
pages = self.oeb.pages
for ptarget in ptargets:
name = ''.join(xpath(ptarget, 'ncx:navLabel/ncx:text/text()'))
name = COLLAPSE_RE.sub(' ', name.strip())
href = xpath(ptarget, 'ncx:content/@src')
if not href:
continue
href = item.abshref(urlnormalize(href[0]))
id = ptarget.get('id')
type = ptarget.get('type', 'normal')
klass = ptarget.get('class')
pages.add(name, href, type=type, id=id, klass=klass)
return True
def _find_page_map(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@page-map')
if result:
id = result[0]
if id not in self.oeb.manifest.ids:
return None
item = self.oeb.manifest.ids[id]
self.oeb.manifest.remove(item)
return item
for item in self.oeb.manifest.values():
if item.media_type == PAGE_MAP_MIME:
self.oeb.manifest.remove(item)
return item
return None
def _pages_from_page_map(self, opf):
item = self._find_page_map(opf)
if item is None:
return False
pmap = item.data
pages = self.oeb.pages
for page in xpath(pmap, 'o2:page'):
name = page.get('name', '')
href = page.get('href')
if not href:
continue
name = COLLAPSE_RE.sub(' ', name.strip())
href = item.abshref(urlnormalize(href))
type = 'normal'
if not name:
type = 'special'
elif name.lower().strip('ivxlcdm') == '':
type = 'front'
pages.add(name, href, type=type)
return True
def _pages_from_opf(self, opf, item):
if self._pages_from_ncx(opf, item): return
if self._pages_from_page_map(opf): return
return
def _cover_from_html(self, hcover):
with TemporaryDirectory('_html_cover') as tdir:
writer = OEBWriter()
writer(self.oeb, tdir)
path = os.path.join(tdir, urlunquote(hcover.href))
renderer = CoverRenderer(path)
data = renderer.image_data
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
return item
def _locate_cover_image(self):
if self.oeb.metadata.cover:
id = str(self.oeb.metadata.cover[0])
item = self.oeb.manifest.ids.get(id, None)
if item is not None and item.media_type in OEB_IMAGES:
return item
else:
self.logger.warn('Invalid cover image @id %r' % id)
hcover = self.oeb.spine[0]
if 'cover' in self.oeb.guide:
href = self.oeb.guide['cover'].href
item = self.oeb.manifest.hrefs[href]
media_type = item.media_type
if media_type in OEB_IMAGES:
return item
elif media_type in OEB_DOCS:
hcover = item
html = hcover.data
if MS_COVER_TYPE in self.oeb.guide:
href = self.oeb.guide[MS_COVER_TYPE].href
item = self.oeb.manifest.hrefs.get(href, None)
if item is not None and item.media_type in OEB_IMAGES:
return item
if self.COVER_SVG_XP(html):
svg = copy.deepcopy(self.COVER_SVG_XP(html)[0])
href = os.path.splitext(hcover.href)[0] + '.svg'
id, href = self.oeb.manifest.generate(hcover.id, href)
item = self.oeb.manifest.add(id, href, SVG_MIME, data=svg)
return item
if self.COVER_OBJECT_XP(html):
object = self.COVER_OBJECT_XP(html)[0]
href = hcover.abshref(object.get('data'))
item = self.oeb.manifest.hrefs.get(href, None)
if item is not None and item.media_type in OEB_IMAGES:
return item
return self._cover_from_html(hcover)
def _ensure_cover_image(self):
cover = self._locate_cover_image()
if self.oeb.metadata.cover:
self.oeb.metadata.cover[0].value = cover.id
return
self.oeb.metadata.add('cover', cover.id)
def _all_from_opf(self, opf):
self.oeb.version = opf.get('version', '1.2')
self._metadata_from_opf(opf)
self._manifest_from_opf(opf)
self._spine_from_opf(opf)
self._guide_from_opf(opf)
item = self._find_ncx(opf)
self._toc_from_opf(opf, item)
self._pages_from_opf(opf, item)
self._ensure_cover_image()
def main(argv=sys.argv):
reader = OEBReader()
for arg in argv[1:]:
oeb = reader(OEBBook(), arg)
for name, doc in oeb.to_opf1().values():
print etree.tostring(doc, pretty_print=True)
for name, doc in oeb.to_opf2(page_map=True).values():
print etree.tostring(doc, pretty_print=True)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -98,7 +98,15 @@ class CSSFlattener(object):
self.unfloat = unfloat self.unfloat = unfloat
self.untable = untable self.untable = untable
def transform(self, oeb, context): @classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Flattening CSS and remapping font sizes...') oeb.logger.info('Flattening CSS and remapping font sizes...')
self.oeb = oeb self.oeb = oeb
self.context = context self.context = context

View File

@ -52,7 +52,18 @@ class HTMLTOCAdder(object):
self.title = title self.title = title
self.style = style self.style = style
def transform(self, oeb, context): @classmethod
def config(cls, cfg):
group = cfg.add_group('htmltoc', _('HTML TOC generation options.'))
group('toc_title', ['--toc-title'], default=None,
help=_('Title for any generated in-line table of contents.'))
return cfg
@classmethod
def generate(cls, opts):
return cls(title=opts.toc_title)
def __call__(self, oeb, context):
if 'toc' in oeb.guide: if 'toc' in oeb.guide:
return return
oeb.logger.info('Generating in-line TOC...') oeb.logger.info('Generating in-line TOC...')

View File

@ -29,7 +29,15 @@ CASE_MANGLER_CSS = """
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase']) TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
class CaseMangler(object): class CaseMangler(object):
def transform(self, oeb, context): @classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Applying case-transforming CSS...') oeb.logger.info('Applying case-transforming CSS...')
self.oeb = oeb self.oeb = oeb
self.profile = context.source self.profile = context.source

View File

@ -34,7 +34,15 @@ class SVGRasterizer(object):
if QApplication.instance() is None: if QApplication.instance() is None:
QApplication([]) QApplication([])
def transform(self, oeb, context): @classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Rasterizing SVG images...') oeb.logger.info('Rasterizing SVG images...')
self.oeb = oeb self.oeb = oeb
self.profile = context.dest self.profile = context.dest

View File

@ -13,7 +13,15 @@ from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
class ManifestTrimmer(object): class ManifestTrimmer(object):
def transform(self, oeb, context): @classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Trimming unused files from manifest...') oeb.logger.info('Trimming unused files from manifest...')
used = set() used = set()
hrefs = oeb.manifest.hrefs hrefs = oeb.manifest.hrefs

View File

@ -0,0 +1,75 @@
'''
Directory output OEBBook writer.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, logging
from calibre.ebooks.oeb.base import OPF_MIME, xml2str
from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
__all__ = ['OEBWriter']
class OEBWriter(object):
DEFAULT_PROFILE = 'PRS505'
"""Default renderer profile for content written with this Writer."""
TRANSFORMS = []
"""List of transforms to apply to content written with this Writer."""
def __init__(self, version='2.0', page_map=False, pretty_print=False):
self.version = version
self.page_map = page_map
self.pretty_print = pretty_print
@classmethod
def config(cls, cfg):
"""Add any book-writing options to the :class:`Config` object
:param:`cfg`.
"""
oeb = cfg.add_group('oeb', _('OPF/NCX/etc. generation options.'))
versions = ['1.2', '2.0']
oeb('opf_version', ['--opf-version'], default='2.0', choices=versions,
help=_('OPF version to generate. Default is %default.'))
oeb('adobe_page_map', ['--adobe-page-map'], default=False,
help=_('Generate an Adobe "page-map" file if pagination '
'information is avaliable.'))
return cfg
@classmethod
def generate(cls, opts):
"""Generate a Writer instance from command-line options."""
version = opts.opf_version
page_map = opts.adobe_page_map
pretty_print = opts.pretty_print
return cls(version=version, page_map=page_map,
pretty_print=pretty_print)
def __call__(self, oeb, path):
"""Read the book in the :class:`OEBBook` object :param:`oeb` to a file
at :param:`path`.
"""
version = int(self.version[0])
opfname = None
if os.path.splitext(path)[1].lower() == '.opf':
opfname = os.path.basename(path)
path = os.path.dirname(path)
if not os.path.isdir(path):
os.mkdir(path)
output = DirContainer(path)
for item in oeb.manifest.values():
output.write(item.href, str(item))
if version == 1:
metadata = oeb.to_opf1()
elif version == 2:
metadata = oeb.to_opf2(page_map=self.page_map)
else:
raise OEBError("Unrecognized OPF version %r" % self.version)
pretty_print = self.pretty_print
for mime, (href, data) in metadata.items():
if opfname and mime == OPF_MIME:
href = opfname
output.write(href, xml2str(data, pretty_print=pretty_print))
return

View File

@ -201,14 +201,14 @@ class Document(QWebPage):
def bookmark(self): def bookmark(self):
return self.javascript('calculate_bookmark(%d)'%(self.ypos+25), 'string') return self.javascript('calculate_bookmark(%d)'%(self.ypos+25), 'string')
@apply @dynamic_property
def at_bottom(): def at_bottom(self):
def fget(self): def fget(self):
return self.height - self.ypos <= self.window_height return self.height - self.ypos <= self.window_height
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def at_top(): def at_top(self):
def fget(self): def fget(self):
return self.ypos <= 0 return self.ypos <= 0
return property(fget=fget) return property(fget=fget)
@ -217,32 +217,32 @@ class Document(QWebPage):
def test(self): def test(self):
pass pass
@apply @dynamic_property
def ypos(): def ypos(self):
def fget(self): def fget(self):
return self.javascript('window.pageYOffset', 'int') return self.javascript('window.pageYOffset', 'int')
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def window_height(): def window_height(self):
def fget(self): def fget(self):
return self.javascript('window.innerHeight', 'int') return self.javascript('window.innerHeight', 'int')
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def window_width(): def window_width(self):
def fget(self): def fget(self):
return self.javascript('window.innerWidth', 'int') return self.javascript('window.innerWidth', 'int')
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def xpos(): def xpos(self):
def fget(self): def fget(self):
return self.javascript('window.pageXOffset', 'int') return self.javascript('window.pageXOffset', 'int')
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def scroll_fraction(): def scroll_fraction(self):
def fget(self): def fget(self):
try: try:
return float(self.ypos)/(self.height-self.window_height) return float(self.ypos)/(self.height-self.window_height)
@ -250,20 +250,20 @@ class Document(QWebPage):
return 0. return 0.
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def hscroll_fraction(): def hscroll_fraction(self):
def fget(self): def fget(self):
return float(self.xpos)/self.width return float(self.xpos)/self.width
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def height(): def height(self):
def fget(self): def fget(self):
return self.javascript('document.body.offsetHeight', 'int') # contentsSize gives inaccurate results return self.javascript('document.body.offsetHeight', 'int') # contentsSize gives inaccurate results
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def width(): def width(self):
def fget(self): def fget(self):
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
return property(fget=fget) return property(fget=fget)
@ -342,20 +342,20 @@ class DocumentView(QWebView):
def sizeHint(self): def sizeHint(self):
return self._size_hint return self._size_hint
@apply @dynamic_property
def scroll_fraction(): def scroll_fraction(self):
def fget(self): def fget(self):
return self.document.scroll_fraction return self.document.scroll_fraction
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def hscroll_fraction(): def hscroll_fraction(self):
def fget(self): def fget(self):
return self.document.hscroll_fraction return self.document.hscroll_fraction
return property(fget=fget) return property(fget=fget)
@apply @dynamic_property
def content_size(): def content_size(self):
def fget(self): def fget(self):
return self.document.width, self.document.height return self.document.width, self.document.height
return property(fget=fget) return property(fget=fget)

View File

@ -1,16 +1,8 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' Code to manage ebook library''' ''' Code to manage ebook library'''
import re
from calibre.utils.config import Config, StringConfig from calibre.utils.config import Config, StringConfig
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()
def server_config(defaults=None): def server_config(defaults=None):
desc=_('Settings to control the calibre content server') desc=_('Settings to control the calibre content server')

View File

@ -814,8 +814,8 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
# _lock_file = None # _lock_file = None
self.conn.close() self.conn.close()
@apply @dynamic_property
def user_version(): def user_version(self):
doc = 'The user version of this database' doc = 'The user version of this database'
def fget(self): def fget(self):
return self.conn.get('pragma user_version;', all=False) return self.conn.get('pragma user_version;', all=False)
@ -1455,4 +1455,4 @@ def text_to_tokens(text):
if __name__ == '__main__': if __name__ == '__main__':
sqlite.enable_callback_tracebacks(True) sqlite.enable_callback_tracebacks(True)
db = LibraryDatabase('/home/kovid/temp/library1.db.orig') db = LibraryDatabase('/home/kovid/temp/library1.db.orig')

View File

@ -33,14 +33,14 @@ from calibre.ebooks import BOOK_EXTENSIONS
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10, 'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15} 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15}
INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys())) INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys()))
class CoverCache(QThread): class CoverCache(QThread):
def __init__(self, library_path, parent=None): def __init__(self, library_path, parent=None):
QThread.__init__(self, parent) QThread.__init__(self, parent)
self.library_path = library_path self.library_path = library_path
@ -52,7 +52,7 @@ class CoverCache(QThread):
self.cache_lock = QReadWriteLock() self.cache_lock = QReadWriteLock()
self.id_map_stale = True self.id_map_stale = True
self.keep_running = True self.keep_running = True
def build_id_map(self): def build_id_map(self):
self.id_map_lock.lockForWrite() self.id_map_lock.lockForWrite()
self.id_map = {} self.id_map = {}
@ -65,8 +65,8 @@ class CoverCache(QThread):
continue continue
self.id_map_lock.unlock() self.id_map_lock.unlock()
self.id_map_stale = False self.id_map_stale = False
def set_cache(self, ids): def set_cache(self, ids):
self.cache_lock.lockForWrite() self.cache_lock.lockForWrite()
already_loaded = set([]) already_loaded = set([])
@ -80,8 +80,8 @@ class CoverCache(QThread):
self.load_queue_lock.lockForWrite() self.load_queue_lock.lockForWrite()
self.load_queue = collections.deque(ids) self.load_queue = collections.deque(ids)
self.load_queue_lock.unlock() self.load_queue_lock.unlock()
def run(self): def run(self):
while self.keep_running: while self.keep_running:
if self.id_map is None or self.id_map_stale: if self.id_map is None or self.id_map_stale:
@ -94,7 +94,7 @@ class CoverCache(QThread):
break break
finally: finally:
self.load_queue_lock.unlock() self.load_queue_lock.unlock()
self.cache_lock.lockForRead() self.cache_lock.lockForRead()
need = True need = True
if id in self.cache.keys(): if id in self.cache.keys():
@ -121,19 +121,19 @@ class CoverCache(QThread):
self.cache_lock.lockForWrite() self.cache_lock.lockForWrite()
self.cache[id] = img self.cache[id] = img
self.cache_lock.unlock() self.cache_lock.unlock()
self.sleep(1) self.sleep(1)
def stop(self): def stop(self):
self.keep_running = False self.keep_running = False
def cover(self, id): def cover(self, id):
val = None val = None
if self.cache_lock.tryLockForRead(50): if self.cache_lock.tryLockForRead(50):
val = self.cache.get(id, None) val = self.cache.get(id, None)
self.cache_lock.unlock() self.cache_lock.unlock()
return val return val
def clear_cache(self): def clear_cache(self):
self.cache_lock.lockForWrite() self.cache_lock.lockForWrite()
self.cache = {} self.cache = {}
@ -148,24 +148,24 @@ class CoverCache(QThread):
for id in ids: for id in ids:
self.load_queue.appendleft(id) self.load_queue.appendleft(id)
self.load_queue_lock.unlock() self.load_queue_lock.unlock()
class ResultCache(SearchQueryParser): class ResultCache(SearchQueryParser):
''' '''
Stores sorted and filtered metadata in memory. Stores sorted and filtered metadata in memory.
''' '''
def __init__(self): def __init__(self):
self._map = self._map_filtered = self._data = [] self._map = self._map_filtered = self._data = []
self.first_sort = True self.first_sort = True
SearchQueryParser.__init__(self) SearchQueryParser.__init__(self)
def __getitem__(self, row): def __getitem__(self, row):
return self._data[self._map_filtered[row]] return self._data[self._map_filtered[row]]
def __len__(self): def __len__(self):
return len(self._map_filtered) return len(self._map_filtered)
def __iter__(self): def __iter__(self):
for id in self._map_filtered: for id in self._map_filtered:
yield self._data[id] yield self._data[id]
@ -194,32 +194,32 @@ class ResultCache(SearchQueryParser):
matches.add(item[0]) matches.add(item[0])
break break
return matches return matches
def remove(self, id): def remove(self, id):
self._data[id] = None self._data[id] = None
if id in self._map: if id in self._map:
self._map.remove(id) self._map.remove(id)
if id in self._map_filtered: if id in self._map_filtered:
self._map_filtered.remove(id) self._map_filtered.remove(id)
def set(self, row, col, val, row_is_id=False): def set(self, row, col, val, row_is_id=False):
id = row if row_is_id else self._map_filtered[row] id = row if row_is_id else self._map_filtered[row]
self._data[id][col] = val self._data[id][col] = val
def index(self, id, cache=False): def index(self, id, cache=False):
x = self._map if cache else self._map_filtered x = self._map if cache else self._map_filtered
return x.index(id) return x.index(id)
def row(self, id): def row(self, id):
return self.index(id) return self.index(id)
def has_id(self, id): def has_id(self, id):
try: try:
return self._data[id] is not None return self._data[id] is not None
except IndexError: except IndexError:
pass pass
return False return False
def refresh_ids(self, conn, ids): def refresh_ids(self, conn, ids):
''' '''
Refresh the data in the cache for books identified by ids. Refresh the data in the cache for books identified by ids.
@ -232,7 +232,7 @@ class ResultCache(SearchQueryParser):
except ValueError: except ValueError:
pass pass
return None return None
def books_added(self, ids, conn): def books_added(self, ids, conn):
if not ids: if not ids:
return return
@ -241,16 +241,16 @@ class ResultCache(SearchQueryParser):
self._data[id] = conn.get('SELECT * from meta WHERE id=?', (id,))[0] self._data[id] = conn.get('SELECT * from meta WHERE id=?', (id,))[0]
self._map[0:0] = ids self._map[0:0] = ids
self._map_filtered[0:0] = ids self._map_filtered[0:0] = ids
def books_deleted(self, ids): def books_deleted(self, ids):
for id in ids: for id in ids:
self._data[id] = None self._data[id] = None
if id in self._map: self._map.remove(id) if id in self._map: self._map.remove(id)
if id in self._map_filtered: self._map_filtered.remove(id) if id in self._map_filtered: self._map_filtered.remove(id)
def count(self): def count(self):
return len(self._map) return len(self._map)
def refresh(self, db, field=None, ascending=True): def refresh(self, db, field=None, ascending=True):
temp = db.conn.get('SELECT * FROM meta') temp = db.conn.get('SELECT * FROM meta')
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else [] self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
@ -260,7 +260,7 @@ class ResultCache(SearchQueryParser):
if field is not None: if field is not None:
self.sort(field, ascending) self.sort(field, ascending)
self._map_filtered = list(self._map) self._map_filtered = list(self._map)
def seriescmp(self, x, y): def seriescmp(self, x, y):
try: try:
ans = cmp(self._data[x][9].lower(), self._data[y][9].lower()) if str else\ ans = cmp(self._data[x][9].lower(), self._data[y][9].lower()) if str else\
@ -291,28 +291,28 @@ class ResultCache(SearchQueryParser):
subsort = True subsort = True
self.first_sort = False self.first_sort = False
fcmp = self.seriescmp if field == 'series' else \ fcmp = self.seriescmp if field == 'series' else \
functools.partial(self.cmp, FIELD_MAP[field], subsort=subsort, functools.partial(self.cmp, FIELD_MAP[field], subsort=subsort,
str=field not in ('size', 'rating', 'timestamp')) str=field not in ('size', 'rating', 'timestamp'))
self._map.sort(cmp=fcmp, reverse=not ascending) self._map.sort(cmp=fcmp, reverse=not ascending)
self._map_filtered = [id for id in self._map if id in self._map_filtered] self._map_filtered = [id for id in self._map if id in self._map_filtered]
def search(self, query): def search(self, query):
if not query or not query.strip(): if not query or not query.strip():
self._map_filtered = list(self._map) self._map_filtered = list(self._map)
return return
matches = sorted(self.parse(query)) matches = sorted(self.parse(query))
self._map_filtered = [id for id in self._map if id in matches] self._map_filtered = [id for id in self._map if id in matches]
class Tag(unicode): class Tag(unicode):
def __new__(cls, *args): def __new__(cls, *args):
obj = super(Tag, cls).__new__(cls, *args) obj = super(Tag, cls).__new__(cls, *args)
obj.count = 0 obj.count = 0
obj.state = 0 obj.state = 0
return obj return obj
def as_string(self): def as_string(self):
return u'[%d] %s'%(self.count, self) return u'[%d] %s'%(self.count, self)
@ -321,19 +321,19 @@ class LibraryDatabase2(LibraryDatabase):
An ebook metadata database that stores references to ebook files on disk. An ebook metadata database that stores references to ebook files on disk.
''' '''
PATH_LIMIT = 40 if 'win32' in sys.platform else 100 PATH_LIMIT = 40 if 'win32' in sys.platform else 100
@apply @dynamic_property
def user_version(): def user_version(self):
doc = 'The user version of this database' doc = 'The user version of this database'
def fget(self): def fget(self):
return self.conn.get('pragma user_version;', all=False) return self.conn.get('pragma user_version;', all=False)
def fset(self, val): def fset(self, val):
self.conn.execute('pragma user_version=%d'%int(val)) self.conn.execute('pragma user_version=%d'%int(val))
self.conn.commit() self.conn.commit()
return property(doc=doc, fget=fget, fset=fset) return property(doc=doc, fget=fget, fset=fset)
def connect(self): def connect(self):
if 'win32' in sys.platform and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259: if 'win32' in sys.platform and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
raise ValueError('Path to library too long. Must be less than %d characters.'%(259-4*self.PATH_LIMIT-10)) raise ValueError('Path to library too long. Must be less than %d characters.'%(259-4*self.PATH_LIMIT-10))
@ -343,9 +343,9 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.close() self.conn.close()
os.remove(self.dbpath) os.remove(self.dbpath)
self.conn = connect(self.dbpath, self.row_factory) self.conn = connect(self.dbpath, self.row_factory)
if self.user_version == 0: if self.user_version == 0:
self.initialize_database() self.initialize_database()
def __init__(self, library_path, row_factory=False): def __init__(self, library_path, row_factory=False):
if not os.path.exists(library_path): if not os.path.exists(library_path):
os.makedirs(library_path) os.makedirs(library_path)
@ -358,7 +358,7 @@ class LibraryDatabase2(LibraryDatabase):
self.connect() self.connect()
self.is_case_sensitive = not iswindows and not isosx and \ self.is_case_sensitive = not iswindows and not isosx and \
not os.path.exists(self.dbpath.replace('metadata.db', 'MeTAdAtA.dB')) not os.path.exists(self.dbpath.replace('metadata.db', 'MeTAdAtA.dB'))
# Upgrade database # Upgrade database
while True: while True:
meth = getattr(self, 'upgrade_version_%d'%self.user_version, None) meth = getattr(self, 'upgrade_version_%d'%self.user_version, None)
if meth is None: if meth is None:
@ -368,7 +368,7 @@ class LibraryDatabase2(LibraryDatabase):
meth() meth()
self.conn.commit() self.conn.commit()
self.user_version += 1 self.user_version += 1
self.data = ResultCache() self.data = ResultCache()
self.search = self.data.search self.search = self.data.search
self.refresh = functools.partial(self.data.refresh, self) self.refresh = functools.partial(self.data.refresh, self)
@ -378,24 +378,24 @@ class LibraryDatabase2(LibraryDatabase):
self.row = self.data.row self.row = self.data.row
self.has_id = self.data.has_id self.has_id = self.data.has_id
self.count = self.data.count self.count = self.data.count
self.refresh() self.refresh()
def get_property(idx, index_is_id=False, loc=-1): def get_property(idx, index_is_id=False, loc=-1):
row = self.data._data[idx] if index_is_id else self.data[idx] row = self.data._data[idx] if index_is_id else self.data[idx]
return row[loc] return row[loc]
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn', for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags', 'publisher', 'rating', 'series', 'series_index', 'tags',
'title', 'timestamp'): 'title', 'timestamp'):
setattr(self, prop, functools.partial(get_property, setattr(self, prop, functools.partial(get_property,
loc=FIELD_MAP['comments' if prop == 'comment' else prop])) loc=FIELD_MAP['comments' if prop == 'comment' else prop]))
def initialize_database(self): def initialize_database(self):
from calibre.resources import metadata_sqlite from calibre.resources import metadata_sqlite
self.conn.executescript(metadata_sqlite) self.conn.executescript(metadata_sqlite)
self.user_version = 1 self.user_version = 1
def upgrade_version_1(self): def upgrade_version_1(self):
''' '''
Normalize indices. Normalize indices.
@ -407,7 +407,7 @@ class LibraryDatabase2(LibraryDatabase):
CREATE INDEX series_idx ON series (name COLLATE NOCASE); CREATE INDEX series_idx ON series (name COLLATE NOCASE);
CREATE INDEX series_sort_idx ON books (series_index, id); CREATE INDEX series_sort_idx ON books (series_index, id);
''')) '''))
def upgrade_version_2(self): def upgrade_version_2(self):
''' Fix Foreign key constraints for deleting from link tables. ''' ''' Fix Foreign key constraints for deleting from link tables. '''
script = textwrap.dedent('''\ script = textwrap.dedent('''\
@ -426,7 +426,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.executescript(script%dict(ltable='publishers', table='publishers', ltable_col='publisher')) self.conn.executescript(script%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag')) self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series')) self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series'))
def upgrade_version_3(self): def upgrade_version_3(self):
' Add path to result cache ' ' Add path to result cache '
self.conn.executescript(''' self.conn.executescript('''
@ -450,25 +450,25 @@ class LibraryDatabase2(LibraryDatabase):
FROM books; FROM books;
''') ''')
def last_modified(self): def last_modified(self):
''' Return last modified time as a UTC datetime object''' ''' Return last modified time as a UTC datetime object'''
return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime) return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime)
def path(self, index, index_is_id=False): def path(self, index, index_is_id=False):
'Return the relative path to the directory containing this books files as a unicode string.' 'Return the relative path to the directory containing this books files as a unicode string.'
row = self.data._data[index] if index_is_id else self.data[index] row = self.data._data[index] if index_is_id else self.data[index]
return row[FIELD_MAP['path']].replace('/', os.sep) return row[FIELD_MAP['path']].replace('/', os.sep)
def abspath(self, index, index_is_id=False): def abspath(self, index, index_is_id=False):
'Return the absolute path to the directory containing this books files as a unicode string.' 'Return the absolute path to the directory containing this books files as a unicode string.'
path = os.path.join(self.library_path, self.path(index, index_is_id=index_is_id)) path = os.path.join(self.library_path, self.path(index, index_is_id=index_is_id))
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
return path return path
def construct_path_name(self, id): def construct_path_name(self, id):
''' '''
Construct the directory name for this book based on its metadata. Construct the directory name for this book based on its metadata.
@ -480,7 +480,7 @@ class LibraryDatabase2(LibraryDatabase):
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
path = author + '/' + title + ' (%d)'%id path = author + '/' + title + ' (%d)'%id
return path return path
def construct_file_name(self, id): def construct_file_name(self, id):
''' '''
Construct the file name for this book based on its metadata. Construct the file name for this book based on its metadata.
@ -492,17 +492,17 @@ class LibraryDatabase2(LibraryDatabase):
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author name = title + ' - ' + author
return name return name
def rmtree(self, path): def rmtree(self, path):
if not self.normpath(self.library_path).startswith(self.normpath(path)): if not self.normpath(self.library_path).startswith(self.normpath(path)):
shutil.rmtree(path) shutil.rmtree(path)
def normpath(self, path): def normpath(self, path):
path = os.path.abspath(os.path.realpath(path)) path = os.path.abspath(os.path.realpath(path))
if not self.is_case_sensitive: if not self.is_case_sensitive:
path = path.lower() path = path.lower()
return path return path
def set_path(self, index, index_is_id=False): def set_path(self, index, index_is_id=False):
''' '''
Set the path to the directory containing this books files based on its Set the path to the directory containing this books files based on its
@ -524,12 +524,12 @@ class LibraryDatabase2(LibraryDatabase):
break break
if path == current_path and not changed: if path == current_path and not changed:
return return
tpath = os.path.join(self.library_path, *path.split('/')) tpath = os.path.join(self.library_path, *path.split('/'))
if not os.path.exists(tpath): if not os.path.exists(tpath):
os.makedirs(tpath) os.makedirs(tpath)
spath = os.path.join(self.library_path, *current_path.split('/')) spath = os.path.join(self.library_path, *current_path.split('/'))
if current_path and os.path.exists(spath): # Migrate existing files if current_path and os.path.exists(spath): # Migrate existing files
cdata = self.cover(id, index_is_id=True) cdata = self.cover(id, index_is_id=True)
if cdata is not None: if cdata is not None:
@ -551,14 +551,14 @@ class LibraryDatabase2(LibraryDatabase):
parent = os.path.dirname(spath) parent = os.path.dirname(spath)
if len(os.listdir(parent)) == 0: if len(os.listdir(parent)) == 0:
self.rmtree(parent) self.rmtree(parent)
def add_listener(self, listener): def add_listener(self, listener):
''' '''
Add a listener. Will be called on change events with two arguments. Add a listener. Will be called on change events with two arguments.
Event name and list of affected ids. Event name and list of affected ids.
''' '''
self.listeners.add(listener) self.listeners.add(listener)
def notify(self, event, ids=[]): def notify(self, event, ids=[]):
'Notify all listeners' 'Notify all listeners'
for listener in self.listeners: for listener in self.listeners:
@ -567,12 +567,12 @@ class LibraryDatabase2(LibraryDatabase):
except: except:
traceback.print_exc() traceback.print_exc()
continue continue
def cover(self, index, index_is_id=False, as_file=False, as_image=False, def cover(self, index, index_is_id=False, as_file=False, as_image=False,
as_path=False): as_path=False):
''' '''
Return the cover image as a bytestring (in JPEG format) or None. Return the cover image as a bytestring (in JPEG format) or None.
`as_file` : If True return the image as an open file object `as_file` : If True return the image as an open file object
`as_image`: If True return the image as a QImage object `as_image`: If True return the image as a QImage object
''' '''
@ -587,7 +587,7 @@ class LibraryDatabase2(LibraryDatabase):
img.loadFromData(f.read()) img.loadFromData(f.read())
return img return img
return f if as_file else f.read() return f if as_file else f.read()
def get_metadata(self, idx, index_is_id=False, get_cover=False): def get_metadata(self, idx, index_is_id=False, get_cover=False):
''' '''
Convenience method to return metadata as a L{MetaInformation} object. Convenience method to return metadata as a L{MetaInformation} object.
@ -612,7 +612,7 @@ class LibraryDatabase2(LibraryDatabase):
if get_cover: if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True) mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi return mi
def has_book(self, mi): def has_book(self, mi):
title = mi.title title = mi.title
if title: if title:
@ -620,16 +620,16 @@ class LibraryDatabase2(LibraryDatabase):
title = title.decode(preferred_encoding, 'replace') title = title.decode(preferred_encoding, 'replace')
return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False)) return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
return False return False
def has_cover(self, index, index_is_id=False): def has_cover(self, index, index_is_id=False):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
return os.access(path, os.R_OK) return os.access(path, os.R_OK)
def set_cover(self, id, data): def set_cover(self, id, data):
''' '''
Set the cover for this book. Set the cover for this book.
`data`: Can be either a QImage, QPixmap, file object or bytestring `data`: Can be either a QImage, QPixmap, file object or bytestring
''' '''
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
@ -644,13 +644,13 @@ class LibraryDatabase2(LibraryDatabase):
data = data.read() data = data.read()
p.loadFromData(data) p.loadFromData(data)
p.save(path) p.save(path)
def all_formats(self): def all_formats(self):
formats = self.conn.get('SELECT format from data') formats = self.conn.get('SELECT format from data')
if not formats: if not formats:
return set([]) return set([])
return set([f[0] for f in formats]) return set([f[0] for f in formats])
def formats(self, index, index_is_id=False): def formats(self, index, index_is_id=False):
''' Return available formats as a comma separated list or None if there are no available formats ''' ''' Return available formats as a comma separated list or None if there are no available formats '''
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
@ -667,7 +667,7 @@ class LibraryDatabase2(LibraryDatabase):
if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK): if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
ans.append(format) ans.append(format)
return ','.join(ans) return ','.join(ans)
def has_format(self, index, format, index_is_id=False): def has_format(self, index, format, index_is_id=False):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False) name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
@ -677,7 +677,7 @@ class LibraryDatabase2(LibraryDatabase):
path = os.path.join(path, name+format) path = os.path.join(path, name+format)
return os.access(path, os.R_OK|os.W_OK) return os.access(path, os.R_OK|os.W_OK)
return False return False
def format_abspath(self, index, format, index_is_id=False): def format_abspath(self, index, format, index_is_id=False):
'Return absolute path to the ebook file of format `format`' 'Return absolute path to the ebook file of format `format`'
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
@ -688,13 +688,13 @@ class LibraryDatabase2(LibraryDatabase):
path = os.path.join(path, name+format) path = os.path.join(path, name+format)
if os.access(path, os.R_OK|os.W_OK): if os.access(path, os.R_OK|os.W_OK):
return path return path
def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'): def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
''' '''
Return the ebook format as a bytestring or `None` if the format doesn't exist, Return the ebook format as a bytestring or `None` if the format doesn't exist,
or we don't have permission to write to the ebook file. or we don't have permission to write to the ebook file.
`as_file`: If True the ebook format is returned as a file object opened in `mode` `as_file`: If True the ebook format is returned as a file object opened in `mode`
''' '''
path = self.format_abspath(index, format, index_is_id=index_is_id) path = self.format_abspath(index, format, index_is_id=index_is_id)
if path is not None: if path is not None:
@ -702,14 +702,14 @@ class LibraryDatabase2(LibraryDatabase):
return f if as_file else f.read() return f if as_file else f.read()
if self.has_format(index, format, index_is_id): if self.has_format(index, format, index_is_id):
self.remove_format(id, format, index_is_id=True) self.remove_format(id, format, index_is_id=True)
def add_format_with_hooks(self, index, format, fpath, index_is_id=False, def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
path=None, notify=True): path=None, notify=True):
npath = self.run_import_plugins(fpath, format) npath = self.run_import_plugins(fpath, format)
format = os.path.splitext(npath)[-1].lower().replace('.', '').upper() format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
return self.add_format(index, format, open(npath, 'rb'), return self.add_format(index, format, open(npath, 'rb'),
index_is_id=index_is_id, path=path, notify=notify) index_is_id=index_is_id, path=path, notify=notify)
def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True): def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True):
id = index if index_is_id else self.id(index) id = index if index_is_id else self.id(index)
if path is None: if path is None:
@ -768,7 +768,7 @@ class LibraryDatabase2(LibraryDatabase):
self.refresh_ids([id]) self.refresh_ids([id])
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def clean(self): def clean(self):
''' '''
Remove orphaned entries. Remove orphaned entries.
@ -779,13 +779,13 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute(st%dict(ltable='tags', table='tags', ltable_col='tag')) self.conn.execute(st%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.execute(st%dict(ltable='series', table='series', ltable_col='series')) self.conn.execute(st%dict(ltable='series', table='series', ltable_col='series'))
self.conn.commit() self.conn.commit()
def get_recipes(self): def get_recipes(self):
return self.conn.get('SELECT id, script FROM feeds') return self.conn.get('SELECT id, script FROM feeds')
def get_recipe(self, id): def get_recipe(self, id):
return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False) return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False)
def get_categories(self, sort_on_count=False): def get_categories(self, sort_on_count=False):
categories = {} categories = {}
def get(name, category, field='name'): def get(name, category, field='name'):
@ -807,11 +807,11 @@ class LibraryDatabase2(LibraryDatabase):
for tag in tags: for tag in tags:
tag.count = self.conn.get('SELECT COUNT(format) FROM data WHERE format=?', (tag,), all=False) tag.count = self.conn.get('SELECT COUNT(format) FROM data WHERE format=?', (tag,), all=False)
tags.sort(reverse=sort_on_count, cmp=(lambda x,y:cmp(x.count,y.count)) if sort_on_count else cmp) tags.sort(reverse=sort_on_count, cmp=(lambda x,y:cmp(x.count,y.count)) if sort_on_count else cmp)
for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'), for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'),
('series', 'series')): ('series', 'series')):
get(*x) get(*x)
get('data', 'format', 'format') get('data', 'format', 'format')
categories['news'] = [] categories['news'] = []
newspapers = self.conn.get('SELECT name FROM tags WHERE id IN (SELECT DISTINCT tag FROM books_tags_link WHERE book IN (select book from books_tags_link where tag IN (SELECT id FROM tags WHERE name=?)))', (_('News'),)) newspapers = self.conn.get('SELECT name FROM tags WHERE id IN (SELECT DISTINCT tag FROM books_tags_link WHERE book IN (select book from books_tags_link where tag IN (SELECT id FROM tags WHERE name=?)))', (_('News'),))
if newspapers: if newspapers:
@ -823,10 +823,10 @@ class LibraryDatabase2(LibraryDatabase):
categories['news'] = list(map(Tag, newspapers)) categories['news'] = list(map(Tag, newspapers))
for tag in categories['news']: for tag in categories['news']:
tag.count = self.conn.get('SELECT COUNT(id) FROM books_tags_link WHERE tag IN (SELECT DISTINCT id FROM tags WHERE name=?)', (tag,), all=False) tag.count = self.conn.get('SELECT COUNT(id) FROM books_tags_link WHERE tag IN (SELECT DISTINCT id FROM tags WHERE name=?)', (tag,), all=False)
return categories return categories
def tags_older_than(self, tag, delta): def tags_older_than(self, tag, delta):
tag = tag.lower().strip() tag = tag.lower().strip()
now = datetime.now() now = datetime.now()
@ -836,9 +836,9 @@ class LibraryDatabase2(LibraryDatabase):
tags = r[FIELD_MAP['tags']] tags = r[FIELD_MAP['tags']]
if tags and tag in tags.lower(): if tags and tag in tags.lower():
yield r[FIELD_MAP['id']] yield r[FIELD_MAP['id']]
def set(self, row, column, val): def set(self, row, column, val):
''' '''
Convenience method for setting the title, authors, publisher or rating Convenience method for setting the title, authors, publisher or rating
@ -861,10 +861,10 @@ class LibraryDatabase2(LibraryDatabase):
self.data.refresh_ids(self.conn, [id]) self.data.refresh_ids(self.conn, [id])
self.set_path(id, True) self.set_path(id, True)
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_metadata(self, id, mi): def set_metadata(self, id, mi):
''' '''
Set metadata for the book `id` from the `MetaInformation` object `mi` Set metadata for the book `id` from the `MetaInformation` object `mi`
''' '''
if mi.title: if mi.title:
self.set_title(id, mi.title) self.set_title(id, mi.title)
@ -898,7 +898,7 @@ class LibraryDatabase2(LibraryDatabase):
self.set_timestamp(id, mi.timestamp, notify=False) self.set_timestamp(id, mi.timestamp, notify=False)
self.set_path(id, True) self.set_path(id, True)
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_authors(self, id, authors, notify=True): def set_authors(self, id, authors, notify=True):
''' '''
`authors`: A list of authors. `authors`: A list of authors.
@ -929,14 +929,14 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
(ss, id)) (ss, id))
self.conn.commit() self.conn.commit()
self.data.set(id, FIELD_MAP['authors'], self.data.set(id, FIELD_MAP['authors'],
','.join([a.replace(',', '|') for a in authors]), ','.join([a.replace(',', '|') for a in authors]),
row_is_id=True) row_is_id=True)
self.data.set(id, FIELD_MAP['author_sort'], ss, row_is_id=True) self.data.set(id, FIELD_MAP['author_sort'], ss, row_is_id=True)
self.set_path(id, True) self.set_path(id, True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_title(self, id, title, notify=True): def set_title(self, id, title, notify=True):
if not title: if not title:
return return
@ -949,7 +949,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.commit() self.conn.commit()
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_timestamp(self, id, dt, notify=True): def set_timestamp(self, id, dt, notify=True):
if dt: if dt:
self.conn.execute('UPDATE books SET timestamp=? WHERE id=?', (dt, id)) self.conn.execute('UPDATE books SET timestamp=? WHERE id=?', (dt, id))
@ -957,7 +957,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.commit() self.conn.commit()
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_publisher(self, id, publisher, notify=True): def set_publisher(self, id, publisher, notify=True):
self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,)) self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,))
self.conn.execute('DELETE FROM publishers WHERE (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) < 1') self.conn.execute('DELETE FROM publishers WHERE (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) < 1')
@ -974,7 +974,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['publisher'], publisher, row_is_id=True) self.data.set(id, FIELD_MAP['publisher'], publisher, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_tags(self, id, tags, append=False, notify=True): def set_tags(self, id, tags, append=False, notify=True):
''' '''
@param tags: list of strings @param tags: list of strings
@ -1018,7 +1018,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True) self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def unapply_tags(self, book_id, tags, notify=True): def unapply_tags(self, book_id, tags, notify=True):
for tag in tags: for tag in tags:
id = self.conn.get('SELECT id FROM tags WHERE name=?', (tag,), all=False) id = self.conn.get('SELECT id FROM tags WHERE name=?', (tag,), all=False)
@ -1028,7 +1028,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.refresh_ids(self.conn, [book_id]) self.data.refresh_ids(self.conn, [book_id])
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def is_tag_used(self, tag): def is_tag_used(self, tag):
existing_tags = self.all_tags() existing_tags = self.all_tags()
lt = [t.lower() for t in existing_tags] lt = [t.lower() for t in existing_tags]
@ -1037,7 +1037,7 @@ class LibraryDatabase2(LibraryDatabase):
return True return True
except ValueError: except ValueError:
return False return False
def delete_tag(self, tag): def delete_tag(self, tag):
existing_tags = self.all_tags() existing_tags = self.all_tags()
lt = [t.lower() for t in existing_tags] lt = [t.lower() for t in existing_tags]
@ -1052,7 +1052,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('DELETE FROM tags WHERE id=?', (id,)) self.conn.execute('DELETE FROM tags WHERE id=?', (id,))
self.conn.commit() self.conn.commit()
def set_series(self, id, series, notify=True): def set_series(self, id, series, notify=True):
self.conn.execute('DELETE FROM books_series_link WHERE book=?',(id,)) self.conn.execute('DELETE FROM books_series_link WHERE book=?',(id,))
self.conn.execute('DELETE FROM series WHERE (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) < 1') self.conn.execute('DELETE FROM series WHERE (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) < 1')
@ -1075,7 +1075,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['series'], series, row_is_id=True) self.data.set(id, FIELD_MAP['series'], series, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_series_index(self, id, idx, notify=True): def set_series_index(self, id, idx, notify=True):
if idx is None: if idx is None:
idx = 1 idx = 1
@ -1091,7 +1091,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['series_index'], int(idx), row_is_id=True) self.data.set(id, FIELD_MAP['series_index'], int(idx), row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_rating(self, id, rating, notify=True): def set_rating(self, id, rating, notify=True):
rating = int(rating) rating = int(rating)
self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,)) self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
@ -1102,7 +1102,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['rating'], rating, row_is_id=True) self.data.set(id, FIELD_MAP['rating'], rating, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_comment(self, id, text, notify=True): def set_comment(self, id, text, notify=True):
self.conn.execute('DELETE FROM comments WHERE book=?', (id,)) self.conn.execute('DELETE FROM comments WHERE book=?', (id,))
self.conn.execute('INSERT INTO comments(book,text) VALUES (?,?)', (id, text)) self.conn.execute('INSERT INTO comments(book,text) VALUES (?,?)', (id, text))
@ -1110,21 +1110,21 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['comments'], text, row_is_id=True) self.data.set(id, FIELD_MAP['comments'], text, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_author_sort(self, id, sort, notify=True): def set_author_sort(self, id, sort, notify=True):
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (sort, id)) self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (sort, id))
self.conn.commit() self.conn.commit()
self.data.set(id, FIELD_MAP['author_sort'], sort, row_is_id=True) self.data.set(id, FIELD_MAP['author_sort'], sort, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_isbn(self, id, isbn, notify=True): def set_isbn(self, id, isbn, notify=True):
self.conn.execute('UPDATE books SET isbn=? WHERE id=?', (isbn, id)) self.conn.execute('UPDATE books SET isbn=? WHERE id=?', (isbn, id))
self.conn.commit() self.conn.commit()
self.data.set(id, FIELD_MAP['isbn'], isbn, row_is_id=True) self.data.set(id, FIELD_MAP['isbn'], isbn, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def add_news(self, path, recipe): def add_news(self, path, recipe):
format = os.path.splitext(path)[1][1:].lower() format = os.path.splitext(path)[1][1:].lower()
stream = path if hasattr(path, 'read') else open(path, 'rb') stream = path if hasattr(path, 'read') else open(path, 'rb')
@ -1133,21 +1133,21 @@ class LibraryDatabase2(LibraryDatabase):
stream.seek(0) stream.seek(0)
mi.series_index = 1 mi.series_index = 1
mi.tags = [_('News'), recipe.title] mi.tags = [_('News'), recipe.title]
obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)', obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
(mi.title, mi.authors[0])) (mi.title, mi.authors[0]))
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self.conn) self.data.books_added([id], self.conn)
self.set_path(id, index_is_id=True) self.set_path(id, index_is_id=True)
self.conn.commit() self.conn.commit()
self.set_metadata(id, mi) self.set_metadata(id, mi)
self.add_format(id, format, stream, index_is_id=True) self.add_format(id, format, stream, index_is_id=True)
if not hasattr(path, 'read'): if not hasattr(path, 'read'):
stream.close() stream.close()
self.conn.commit() self.conn.commit()
self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size
return id return id
def run_import_plugins(self, path_or_stream, format): def run_import_plugins(self, path_or_stream, format):
format = format.lower() format = format.lower()
if hasattr(path_or_stream, 'seek'): if hasattr(path_or_stream, 'seek'):
@ -1185,7 +1185,7 @@ class LibraryDatabase2(LibraryDatabase):
aus = aus.decode(preferred_encoding, 'replace') aus = aus.decode(preferred_encoding, 'replace')
if isinstance(title, str): if isinstance(title, str):
title = title.decode(preferred_encoding) title = title.decode(preferred_encoding)
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)', obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(title, uri, series_index, aus)) (title, uri, series_index, aus))
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self.conn) self.data.books_added([id], self.conn)
@ -1207,7 +1207,7 @@ class LibraryDatabase2(LibraryDatabase):
uris = list(duplicate[3] for duplicate in duplicates) uris = list(duplicate[3] for duplicate in duplicates)
return (paths, formats, metadata, uris), len(ids) return (paths, formats, metadata, uris), len(ids)
return None, len(ids) return None, len(ids)
def import_book(self, mi, formats, notify=True): def import_book(self, mi, formats, notify=True):
series_index = 1 if mi.series_index is None else mi.series_index series_index = 1 if mi.series_index is None else mi.series_index
if not mi.title: if not mi.title:
@ -1234,7 +1234,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size
if notify: if notify:
self.notify('add', [id]) self.notify('add', [id])
def move_library_to(self, newloc, progress=None): def move_library_to(self, newloc, progress=None):
header = _(u'<p>Copying books to %s<br><center>')%newloc header = _(u'<p>Copying books to %s<br><center>')%newloc
books = self.conn.get('SELECT id, path, title FROM books') books = self.conn.get('SELECT id, path, title FROM books')
@ -1263,7 +1263,7 @@ class LibraryDatabase2(LibraryDatabase):
old_dirs.add(srcdir) old_dirs.add(srcdir)
if progress is not None: if progress is not None:
progress.setValue(i+1) progress.setValue(i+1)
dbpath = os.path.join(newloc, os.path.basename(self.dbpath)) dbpath = os.path.join(newloc, os.path.basename(self.dbpath))
shutil.copyfile(self.dbpath, dbpath) shutil.copyfile(self.dbpath, dbpath)
opath = self.dbpath opath = self.dbpath
@ -1279,22 +1279,22 @@ class LibraryDatabase2(LibraryDatabase):
if progress is not None: if progress is not None:
progress.reset() progress.reset()
progress.hide() progress.hide()
def __iter__(self): def __iter__(self):
for record in self.data._data: for record in self.data._data:
if record is not None: if record is not None:
yield record yield record
def all_ids(self): def all_ids(self):
for i in iter(self): for i in iter(self):
yield i['id'] yield i['id']
def get_data_as_dict(self, prefix=None, authors_as_string=False): def get_data_as_dict(self, prefix=None, authors_as_string=False):
''' '''
Return all metadata stored in the database as a dict. Includes paths to Return all metadata stored in the database as a dict. Includes paths to
the cover and each format. the cover and each format.
:param prefix: The prefix for all paths. By default, the prefix is the absolute path :param prefix: The prefix for all paths. By default, the prefix is the absolute path
to the library folder. to the library folder.
''' '''
@ -1325,9 +1325,9 @@ class LibraryDatabase2(LibraryDatabase):
x['formats'].append(path%fmt.lower()) x['formats'].append(path%fmt.lower())
x['fmt_'+fmt.lower()] = path%fmt.lower() x['fmt_'+fmt.lower()] = path%fmt.lower()
x['available_formats'] = [i.upper() for i in formats.split(',')] x['available_formats'] = [i.upper() for i in formats.split(',')]
return data return data
def migrate_old(self, db, progress): def migrate_old(self, db, progress):
header = _(u'<p>Migrating old database to ebook library in %s<br><center>')%self.library_path header = _(u'<p>Migrating old database to ebook library in %s<br><center>')%self.library_path
progress.setValue(0) progress.setValue(0)
@ -1338,23 +1338,23 @@ class LibraryDatabase2(LibraryDatabase):
books = db.conn.get('SELECT id, title, sort, timestamp, uri, series_index, author_sort, isbn FROM books ORDER BY id ASC') books = db.conn.get('SELECT id, title, sort, timestamp, uri, series_index, author_sort, isbn FROM books ORDER BY id ASC')
progress.setAutoReset(False) progress.setAutoReset(False)
progress.setRange(0, len(books)) progress.setRange(0, len(books))
for book in books: for book in books:
self.conn.execute('INSERT INTO books(id, title, sort, timestamp, uri, series_index, author_sort, isbn) VALUES(?, ?, ?, ?, ?, ?, ?, ?);', book) self.conn.execute('INSERT INTO books(id, title, sort, timestamp, uri, series_index, author_sort, isbn) VALUES(?, ?, ?, ?, ?, ?, ?, ?);', book)
tables = ''' tables = '''
authors ratings tags series books_tags_link authors ratings tags series books_tags_link
comments publishers comments publishers
books_authors_link conversion_options books_authors_link conversion_options
books_publishers_link books_publishers_link
books_ratings_link books_ratings_link
books_series_link feeds books_series_link feeds
'''.split() '''.split()
for table in tables: for table in tables:
rows = db.conn.get('SELECT * FROM %s ORDER BY id ASC'%table) rows = db.conn.get('SELECT * FROM %s ORDER BY id ASC'%table)
for row in rows: for row in rows:
self.conn.execute('INSERT INTO %s VALUES(%s)'%(table, ','.join(repeat('?', len(row)))), row) self.conn.execute('INSERT INTO %s VALUES(%s)'%(table, ','.join(repeat('?', len(row)))), row)
self.conn.commit() self.conn.commit()
self.refresh('timestamp', True) self.refresh('timestamp', True)
for i, book in enumerate(books): for i, book in enumerate(books):
@ -1379,7 +1379,7 @@ books_series_link feeds
self.vacuum() self.vacuum()
progress.reset() progress.reset()
return len(books) return len(books)
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False, def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
index_is_id=False, callback=None): index_is_id=False, callback=None):
if not os.path.exists(dir): if not os.path.exists(dir):
@ -1425,7 +1425,7 @@ books_series_link feeds
opf = OPFCreator(base, mi) opf = OPFCreator(base, mi)
opf.render(f) opf.render(f)
f.close() f.close()
fmts = self.formats(idx, index_is_id=index_is_id) fmts = self.formats(idx, index_is_id=index_is_id)
if not fmts: if not fmts:
fmts = '' fmts = ''
@ -1449,7 +1449,7 @@ books_series_link feeds
if not callback(count, mi.title): if not callback(count, mi.title):
return return
def export_single_format_to_dir(self, dir, indices, format, def export_single_format_to_dir(self, dir, indices, format,
index_is_id=False, callback=None): index_is_id=False, callback=None):
dir = os.path.abspath(dir) dir = os.path.abspath(dir)
if not index_is_id: if not index_is_id:
@ -1476,7 +1476,7 @@ books_series_link feeds
f.write(data) f.write(data)
f.seek(0) f.seek(0)
try: try:
set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True), set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True),
stream_type=format.lower()) stream_type=format.lower())
except: except:
pass pass
@ -1485,7 +1485,7 @@ books_series_link feeds
if not callback(count, title): if not callback(count, title):
break break
return failures return failures
def find_books_in_directory(self, dirpath, single_book_per_directory): def find_books_in_directory(self, dirpath, single_book_per_directory):
dirpath = os.path.abspath(dirpath) dirpath = os.path.abspath(dirpath)
if single_book_per_directory: if single_book_per_directory:
@ -1514,12 +1514,12 @@ books_series_link feeds
ext = ext[1:].lower() ext = ext[1:].lower()
if ext not in BOOK_EXTENSIONS: if ext not in BOOK_EXTENSIONS:
continue continue
key = os.path.splitext(path)[0] key = os.path.splitext(path)[0]
if not books.has_key(key): if not books.has_key(key):
books[key] = [] books[key] = []
books[key].append(path) books[key].append(path)
for formats in books.values(): for formats in books.values():
yield formats yield formats
@ -1543,7 +1543,7 @@ books_series_link feeds
formats = self.find_books_in_directory(dirpath, True) formats = self.find_books_in_directory(dirpath, True)
if not formats: if not formats:
return return
mi = metadata_from_formats(formats) mi = metadata_from_formats(formats)
if mi.title is None: if mi.title is None:
return return
@ -1552,7 +1552,7 @@ books_series_link feeds
self.import_book(mi, formats) self.import_book(mi, formats)
if callable(callback): if callable(callback):
callback(mi.title) callback(mi.title)
def recursive_import(self, root, single_book_per_directory=True, callback=None): def recursive_import(self, root, single_book_per_directory=True, callback=None):
root = os.path.abspath(root) root = os.path.abspath(root)
duplicates = [] duplicates = []
@ -1565,8 +1565,5 @@ books_series_link feeds
if callable(callback): if callable(callback):
if callback(''): if callback(''):
break break
return duplicates return duplicates

View File

@ -14,7 +14,7 @@ from Queue import Queue
from threading import RLock from threading import RLock
from datetime import tzinfo, datetime, timedelta from datetime import tzinfo, datetime, timedelta
from calibre.library import title_sort from calibre.ebooks.metadata import title_sort
global_lock = RLock() global_lock = RLock()

View File

@ -16,66 +16,56 @@ if os.environ.has_key('DESTDIR'):
entry_points = { entry_points = {
'console_scripts': [ \ 'console_scripts': [ \
'prs500 = calibre.devices.prs500.cli.main:main', 'ebook-device = calibre.devices.prs500.cli.main:main',
'lrf-meta = calibre.ebooks.lrf.meta:main', 'ebook-meta = calibre.ebooks.metadata.cli:main',
'rtf-meta = calibre.ebooks.metadata.rtf:main', 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
'pdf-meta = calibre.ebooks.metadata.pdf:main', 'html2lrf = calibre.ebooks.lrf.html.convert_from:main',
'lit-meta = calibre.ebooks.metadata.lit:main', 'html2oeb = calibre.ebooks.html:main',
'imp-meta = calibre.ebooks.metadata.imp:main', 'html2epub = calibre.ebooks.epub.from_html:main',
'rb-meta = calibre.ebooks.metadata.rb:main', 'odt2oeb = calibre.ebooks.odt.to_oeb:main',
'opf-meta = calibre.ebooks.metadata.opf2:main', 'markdown-calibre = calibre.ebooks.markdown.markdown:main',
'odt-meta = calibre.ebooks.metadata.odt:main', 'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main',
'epub-meta = calibre.ebooks.metadata.epub:main', 'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main',
'mobi-meta = calibre.ebooks.metadata.mobi:main', 'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', 'web2disk = calibre.web.fetch.simple:main',
'html2lrf = calibre.ebooks.lrf.html.convert_from:main', 'feeds2disk = calibre.web.feeds.main:main',
'html2oeb = calibre.ebooks.html:main', 'calibre-server = calibre.library.server:main',
'html2epub = calibre.ebooks.epub.from_html:main', 'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main',
'odt2oeb = calibre.ebooks.odt.to_oeb:main', 'feeds2epub = calibre.ebooks.epub.from_feeds:main',
'markdown-calibre = calibre.ebooks.markdown.markdown:main', 'feeds2mobi = calibre.ebooks.mobi.from_feeds:main',
'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main', 'web2lrf = calibre.ebooks.lrf.web.convert_from:main',
'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main', 'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main', 'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
'web2disk = calibre.web.fetch.simple:main', 'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
'feeds2disk = calibre.web.feeds.main:main', 'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'calibre-server = calibre.library.server:main', 'any2epub = calibre.ebooks.epub.from_any:main',
'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main', 'any2lit = calibre.ebooks.lit.from_any:main',
'feeds2epub = calibre.ebooks.epub.from_feeds:main', 'any2mobi = calibre.ebooks.mobi.from_any:main',
'feeds2mobi = calibre.ebooks.mobi.from_feeds:main', 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'web2lrf = calibre.ebooks.lrf.web.convert_from:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main', 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main', 'isbndb = calibre.ebooks.metadata.isbndb:main',
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main', 'librarything = calibre.ebooks.metadata.library_thing:main',
'fb2-meta = calibre.ebooks.metadata.fb2:main', 'mobi2oeb = calibre.ebooks.mobi.reader:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main', 'oeb2mobi = calibre.ebooks.mobi.writer:main',
'any2epub = calibre.ebooks.epub.from_any:main', 'lit2oeb = calibre.ebooks.lit.reader:main',
'any2lit = calibre.ebooks.lit.from_any:main', 'oeb2lit = calibre.ebooks.lit.writer:main',
'any2mobi = calibre.ebooks.mobi.from_any:main', 'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'any2pdf = calibre.ebooks.pdf.from_any:main', 'comic2epub = calibre.ebooks.epub.from_comic:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', 'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', 'calibre-debug = calibre.debug:main',
'isbndb = calibre.ebooks.metadata.isbndb:main', 'calibredb = calibre.library.cli:main',
'librarything = calibre.ebooks.metadata.library_thing:main', 'calibre-fontconfig = calibre.utils.fontconfig:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main', 'calibre-parallel = calibre.parallel:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main', 'calibre-customize = calibre.customize.ui:main',
'lit2oeb = calibre.ebooks.lit.reader:main', 'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
'oeb2lit = calibre.ebooks.lit.writer:main', 'any2pdf = calibre.ebooks.pdf.from_any:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main', ],
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
],
'gui_scripts' : [ 'gui_scripts' : [
__appname__+' = calibre.gui2.main:main', __appname__+' = calibre.gui2.main:main',
'lrfviewer = calibre.gui2.lrf_renderer.main:main', 'lrfviewer = calibre.gui2.lrf_renderer.main:main',
'ebook-viewer = calibre.gui2.viewer.main:main', 'ebook-viewer = calibre.gui2.viewer.main:main',
], ],
} }
@ -177,7 +167,7 @@ def setup_completion(fatal_errors):
sys.stdout.flush() sys.stdout.flush()
from calibre.ebooks.lrf.html.convert_from import option_parser as htmlop from calibre.ebooks.lrf.html.convert_from import option_parser as htmlop
from calibre.ebooks.lrf.txt.convert_from import option_parser as txtop from calibre.ebooks.lrf.txt.convert_from import option_parser as txtop
from calibre.ebooks.lrf.meta import option_parser as metaop from calibre.ebooks.metadata.cli import option_parser as metaop, filetypes as meta_filetypes
from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
@ -186,7 +176,6 @@ def setup_completion(fatal_errors):
from calibre.web.feeds.main import option_parser as feeds2disk from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles from calibre.web.feeds.recipes import titles as feed_titles
from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf
from calibre.ebooks.metadata.epub import option_parser as epub_meta
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_html import option_parser as html2epub from calibre.ebooks.epub.from_html import option_parser as html2epub
from calibre.ebooks.html import option_parser as html2oeb from calibre.ebooks.html import option_parser as html2oeb
@ -225,15 +214,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('any2mobi', any2mobi, any_formats)) f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf'])) f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf']))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf'])) f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf'])) f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
f.write(opts_and_exts('imp-meta', metaop, ['imp']))
f.write(opts_and_exts('rb-meta', metaop, ['rb']))
f.write(opts_and_exts('opf-meta', metaop, ['opf']))
f.write(opts_and_exts('odt-meta', metaop, ['odt', 'ods', 'odf', 'odg', 'odp']))
f.write(opts_and_exts('epub-meta', epub_meta, ['epub']))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf'])) f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf'])) f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc'])) f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
@ -423,10 +404,8 @@ def install_man_pages(fatal_errors):
os.environ['PATH'] += ':'+os.path.expanduser('~/bin') os.environ['PATH'] += ':'+os.path.expanduser('~/bin')
for src in entry_points['console_scripts']: for src in entry_points['console_scripts']:
prog = src[:src.index('=')].strip() prog = src[:src.index('=')].strip()
if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta', if prog in ('ebook-device', 'markdown-calibre',
'markdown-calibre', 'calibre-debug', 'fb2-meta', 'calibre-fontconfig', 'calibre-parallel'):
'calibre-fontconfig', 'calibre-parallel', 'odt-meta',
'rb-meta', 'imp-meta', 'mobi-meta'):
continue continue
help2man = ('help2man', prog, '--name', 'part of %s'%__appname__, help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,

View File

@ -0,0 +1,92 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'A simplified logging system'
DEBUG = 0
INFO = 1
WARN = 2
ERROR = 3
import sys, traceback
from functools import partial
from calibre import prints
from calibre.utils.terminfo import TerminalController
class ANSIStream:
def __init__(self, stream=sys.stdout):
self.stream = stream
tc = TerminalController(stream)
self.color = {
DEBUG: tc.GREEN,
INFO:'',
WARN: tc.YELLOW,
ERROR: tc.RED
}
self.normal = tc.NORMAL
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class HTMLStream:
def __init__(self, stream=sys.stdout):
self.stream = stream
self.color = {
DEBUG: '<span style="color:green">',
INFO:'<span>',
WARN: '<span style="color:yellow">',
ERROR: '<span style="color:red">'
}
self.normal = '</span>'
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class Log(object):
DEBUG = DEBUG
INFO = INFO
WARN = WARN
ERROR = ERROR
def __init__(self, level=INFO):
self.filter_level = level
default_output = ANSIStream()
self.outputs = [default_output]
self.debug = partial(self.prints, DEBUG)
self.info = partial(self.prints, INFO)
self.warn = self.warning = partial(self.prints, WARN)
self.error = partial(self.prints, ERROR)
def prints(self, level, *args, **kwargs):
if level < self.filter_level:
return
for output in self.outputs:
output.prints(level, *args, **kwargs)
def exception(self, *args, **kwargs):
limit = kwargs.pop('limit', None)
self.prints(ERROR, *args, **kwargs)
self.prints(DEBUG, traceback.format_exc(limit))
def __call__(self, *args, **kwargs):
self.prints(INFO, *args, **kwargs)

View File

@ -33,7 +33,7 @@ class TerminalController:
>>> term = TerminalController() >>> term = TerminalController()
>>> if term.CLEAR_SCREEN: >>> if term.CLEAR_SCREEN:
... print 'This terminal supports clearning the screen.' ... print 'This terminal supports clearing the screen.'
Finally, if the width and height of the terminal are known, then Finally, if the width and height of the terminal are known, then
they will be stored in the `COLS` and `LINES` attributes. they will be stored in the `COLS` and `LINES` attributes.