Sync to pluginize

This commit is contained in:
John Schember 2009-03-07 13:58:12 -05:00
commit 413da29dec
60 changed files with 2646 additions and 1895 deletions

View File

@ -2,9 +2,9 @@
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/calibre/src</path>
<path>/calibre-pluginize/src</path>
</pydev_pathproperty>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>

View File

@ -2,7 +2,9 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, re, logging, time, subprocess, atexit, mimetypes
import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
__builtin__
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
from htmlentitydefs import name2codepoint
from math import floor
from logging import Formatter
@ -73,26 +75,26 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
return one.replace('..', '_')
def prints(*args, **kwargs):
'''
Print unicode arguments safely by encoding them to preferred_encoding
Has the same signature as the print function from Python 3.
'''
file = kwargs.get('file', sys.stdout)
sep = kwargs.get('sep', ' ')
end = kwargs.get('end', '\n')
for i, arg in enumerate(args):
if isinstance(arg, unicode):
arg = arg.encode(preferred_encoding)
file.write(arg)
if i != len(args)-1:
file.write(sep)
file.write(end)
class CommandLineError(Exception):
pass
class ColoredFormatter(Formatter):
def format(self, record):
ln = record.__dict__['levelname']
col = ''
if ln == 'CRITICAL':
col = terminal_controller.YELLOW
elif ln == 'ERROR':
col = terminal_controller.RED
elif ln in ['WARN', 'WARNING']:
col = terminal_controller.BLUE
elif ln == 'INFO':
col = terminal_controller.GREEN
elif ln == 'DEBUG':
col = terminal_controller.CYAN
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
return Formatter.format(self, record)
def setup_cli_handlers(logger, level):
@ -316,66 +318,23 @@ def english_sort(x, y):
'''
return cmp(_spat.sub('', x), _spat.sub('', y))
class LoggingInterface:
class ColoredFormatter(Formatter):
def __init__(self, logger):
self.__logger = self.logger = logger
def setup_cli_handler(self, verbosity):
for handler in self.__logger.handlers:
if isinstance(handler, logging.StreamHandler):
return
if os.environ.get('CALIBRE_WORKER', None) is not None and self.__logger.handlers:
return
stream = sys.stdout
formatter = logging.Formatter()
level = logging.INFO
if verbosity > 0:
formatter = ColoredFormatter('[%(levelname)s] %(message)s') if verbosity > 1 else \
ColoredFormatter('%(levelname)s: %(message)s')
level = logging.DEBUG
if verbosity > 1:
stream = sys.stderr
handler = logging.StreamHandler(stream)
handler.setFormatter(formatter)
handler.setLevel(level)
self.__logger.addHandler(handler)
self.__logger.setLevel(level)
def ___log(self, func, msg, args, kwargs):
args = [msg] + list(args)
for i in range(len(args)):
if not isinstance(args[i], basestring):
continue
if sys.version_info[:2] > (2, 5):
if not isinstance(args[i], unicode):
args[i] = args[i].decode(preferred_encoding, 'replace')
elif isinstance(args[i], unicode):
args[i] = args[i].encode(preferred_encoding, 'replace')
func(*args, **kwargs)
def log_debug(self, msg, *args, **kwargs):
self.___log(self.__logger.debug, msg, args, kwargs)
def log_info(self, msg, *args, **kwargs):
self.___log(self.__logger.info, msg, args, kwargs)
def log_warning(self, msg, *args, **kwargs):
self.___log(self.__logger.warning, msg, args, kwargs)
def log_warn(self, msg, *args, **kwargs):
self.___log(self.__logger.warning, msg, args, kwargs)
def log_error(self, msg, *args, **kwargs):
self.___log(self.__logger.error, msg, args, kwargs)
def log_critical(self, msg, *args, **kwargs):
self.___log(self.__logger.critical, msg, args, kwargs)
def log_exception(self, msg, *args):
self.___log(self.__logger.exception, msg, args, {})
def format(self, record):
ln = record.__dict__['levelname']
col = ''
if ln == 'CRITICAL':
col = terminal_controller.YELLOW
elif ln == 'ERROR':
col = terminal_controller.RED
elif ln in ['WARN', 'WARNING']:
col = terminal_controller.BLUE
elif ln == 'INFO':
col = terminal_controller.GREEN
elif ln == 'DEBUG':
col = terminal_controller.CYAN
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
return Formatter.format(self, record)
def walk(dir):
''' A nice interface to os.walk '''

View File

@ -220,4 +220,6 @@ class MetadataWriterPlugin(Plugin):
'''
pass

View File

@ -242,8 +242,13 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
set_metadata(stream, mi)
plugins = [HTML2ZIP]
from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.customize.profiles import input_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')]
plugins += input_profiles

View File

@ -0,0 +1,189 @@
from __future__ import with_statement
'''
Defines the plugin sytem for conversions.
'''
import re, os, shutil
from lxml import html
from calibre import CurrentDir
from calibre.customize import Plugin
class ConversionOption(object):
'''
Class representing conversion options
'''
def __init__(self, name=None, help=None, long_switch=None,
short_switch=None, choices=None):
self.name = name
self.help = help
self.long_switch = long_switch
self.short_switch = short_switch
self.choices = choices
if self.long_switch is None:
self.long_switch = '--'+self.name.replace('_', '-')
self.validate_parameters()
def validate_parameters(self):
'''
Validate the parameters passed to :method:`__init__`.
'''
if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
raise ValueError(self.name + ' is not a valid Python identifier')
if not self.help:
raise ValueError('You must set the help text')
class OptionRecommendation(object):
LOW = 1
MED = 2
HIGH = 3
def __init__(self, recommeded_value, level=LOW, **kwargs):
'''
An option recommendation. That is, an option as well as its recommended
value and the level of the recommendation.
'''
self.level = level
self.recommended_value = recommeded_value
self.option = kwargs.pop('option', None)
if self.option is None:
self.option = ConversionOption(**kwargs)
self.validate_parameters()
def validate_parameters(self):
if self.option.choices and self.recommended_value not in \
self.option.choices:
raise ValueError('Recommended value not in choices')
if not (isinstance(self.recommended_value, (int, float, str, unicode))\
or self.default is None):
raise ValueError(unicode(self.default) +
' is not a string or a number')
class InputFormatPlugin(Plugin):
'''
InputFormatPlugins are responsible for converting a document into
HTML+OPF+CSS+etc.
The results of the conversion *must* be encoded in UTF-8.
The main action happens in :method:`convert`.
'''
type = _('Conversion Input')
can_be_disabled = False
supported_platforms = ['windows', 'osx', 'linux']
#: Set of file types for which this plugin should be run
#: For example: ``set(['azw', 'mobi', 'prc'])``
file_types = set([])
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
common_options = set([
OptionRecommendation(name='debug_input',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Save the output from the input plugin to the specified '
'directory. Useful if you are unsure at which stage '
'of the conversion process a bug is occurring. '
'WARNING: This completely deletes the contents of '
'the specified directory.')
),
OptionRecommendation(name='input_encoding',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the input document. If '
'set this option will override any encoding declared by the '
'document itself. Particularly useful for documents that '
'do not declare an encoding or that have erroneous '
'encoding declarations.')
),
])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.
options = set([])
def convert(self, stream, options, file_ext, parse_cache, log):
'''
This method must be implemented in sub-classes. It must return
the path to the created OPF file. All output should be contained in
the current directory. If this plugin creates files outside the current
directory they must be deleted/marked for deletion before this method
returns.
:param stream: A file like object that contains the input file.
:param options: Options to customize the conversion process.
Guaranteed to have attributes corresponding
to all the options declared by this plugin. In
addition, it will have a verbose attribute that
takes integral values from zero upwards. Higher numbers
mean be more verbose. Another useful attribute is
``input_profile`` that is an instance of
:class:`calibre.customize.profiles.InputProfile`.
:param file_ext: The extension (without the .) of the input file. It
is guaranteed to be one of the `file_types` supported
by this plugin.
:param parse_cache: A dictionary that maps absolute file paths to
parsed representations of their contents. For
HTML the representation is an lxml element of
the root of the tree. For CSS it is a cssutils
stylesheet. If this plugin parses any of the
output files, it should add them to the cache
so that later stages of the conversion wont
have to re-parse them. If a parsed representation
is in the cache, there is no need to actually
write the file to disk.
:param log: A :class:`calibre.utils.logging.Log` object. All output
should use this object.
'''
raise NotImplementedError
def __call__(self, stream, options, file_ext, parse_cache, log, output_dir):
log('InputFormatPlugin: %s running'%self.name, end=' ')
if hasattr(stream, 'name'):
log('on', stream.name)
with CurrentDir(output_dir):
for x in os.listdir('.'):
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
ret = self.convert(stream, options, file_ext, parse_cache, log)
for key in list(parse_cache.keys()):
if os.path.abspath(key) != key:
log.warn(('InputFormatPlugin: %s returned a '
'relative path: %s')%(self.name, key)
)
parse_cache[os.path.abspath(key)] = parse_cache.pop(key)
if options.debug_input is not None:
options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input)
shutil.rmtree(options.debug_input)
for f, obj in parse_cache.items():
if hasattr(obj, 'cssText'):
raw = obj.cssText
else:
raw = html.tostring(obj, encoding='utf-8', method='xml',
include_meta_content_type=True, pretty_print=True)
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open(f, 'wb').write(raw)
shutil.copytree('.', options.debug_input)
return ret

View File

@ -0,0 +1,27 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize import Plugin
class InputProfile(Plugin):
author = 'Kovid Goyal'
supported_platforms = set(['windows', 'osx', 'linux'])
can_be_disabled = False
type = _('Input profile')
# TODO: Add some real information to this profile. All other profiles must
# inherit from this profile and override as needed
name = 'Default Input Profile'
short_name = 'default' # Used in the CLI so dont spaces etc. in it
description = _('This profile tries to provide sane defaults and is useful '
'if you know nothing about the input document.')
input_profiles = [InputProfile]

View File

@ -6,13 +6,14 @@ import os, shutil, traceback, functools, sys
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin
from calibre.customize.conversion import InputFormatPlugin
from calibre.customize.profiles import InputProfile
from calibre.customize.builtins import plugins as builtin_plugins
from calibre.constants import __version__, iswindows, isosx
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
plugin_dir, OptionParser
version = tuple([int(x) for x in __version__.split('.')])
platform = 'linux'
@ -70,7 +71,10 @@ _on_import = {}
_on_preprocess = {}
_on_postprocess = {}
def input_profiles():
for plugin in _initialized_plugins:
if isinstance(plugin, InputProfile):
yield plugin
def reread_filetype_plugins():
global _on_import
@ -114,7 +118,19 @@ def reread_metadata_plugins():
_metadata_writers[ft] = []
_metadata_writers[ft].append(plugin)
def metadata_readers():
ans = set([])
for plugins in _metadata_readers.values():
for plugin in plugins:
ans.add(plugin)
return ans
def metadata_writers():
ans = set([])
for plugins in _metadata_writers.values():
for plugin in plugins:
ans.add(plugin)
return ans
def get_file_type_metadata(stream, ftype):
mi = MetaInformation(None, None)
@ -222,6 +238,17 @@ def find_plugin(name):
if plugin.name == name:
return plugin
def input_format_plugins():
for plugin in _initialized_plugins:
if isinstance(plugin, InputFormatPlugin):
yield plugin
def plugin_for_input_format(fmt):
for plugin in input_format_plugins():
if fmt in plugin.file_types:
return plugin
def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)
plugin = find_plugin(x)

View File

@ -60,7 +60,7 @@ class CYBOOKG3(USBMS):
if on_card and size > self.free_space()[2] - 1024*1024:
raise FreeSpaceError(_("There is insufficient free space on the storage card"))
if not on_card and size > self.free_space()[0] - 2*1024*1024:
if not on_card and size > self.free_space()[0] - 2*1024*1024:
raise FreeSpaceError(_("There is insufficient free space in main memory"))
paths = []
@ -91,7 +91,7 @@ class CYBOOKG3(USBMS):
if not os.path.exists(newpath):
os.makedirs(newpath)
filepath = os.path.join(newpath, names.next())
filepath = os.path.join(newpath, names.next())
paths.append(filepath)
if hasattr(infile, 'read'):
@ -100,7 +100,7 @@ class CYBOOKG3(USBMS):
dest = open(filepath, 'wb')
shutil.copyfileobj(infile, dest, 10*1024*1024)
dest.flush()
dest.flush()
dest.close()
else:
shutil.copy2(infile, filepath)

View File

@ -116,8 +116,8 @@ class Device(Structure):
raise Error("Cannot open device")
return handle.contents
@apply
def configurations():
@dynamic_property
def configurations(self):
doc = """ List of device configurations. See L{ConfigDescriptor} """
def fget(self):
ans = []
@ -127,8 +127,8 @@ class Device(Structure):
return property(doc=doc, fget=fget)
class Bus(Structure):
@apply
def device_list():
@dynamic_property
def device_list(self):
doc = \
"""
Flat list of devices on this bus.
@ -360,4 +360,4 @@ def get_devices():
for dev in devices:
device = (dev.device_descriptor.idVendor, dev.device_descriptor.idProduct, dev.device_descriptor.bcdDevice)
ans.append(device)
return ans
return ans

View File

@ -55,8 +55,8 @@ class Book(object):
size = book_metadata_field("size", formatter=int)
# When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
@apply
def title_sorter():
@dynamic_property
def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
src = self.elem.getAttribute('titleSorter').strip()
@ -67,8 +67,8 @@ class Book(object):
self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
return property(doc=doc, fget=fget, fset=fset)
@apply
def thumbnail():
@dynamic_property
def thumbnail(self):
doc = \
"""
The thumbnail. Should be a height 68 image.
@ -88,15 +88,15 @@ class Book(object):
return decode(rc)
return property(fget=fget, doc=doc)
@apply
def path():
@dynamic_property
def path(self):
doc = """ Absolute path to book on device. Setting not supported. """
def fget(self):
return self.root + self.rpath
return property(fget=fget, doc=doc)
@apply
def db_id():
@dynamic_property
def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -378,4 +378,4 @@ class BookList(_BookList):
def write(self, stream):
""" Write XML representation of DOM tree to C{stream} """
stream.write(self.document.toxml('utf-8'))
stream.write(self.document.toxml('utf-8'))

View File

@ -39,8 +39,8 @@ class FileFormatter(object):
self.name = file.name
self.path = file.path
@apply
def mode_string():
@dynamic_property
def mode_string(self):
doc=""" The mode string for this file. There are only two modes read-only and read-write """
def fget(self):
mode, x = "-", "-"
@ -50,8 +50,8 @@ class FileFormatter(object):
return mode
return property(doc=doc, fget=fget)
@apply
def isdir_name():
@dynamic_property
def isdir_name(self):
doc='''Return self.name + '/' if self is a directory'''
def fget(self):
name = self.name
@ -61,8 +61,8 @@ class FileFormatter(object):
return property(doc=doc, fget=fget)
@apply
def name_in_color():
@dynamic_property
def name_in_color(self):
doc=""" The name in ANSI text. Directories are blue, ebooks are green """
def fget(self):
cname = self.name
@ -75,22 +75,22 @@ class FileFormatter(object):
return cname
return property(doc=doc, fget=fget)
@apply
def human_readable_size():
@dynamic_property
def human_readable_size(self):
doc=""" File size in human readable form """
def fget(self):
return human_readable(self.size)
return property(doc=doc, fget=fget)
@apply
def modification_time():
@dynamic_property
def modification_time(self):
doc=""" Last modified time in the Linux ls -l format """
def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime))
return property(doc=doc, fget=fget)
@apply
def creation_time():
@dynamic_property
def creation_time(self):
doc=""" Last modified time in the Linux ls -l format """
def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime))
@ -334,4 +334,4 @@ def main():
return 0
if __name__ == '__main__':
main()
main()

View File

@ -284,8 +284,8 @@ class Command(TransferBuffer):
# Length of the data part of this packet
length = field(start=12, fmt=DWORD)
@apply
def data():
@dynamic_property
def data(self):
doc = \
"""
The data part of this command. Returned/set as/by a TransferBuffer.
@ -447,8 +447,8 @@ class LongCommand(Command):
self.length = 16
self.command = command
@apply
def command():
@dynamic_property
def command(self):
doc = \
"""
Usually carries extra information needed for the command
@ -568,8 +568,8 @@ class FileOpen(PathCommand):
PathCommand.__init__(self, path, FileOpen.NUMBER, path_len_at_byte=20)
self.mode = mode
@apply
def mode():
@dynamic_property
def mode(self):
doc = \
"""
The file open mode. Is either L{FileOpen.READ}
@ -651,8 +651,8 @@ class Response(Command):
raise PacketError("Response packets must have their number set to " \
+ hex(0x00001000))
@apply
def data():
@dynamic_property
def data(self):
doc = \
"""
The last 3 DWORDs (12 bytes) of data in this
@ -681,43 +681,43 @@ class ListResponse(Response):
PATH_NOT_FOUND = 0xffffffd7 #: Queried path is not found
PERMISSION_DENIED = 0xffffffd6 #: Permission denied
@apply
def is_file():
@dynamic_property
def is_file(self):
doc = """ True iff queried path is a file """
def fget(self):
return self.code == ListResponse.IS_FILE
return property(doc=doc, fget=fget)
@apply
def is_invalid():
@dynamic_property
def is_invalid(self):
doc = """ True iff queried path is invalid """
def fget(self):
return self.code == ListResponse.IS_INVALID
return property(doc=doc, fget=fget)
@apply
def path_not_found():
@dynamic_property
def path_not_found(self):
doc = """ True iff queried path is not found """
def fget(self):
return self.code == ListResponse.PATH_NOT_FOUND
return property(doc=doc, fget=fget)
@apply
def permission_denied():
@dynamic_property
def permission_denied(self):
doc = """ True iff permission is denied for path operations """
def fget(self):
return self.code == ListResponse.PERMISSION_DENIED
return property(doc=doc, fget=fget)
@apply
def is_unmounted():
@dynamic_property
def is_unmounted(self):
doc = """ True iff queried path is unmounted (i.e. removed storage card) """
def fget(self):
return self.code == ListResponse.IS_UNMOUNTED
return property(doc=doc, fget=fget)
@apply
def is_eol():
@dynamic_property
def is_eol(self):
doc = """ True iff there are no more items in the list """
def fget(self):
return self.code == ListResponse.IS_EOL
@ -759,8 +759,8 @@ class FileProperties(Answer):
# 0 = default permissions, 4 = read only
permissions = field(start=36, fmt=DWORD)
@apply
def is_dir():
@dynamic_property
def is_dir(self):
doc = """True if path points to a directory, False if it points to a file."""
def fget(self):
@ -776,8 +776,8 @@ class FileProperties(Answer):
return property(doc=doc, fget=fget, fset=fset)
@apply
def is_readonly():
@dynamic_property
def is_readonly(self):
doc = """ Whether this file is readonly."""
def fget(self):
@ -801,8 +801,8 @@ class IdAnswer(Answer):
""" Defines the structure of packets that contain identifiers for queries. """
@apply
def id():
@dynamic_property
def id(self):
doc = \
"""
The identifier. C{unsigned int} stored in 4 bytes
@ -841,8 +841,8 @@ class ListAnswer(Answer):
name_length = field(start=20, fmt=DWORD)
name = stringfield(name_length, start=24)
@apply
def is_dir():
@dynamic_property
def is_dir(self):
doc = \
"""
True if list item points to a directory, False if it points to a file.
@ -859,4 +859,3 @@ class ListAnswer(Answer):
return property(doc=doc, fget=fget, fset=fset)

View File

@ -64,8 +64,8 @@ class Book(object):
# When setting this attribute you must use an epoch
datetime = book_metadata_field("date", formatter=strptime, setter=strftime)
@apply
def title_sorter():
@dynamic_property
def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
src = self.elem.getAttribute('titleSorter').strip()
@ -76,8 +76,8 @@ class Book(object):
self.elem.setAttribute('titleSorter', sortable_title(unicode(val)))
return property(doc=doc, fget=fget, fset=fset)
@apply
def thumbnail():
@dynamic_property
def thumbnail(self):
doc = \
"""
The thumbnail. Should be a height 68 image.
@ -99,15 +99,15 @@ class Book(object):
return decode(rc)
return property(fget=fget, doc=doc)
@apply
def path():
@dynamic_property
def path(self):
doc = """ Absolute path to book on device. Setting not supported. """
def fget(self):
return self.mountpath + self.rpath
return property(fget=fget, doc=doc)
@apply
def db_id():
@dynamic_property
def db_id(self):
doc = '''The database id in the application database that this file corresponds to'''
def fget(self):
match = re.search(r'_(\d+)$', self.rpath.rpartition('.')[0])
@ -415,4 +415,4 @@ def fix_ids(main, card):
regen_ids(main)
regen_ids(card)
main.set_next_id(str(main.max_id()+1))
main.set_next_id(str(main.max_id()+1))

View File

@ -21,15 +21,15 @@ class Book(object):
def __eq__(self, other):
return self.path == other.path
@apply
def title_sorter():
@dynamic_property
def title_sorter(self):
doc = '''String to sort the title. If absent, title is returned'''
def fget(self):
return re.sub('^\s*A\s+|^\s*The\s+|^\s*An\s+', '', self.title).rstrip()
return property(doc=doc, fget=fget)
@apply
def thumbnail():
@dynamic_property
def thumbnail(self):
return None
def __str__(self):
@ -44,4 +44,3 @@ class BookList(_BookList):
def set_tags(self, book, tags):
pass

View File

@ -37,7 +37,7 @@ class USBMS(Device):
SUPPORTS_SUB_DIRS = False
def __init__(self, key='-1', log_packets=False, report_progress=None):
Device.__init__(self, key=key, log_packets=log_packets,
Device.__init__(self, key=key, log_packets=log_packets,
report_progress=report_progress)
def get_device_information(self, end_session=True):
@ -103,7 +103,7 @@ class USBMS(Device):
if 'tags' in mdata.keys():
for tag in mdata['tags']:
if tag.startswith('News'):
if tag.startswith(_('News')):
newpath = os.path.join(newpath, 'news')
newpath = os.path.join(newpath, mdata.get('title', ''))
newpath = os.path.join(newpath, mdata.get('timestamp', ''))

View File

@ -0,0 +1,4 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,30 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles
pipeline_options = [
OptionRecommendation(name='verbose',
recommended_value=0, level=OptionRecommendation.LOW,
short_switch='v',
help=_('Level of verbosity. Specify multiple times for greater '
'verbosity.')
),
OptionRecommendation(name='input_profile',
recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in input_profiles()],
help=_('Specify the input profile. The input profile gives the '
'conversion system information on how to interpret '
'various information in the input document. For '
'example resolution dependent lengths (i.e. lengths in '
'pixels).')
),
]

View File

@ -40,38 +40,6 @@ def rules(stylesheets):
if r.type == r.STYLE_RULE:
yield r
def decrypt_font(key, path):
raw = open(path, 'rb').read()
crypt = raw[:1024]
key = cycle(iter(key))
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
with open(path, 'wb') as f:
f.write(decrypt)
f.write(raw[1024:])
def process_encryption(encfile, opf):
key = None
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
if m:
key = m.group(1)
key = list(map(ord, uuid.UUID(key).bytes))
try:
root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
algorithm = em.get('Algorithm', '')
if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if os.path.exists(path):
decrypt_font(key, path)
return True
except:
import traceback
traceback.print_exc()
return False
def initialize_container(path_to_container, opf_name='metadata.opf'):
'''
Create an empty EPUB document, with a default skeleton.

View File

@ -0,0 +1,76 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re, uuid
from itertools import cycle
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
class EPUBInput(InputFormatPlugin):
name = 'EPUB Input'
author = 'Kovid Goyal'
description = 'Convert EPUB files (.epub) to HTML'
file_types = set(['epub'])
@classmethod
def decrypt_font(cls, key, path):
raw = open(path, 'rb').read()
crypt = raw[:1024]
key = cycle(iter(key))
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
with open(path, 'wb') as f:
f.write(decrypt)
f.write(raw[1024:])
@classmethod
def process_ecryption(cls, encfile, opf, log):
key = None
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
if m:
key = m.group(1)
key = list(map(ord, uuid.UUID(key).bytes))
try:
root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
algorithm = em.get('Algorithm', '')
if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
if os.path.exists(path):
cls.decrypt_font(key, path)
return True
except:
import traceback
traceback.print_exc()
return False
def convert(self, stream, options, file_ext, parse_cache, log):
from calibre.utils.zipfile import ZipFile
from calibre import walk
from calibre.ebooks import DRMError
zf = ZipFile(stream)
zf.extractall(os.getcwd())
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = None
for f in walk('.'):
if f.lower().endswith('.opf'):
opf = f
break
path = getattr(stream, 'name', 'stream')
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
if os.path.exists(encfile):
if not self.process_encryption(encfile, opf, log):
raise DRMError(os.path.basename(path))
return opf

View File

@ -31,8 +31,8 @@ from cssutils import CSSParser
class HTMLElement(HtmlElement):
@apply
def specified_font_size():
@dynamic_property
def specified_font_size(self):
def fget(self):
ans = self.get('specified_font_size', '')
@ -47,8 +47,8 @@ class HTMLElement(HtmlElement):
return property(fget=fget, fset=fset)
@apply
def computed_font_size():
@dynamic_property
def computed_font_size(self):
def fget(self):
ans = self.get('computed_font_size', '')
if ans == '':
@ -1183,4 +1183,4 @@ output = %s
if __name__ == '__main__':
sys.exit(main())
sys.exit(main())

View File

@ -7,21 +7,25 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>'
import sys, struct, cStringIO, os
import sys, struct, os
import functools
import re
from urlparse import urldefrag
from cStringIO import StringIO
from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks import DRMError
from calibre import plugins
lzx, lxzerror = plugins['lzx']
msdes, msdeserror = plugins['msdes']
__all__ = ["LitReader"]
XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
"""
OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
@ -109,6 +113,9 @@ def consume_sized_utf8_string(bytes, zpad=False):
pos += 1
return u''.join(result), bytes[pos:]
def encode(string):
return unicode(string).encode('ascii', 'xmlcharrefreplace')
class UnBinary(object):
AMPERSAND_RE = re.compile(
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
@ -120,14 +127,16 @@ class UnBinary(object):
def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
self.manifest = manifest
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.is_html = map is HTML_MAP
self.tag_atoms, self.attr_atoms = atoms
self.opf = map is OPF_MAP
self.bin = bin
self.dir = os.path.dirname(path)
self.buf = cStringIO.StringIO()
self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8')
buf = StringIO()
self.binary_to_text(bin, buf)
self.raw = buf.getvalue().lstrip()
self.escape_reserved()
self._tree = None
def escape_reserved(self):
raw = self.raw
@ -154,18 +163,20 @@ class UnBinary(object):
return '/'.join(relpath)
def __unicode__(self):
return self.raw.decode('utf-8')
def __str__(self):
return self.raw
def binary_to_text(self, base=0, depth=0):
def binary_to_text(self, bin, buf, index=0, depth=0):
tag_name = current_map = None
dynamic_tag = errors = 0
in_censorship = is_goingdown = False
state = 'text'
index = base
flags = 0
while index < len(self.bin):
c, index = read_utf8_char(self.bin, index)
while index < len(bin):
c, index = read_utf8_char(bin, index)
oc = ord(c)
if state == 'text':
@ -178,7 +189,7 @@ class UnBinary(object):
c = '>>'
elif c == '<':
c = '<<'
self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
buf.write(encode(c))
elif state == 'get flags':
if oc == 0:
@ -191,7 +202,7 @@ class UnBinary(object):
state = 'text' if oc == 0 else 'get attr'
if flags & FLAG_OPENING:
tag = oc
self.buf.write('<')
buf.write('<')
if not (flags & FLAG_CLOSING):
is_goingdown = True
if tag == 0x8000:
@ -211,7 +222,7 @@ class UnBinary(object):
tag_name = '?'+unichr(tag)+'?'
current_map = self.tag_to_attr_map[tag]
print 'WARNING: tag %s unknown' % unichr(tag)
self.buf.write(unicode(tag_name).encode('utf-8'))
buf.write(encode(tag_name))
elif flags & FLAG_CLOSING:
if depth == 0:
raise LitError('Extra closing tag')
@ -223,15 +234,14 @@ class UnBinary(object):
if not is_goingdown:
tag_name = None
dynamic_tag = 0
self.buf.write(' />')
buf.write(' />')
else:
self.buf.write('>')
index = self.binary_to_text(base=index, depth=depth+1)
buf.write('>')
index = self.binary_to_text(bin, buf, index, depth+1)
is_goingdown = False
if not tag_name:
raise LitError('Tag ends before it begins.')
self.buf.write(u''.join(
('</', tag_name, '>')).encode('utf-8'))
buf.write(encode(u''.join(('</', tag_name, '>'))))
dynamic_tag = 0
tag_name = None
state = 'text'
@ -251,7 +261,7 @@ class UnBinary(object):
in_censorship = True
state = 'get value length'
continue
self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
buf.write(' ' + encode(attr) + '=')
if attr in ['href', 'src']:
state = 'get href length'
else:
@ -259,24 +269,24 @@ class UnBinary(object):
elif state == 'get value length':
if not in_censorship:
self.buf.write('"')
buf.write('"')
count = oc - 1
if count == 0:
if not in_censorship:
self.buf.write('"')
buf.write('"')
in_censorship = False
state = 'get attr'
continue
state = 'get value'
if oc == 0xffff:
continue
if count < 0 or count > (len(self.bin) - index):
if count < 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
elif state == 'get value':
if count == 0xfffe:
if not in_censorship:
self.buf.write('%s"' % (oc - 1))
buf.write('%s"' % (oc - 1))
in_censorship = False
state = 'get attr'
elif count > 0:
@ -289,13 +299,13 @@ class UnBinary(object):
count -= 1
if count == 0:
if not in_censorship:
self.buf.write('"')
buf.write('"')
in_censorship = False
state = 'get attr'
elif state == 'get custom length':
count = oc - 1
if count <= 0 or count > len(self.bin)-index:
if count <= 0 or count > len(bin)-index:
raise LitError('Invalid character count %d' % count)
dynamic_tag += 1
state = 'get custom'
@ -305,26 +315,26 @@ class UnBinary(object):
tag_name += c
count -= 1
if count == 0:
self.buf.write(unicode(tag_name).encode('utf-8'))
buf.write(encode(tag_name))
state = 'get attr'
elif state == 'get attr length':
count = oc - 1
if count <= 0 or count > (len(self.bin) - index):
if count <= 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
self.buf.write(' ')
buf.write(' ')
state = 'get custom attr'
elif state == 'get custom attr':
self.buf.write(unicode(c).encode('utf-8'))
buf.write(encode(c))
count -= 1
if count == 0:
self.buf.write('=')
buf.write('=')
state = 'get value length'
elif state == 'get href length':
count = oc - 1
if count <= 0 or count > (len(self.bin) - index):
if count <= 0 or count > (len(bin) - index):
raise LitError('Invalid character count %d' % count)
href = ''
state = 'get href'
@ -338,10 +348,11 @@ class UnBinary(object):
if frag:
path = '#'.join((path, frag))
path = urlnormalize(path)
self.buf.write((u'"%s"' % path).encode('utf-8'))
buf.write(encode(u'"%s"' % path))
state = 'get attr'
return index
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
self.name = name
@ -356,6 +367,7 @@ class DirectoryEntry(object):
def __str__(self):
return repr(self)
class ManifestItem(object):
def __init__(self, original, internal, mime_type, offset, root, state):
self.original = original
@ -383,65 +395,87 @@ class ManifestItem(object):
% (self.internal, self.path, self.mime_type, self.offset,
self.root, self.state)
def preserve(function):
def wrapper(self, *args, **kwargs):
opos = self._stream.tell()
opos = self.stream.tell()
try:
return function(self, *args, **kwargs)
finally:
self._stream.seek(opos)
self.stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitReader(object):
class LitFile(object):
PIECE_SIZE = 16
XML_PARSER = etree.XMLParser(
recover=True, resolve_entities=False)
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self.stream = filename_or_stream
else:
self.stream = open(filename_or_stream, 'rb')
try:
self.opf_path = os.path.splitext(
os.path.basename(self.stream.name))[0] + '.opf'
except AttributeError:
self.opf_path = 'content.opf'
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.read_secondary_header()
self.read_header_pieces()
self.read_section_names()
self.read_manifest()
self.read_drm()
def warn(self, msg):
print "WARNING: %s" % (msg,)
def magic():
@preserve
def fget(self):
self._stream.seek(0)
return self._stream.read(8)
self.stream.seek(0)
return self.stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
self._stream.seek(8)
return u32(self._stream.read(4))
self.stream.seek(8)
return u32(self.stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
self._stream.seek(12)
return int32(self._stream.read(4))
self.stream.seek(12)
return int32(self.stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
self._stream.seek(16)
return int32(self._stream.read(4))
self.stream.seek(16)
return int32(self.stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
self._stream.seek(20)
return int32(self._stream.read(4))
self.stream.seek(20)
return int32(self.stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
self._stream.seek(24)
return self._stream.read(16)
self.stream.seek(24)
return self.stream.read(16)
return property(fget=fget)
guid = guid()
@ -451,44 +485,27 @@ class LitReader(object):
size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len
self._stream.seek(0)
return self._stream.read(size)
self.stream.seek(0)
return self.stream.read(size)
return property(fget=fget)
header = header()
def __init__(self, filename_or_stream):
if hasattr(filename_or_stream, 'read'):
self._stream = filename_or_stream
else:
self._stream = open(filename_or_stream, 'rb')
if self.magic != 'ITOLITLS':
raise LitError('Not a valid LIT file')
if self.version != 1:
raise LitError('Unknown LIT version %d' % (self.version,))
self.entries = {}
self._read_secondary_header()
self._read_header_pieces()
self._read_section_names()
self._read_manifest()
self._read_meta()
self._read_drm()
@preserve
def __len__(self):
self._stream.seek(0, 2)
return self._stream.tell()
self.stream.seek(0, 2)
return self.stream.tell()
@preserve
def _read_raw(self, offset, size):
self._stream.seek(offset)
return self._stream.read(size)
def read_raw(self, offset, size):
self.stream.seek(offset)
return self.stream.read(size)
def _read_content(self, offset, size):
return self._read_raw(self.content_offset + offset, size)
def read_content(self, offset, size):
return self.read_raw(self.content_offset + offset, size)
def _read_secondary_header(self):
def read_secondary_header(self):
offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
bytes = self._read_raw(offset, self.sec_hdr_len)
bytes = self.read_raw(offset, self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
@ -516,21 +533,21 @@ class LitReader(object):
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
def _read_header_pieces(self):
def read_header_pieces(self):
src = self.header[self.hdr_len:]
for i in xrange(self.num_pieces):
piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:])
piece = self._read_raw(offset, size)
piece = self.read_raw(offset, size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece')
self._read_directory(piece)
self.read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
@ -541,12 +558,13 @@ class LitReader(object):
elif i == 4:
self.piece4_guid = piece
def _read_directory(self, piece):
def read_directory(self, piece):
if not piece.startswith('IFCM'):
raise LitError('Header piece #1 is not main directory.')
chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
if (32 + (num_chunks * chunk_size)) != len(piece):
raise LitError('IFCM HEADER has incorrect length')
raise LitError('IFCM header has incorrect length')
self.entries = {}
for i in xrange(num_chunks):
offset = 32 + (i * chunk_size)
chunk = piece[offset:offset + chunk_size]
@ -580,17 +598,17 @@ class LitReader(object):
entry = DirectoryEntry(name, section, offset, size)
self.entries[name] = entry
def _read_section_names(self):
def read_section_names(self):
if '::DataSpace/NameList' not in self.entries:
raise LitError('Lit file does not have a valid NameList')
raw = self.get_file('::DataSpace/NameList')
if len(raw) < 4:
raise LitError('Invalid Namelist section')
pos = 4
self.num_sections = u16(raw[2:pos])
self.section_names = [""]*self.num_sections
self.section_data = [None]*self.num_sections
for section in xrange(self.num_sections):
num_sections = u16(raw[2:pos])
self.section_names = [""] * num_sections
self.section_data = [None] * num_sections
for section in xrange(num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
@ -600,11 +618,12 @@ class LitReader(object):
raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
pos += size
def _read_manifest(self):
def read_manifest(self):
if '/manifest' not in self.entries:
raise LitError('Lit file does not have a valid manifest')
raw = self.get_file('/manifest')
self.manifest = {}
self.paths = {self.opf_path: None}
while raw:
slen, raw = ord(raw[0]), raw[1:]
if slen == 0: break
@ -645,28 +664,9 @@ class LitReader(object):
for item in mlist:
if item.path[0] == '/':
item.path = os.path.basename(item.path)
self.paths[item.path] = item
def _pretty_print(self, xml):
f = cStringIO.StringIO(xml.encode('utf-8'))
doc = etree.parse(f, parser=self.XML_PARSER)
pretty = etree.tostring(doc, encoding='ascii', pretty_print=True)
return XML_DECL + unicode(pretty)
def _read_meta(self):
path = 'content.opf'
raw = self.get_file('/meta')
xml = OPF_DECL
try:
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
xml += unicode(UnBinary(raw, path, self.manifest, OPF_MAP))
self.meta = xml
def _read_drm(self):
def read_drm(self):
self.drmlevel = 0
if '/DRMStorage/Licenses/EUL' in self.entries:
self.drmlevel = 5
@ -677,7 +677,7 @@ class LitReader(object):
else:
return
if self.drmlevel < 5:
msdes.deskey(self._calculate_deskey(), msdes.DE1)
msdes.deskey(self.calculate_deskey(), msdes.DE1)
bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
if bookkey[0] != '\000':
raise LitError('Unable to decrypt title key!')
@ -685,7 +685,7 @@ class LitReader(object):
else:
raise DRMError("Cannot access DRM-protected book")
def _calculate_deskey(self):
def calculate_deskey(self):
hashfiles = ['/meta', '/DRMStorage/DRMSource']
if self.drmlevel == 3:
hashfiles.append('/DRMStorage/DRMBookplate')
@ -709,18 +709,18 @@ class LitReader(object):
def get_file(self, name):
entry = self.entries[name]
if entry.section == 0:
return self._read_content(entry.offset, entry.size)
return self.read_content(entry.offset, entry.size)
section = self.get_section(entry.section)
return section[entry.offset:entry.offset+entry.size]
def get_section(self, section):
data = self.section_data[section]
if not data:
data = self._get_section(section)
data = self.get_section_uncached(section)
self.section_data[section] = data
return data
def _get_section(self, section):
def get_section_uncached(self, section):
name = self.section_names[section]
path = '::DataSpace/Storage/' + name
transform = self.get_file(path + '/Transform/List')
@ -732,29 +732,29 @@ class LitReader(object):
raise LitError("ControlData is too short")
guid = msguid(transform)
if guid == DESENCRYPT_GUID:
content = self._decrypt(content)
content = self.decrypt(content)
control = control[csize:]
elif guid == LZXCOMPRESS_GUID:
reset_table = self.get_file(
'/'.join(('::DataSpace/Storage', name, 'Transform',
LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
content = self._decompress(content, control, reset_table)
content = self.decompress(content, control, reset_table)
control = control[csize:]
else:
raise LitError("Unrecognized transform: %s." % repr(guid))
transform = transform[16:]
return content
def _decrypt(self, content):
def decrypt(self, content):
length = len(content)
extra = length & 0x7
if extra > 0:
self._warn("content length not a multiple of block size")
self.warn("content length not a multiple of block size")
content += "\0" * (8 - extra)
msdes.deskey(self.bookkey, msdes.DE1)
return msdes.des(content)
def _decompress(self, content, control, reset_table):
def decompress(self, content, control, reset_table):
if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
raise LitError("Invalid ControlData tag value")
if len(reset_table) < (RESET_INTERVAL + 8):
@ -795,7 +795,7 @@ class LitReader(object):
result.append(
lzx.decompress(content[base:size], window_bytes))
except lzx.LZXError:
self._warn("LZX decompression error; skipping chunk")
self.warn("LZX decompression error; skipping chunk")
bytes_remaining -= window_bytes
base = size
accum += int32(reset_table[RESET_INTERVAL:])
@ -805,7 +805,7 @@ class LitReader(object):
try:
result.append(lzx.decompress(content[base:], bytes_remaining))
except lzx.LZXError:
self._warn("LZX decompression error; skipping chunk")
self.warn("LZX decompression error; skipping chunk")
bytes_remaining = 0
if bytes_remaining > 0:
raise LitError("Failed to completely decompress section")
@ -855,62 +855,51 @@ class LitReader(object):
content = self._pretty_print(content)
content = content.encode('utf-8')
else:
name = '/'.join(('/data', entry.internal))
content = self.get_file(name)
internal = '/'.join(('/data', entry.internal))
content = self._litfile.get_file(internal)
return content
def extract_content(self, output_dir=os.getcwdu(), pretty_print=False):
output_dir = os.path.abspath(output_dir)
class LitContainer(object):
"""Simple Container-interface, read-only accessor for LIT files."""
def __init__(self, filename_or_stream):
self._litfile = LitFile(filename_or_stream)
def namelist(self):
return self._litfile.paths.keys()
def exists(self, name):
return urlunquote(name) in self._litfile.paths
def read(self, name):
entry = self._litfile.paths[urlunquote(name)] if name else None
if entry is None:
content = OPF_DECL + self._read_meta()
elif 'spine' in entry.state:
internal = '/'.join(('/data', entry.internal, 'content'))
raw = self._litfile.get_file(internal)
unbin = UnBinary(raw, name, self._litfile.manifest, HTML_MAP)
content = HTML_DECL + str(unbin)
def _read_meta(self):
path = 'content.opf'
raw = self._litfile.get_file('/meta')
try:
opf_path = os.path.splitext(
os.path.basename(self._stream.name))[0] + '.opf'
except AttributeError:
opf_path = 'content.opf'
opf_path = os.path.join(output_dir, opf_path)
self._ensure_dir(opf_path)
with open(opf_path, 'wb') as f:
xml = self.meta
if pretty_print:
xml = self._pretty_print(xml)
f.write(xml.encode('utf-8'))
for entry in self.manifest.values():
path = os.path.join(output_dir, entry.path)
self._ensure_dir(path)
with open(path, 'wb') as f:
f.write(self.get_entry_content(entry, pretty_print))
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
except LitError:
if 'PENGUIN group' not in raw: raise
print "WARNING: attempting PENGUIN malformed OPF fix"
raw = raw.replace(
'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
return str(unbin)
def _ensure_dir(self, path):
dir = os.path.dirname(path)
if not os.path.isdir(dir):
os.makedirs(dir)
def _warn(self, msg):
print "WARNING: %s" % (msg,)
class LitReader(OEBReader):
Container = LitContainer
DEFAULT_PROFILE = 'MSReader'
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] LITFILE'))
parser.add_option(
'-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.'))
parser.add_option(
'-p', '--pretty-print', default=False, action='store_true',
help=_('Legibly format extracted markup. May modify meaningful whitespace.'))
parser.add_option(
'--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
lr = LitReader(args[1])
lr.extract_content(opts.output_dir, opts.pretty_print)
print _('OEB ebook created in'), opts.output_dir
return 0
try:
import psyco
@ -918,6 +907,3 @@ try:
psyco.bind(UnBinary.binary_to_text)
except ImportError:
pass
if __name__ == '__main__':
sys.exit(main())

View File

@ -312,7 +312,7 @@ class LitWriter(object):
cover = None
if oeb.metadata.cover:
id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id]
cover = oeb.manifest.ids[id]
for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide:
oeb.guide.add(type, title, cover.href)

View File

@ -229,6 +229,9 @@ def get_metadata(stream):
mi.author = lrf.author.strip()
mi.comments = lrf.free_text.strip()
mi.category = lrf.category.strip()+', '+lrf.classification.strip()
tags = [x.strip() for x in mi.category.split(',') if x.strip()]
if tags:
mi.tags = tags
mi.publisher = lrf.publisher.strip()
mi.cover_data = lrf.get_cover()
try:
@ -624,7 +627,9 @@ def set_metadata(stream, mi):
lrf.title = mi.title
if mi.authors:
lrf.author = ', '.join(mi.authors)
if mi.category:
if mi.tags:
lrf.category = mi.tags[0]
if getattr(mi, 'category', False):
lrf.category = mi.category
if mi.comments:
lrf.free_text = mi.comments

View File

@ -207,32 +207,32 @@ class Tag(object):
s += " at %08X, contents: %s" % (self.offset, repr(self.contents))
return s
@apply
def byte():
@dynamic_property
def byte(self):
def fget(self):
if len(self.contents) != 1:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
return struct.unpack("<B", self.contents)[0]
return property(fget=fget)
@apply
def word():
@dynamic_property
def word(self):
def fget(self):
if len(self.contents) != 2:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
return struct.unpack("<H", self.contents)[0]
return property(fget=fget)
@apply
def sword():
@dynamic_property
def sword(self):
def fget(self):
if len(self.contents) != 2:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
return struct.unpack("<h", self.contents)[0]
return property(fget=fget)
@apply
def dword():
@dynamic_property
def dword(self):
def fget(self):
if len(self.contents) != 4:
raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)

View File

@ -4,9 +4,9 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
"""
Provides abstraction for metadata reading.writing from a variety of ebook formats.
Provides abstraction for metadata reading.writing from a variety of ebook formats.
"""
import os, mimetypes, sys
import os, mimetypes, sys, re
from urllib import unquote, quote
from urlparse import urlparse
@ -36,32 +36,28 @@ def author_to_author_sort(author):
def authors_to_sort_string(authors):
return ' & '.join(map(author_to_author_sort, authors))
def get_parser(extension):
''' Return an option parser with the basic metadata options already setup'''
parser = OptionParser(usage='%prog [options] myfile.'+extension+'\n\nRead and write metadata from an ebook file.')
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help=_("Set the book title"), default=None)
parser.add_option("-a", "--authors", action="store", type="string", \
dest="authors", help=_("Set the authors"), default=None)
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help=_("The category this book belongs to. E.g.: History"), default=None)
parser.add_option('--comment', dest='comment', default=None, action='store',
help=_('Set the comment'))
return parser
_title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = _title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()
class Resource(object):
'''
Represents a resource (usually a file on the filesystem or a URL pointing
Represents a resource (usually a file on the filesystem or a URL pointing
to the web. Such resources are commonly referred to in OPF files.
They have the interface:
:member:`path`
:member:`mime_type`
:method:`href`
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
self._href = None
self._basedir = basedir
@ -91,13 +87,13 @@ class Resource(object):
pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
self.fragment = unquote(url[-1])
def href(self, basedir=None):
'''
Return a URL pointing to this resource. If it is a file on the filesystem
the URL is relative to `basedir`.
`basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
If this resource has no basedir, then the current working directory is used as the basedir.
'''
@ -119,54 +115,54 @@ class Resource(object):
if isinstance(rpath, unicode):
rpath = rpath.encode('utf-8')
return quote(rpath.replace(os.sep, '/'))+frag
def set_basedir(self, path):
self._basedir = path
def basedir(self):
return self._basedir
def __repr__(self):
return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
class ResourceCollection(object):
def __init__(self):
self._resources = []
def __iter__(self):
for r in self._resources:
yield r
def __len__(self):
return len(self._resources)
def __getitem__(self, index):
return self._resources[index]
def __bool__(self):
return len(self._resources) > 0
def __str__(self):
resources = map(repr, self)
return '[%s]'%', '.join(resources)
def __repr__(self):
return str(self)
def append(self, resource):
if not isinstance(resource, Resource):
raise ValueError('Can only append objects of type Resource')
self._resources.append(resource)
def remove(self, resource):
self._resources.remove(resource)
def replace(self, start, end, items):
'Same as list[start:end] = items'
self._resources[start:end] = items
@staticmethod
def from_directory_contents(top, topdown=True):
collection = ResourceCollection()
@ -176,30 +172,30 @@ class ResourceCollection(object):
res.set_basedir(top)
collection.append(res)
return collection
def set_basedir(self, path):
for res in self:
res.set_basedir(path)
class MetaInformation(object):
'''Convenient encapsulation of book metadata'''
@staticmethod
def copy(mi):
ans = MetaInformation(mi.title, mi.authors)
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language',
'manifest', 'spine', 'toc', 'cover', 'language',
'book_producer', 'timestamp'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
def __init__(self, title, authors=[_('Unknown')]):
'''
@param title: title or "Unknown" or a MetaInformation object
@param title: title or ``_('Unknown')`` or a MetaInformation object
@param authors: List of strings or []
'''
mi = None
@ -214,14 +210,14 @@ class MetaInformation(object):
self.tags = getattr(mi, 'tags', [])
#: mi.cover_data = (ext, data)
self.cover_data = getattr(mi, 'cover_data', (None, None))
for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
'series', 'series_index', 'rating', 'isbn', 'language',
'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
'book_producer', 'timestamp'
):
setattr(self, x, getattr(mi, x, None))
def smart_update(self, mi):
'''
Merge the information in C{mi} into self. In case of conflicts, the information
@ -229,59 +225,66 @@ class MetaInformation(object):
'''
if mi.title and mi.title != _('Unknown'):
self.title = mi.title
if mi.authors and mi.authors[0] != _('Unknown'):
self.authors = mi.authors
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer',
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide', 'book_producer',
'timestamp'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
setattr(self, attr, val)
self.tags += mi.tags
val = getattr(mi, attr, None)
if val is not None:
setattr(self, attr, val)
if mi.tags:
self.tags += mi.tags
self.tags = list(set(self.tags))
if getattr(mi, 'cover_data', None) and mi.cover_data[0] is not None:
self.cover_data = mi.cover_data
def format_series_index(self):
try:
x = float(self.series_index)
except ValueError:
x = 1.0
return '%d'%x if int(x) == x else '%.2f'%x
def __unicode__(self):
ans = u''
ans += u'Title : ' + unicode(self.title) + u'\n'
ans = []
def fmt(x, y):
ans.append(u'%-20s: %s'%(unicode(x), unicode(y)))
fmt('Title', self.title)
if self.title_sort:
fmt('Title sort', self.title_sort)
if self.authors:
ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown'))
ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n'
fmt('Author(s)', authors_to_string(self.authors) + \
((' [' + self.author_sort + ']') if self.author_sort else ''))
if self.publisher:
ans += u'Publisher: '+ unicode(self.publisher) + u'\n'
fmt('Publisher', self.publisher)
if getattr(self, 'book_producer', False):
ans += u'Producer : '+ unicode(self.book_producer) + u'\n'
if self.category:
fmt('Book Producer', self.book_producer)
if self.category:
ans += u'Category : ' + unicode(self.category) + u'\n'
if self.comments:
ans += u'Comments : ' + unicode(self.comments) + u'\n'
fmt('Comments', self.comments)
if self.isbn:
ans += u'ISBN : ' + unicode(self.isbn) + u'\n'
fmt('ISBN', self.isbn)
if self.tags:
ans += u'Tags : ' + u', '.join([unicode(t) for t in self.tags]) + '\n'
fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
if self.series:
ans += u'Series : '+unicode(self.series) + ' #%s\n'%self.format_series_index()
fmt('Series', self.series + ' #%s'%self.format_series_index())
if self.language:
ans += u'Language : ' + unicode(self.language) + u'\n'
fmt('Language', self.language)
if self.rating is not None:
fmt('Rating', self.rating)
if self.timestamp is not None:
ans += u'Timestamp : ' + self.timestamp.isoformat(' ')
return ans.strip()
fmt('Timestamp', self.timestamp.isoformat(' '))
return u'\n'.join(ans)
def to_html(self):
ans = [(_('Title'), unicode(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
@ -298,9 +301,9 @@ class MetaInformation(object):
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
return u'<table>%s</table>'%u'\n'.join(ans)
def __str__(self):
return self.__unicode__().encode('utf-8')
def __nonzero__(self):
return bool(self.title or self.author or self.comments or self.category)
return bool(self.title or self.author or self.comments or self.tags)

View File

@ -0,0 +1,198 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
ebook-meta
'''
USAGE='%%prog ebook_file [' + _('options') + ']\n' + \
_('''
Read/Write metadata from/to ebook files.
Supported formats for reading metadata: %s
Supported formats for writing metadata: %s
Different file types support different kinds of metadata. If you try to set
some metadata on a file type that does not support it, the metadata will be
silently ignored.
''')
import sys, os
from calibre.utils.config import StringConfig
from calibre.customize.ui import metadata_readers, metadata_writers
from calibre.ebooks.metadata.meta import get_metadata, set_metadata
from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
title_sort, MetaInformation
from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre import prints
def config():
c = StringConfig('')
c.add_opt('title', ['-t', '--title'],
help=_('Set the title.'))
c.add_opt('authors', ['-a', '--authors'],
help=_('Set the authors. Multiple authors should be separated '
'by the & character. Author names should be in the order '
'Firstname Lastname.'))
c.add_opt('title_sort', ['--title-sort'],
help=_('The version of the title to be used for sorting. '
'If unspecified, and the title is specified, it will '
'be auto-generated from the title.'))
c.add_opt('author_sort', ['--author-sort'],
help=_('String to be used when sorting by author. '
'If unspecified, and the author(s) are specified, it will '
'be auto-generated from the author(s).'))
c.add_opt('cover', ['--cover'],
help=_('Set the cover to the specified file.'))
c.add_opt('comments', ['-c', '--comments'],
help=_('Set the ebook description.'))
c.add_opt('publisher', ['-p', '--publisher'],
help=_('Set the ebook publisher.'))
c.add_opt('category', ['--category'],
help=_('Set the book category.'))
c.add_opt('series', ['-s', '--series'],
help=_('Set the series this ebook belongs to.'))
c.add_opt('series_index', ['-i', '--index'],
help=_('Set the index of the book in this series.'))
c.add_opt('rating', ['-r', '--rating'],
help=_('Set the rating. Should be a number between 1 and 5.'))
c.add_opt('isbn', ['--isbn'],
help=_('Set the ISBN of the book.'))
c.add_opt('tags', ['--tags'],
help=_('Set the tags for the book. Should be a comma separated list.'))
c.add_opt('book_producer', ['-k', '--book-producer'],
help=_('Set the book producer.'))
c.add_opt('language', ['-l', '--language'],
help=_('Set the language.'))
c.add_opt('get_cover', ['--get-cover'],
help=_('Get the cover from the ebook and save it at as the '
'specified file.'))
c.add_opt('to_opf', ['--to-opf'],
help=_('Specify the name of an OPF file. The metadata will '
'be written to the OPF file.'))
c.add_opt('from_opf', ['--from-opf'],
help=_('Read metadata from the specified OPF file and use it to '
'set metadata in the ebook. Metadata specified on the'
'command line will override metadata read from the OPF file'))
c.add_opt('lrf_bookid', ['--lrf-bookid'],
help=_('Set the BookID in LRF files'))
return c
def filetypes():
readers = set([])
for r in metadata_readers():
readers = readers.union(set(r.file_types))
return readers
def option_parser():
writers = set([])
for w in metadata_writers():
writers = writers.union(set(w.file_types))
return config().option_parser(USAGE%(list(filetypes()), list(writers)))
def do_set_metadata(opts, mi, stream, stream_type):
mi = MetaInformation(mi)
for x in ('guide', 'toc', 'manifest', 'spine'):
setattr(mi, x, None)
from_opf = getattr(opts, 'from_opf', None)
if from_opf is not None:
from calibre.ebooks.metadata.opf2 import OPF
opf_mi = MetaInformation(OPF(open(from_opf, 'rb')))
mi.smart_update(opf_mi)
for pref in config().option_set.preferences:
if pref.name in ('to_opf', 'from_opf', 'authors', 'title_sort',
'author_sort', 'get_cover', 'cover', 'tags',
'lrf_bookid'):
continue
val = getattr(opts, pref.name, None)
if val is not None:
setattr(mi, pref.name, val)
if getattr(opts, 'authors', None) is not None:
mi.authors = string_to_authors(opts.authors)
mi.author_sort = authors_to_sort_string(mi.authors)
if getattr(opts, 'author_sort', None) is not None:
mi.author_sort = opts.author_sort
if getattr(opts, 'title_sort', None) is not None:
mi.title_sort = opts.title_sort
elif getattr(opts, 'title', None) is not None:
mi.title_sort = title_sort(opts.title)
if getattr(opts, 'tags', None) is not None:
mi.tags = [t.strip() for t in opts.tags.split(',')]
if getattr(opts, 'cover', None) is not None:
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
mi.cover_data = (ext, open(opts.cover, 'rb').read())
set_metadata(stream, mi, stream_type)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
prints(_('No file specified'), file=sys.stderr)
return 1
path = args[1]
stream = open(path, 'r+b')
stream_type = os.path.splitext(path)[1].replace('.', '').lower()
trying_to_set = False
for pref in config().option_set.preferences:
if pref.name in ('to_opf', 'get_cover'):
continue
if getattr(opts, pref.name) is not None:
trying_to_set = True
break
mi = get_metadata(stream, stream_type)
if trying_to_set:
prints(_('Original metadata')+'::')
metadata = unicode(mi)
if trying_to_set:
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata)
if trying_to_set:
stream.seek(0)
do_set_metadata(opts, mi, stream, stream_type)
stream.seek(0)
stream.flush()
lrf = None
if stream_type == 'lrf':
if opts.lrf_bookid is not None:
lrf = LRFMetaFile(stream)
lrf.book_id = opts.lrf_bookid
mi = get_metadata(stream, stream_type)
prints('\n' + _('Changed metadata') + '::')
metadata = unicode(mi)
metadata = '\t'+'\n\t'.join(metadata.split('\n'))
prints(metadata)
if lrf is not None:
prints('\tBookID:', lrf.book_id)
if opts.to_opf is not None:
from calibre.ebooks.metadata.opf2 import OPFCreator
opf = OPFCreator(os.getcwdu(), mi)
with open(opts.opf, 'wb') as f:
opf.render(f)
prints(_('OPF created in'), opts.opf)
if opts.get_cover is not None:
if mi.cover_data and mi.cover_data[1]:
with open(opts.get_cover, 'wb') as f:
f.write(mi.cover_data[1])
prints(_('Cover saved to'), f.name)
else:
prints(_('No cover found'), file=sys.stderr)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from epub files'''
import sys, os, time
import os, time
from cStringIO import StringIO
from contextlib import closing
@ -15,7 +15,7 @@ from PyQt4.QtWebKit import QWebPage
from calibre.utils.zipfile import ZipFile, BadZipfile, safe_replace
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import get_parser, MetaInformation
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
@ -191,67 +191,10 @@ def get_metadata(stream, extract_cover=True):
def set_metadata(stream, mi):
stream.seek(0)
reader = OCFZipReader(stream, root=os.getcwdu())
mi = MetaInformation(mi)
for x in ('guide', 'toc', 'manifest', 'spine'):
setattr(mi, x, None)
reader.opf.smart_update(mi)
newopf = StringIO(reader.opf.render())
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
def option_parser():
parser = get_parser('epub')
parser.remove_option('--category')
parser.add_option('--tags', default=None,
help=_('A comma separated list of tags to set'))
parser.add_option('--series', default=None,
help=_('The series to which this book belongs'))
parser.add_option('--series-index', default=None,
help=_('The series index'))
parser.add_option('--language', default=None,
help=_('The book language'))
parser.add_option('--get-cover', default=False, action='store_true',
help=_('Extract the cover'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
with open(args[1], 'r+b') as stream:
mi = get_metadata(stream, extract_cover=opts.get_cover)
changed = False
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.tags:
mi.tags = opts.tags.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.series:
mi.series = opts.series
changed = True
if opts.series_index:
mi.series_index = opts.series_index
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream, extract_cover=False)).encode('utf-8')
if mi.cover_data[1] is not None:
cpath = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.jpg'
with open(cpath, 'wb') as f:
f.write(mi.cover_data[1])
print 'Cover saved to', f.name
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -48,15 +48,3 @@ def get_metadata(stream):
if cdata:
mi.cover_data = cdata
return mi
def main(args=sys.argv):
if len(args) != 2 or '--help' in args or '-h' in args:
print >>sys.stderr, _('Usage:'), args[0], 'mybook.fb2'
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print unicode(get_metadata(open(path, 'rb')))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -46,17 +46,3 @@ def get_metadata(stream):
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8')
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: imp-meta file.imp')
print >>sys.stderr, _('No filename specified.')
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -30,21 +30,3 @@ def get_metadata(stream):
mi.cover_data = ('jpg', covers[-1])
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: %s file.lit') % args[0]
return 1
fname = args[1]
mi = get_metadata(open(fname, 'rb'))
print unicode(mi)
if mi.cover_data[1]:
cover = os.path.abspath(
'.'.join((os.path.splitext(os.path.basename(fname))[0],
mi.cover_data[0])))
open(cover, 'wb').write(mi.cover_data[1])
print _('Cover saved to'), cover
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -80,10 +80,3 @@ def get_metadata(f):
else:
raise ValueError('Not a LRX file')
def main(args=sys.argv):
print get_metadata(open(args[1], 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -13,7 +13,6 @@ import sys
import os
from struct import pack, unpack
from cStringIO import StringIO
from calibre.ebooks.metadata import get_parser
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader import get_metadata
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
@ -178,63 +177,3 @@ def set_metadata(stream, mi):
mu = MetadataUpdater(stream)
mu.update(mi)
return
def option_parser():
parser = get_parser('mobi')
parser.remove_option('--category')
parser.add_option('--tags', default=None,
help=_('Set the subject tags'))
parser.add_option('--language', default=None,
help=_('Set the language'))
parser.add_option('--publisher', default=None,
help=_('Set the publisher'))
parser.add_option('--isbn', default=None,
help=_('Set the ISBN'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print >>sys.stderr, 'Usage: %s file.mobi' % args[0]
return 1
fname = args[1]
changed = False
with open(fname, 'r+b') as stream:
mi = get_metadata(stream)
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.tags is not None:
mi.tags = opts.tags.split(',')
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
if opts.publisher is not None:
mi.publisher = opts.publisher
changed = True
if opts.isbn is not None:
mi.isbn = opts.isbn
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream))
if not changed and mi.cover_data[1]:
cover = os.path.abspath(
'.'.join((os.path.splitext(os.path.basename(fname))[0],
mi.cover_data[0].lower())))
open(cover, 'wb').write(mi.cover_data[1])
print _('Cover saved to'), cover
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -164,103 +164,3 @@ def get_metadata(stream):
return mi
def main(args=sys.argv):
if len(args) != 2:
print 'Usage: %s file.odt'%args[0]
return 1
mi = get_metadata(open(args[1], 'rb'))
print mi
return 0
if __name__ == '__main__':
sys.exit(main())
#now = time.localtime()[:6]
#outputfile = "-"
#writemeta = False # Do we change any meta data?
#usenormalize = False
#
#try:
# opts, args = getopt.getopt(sys.argv[1:], "cdlI:A:a:o:x:X:")
#except getopt.GetoptError:
# exitwithusage()
#
#if len(opts) == 0:
# opts = [ ('-l','') ]
#
#for o, a in opts:
# if o in ('-a','-A','-I'):
# writemeta = True
# if a.find(":") >= 0:
# k,v = a.split(":",1)
# else:
# k,v = (a, "")
# if len(k) == 0:
# exitwithusage()
# k = fields.get(k,k)
# addfields[k] = unicode(v,'utf-8')
# if o == '-a':
# yieldfields[k] = True
# if o == '-I':
# deletefields[k] = True
# if o == '-d':
# writemeta = True
# addfields[(DCNS,u'date')] = "%04d-%02d-%02dT%02d:%02d:%02d" % now
# deletefields[(DCNS,u'date')] = True
# if o == '-c':
# usenormalize = True
# if o == '-l':
# Xfields = fields.values()
# if o == "-x":
# xfields.append(fields.get(a,a))
# if o == "-X":
# Xfields.append(fields.get(a,a))
# if o == "-o":
# outputfile = a
#
## The specification says we should change the element to our own,
## and must not export the original identifier.
#if writemeta:
# addfields[(METANS,u'generator')] = TOOLSVERSION
# deletefields[(METANS,u'generator')] = True
#
#odfs = odfmetaparser()
#parser = xml.sax.make_parser()
#parser.setFeature(xml.sax.handler.feature_namespaces, 1)
#parser.setContentHandler(odfs)
#
#if len(args) == 0:
# zin = zipfile.ZipFile(sys.stdin,'r')
#else:
# if not zipfile.is_zipfile(args[0]):
# exitwithusage()
# zin = zipfile.ZipFile(args[0], 'r')
#
#content = zin.read('meta.xml')
#parser.parse(StringIO(content))
#
#if writemeta:
# if outputfile == '-':
# if sys.stdout.isatty():
# sys.stderr.write("Won't write ODF file to terminal\n")
# sys.exit(1)
# zout = zipfile.ZipFile(sys.stdout,"w")
# else:
# zout = zipfile.ZipFile(outputfile,"w")
#
#
#
# # Loop through the input zipfile and copy the content to the output until we
# # get to the meta.xml. Then substitute.
# for zinfo in zin.infolist():
# if zinfo.filename == "meta.xml":
# # Write meta
# zi = zipfile.ZipInfo("meta.xml", now)
# zi.compress_type = zipfile.ZIP_DEFLATED
# zout.writestr(zi,odfs.meta() )
# else:
# payload = zin.read(zinfo.filename)
# zout.writestr(zinfo, payload)
#
# zout.close()
#zin.close()

View File

@ -11,7 +11,7 @@ from calibre.constants import __appname__, __version__
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
from calibre.ebooks.lrf import entity_to_unicode
from calibre.ebooks.metadata import get_parser, Resource, ResourceCollection
from calibre.ebooks.metadata import Resource, ResourceCollection
from calibre.ebooks.metadata.toc import TOC
class OPFSoup(BeautifulStoneSoup):
@ -38,8 +38,8 @@ class ManifestItem(Resource):
res.mime_type = mt
return res
@apply
def media_type():
@dynamic_property
def media_type(self):
def fget(self):
return self.mime_type
def fset(self, val):
@ -242,14 +242,14 @@ class OPF(MetaInformation):
def __init__(self):
raise NotImplementedError('Abstract base class')
@apply
def package():
@dynamic_property
def package(self):
def fget(self):
return self.soup.find(re.compile('package'))
return property(fget=fget)
@apply
def metadata():
@dynamic_property
def metadata(self):
def fget(self):
return self.package.find(re.compile('metadata'))
return property(fget=fget)
@ -540,46 +540,4 @@ class OPFCreator(MetaInformation):
if toc is not None and ncx_stream is not None:
toc.render(ncx_stream, self.application_id)
ncx_stream.flush()
def option_parser():
return get_parser('opf')
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
mi = MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
write = False
if opts.title is not None:
mi.title = opts.title.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if opts.authors is not None:
aus = [i.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') for i in opts.authors.split(',')]
mi.authors = aus
write = True
if opts.category is not None:
mi.category = opts.category.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if opts.comment is not None:
mi.comments = opts.comment.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if write:
mo = OPFCreator(os.path.dirname(args[1]), mi)
ncx = cStringIO.StringIO()
mo.render(open(args[1], 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
if f:
f = open(f[0], 'wb')
else:
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
f.write(ncx)
f.close()
print MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -12,7 +12,7 @@
<dc:identifier opf:scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
<dc:date py:if="getattr(mi, 'timestamp', None) is not None">${mi.timestamp.isoformat()}</dc:date>
<dc:language>${mi.language if mi.language else 'UND'}</dc:language>
<dc:type py:if="mi.category">${mi.category}</dc:type>
<dc:type py:if="getattr(mi, 'category', False)">${mi.category}</dc:type>
<dc:description py:if="mi.comments">${mi.comments}</dc:description>
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
<dc:identifier opf:scheme="ISBN" py:if="mi.isbn">${mi.isbn}</dc:identifier>

View File

@ -169,8 +169,8 @@ class ManifestItem(Resource):
res.mime_type = mt
return res
@apply
def media_type():
@dynamic_property
def media_type(self):
def fget(self):
return self.mime_type
def fset(self, val):
@ -608,8 +608,8 @@ class OPF(object):
for item in self.iterguide():
item.set('href', get_href(item))
@apply
def authors():
@dynamic_property
def authors(self):
def fget(self):
ans = []
@ -628,8 +628,8 @@ class OPF(object):
return property(fget=fget, fset=fset)
@apply
def author_sort():
@dynamic_property
def author_sort(self):
def fget(self):
matches = self.authors_path(self.metadata)
@ -651,8 +651,8 @@ class OPF(object):
return property(fget=fget, fset=fset)
@apply
def title_sort():
@dynamic_property
def title_sort(self):
def fget(self):
matches = self.title_path(self.metadata)
@ -674,8 +674,28 @@ class OPF(object):
return property(fget=fget, fset=fset)
@apply
def tags():
@dynamic_property
def title_sort(self):
def fget(self):
matches = self.title_path(self.metadata)
if matches:
for match in matches:
ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
if not ans:
ans = match.get('file-as', None)
if ans:
return ans
def fset(self, val):
matches = self.title_path(self.metadata)
if matches:
matches[0].set('file-as', unicode(val))
return property(fget=fget, fset=fset)
@dynamic_property
def tags(self):
def fget(self):
ans = []
@ -692,8 +712,8 @@ class OPF(object):
return property(fget=fget, fset=fset)
@apply
def isbn():
@dynamic_property
def isbn(self):
def fget(self):
for match in self.isbn_path(self.metadata):
@ -709,8 +729,8 @@ class OPF(object):
return property(fget=fget, fset=fset)
@apply
def application_id():
@dynamic_property
def application_id(self):
def fget(self):
for match in self.application_id_path(self.metadata):
@ -726,8 +746,8 @@ class OPF(object):
return property(fget=fget, fset=fset)
@apply
def book_producer():
@dynamic_property
def book_producer(self):
def fget(self):
for match in self.bkp_path(self.metadata):
@ -764,8 +784,8 @@ class OPF(object):
return cpath
@apply
def cover():
@dynamic_property
def cover(self):
def fget(self):
if self.guide is not None:
@ -1001,62 +1021,19 @@ class OPFTest(unittest.TestCase):
self.opf.smart_update(MetaInformation(self.opf))
self.testReading()
def testCreator(self):
opf = OPFCreator(os.getcwd(), self.opf)
buf = cStringIO.StringIO()
opf.render(buf)
raw = buf.getvalue()
self.testReading(opf=OPF(cStringIO.StringIO(raw), os.getcwd()))
def testSmartUpdate(self):
self.opf.smart_update(self.opf)
self.testReading()
def suite():
return unittest.TestLoader().loadTestsFromTestCase(OPFTest)
def test():
unittest.TextTestRunner(verbosity=2).run(suite())
def option_parser():
from calibre.ebooks.metadata import get_parser
parser = get_parser('opf')
parser.add_option('--language', default=None, help=_('Set the dc:language field'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
opfpath = os.path.abspath(args[1])
basedir = os.path.dirname(opfpath)
mi = MetaInformation(OPF(open(opfpath, 'rb'), basedir))
write = False
if opts.title is not None:
mi.title = opts.title
write = True
if opts.authors is not None:
aus = [i.strip() for i in opts.authors.split(',')]
mi.authors = aus
write = True
if opts.category is not None:
mi.category = opts.category
write = True
if opts.comment is not None:
mi.comments = opts.comment
write = True
if opts.language is not None:
mi.language = opts.language
write = True
if write:
mo = OPFCreator(basedir, mi)
ncx = cStringIO.StringIO()
mo.render(open(args[1], 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
if f:
f = open(f[0], 'wb')
else:
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
f.write(ncx)
f.close()
print MetaInformation(OPF(open(opfpath, 'rb'), basedir))
return 0
if __name__ == '__main__':
sys.exit(main())
unittest.TextTestRunner(verbosity=2).run(suite())

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from RB files'''
import sys, os, struct
import sys, struct
from calibre.ebooks.metadata import MetaInformation
@ -53,16 +53,4 @@ def get_metadata(stream):
raise
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: rb-meta file.rb')
print >>sys.stderr, _('No filename specified.')
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -5,7 +5,7 @@ Edit metadata in RTF files.
"""
import re, cStringIO, sys
from calibre.ebooks.metadata import MetaInformation, get_parser
from calibre.ebooks.metadata import MetaInformation
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@ -166,22 +166,3 @@ def set_metadata(stream, options):
stream.write(src)
stream.write(after)
def option_parser():
return get_parser('rtf')
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
sys.exit(1)
stream = open(args[1], 'r+b')
if options.authors:
options.authors = options.authors.split(',')
options.comments = options.comment
set_metadata(stream, options)
mi = get_metadata(stream)
return mi
if __name__ == '__main__':
main()

View File

@ -86,8 +86,8 @@ class TOC(list):
for i in obj.flat():
yield i
@apply
def abspath():
@dynamic_property
def abspath(self):
doc='Return the file this toc entry points to as a absolute path to a file on the system.'
def fget(self):
if self.href is None:
@ -208,4 +208,4 @@ class TOC(list):
template = MarkupTemplate(ncx_template)
raw = template.generate(uid=uid, toc=self, __appname__=__appname__)
raw = raw.render(doctype=doctype)
stream.write(raw)
stream.write(raw)

View File

@ -0,0 +1,29 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin):
name = 'MOBI Input'
author = 'Kovid Goyal'
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw'])
def convert(self, stream, options, file_ext, parse_cache, log):
from calibre.ebooks.mobi.reader import MobiReader
mr = MobiReader(stream, log, options.input_encoding,
options.debug_input)
mr.extract_content(output_dir=os.getcwdu(), parse_cache)
raw = parse_cache.get('calibre_raw_mobi_markup', False)
if raw:
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open('debug-raw.html', 'wb').write(raw)
return mr.created_opf_path

View File

@ -80,7 +80,20 @@ class MobiMLizer(object):
def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables
def transform(self, oeb, context):
@classmethod
def config(cls, cfg):
group = cfg.add_group('mobiml', _('Mobipocket markup options.'))
group('ignore_tables', ['--ignore-tables'], default=False,
help=_('Render HTML tables as blocks of text instead of actual '
'tables. This is neccessary if the HTML contains very '
'large or complex tables.'))
return cfg
@classmethod
def generate(cls, opts):
return cls(ignore_tables=opts.ignore_tables)
def __call__(self, oeb, context):
oeb.logger.info('Converting XHTML to Mobipocket markup...')
self.oeb = oeb
self.profile = profile = context.dest

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files
'''
import sys, struct, os, cStringIO, re, functools
import struct, os, cStringIO, re, functools
try:
from PIL import Image as PILImage
@ -35,8 +35,10 @@ class EXTHHeader(object):
pos = 0
self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
self.has_fake_cover = True
left = self.num_items
for i in range(self.num_items):
while left > 0:
left -= 1
id, size = struct.unpack('>LL', raw[pos:pos+8])
content = raw[pos+8:pos+size]
pos += size
@ -76,7 +78,8 @@ class EXTHHeader(object):
class BookHeader(object):
def __init__(self, raw, ident):
def __init__(self, raw, ident, user_encoding, log):
self.log = log
self.compression_type = raw[:2]
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
self.encryption_type, = struct.unpack('>H', raw[12:14])
@ -92,8 +95,8 @@ class BookHeader(object):
else:
self.ancient = False
self.doctype = raw[16:20]
self.length, self.type, self.codepage, self.unique_id, self.version = \
struct.unpack('>LLLLL', raw[20:40])
self.length, self.type, self.codepage, self.unique_id, \
self.version = struct.unpack('>LLLLL', raw[20:40])
try:
@ -102,8 +105,9 @@ class BookHeader(object):
65001 : 'utf-8',
}[self.codepage]
except (IndexError, KeyError):
print '[WARNING] Unknown codepage %d. Assuming cp-1252'%self.codepage
self.codec = 'cp1252'
self.codec = 'cp1252' if user_encoding is None else user_encoding
log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
self.codec))
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
self.extra_flags = 0
@ -138,9 +142,24 @@ class MobiReader(object):
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
def __init__(self, filename_or_stream, verbose=False):
self.verbose = verbose
def __init__(self, filename_or_stream, log, user_encoding=None, debug=None):
self.log = log
self.debug = debug
self.embedded_mi = None
self.base_css_rules = '''
blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
p { margin: 0em; text-align: justify }
.bold { font-weight: bold }
.italic { font-style: italic }
.mbp_pagebreak {
page-break-after: always; margin: 0; display: block
}
'''
self.tag_css_rules = []
if hasattr(filename_or_stream, 'read'):
stream = filename_or_stream
@ -177,17 +196,21 @@ class MobiReader(object):
self.sections.append((section(i), self.section_headers[i]))
self.book_header = BookHeader(self.sections[0][0], self.ident)
self.book_header = BookHeader(self.sections[0][0], self.ident,
user_encoding, self.log)
self.name = self.name.decode(self.book_header.codec, 'replace')
def extract_content(self, output_dir=os.getcwdu()):
def extract_content(self, output_dir, parse_cache):
output_dir = os.path.abspath(output_dir)
if self.book_header.encryption_type != 0:
raise DRMError(self.name)
processed_records = self.extract_text()
if self.debug is not None:
self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
self.processed_html = self.processed_html.decode(self.book_header.codec,
'ignore')
for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html)
e2u = functools.partial(entity_to_unicode,
@ -203,16 +226,10 @@ class MobiReader(object):
self.processed_html = \
re.compile('<head>', re.IGNORECASE).sub(
'\n<head>\n'
'<style type="text/css">\n'
'blockquote { margin: 0em 0em 0em 1.25em; text-align: justify; }\n'
'p { margin: 0em; text-align: justify; }\n'
'.bold { font-weight: bold; }\n'
'.italic { font-style: italic; }\n'
'</style>\n',
'\t<link type="text/css" href="styles.css" />\n',
self.processed_html)
if self.verbose:
print 'Parsing HTML...'
self.log.debug('Parsing HTML...')
root = html.fromstring(self.processed_html)
self.upshift_markup(root)
guides = root.xpath('//guide')
@ -230,25 +247,23 @@ class MobiReader(object):
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
except AttributeError:
pass
if self.verbose:
print 'Serializing...'
with open(htmlfile, 'wb') as f:
raw = html.tostring(root, encoding='utf-8', method='xml',
include_meta_content_type=True, pretty_print=True)
raw = raw.replace('<head>',
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
f.write(raw)
parse_cache[htmlfile] = root
self.htmlfile = htmlfile
if self.book_header.exth is not None or self.embedded_mi is not None:
if self.verbose:
print 'Creating OPF...'
ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
self.log.debug('Creating OPF...')
ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root)
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
opf.render(open(self.created_opf_path, 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
with open('styles.css', 'wb') as s:
s.write(self.base_css_rules+'\n\n')
for rule in self.tag_css_rules:
if isinstance(rule, unicode):
rule = rule.encode('utf-8')
s.write(rule+'\n\n')
def read_embedded_metadata(self, root, elem, guide):
raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
@ -274,11 +289,9 @@ class MobiReader(object):
elem.getparent().remove(elem)
break
break
def cleanup_html(self):
if self.verbose:
print 'Cleaning up HTML...'
self.log.debug('Cleaning up HTML...')
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
@ -286,8 +299,7 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('> <', '>\n<')
def upshift_markup(self, root):
if self.verbose:
print 'Converting style information to CSS...'
self.log.debug('Converting style information to CSS...')
size_map = {
'xx-small' : '0.5',
'x-small' : '1',
@ -298,7 +310,7 @@ class MobiReader(object):
'xx-large' : '6',
}
mobi_version = self.book_header.mobi_version
for tag in root.iter(etree.Element):
for i, tag in enumerate(root.iter(etree.Element)):
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city'):
tag.tag = 'span'
@ -352,8 +364,7 @@ class MobiReader(object):
elif tag.tag == 'pre':
if not tag.text:
tag.tag = 'div'
if styles:
attrib['style'] = '; '.join(styles)
if 'filepos-id' in attrib:
attrib['id'] = attrib.pop('filepos-id')
if 'filepos' in attrib:
@ -362,15 +373,24 @@ class MobiReader(object):
attrib['href'] = "#filepos%d" % int(filepos)
except ValueError:
pass
if styles:
attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
self.tag_css_rules.append('#%s {%s}'%(attrib['id'],
'; '.join(styles)))
def create_opf(self, htmlfile, guide=None, root=None):
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
if mi is None:
mi = MetaInformation(self.title, [_('Unknown')])
opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'):
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
elif mi.cover is not None:
opf.cover = mi.cover
manifest = [(htmlfile, 'text/x-oeb1-document')]
manifest = [(htmlfile, 'text/x-oeb1-document'),
(os.path.abspath('styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []):
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
@ -441,8 +461,7 @@ class MobiReader(object):
return data[:len(data)-trail_size]
def extract_text(self):
if self.verbose:
print 'Extracting text...'
self.log.debug('Extracting text...')
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
processed_records = list(range(0, self.book_header.records+1))
@ -472,12 +491,11 @@ class MobiReader(object):
def replace_page_breaks(self):
self.processed_html = self.PAGE_BREAK_PAT.sub(
'<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />',
'<div class="mbp_pagebreak" />',
self.processed_html)
def add_anchors(self):
if self.verbose:
print 'Adding anchors...'
self.log.debug('Adding anchors...')
positions = set([])
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
re.IGNORECASE)
@ -507,8 +525,7 @@ class MobiReader(object):
def extract_images(self, processed_records, output_dir):
if self.verbose:
print 'Extracting images...'
self.log.debug('Extracting images...')
output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
if not os.path.exists(output_dir):
os.makedirs(output_dir)
@ -535,14 +552,17 @@ class MobiReader(object):
im.convert('RGB').save(open(path, 'wb'), format='JPEG')
def get_metadata(stream):
mr = MobiReader(stream)
from calibre.utils.logging import Log
log = Log()
mr = MobiReader(stream, log)
if mr.book_header.exth is None:
mi = MetaInformation(mr.name, [_('Unknown')])
else:
mi = mr.create_opf('dummy.html')
try:
if hasattr(mr.book_header.exth, 'cover_offset'):
cover_index = mr.book_header.first_image_index + mr.book_header.exth.cover_offset
cover_index = mr.book_header.first_image_index + \
mr.book_header.exth.cover_offset
data = mr.sections[int(cover_index)][0]
else:
data = mr.sections[mr.book_header.first_image_index][0]
@ -552,42 +572,6 @@ def get_metadata(stream):
im.convert('RGBA').save(obuf, format='JPEG')
mi.cover_data = ('jpg', obuf.getvalue())
except:
import traceback
traceback.print_exc()
log.exception()
return mi
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
parser.add_option('-o', '--output-dir', default='.',
help=_('Output directory. Defaults to current directory.'))
parser.add_option('-v', '--verbose', default=False, action='store_true',
help='Useful for debugging.')
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
mr = MobiReader(args[1], verbose=opts.verbose)
opts.output_dir = os.path.abspath(opts.output_dir)
mr.extract_content(opts.output_dir)
if opts.verbose:
oname = os.path.join(opts.output_dir, 'debug-raw.html')
dat = mr.mobi_html
if isinstance(dat, unicode):
dat = dat.encode('utf-8')
open(oname, 'wb').write(dat)
print _('Raw MOBI HTML saved in'), oname
print _('OEB ebook created in'), opts.output_dir
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -292,9 +292,29 @@ class Serializer(object):
buffer.seek(hoff)
buffer.write('%010d' % ioff)
class MobiFlattener(object):
def config(self, cfg):
return cfg
def generate(self, opts):
return self
def __call__(self, oeb, context):
fbase = context.dest.fbase
fkey = context.dest.fnums.values()
flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
return flattener(oeb, context)
class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
DEFAULT_PROFILE = 'CybookG3'
TRANSFORMS = [HTMLTOCAdder, CaseMangler, MobiFlattener(), SVGRasterizer,
ManifestTrimmer, MobiMLizer]
def __init__(self, compression=None, imagemax=None,
prefer_author_sort=False):
@ -302,7 +322,32 @@ class MobiWriter(object):
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
self._prefer_author_sort = prefer_author_sort
def dump(self, oeb, path):
@classmethod
def config(cls, cfg):
"""Add any book-writing options to the :class:`Config` object
:param:`cfg`.
"""
mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
mobi('compress', ['--compress'], default=False,
help=_('Compress file text using PalmDOC compression. '
'Results in smaller files, but takes a long time to run.'))
mobi('rescale_images', ['--rescale-images'], default=False,
help=_('Modify images to meet Palm device size limitations.'))
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
help=_('When present, use the author sorting information for '
'generating the Mobipocket author metadata.'))
return cfg
@classmethod
def generate(cls, opts):
"""Generate a Writer instance from command-line options."""
compression = PALMDOC if opts.compress else UNCOMPRESSED
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
prefer_author_sort = opts.prefer_author_sort
return cls(compression=compression, imagemax=imagemax,
prefer_author_sort=prefer_author_sort)
def __call__(self, oeb, path):
if hasattr(path, 'write'):
return self._dump_stream(oeb, path)
with open(path, 'w+b') as stream:
@ -542,21 +587,6 @@ def config(defaults=None):
else:
c = StringConfig(defaults, desc)
mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))
mobi('compress', ['--compress'], default=False,
help=_('Compress file text using PalmDOC compression. '
'Results in smaller files, but takes a long time to run.'))
mobi('rescale_images', ['--rescale-images'], default=False,
help=_('Modify images to meet Palm device size limitations.'))
mobi('toc_title', ['--toc-title'], default=None,
help=_('Title for any generated in-line table of contents.'))
mobi('ignore_tables', ['--ignore-tables'], default=False,
help=_('Render HTML tables as blocks of text instead of actual '
'tables. This is neccessary if the HTML contains very large '
'or complex tables.'))
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
help=_('When present, use the author sorting information for '
'generating the Mobipocket author metadata.'))
profiles = c.add_group('profiles', _('Device renderer profiles. '
'Affects conversion of font sizes, image rescaling and rasterization '
'of tables. Valid profiles are: %s.') % ', '.join(_profiles))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,98 @@
'''
Registry associating file extensions with Reader classes.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, logging
from itertools import chain
from calibre.ebooks.oeb.base import OEBError
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.lit.reader import LitReader
from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.mobi.writer import MobiWriter
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.utils.config import Config
__all__ = ['get_reader']
REGISTRY = {
'.opf': (OEBReader, None),
'.lit': (LitReader, LitWriter),
'.mobi': (MobiReader, MobiWriter),
}
def ReaderFactory(path):
if os.path.isdir(path):
return OEBReader
ext = os.path.splitext(path)[1].lower()
Reader = REGISTRY.get(ext, (None, None))[0]
if Reader is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Reader
def WriterFactory(path):
if os.path.isdir(path):
return OEBWriter
ext = os.path.splitext(path)[1].lower()
if not os.path.exists(path) and not ext:
return OEBWriter
Writer = REGISTRY.get(ext, (None, None))[1]
if Writer is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Writer
def option_parser(Reader, Writer):
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
Reader.config(cfg)
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
Transform.config(cfg)
Writer.config(cfg)
parser = cfg.option_parser()
parser.add_option('--encoding', default=None,
help=_('Character encoding for input. Default is to auto detect.'))
parser.add_option('-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option('-p', '--pretty-print', action='store_true',
default=False, help=_('Produce more human-readable XML output.'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
return parser
def main(argv=sys.argv):
if len(argv) < 3:
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
return 1
inpath, outpath = argv[1], argv[2]
Reader = ReaderFactory(inpath)
Writer = WriterFactory(outpath)
parser = option_parser(Reader, Writer)
opts, args = parser.parse_args(argv[3:])
if len(args) != 0:
parser.print_help()
return 1
logger = Logger(logging.getLogger('ebook-convert'))
logger.setup_cli_handler(opts.verbose)
encoding = opts.encoding
pretty_print = opts.pretty_print
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
reader = Reader.generate(opts)
writer = Writer.generate(opts)
transforms = []
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
transforms.append(Transform.generate(opts))
reader(oeb, inpath)
for transform in transforms:
transform(oeb, context)
writer(oeb, outpath)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,562 @@
"""
Container-/OPF-based input OEBBook reader.
"""
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, uuid, copy
from itertools import izip, chain
from urlparse import urldefrag, urlparse
from urllib import unquote as urlunquote
from mimetypes import guess_type
from collections import defaultdict
from lxml import etree
from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
DC_NSES, OPF
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \
ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE
from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath
from calibre.ebooks.oeb.base import urlnormalize, xml2str
from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
from calibre.ebooks.metadata.epub import CoverRenderer
from calibre.startup import get_lang
from calibre.ptempfile import TemporaryDirectory
__all__ = ['OEBReader']
class OEBReader(object):
"""Read an OEBPS 1.x or OPF/OPS 2.0 file collection."""
COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]')
COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
Container = DirContainer
"""Container type used to access book files. Override in sub-classes."""
DEFAULT_PROFILE = 'PRS505'
"""Default renderer profile for content read with this Reader."""
TRANSFORMS = []
"""List of transforms to apply to content read with this Reader."""
def __init__(self):
return
@classmethod
def config(cls, cfg):
"""Add any book-reading options to the :class:`Config` object
:param:`cfg`.
"""
return
@classmethod
def generate(cls, opts):
"""Generate a Reader instance from command-line options."""
return cls()
def __call__(self, oeb, path):
"""Read the book at :param:`path` into the :class:`OEBBook` object
:param:`oeb`.
"""
self.oeb = oeb
self.logger = oeb.logger
oeb.container = self.Container(path)
opf = self._read_opf()
self._all_from_opf(opf)
return oeb
def _clean_opf(self, opf):
nsmap = {}
for elem in opf.iter(tag=etree.Element):
nsmap.update(elem.nsmap)
for elem in opf.iter(tag=etree.Element):
if namespace(elem.tag) in ('', OPF1_NS):
elem.tag = OPF(barename(elem.tag))
nsmap.update(OPF2_NSMAP)
attrib = dict(opf.attrib)
nroot = etree.Element(OPF('package'),
nsmap={None: OPF2_NS}, attrib=attrib)
metadata = etree.SubElement(nroot, OPF('metadata'), nsmap=nsmap)
ignored = (OPF('dc-metadata'), OPF('x-metadata'))
for elem in xpath(opf, 'o2:metadata//*'):
if elem.tag in ignored:
continue
if namespace(elem.tag) in DC_NSES:
tag = barename(elem.tag).lower()
elem.tag = '{%s}%s' % (DC11_NS, tag)
metadata.append(elem)
for element in xpath(opf, 'o2:metadata//o2:meta'):
metadata.append(element)
for tag in ('o2:manifest', 'o2:spine', 'o2:tours', 'o2:guide'):
for element in xpath(opf, tag):
nroot.append(element)
return nroot
def _read_opf(self):
data = self.oeb.container.read(None)
data = self.oeb.decode(data)
data = XMLDECL_RE.sub('', data)
try:
opf = etree.fromstring(data)
except etree.XMLSyntaxError:
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
data = ENTITY_RE.sub(repl, data)
opf = etree.fromstring(data)
self.logger.warn('OPF contains invalid HTML named entities')
ns = namespace(opf.tag)
if ns not in ('', OPF1_NS, OPF2_NS):
raise OEBError('Invalid namespace %r for OPF document' % ns)
opf = self._clean_opf(opf)
return opf
def _metadata_from_opf(self, opf):
uid = opf.get('unique-identifier', None)
self.oeb.uid = None
metadata = self.oeb.metadata
for elem in xpath(opf, '/o2:package/o2:metadata//*'):
term = elem.tag
value = elem.text
attrib = dict(elem.attrib)
nsmap = elem.nsmap
if term == OPF('meta'):
term = qname(attrib.pop('name', None), nsmap)
value = attrib.pop('content', None)
if value:
value = COLLAPSE_RE.sub(' ', value.strip())
if term and (value or attrib):
metadata.add(term, value, attrib, nsmap=nsmap)
haveuuid = haveid = False
for ident in metadata.identifier:
if unicode(ident).startswith('urn:uuid:'):
haveuuid = True
if 'id' in ident.attrib:
haveid = True
if not (haveuuid and haveid):
bookid = "urn:uuid:%s" % str(uuid.uuid4())
metadata.add('identifier', bookid, id='calibre-uuid')
if uid is None:
self.logger.warn(u'Unique-identifier not specified')
for item in metadata.identifier:
if not item.id:
continue
if uid is None or item.id == uid:
self.oeb.uid = item
break
else:
self.logger.warn(u'Unique-identifier %r not found' % uid)
for ident in metadata.identifier:
if 'id' in ident.attrib:
self.oeb.uid = metadata.identifier[0]
break
if not metadata.language:
self.logger.warn(u'Language not specified')
metadata.add('language', get_lang())
if not metadata.creator:
self.logger.warn('Creator not specified')
metadata.add('creator', self.oeb.translate(__('Unknown')))
if not metadata.title:
self.logger.warn('Title not specified')
metadata.add('title', self.oeb.translate(__('Unknown')))
def _manifest_add_missing(self):
manifest = self.oeb.manifest
known = set(manifest.hrefs)
unchecked = set(manifest.values())
while unchecked:
new = set()
for item in unchecked:
if (item.media_type in OEB_DOCS or
item.media_type[-4:] in ('/xml', '+xml')) and \
item.data is not None:
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
href, _ = urldefrag(href)
if not href:
continue
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
elif item.media_type in OEB_STYLES:
for match in CSSURL_RE.finditer(item.data):
href, _ = urldefrag(match.group('url'))
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
unchecked.clear()
for href in new:
known.add(href)
if not self.oeb.container.exists(href):
self.logger.warn('Referenced file %r not found' % href)
continue
self.logger.warn('Referenced file %r not in manifest' % href)
id, _ = manifest.generate(id='added')
guessed = guess_type(href)[0]
media_type = guessed or BINARY_MIME
added = manifest.add(id, href, media_type)
unchecked.add(added)
def _manifest_from_opf(self, opf):
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
id = elem.get('id')
href = elem.get('href')
media_type = elem.get('media-type', None)
if media_type is None:
media_type = elem.get('mediatype', None)
if media_type is None or media_type == 'text/xml':
guessed = guess_type(href)[0]
media_type = guessed or media_type or BINARY_MIME
fallback = elem.get('fallback')
if href in manifest.hrefs:
self.logger.warn(u'Duplicate manifest entry for %r' % href)
continue
if not self.oeb.container.exists(href):
self.logger.warn(u'Manifest item %r not found' % href)
continue
if id in manifest.ids:
self.logger.warn(u'Duplicate manifest id %r' % id)
id, href = manifest.generate(id, href)
manifest.add(id, href, media_type, fallback)
self._manifest_add_missing()
def _spine_add_extra(self):
manifest = self.oeb.manifest
spine = self.oeb.spine
unchecked = set(spine)
selector = XPath('h:body//h:a/@href')
extras = set()
while unchecked:
new = set()
for item in unchecked:
if item.media_type not in OEB_DOCS:
# TODO: handle fallback chains
continue
for href in selector(item.data):
href, _ = urldefrag(href)
if not href:
continue
href = item.abshref(urlnormalize(href))
if href not in manifest.hrefs:
continue
found = manifest.hrefs[href]
if found.media_type not in OEB_DOCS or \
found in spine or found in extras:
continue
new.add(found)
extras.update(new)
unchecked = new
version = int(self.oeb.version[0])
for item in sorted(extras):
if version >= 2:
self.logger.warn(
'Spine-referenced file %r not in spine' % item.href)
spine.add(item, linear=False)
def _spine_from_opf(self, opf):
spine = self.oeb.spine
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
idref = elem.get('idref')
if idref not in manifest.ids:
self.logger.warn(u'Spine item %r not found' % idref)
continue
item = manifest.ids[idref]
spine.add(item, elem.get('linear'))
if len(spine) == 0:
raise OEBError("Spine is empty")
self._spine_add_extra()
def _guide_from_opf(self, opf):
guide = self.oeb.guide
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
href = elem.get('href')
path = urldefrag(href)[0]
if path not in manifest.hrefs:
self.logger.warn(u'Guide reference %r not found' % href)
continue
guide.add(elem.get('type'), elem.get('title'), href)
def _find_ncx(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@toc')
if result:
id = result[0]
if id not in self.oeb.manifest.ids:
return None
item = self.oeb.manifest.ids[id]
self.oeb.manifest.remove(item)
return item
for item in self.oeb.manifest.values():
if item.media_type == NCX_MIME:
self.oeb.manifest.remove(item)
return item
return None
def _toc_from_navpoint(self, item, toc, navpoint):
children = xpath(navpoint, 'ncx:navPoint')
for child in children:
title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
href = xpath(child, 'ncx:content/@src')
if not title or not href:
continue
href = item.abshref(urlnormalize(href[0]))
path, _ = urldefrag(href)
if path not in self.oeb.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href)
continue
id = child.get('id')
klass = child.get('class')
node = toc.add(title, href, id=id, klass=klass)
self._toc_from_navpoint(item, node, child)
def _toc_from_ncx(self, item):
if item is None:
return False
ncx = item.data
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
title = title or unicode(self.oeb.metadata.title[0])
toc = self.oeb.toc
toc.title = title
navmaps = xpath(ncx, 'ncx:navMap')
for navmap in navmaps:
self._toc_from_navpoint(item, toc, navmap)
return True
def _toc_from_tour(self, opf):
result = xpath(opf, 'o2:tours/o2:tour')
if not result:
return False
tour = result[0]
toc = self.oeb.toc
toc.title = tour.get('title')
sites = xpath(tour, 'o2:site')
for site in sites:
title = site.get('title')
href = site.get('href')
if not title or not href:
continue
path, _ = urldefrag(urlnormalize(href))
if path not in self.oeb.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href)
continue
id = site.get('id')
toc.add(title, href, id=id)
return True
def _toc_from_html(self, opf):
if 'toc' not in self.oeb.guide:
return False
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
item = self.oeb.manifest.hrefs[itempath]
html = item.data
if frag:
elems = xpath(html, './/*[@id="%s"]' % frag)
if not elems:
elems = xpath(html, './/*[@name="%s"]' % frag)
elem = elems[0] if elems else html
while elem != html and not xpath(elem, './/h:a[@href]'):
elem = elem.getparent()
html = elem
titles = defaultdict(list)
order = []
for anchor in xpath(html, './/h:a[@href]'):
href = anchor.attrib['href']
href = item.abshref(urlnormalize(href))
path, frag = urldefrag(href)
if path not in self.oeb.manifest.hrefs:
continue
title = ' '.join(xpath(anchor, './/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
if href not in titles:
order.append(href)
titles[href].append(title)
toc = self.oeb.toc
for href in order:
toc.add(' '.join(titles[href]), href)
return True
def _toc_from_spine(self, opf):
toc = self.oeb.toc
titles = []
headers = []
for item in self.oeb.spine:
if not item.linear: continue
html = item.data
title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
title = COLLAPSE_RE.sub(' ', title.strip())
if title:
titles.append(title)
headers.append('(unlabled)')
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
expr = '/h:html/h:body//h:%s[position()=1]/text()'
header = ''.join(xpath(html, expr % tag))
header = COLLAPSE_RE.sub(' ', header.strip())
if header:
headers[-1] = header
break
use = titles
if len(titles) > len(set(titles)):
use = headers
for title, item in izip(use, self.oeb.spine):
if not item.linear: continue
toc.add(title, item.href)
return True
def _toc_from_opf(self, opf, item):
if self._toc_from_ncx(item): return
if self._toc_from_tour(opf): return
self.logger.warn('No metadata table of contents found')
if self._toc_from_html(opf): return
self._toc_from_spine(opf)
def _pages_from_ncx(self, opf, item):
if item is None:
return False
ncx = item.data
ptargets = xpath(ncx, 'ncx:pageList/ncx:pageTarget')
if not ptargets:
return False
pages = self.oeb.pages
for ptarget in ptargets:
name = ''.join(xpath(ptarget, 'ncx:navLabel/ncx:text/text()'))
name = COLLAPSE_RE.sub(' ', name.strip())
href = xpath(ptarget, 'ncx:content/@src')
if not href:
continue
href = item.abshref(urlnormalize(href[0]))
id = ptarget.get('id')
type = ptarget.get('type', 'normal')
klass = ptarget.get('class')
pages.add(name, href, type=type, id=id, klass=klass)
return True
def _find_page_map(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@page-map')
if result:
id = result[0]
if id not in self.oeb.manifest.ids:
return None
item = self.oeb.manifest.ids[id]
self.oeb.manifest.remove(item)
return item
for item in self.oeb.manifest.values():
if item.media_type == PAGE_MAP_MIME:
self.oeb.manifest.remove(item)
return item
return None
def _pages_from_page_map(self, opf):
item = self._find_page_map(opf)
if item is None:
return False
pmap = item.data
pages = self.oeb.pages
for page in xpath(pmap, 'o2:page'):
name = page.get('name', '')
href = page.get('href')
if not href:
continue
name = COLLAPSE_RE.sub(' ', name.strip())
href = item.abshref(urlnormalize(href))
type = 'normal'
if not name:
type = 'special'
elif name.lower().strip('ivxlcdm') == '':
type = 'front'
pages.add(name, href, type=type)
return True
def _pages_from_opf(self, opf, item):
if self._pages_from_ncx(opf, item): return
if self._pages_from_page_map(opf): return
return
def _cover_from_html(self, hcover):
with TemporaryDirectory('_html_cover') as tdir:
writer = OEBWriter()
writer(self.oeb, tdir)
path = os.path.join(tdir, urlunquote(hcover.href))
renderer = CoverRenderer(path)
data = renderer.image_data
id, href = self.oeb.manifest.generate('cover', 'cover.jpeg')
item = self.oeb.manifest.add(id, href, JPEG_MIME, data=data)
return item
def _locate_cover_image(self):
if self.oeb.metadata.cover:
id = str(self.oeb.metadata.cover[0])
item = self.oeb.manifest.ids.get(id, None)
if item is not None and item.media_type in OEB_IMAGES:
return item
else:
self.logger.warn('Invalid cover image @id %r' % id)
hcover = self.oeb.spine[0]
if 'cover' in self.oeb.guide:
href = self.oeb.guide['cover'].href
item = self.oeb.manifest.hrefs[href]
media_type = item.media_type
if media_type in OEB_IMAGES:
return item
elif media_type in OEB_DOCS:
hcover = item
html = hcover.data
if MS_COVER_TYPE in self.oeb.guide:
href = self.oeb.guide[MS_COVER_TYPE].href
item = self.oeb.manifest.hrefs.get(href, None)
if item is not None and item.media_type in OEB_IMAGES:
return item
if self.COVER_SVG_XP(html):
svg = copy.deepcopy(self.COVER_SVG_XP(html)[0])
href = os.path.splitext(hcover.href)[0] + '.svg'
id, href = self.oeb.manifest.generate(hcover.id, href)
item = self.oeb.manifest.add(id, href, SVG_MIME, data=svg)
return item
if self.COVER_OBJECT_XP(html):
object = self.COVER_OBJECT_XP(html)[0]
href = hcover.abshref(object.get('data'))
item = self.oeb.manifest.hrefs.get(href, None)
if item is not None and item.media_type in OEB_IMAGES:
return item
return self._cover_from_html(hcover)
def _ensure_cover_image(self):
cover = self._locate_cover_image()
if self.oeb.metadata.cover:
self.oeb.metadata.cover[0].value = cover.id
return
self.oeb.metadata.add('cover', cover.id)
def _all_from_opf(self, opf):
self.oeb.version = opf.get('version', '1.2')
self._metadata_from_opf(opf)
self._manifest_from_opf(opf)
self._spine_from_opf(opf)
self._guide_from_opf(opf)
item = self._find_ncx(opf)
self._toc_from_opf(opf, item)
self._pages_from_opf(opf, item)
self._ensure_cover_image()
def main(argv=sys.argv):
reader = OEBReader()
for arg in argv[1:]:
oeb = reader(OEBBook(), arg)
for name, doc in oeb.to_opf1().values():
print etree.tostring(doc, pretty_print=True)
for name, doc in oeb.to_opf2(page_map=True).values():
print etree.tostring(doc, pretty_print=True)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -98,7 +98,15 @@ class CSSFlattener(object):
self.unfloat = unfloat
self.untable = untable
def transform(self, oeb, context):
@classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Flattening CSS and remapping font sizes...')
self.oeb = oeb
self.context = context

View File

@ -52,7 +52,18 @@ class HTMLTOCAdder(object):
self.title = title
self.style = style
def transform(self, oeb, context):
@classmethod
def config(cls, cfg):
group = cfg.add_group('htmltoc', _('HTML TOC generation options.'))
group('toc_title', ['--toc-title'], default=None,
help=_('Title for any generated in-line table of contents.'))
return cfg
@classmethod
def generate(cls, opts):
return cls(title=opts.toc_title)
def __call__(self, oeb, context):
if 'toc' in oeb.guide:
return
oeb.logger.info('Generating in-line TOC...')

View File

@ -29,7 +29,15 @@ CASE_MANGLER_CSS = """
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
class CaseMangler(object):
def transform(self, oeb, context):
@classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Applying case-transforming CSS...')
self.oeb = oeb
self.profile = context.source

View File

@ -34,7 +34,15 @@ class SVGRasterizer(object):
if QApplication.instance() is None:
QApplication([])
def transform(self, oeb, context):
@classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Rasterizing SVG images...')
self.oeb = oeb
self.profile = context.dest

View File

@ -13,7 +13,15 @@ from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE
from calibre.ebooks.oeb.base import urlnormalize
class ManifestTrimmer(object):
def transform(self, oeb, context):
@classmethod
def config(cls, cfg):
return cfg
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Trimming unused files from manifest...')
used = set()
hrefs = oeb.manifest.hrefs

View File

@ -0,0 +1,75 @@
'''
Directory output OEBBook writer.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, logging
from calibre.ebooks.oeb.base import OPF_MIME, xml2str
from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
__all__ = ['OEBWriter']
class OEBWriter(object):
DEFAULT_PROFILE = 'PRS505'
"""Default renderer profile for content written with this Writer."""
TRANSFORMS = []
"""List of transforms to apply to content written with this Writer."""
def __init__(self, version='2.0', page_map=False, pretty_print=False):
self.version = version
self.page_map = page_map
self.pretty_print = pretty_print
@classmethod
def config(cls, cfg):
"""Add any book-writing options to the :class:`Config` object
:param:`cfg`.
"""
oeb = cfg.add_group('oeb', _('OPF/NCX/etc. generation options.'))
versions = ['1.2', '2.0']
oeb('opf_version', ['--opf-version'], default='2.0', choices=versions,
help=_('OPF version to generate. Default is %default.'))
oeb('adobe_page_map', ['--adobe-page-map'], default=False,
help=_('Generate an Adobe "page-map" file if pagination '
'information is avaliable.'))
return cfg
@classmethod
def generate(cls, opts):
"""Generate a Writer instance from command-line options."""
version = opts.opf_version
page_map = opts.adobe_page_map
pretty_print = opts.pretty_print
return cls(version=version, page_map=page_map,
pretty_print=pretty_print)
def __call__(self, oeb, path):
"""Read the book in the :class:`OEBBook` object :param:`oeb` to a file
at :param:`path`.
"""
version = int(self.version[0])
opfname = None
if os.path.splitext(path)[1].lower() == '.opf':
opfname = os.path.basename(path)
path = os.path.dirname(path)
if not os.path.isdir(path):
os.mkdir(path)
output = DirContainer(path)
for item in oeb.manifest.values():
output.write(item.href, str(item))
if version == 1:
metadata = oeb.to_opf1()
elif version == 2:
metadata = oeb.to_opf2(page_map=self.page_map)
else:
raise OEBError("Unrecognized OPF version %r" % self.version)
pretty_print = self.pretty_print
for mime, (href, data) in metadata.items():
if opfname and mime == OPF_MIME:
href = opfname
output.write(href, xml2str(data, pretty_print=pretty_print))
return

View File

@ -201,14 +201,14 @@ class Document(QWebPage):
def bookmark(self):
return self.javascript('calculate_bookmark(%d)'%(self.ypos+25), 'string')
@apply
def at_bottom():
@dynamic_property
def at_bottom(self):
def fget(self):
return self.height - self.ypos <= self.window_height
return property(fget=fget)
@apply
def at_top():
@dynamic_property
def at_top(self):
def fget(self):
return self.ypos <= 0
return property(fget=fget)
@ -217,32 +217,32 @@ class Document(QWebPage):
def test(self):
pass
@apply
def ypos():
@dynamic_property
def ypos(self):
def fget(self):
return self.javascript('window.pageYOffset', 'int')
return property(fget=fget)
@apply
def window_height():
@dynamic_property
def window_height(self):
def fget(self):
return self.javascript('window.innerHeight', 'int')
return property(fget=fget)
@apply
def window_width():
@dynamic_property
def window_width(self):
def fget(self):
return self.javascript('window.innerWidth', 'int')
return property(fget=fget)
@apply
def xpos():
@dynamic_property
def xpos(self):
def fget(self):
return self.javascript('window.pageXOffset', 'int')
return property(fget=fget)
@apply
def scroll_fraction():
@dynamic_property
def scroll_fraction(self):
def fget(self):
try:
return float(self.ypos)/(self.height-self.window_height)
@ -250,20 +250,20 @@ class Document(QWebPage):
return 0.
return property(fget=fget)
@apply
def hscroll_fraction():
@dynamic_property
def hscroll_fraction(self):
def fget(self):
return float(self.xpos)/self.width
return property(fget=fget)
@apply
def height():
@dynamic_property
def height(self):
def fget(self):
return self.javascript('document.body.offsetHeight', 'int') # contentsSize gives inaccurate results
return property(fget=fget)
@apply
def width():
@dynamic_property
def width(self):
def fget(self):
return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
return property(fget=fget)
@ -342,20 +342,20 @@ class DocumentView(QWebView):
def sizeHint(self):
return self._size_hint
@apply
def scroll_fraction():
@dynamic_property
def scroll_fraction(self):
def fget(self):
return self.document.scroll_fraction
return property(fget=fget)
@apply
def hscroll_fraction():
@dynamic_property
def hscroll_fraction(self):
def fget(self):
return self.document.hscroll_fraction
return property(fget=fget)
@apply
def content_size():
@dynamic_property
def content_size(self):
def fget(self):
return self.document.width, self.document.height
return property(fget=fget)

View File

@ -1,16 +1,8 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' Code to manage ebook library'''
import re
from calibre.utils.config import Config, StringConfig
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()
def server_config(defaults=None):
desc=_('Settings to control the calibre content server')

View File

@ -814,8 +814,8 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
# _lock_file = None
self.conn.close()
@apply
def user_version():
@dynamic_property
def user_version(self):
doc = 'The user version of this database'
def fget(self):
return self.conn.get('pragma user_version;', all=False)
@ -1455,4 +1455,4 @@ def text_to_tokens(text):
if __name__ == '__main__':
sqlite.enable_callback_tracebacks(True)
db = LibraryDatabase('/home/kovid/temp/library1.db.orig')
db = LibraryDatabase('/home/kovid/temp/library1.db.orig')

View File

@ -33,14 +33,14 @@ from calibre.ebooks import BOOK_EXTENSIONS
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,
'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15}
INDEX_MAP = dict(zip(FIELD_MAP.values(), FIELD_MAP.keys()))
class CoverCache(QThread):
def __init__(self, library_path, parent=None):
QThread.__init__(self, parent)
self.library_path = library_path
@ -52,7 +52,7 @@ class CoverCache(QThread):
self.cache_lock = QReadWriteLock()
self.id_map_stale = True
self.keep_running = True
def build_id_map(self):
self.id_map_lock.lockForWrite()
self.id_map = {}
@ -65,8 +65,8 @@ class CoverCache(QThread):
continue
self.id_map_lock.unlock()
self.id_map_stale = False
def set_cache(self, ids):
self.cache_lock.lockForWrite()
already_loaded = set([])
@ -80,8 +80,8 @@ class CoverCache(QThread):
self.load_queue_lock.lockForWrite()
self.load_queue = collections.deque(ids)
self.load_queue_lock.unlock()
def run(self):
while self.keep_running:
if self.id_map is None or self.id_map_stale:
@ -94,7 +94,7 @@ class CoverCache(QThread):
break
finally:
self.load_queue_lock.unlock()
self.cache_lock.lockForRead()
need = True
if id in self.cache.keys():
@ -121,19 +121,19 @@ class CoverCache(QThread):
self.cache_lock.lockForWrite()
self.cache[id] = img
self.cache_lock.unlock()
self.sleep(1)
def stop(self):
self.keep_running = False
def cover(self, id):
val = None
if self.cache_lock.tryLockForRead(50):
val = self.cache.get(id, None)
self.cache_lock.unlock()
return val
def clear_cache(self):
self.cache_lock.lockForWrite()
self.cache = {}
@ -148,24 +148,24 @@ class CoverCache(QThread):
for id in ids:
self.load_queue.appendleft(id)
self.load_queue_lock.unlock()
class ResultCache(SearchQueryParser):
'''
Stores sorted and filtered metadata in memory.
'''
def __init__(self):
self._map = self._map_filtered = self._data = []
self.first_sort = True
SearchQueryParser.__init__(self)
def __getitem__(self, row):
return self._data[self._map_filtered[row]]
def __len__(self):
return len(self._map_filtered)
def __iter__(self):
for id in self._map_filtered:
yield self._data[id]
@ -194,32 +194,32 @@ class ResultCache(SearchQueryParser):
matches.add(item[0])
break
return matches
def remove(self, id):
self._data[id] = None
if id in self._map:
self._map.remove(id)
if id in self._map_filtered:
self._map_filtered.remove(id)
def set(self, row, col, val, row_is_id=False):
id = row if row_is_id else self._map_filtered[row]
id = row if row_is_id else self._map_filtered[row]
self._data[id][col] = val
def index(self, id, cache=False):
x = self._map if cache else self._map_filtered
return x.index(id)
def row(self, id):
return self.index(id)
def has_id(self, id):
try:
return self._data[id] is not None
except IndexError:
pass
return False
def refresh_ids(self, conn, ids):
'''
Refresh the data in the cache for books identified by ids.
@ -232,7 +232,7 @@ class ResultCache(SearchQueryParser):
except ValueError:
pass
return None
def books_added(self, ids, conn):
if not ids:
return
@ -241,16 +241,16 @@ class ResultCache(SearchQueryParser):
self._data[id] = conn.get('SELECT * from meta WHERE id=?', (id,))[0]
self._map[0:0] = ids
self._map_filtered[0:0] = ids
def books_deleted(self, ids):
for id in ids:
self._data[id] = None
if id in self._map: self._map.remove(id)
if id in self._map_filtered: self._map_filtered.remove(id)
def count(self):
return len(self._map)
def refresh(self, db, field=None, ascending=True):
temp = db.conn.get('SELECT * FROM meta')
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
@ -260,7 +260,7 @@ class ResultCache(SearchQueryParser):
if field is not None:
self.sort(field, ascending)
self._map_filtered = list(self._map)
def seriescmp(self, x, y):
try:
ans = cmp(self._data[x][9].lower(), self._data[y][9].lower()) if str else\
@ -291,28 +291,28 @@ class ResultCache(SearchQueryParser):
subsort = True
self.first_sort = False
fcmp = self.seriescmp if field == 'series' else \
functools.partial(self.cmp, FIELD_MAP[field], subsort=subsort,
functools.partial(self.cmp, FIELD_MAP[field], subsort=subsort,
str=field not in ('size', 'rating', 'timestamp'))
self._map.sort(cmp=fcmp, reverse=not ascending)
self._map_filtered = [id for id in self._map if id in self._map_filtered]
def search(self, query):
if not query or not query.strip():
self._map_filtered = list(self._map)
return
matches = sorted(self.parse(query))
self._map_filtered = [id for id in self._map if id in matches]
class Tag(unicode):
def __new__(cls, *args):
obj = super(Tag, cls).__new__(cls, *args)
obj.count = 0
obj.state = 0
return obj
def as_string(self):
return u'[%d] %s'%(self.count, self)
@ -321,19 +321,19 @@ class LibraryDatabase2(LibraryDatabase):
An ebook metadata database that stores references to ebook files on disk.
'''
PATH_LIMIT = 40 if 'win32' in sys.platform else 100
@apply
def user_version():
@dynamic_property
def user_version(self):
doc = 'The user version of this database'
def fget(self):
return self.conn.get('pragma user_version;', all=False)
def fset(self, val):
self.conn.execute('pragma user_version=%d'%int(val))
self.conn.commit()
return property(doc=doc, fget=fget, fset=fset)
def connect(self):
if 'win32' in sys.platform and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
raise ValueError('Path to library too long. Must be less than %d characters.'%(259-4*self.PATH_LIMIT-10))
@ -343,9 +343,9 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.close()
os.remove(self.dbpath)
self.conn = connect(self.dbpath, self.row_factory)
if self.user_version == 0:
if self.user_version == 0:
self.initialize_database()
def __init__(self, library_path, row_factory=False):
if not os.path.exists(library_path):
os.makedirs(library_path)
@ -358,7 +358,7 @@ class LibraryDatabase2(LibraryDatabase):
self.connect()
self.is_case_sensitive = not iswindows and not isosx and \
not os.path.exists(self.dbpath.replace('metadata.db', 'MeTAdAtA.dB'))
# Upgrade database
# Upgrade database
while True:
meth = getattr(self, 'upgrade_version_%d'%self.user_version, None)
if meth is None:
@ -368,7 +368,7 @@ class LibraryDatabase2(LibraryDatabase):
meth()
self.conn.commit()
self.user_version += 1
self.data = ResultCache()
self.search = self.data.search
self.refresh = functools.partial(self.data.refresh, self)
@ -378,24 +378,24 @@ class LibraryDatabase2(LibraryDatabase):
self.row = self.data.row
self.has_id = self.data.has_id
self.count = self.data.count
self.refresh()
def get_property(idx, index_is_id=False, loc=-1):
row = self.data._data[idx] if index_is_id else self.data[idx]
return row[loc]
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags',
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags',
'title', 'timestamp'):
setattr(self, prop, functools.partial(get_property,
setattr(self, prop, functools.partial(get_property,
loc=FIELD_MAP['comments' if prop == 'comment' else prop]))
def initialize_database(self):
from calibre.resources import metadata_sqlite
self.conn.executescript(metadata_sqlite)
self.user_version = 1
def upgrade_version_1(self):
'''
Normalize indices.
@ -407,7 +407,7 @@ class LibraryDatabase2(LibraryDatabase):
CREATE INDEX series_idx ON series (name COLLATE NOCASE);
CREATE INDEX series_sort_idx ON books (series_index, id);
'''))
def upgrade_version_2(self):
''' Fix Foreign key constraints for deleting from link tables. '''
script = textwrap.dedent('''\
@ -426,7 +426,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.executescript(script%dict(ltable='publishers', table='publishers', ltable_col='publisher'))
self.conn.executescript(script%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.executescript(script%dict(ltable='series', table='series', ltable_col='series'))
def upgrade_version_3(self):
' Add path to result cache '
self.conn.executescript('''
@ -450,25 +450,25 @@ class LibraryDatabase2(LibraryDatabase):
FROM books;
''')
def last_modified(self):
''' Return last modified time as a UTC datetime object'''
return datetime.utcfromtimestamp(os.stat(self.dbpath).st_mtime)
def path(self, index, index_is_id=False):
'Return the relative path to the directory containing this books files as a unicode string.'
row = self.data._data[index] if index_is_id else self.data[index]
return row[FIELD_MAP['path']].replace('/', os.sep)
def abspath(self, index, index_is_id=False):
'Return the absolute path to the directory containing this books files as a unicode string.'
path = os.path.join(self.library_path, self.path(index, index_is_id=index_is_id))
if not os.path.exists(path):
os.makedirs(path)
return path
def construct_path_name(self, id):
'''
Construct the directory name for this book based on its metadata.
@ -480,7 +480,7 @@ class LibraryDatabase2(LibraryDatabase):
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
path = author + '/' + title + ' (%d)'%id
return path
def construct_file_name(self, id):
'''
Construct the file name for this book based on its metadata.
@ -492,17 +492,17 @@ class LibraryDatabase2(LibraryDatabase):
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author
return name
def rmtree(self, path):
if not self.normpath(self.library_path).startswith(self.normpath(path)):
shutil.rmtree(path)
def normpath(self, path):
path = os.path.abspath(os.path.realpath(path))
if not self.is_case_sensitive:
path = path.lower()
return path
def set_path(self, index, index_is_id=False):
'''
Set the path to the directory containing this books files based on its
@ -524,12 +524,12 @@ class LibraryDatabase2(LibraryDatabase):
break
if path == current_path and not changed:
return
tpath = os.path.join(self.library_path, *path.split('/'))
if not os.path.exists(tpath):
os.makedirs(tpath)
spath = os.path.join(self.library_path, *current_path.split('/'))
if current_path and os.path.exists(spath): # Migrate existing files
cdata = self.cover(id, index_is_id=True)
if cdata is not None:
@ -551,14 +551,14 @@ class LibraryDatabase2(LibraryDatabase):
parent = os.path.dirname(spath)
if len(os.listdir(parent)) == 0:
self.rmtree(parent)
def add_listener(self, listener):
'''
Add a listener. Will be called on change events with two arguments.
Event name and list of affected ids.
'''
self.listeners.add(listener)
def notify(self, event, ids=[]):
'Notify all listeners'
for listener in self.listeners:
@ -567,12 +567,12 @@ class LibraryDatabase2(LibraryDatabase):
except:
traceback.print_exc()
continue
def cover(self, index, index_is_id=False, as_file=False, as_image=False,
def cover(self, index, index_is_id=False, as_file=False, as_image=False,
as_path=False):
'''
Return the cover image as a bytestring (in JPEG format) or None.
`as_file` : If True return the image as an open file object
`as_image`: If True return the image as a QImage object
'''
@ -587,7 +587,7 @@ class LibraryDatabase2(LibraryDatabase):
img.loadFromData(f.read())
return img
return f if as_file else f.read()
def get_metadata(self, idx, index_is_id=False, get_cover=False):
'''
Convenience method to return metadata as a L{MetaInformation} object.
@ -612,7 +612,7 @@ class LibraryDatabase2(LibraryDatabase):
if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi
def has_book(self, mi):
title = mi.title
if title:
@ -620,16 +620,16 @@ class LibraryDatabase2(LibraryDatabase):
title = title.decode(preferred_encoding, 'replace')
return bool(self.conn.get('SELECT id FROM books where title=?', (title,), all=False))
return False
def has_cover(self, index, index_is_id=False):
id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
return os.access(path, os.R_OK)
def set_cover(self, id, data):
'''
Set the cover for this book.
`data`: Can be either a QImage, QPixmap, file object or bytestring
'''
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
@ -644,13 +644,13 @@ class LibraryDatabase2(LibraryDatabase):
data = data.read()
p.loadFromData(data)
p.save(path)
def all_formats(self):
formats = self.conn.get('SELECT format from data')
if not formats:
return set([])
return set([f[0] for f in formats])
def formats(self, index, index_is_id=False):
''' Return available formats as a comma separated list or None if there are no available formats '''
id = index if index_is_id else self.id(index)
@ -667,7 +667,7 @@ class LibraryDatabase2(LibraryDatabase):
if os.access(os.path.join(path, name+_format), os.R_OK|os.W_OK):
ans.append(format)
return ','.join(ans)
def has_format(self, index, format, index_is_id=False):
id = index if index_is_id else self.id(index)
name = self.conn.get('SELECT name FROM data WHERE book=? AND format=?', (id, format), all=False)
@ -677,7 +677,7 @@ class LibraryDatabase2(LibraryDatabase):
path = os.path.join(path, name+format)
return os.access(path, os.R_OK|os.W_OK)
return False
def format_abspath(self, index, format, index_is_id=False):
'Return absolute path to the ebook file of format `format`'
id = index if index_is_id else self.id(index)
@ -688,13 +688,13 @@ class LibraryDatabase2(LibraryDatabase):
path = os.path.join(path, name+format)
if os.access(path, os.R_OK|os.W_OK):
return path
def format(self, index, format, index_is_id=False, as_file=False, mode='r+b'):
'''
Return the ebook format as a bytestring or `None` if the format doesn't exist,
or we don't have permission to write to the ebook file.
`as_file`: If True the ebook format is returned as a file object opened in `mode`
or we don't have permission to write to the ebook file.
`as_file`: If True the ebook format is returned as a file object opened in `mode`
'''
path = self.format_abspath(index, format, index_is_id=index_is_id)
if path is not None:
@ -702,14 +702,14 @@ class LibraryDatabase2(LibraryDatabase):
return f if as_file else f.read()
if self.has_format(index, format, index_is_id):
self.remove_format(id, format, index_is_id=True)
def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
def add_format_with_hooks(self, index, format, fpath, index_is_id=False,
path=None, notify=True):
npath = self.run_import_plugins(fpath, format)
format = os.path.splitext(npath)[-1].lower().replace('.', '').upper()
return self.add_format(index, format, open(npath, 'rb'),
return self.add_format(index, format, open(npath, 'rb'),
index_is_id=index_is_id, path=path, notify=notify)
def add_format(self, index, format, stream, index_is_id=False, path=None, notify=True):
id = index if index_is_id else self.id(index)
if path is None:
@ -768,7 +768,7 @@ class LibraryDatabase2(LibraryDatabase):
self.refresh_ids([id])
if notify:
self.notify('metadata', [id])
def clean(self):
'''
Remove orphaned entries.
@ -779,13 +779,13 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute(st%dict(ltable='tags', table='tags', ltable_col='tag'))
self.conn.execute(st%dict(ltable='series', table='series', ltable_col='series'))
self.conn.commit()
def get_recipes(self):
return self.conn.get('SELECT id, script FROM feeds')
def get_recipe(self, id):
return self.conn.get('SELECT script FROM feeds WHERE id=?', (id,), all=False)
def get_categories(self, sort_on_count=False):
categories = {}
def get(name, category, field='name'):
@ -807,11 +807,11 @@ class LibraryDatabase2(LibraryDatabase):
for tag in tags:
tag.count = self.conn.get('SELECT COUNT(format) FROM data WHERE format=?', (tag,), all=False)
tags.sort(reverse=sort_on_count, cmp=(lambda x,y:cmp(x.count,y.count)) if sort_on_count else cmp)
for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'),
for x in (('authors', 'author'), ('tags', 'tag'), ('publishers', 'publisher'),
('series', 'series')):
get(*x)
get('data', 'format', 'format')
categories['news'] = []
newspapers = self.conn.get('SELECT name FROM tags WHERE id IN (SELECT DISTINCT tag FROM books_tags_link WHERE book IN (select book from books_tags_link where tag IN (SELECT id FROM tags WHERE name=?)))', (_('News'),))
if newspapers:
@ -823,10 +823,10 @@ class LibraryDatabase2(LibraryDatabase):
categories['news'] = list(map(Tag, newspapers))
for tag in categories['news']:
tag.count = self.conn.get('SELECT COUNT(id) FROM books_tags_link WHERE tag IN (SELECT DISTINCT id FROM tags WHERE name=?)', (tag,), all=False)
return categories
def tags_older_than(self, tag, delta):
tag = tag.lower().strip()
now = datetime.now()
@ -836,9 +836,9 @@ class LibraryDatabase2(LibraryDatabase):
tags = r[FIELD_MAP['tags']]
if tags and tag in tags.lower():
yield r[FIELD_MAP['id']]
def set(self, row, column, val):
'''
Convenience method for setting the title, authors, publisher or rating
@ -861,10 +861,10 @@ class LibraryDatabase2(LibraryDatabase):
self.data.refresh_ids(self.conn, [id])
self.set_path(id, True)
self.notify('metadata', [id])
def set_metadata(self, id, mi):
'''
Set metadata for the book `id` from the `MetaInformation` object `mi`
Set metadata for the book `id` from the `MetaInformation` object `mi`
'''
if mi.title:
self.set_title(id, mi.title)
@ -898,7 +898,7 @@ class LibraryDatabase2(LibraryDatabase):
self.set_timestamp(id, mi.timestamp, notify=False)
self.set_path(id, True)
self.notify('metadata', [id])
def set_authors(self, id, authors, notify=True):
'''
`authors`: A list of authors.
@ -929,14 +929,14 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
(ss, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['authors'],
self.data.set(id, FIELD_MAP['authors'],
','.join([a.replace(',', '|') for a in authors]),
row_is_id=True)
self.data.set(id, FIELD_MAP['author_sort'], ss, row_is_id=True)
self.set_path(id, True)
if notify:
self.notify('metadata', [id])
def set_title(self, id, title, notify=True):
if not title:
return
@ -949,7 +949,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.commit()
if notify:
self.notify('metadata', [id])
def set_timestamp(self, id, dt, notify=True):
if dt:
self.conn.execute('UPDATE books SET timestamp=? WHERE id=?', (dt, id))
@ -957,7 +957,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.commit()
if notify:
self.notify('metadata', [id])
def set_publisher(self, id, publisher, notify=True):
self.conn.execute('DELETE FROM books_publishers_link WHERE book=?',(id,))
self.conn.execute('DELETE FROM publishers WHERE (SELECT COUNT(id) FROM books_publishers_link WHERE publisher=publishers.id) < 1')
@ -974,7 +974,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['publisher'], publisher, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_tags(self, id, tags, append=False, notify=True):
'''
@param tags: list of strings
@ -1018,7 +1018,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True)
if notify:
self.notify('metadata', [id])
def unapply_tags(self, book_id, tags, notify=True):
for tag in tags:
id = self.conn.get('SELECT id FROM tags WHERE name=?', (tag,), all=False)
@ -1028,7 +1028,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.refresh_ids(self.conn, [book_id])
if notify:
self.notify('metadata', [id])
def is_tag_used(self, tag):
existing_tags = self.all_tags()
lt = [t.lower() for t in existing_tags]
@ -1037,7 +1037,7 @@ class LibraryDatabase2(LibraryDatabase):
return True
except ValueError:
return False
def delete_tag(self, tag):
existing_tags = self.all_tags()
lt = [t.lower() for t in existing_tags]
@ -1052,7 +1052,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('DELETE FROM tags WHERE id=?', (id,))
self.conn.commit()
def set_series(self, id, series, notify=True):
self.conn.execute('DELETE FROM books_series_link WHERE book=?',(id,))
self.conn.execute('DELETE FROM series WHERE (SELECT COUNT(id) FROM books_series_link WHERE series=series.id) < 1')
@ -1075,7 +1075,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['series'], series, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_series_index(self, id, idx, notify=True):
if idx is None:
idx = 1
@ -1091,7 +1091,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['series_index'], int(idx), row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_rating(self, id, rating, notify=True):
rating = int(rating)
self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
@ -1102,7 +1102,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['rating'], rating, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_comment(self, id, text, notify=True):
self.conn.execute('DELETE FROM comments WHERE book=?', (id,))
self.conn.execute('INSERT INTO comments(book,text) VALUES (?,?)', (id, text))
@ -1110,21 +1110,21 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(id, FIELD_MAP['comments'], text, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_author_sort(self, id, sort, notify=True):
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (sort, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['author_sort'], sort, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_isbn(self, id, isbn, notify=True):
self.conn.execute('UPDATE books SET isbn=? WHERE id=?', (isbn, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['isbn'], isbn, row_is_id=True)
if notify:
self.notify('metadata', [id])
def add_news(self, path, recipe):
format = os.path.splitext(path)[1][1:].lower()
stream = path if hasattr(path, 'read') else open(path, 'rb')
@ -1133,21 +1133,21 @@ class LibraryDatabase2(LibraryDatabase):
stream.seek(0)
mi.series_index = 1
mi.tags = [_('News'), recipe.title]
obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
(mi.title, mi.authors[0]))
id = obj.lastrowid
self.data.books_added([id], self.conn)
self.set_path(id, index_is_id=True)
self.conn.commit()
self.set_metadata(id, mi)
self.add_format(id, format, stream, index_is_id=True)
if not hasattr(path, 'read'):
stream.close()
self.conn.commit()
self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size
return id
def run_import_plugins(self, path_or_stream, format):
format = format.lower()
if hasattr(path_or_stream, 'seek'):
@ -1185,7 +1185,7 @@ class LibraryDatabase2(LibraryDatabase):
aus = aus.decode(preferred_encoding, 'replace')
if isinstance(title, str):
title = title.decode(preferred_encoding)
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(title, uri, series_index, aus))
id = obj.lastrowid
self.data.books_added([id], self.conn)
@ -1207,7 +1207,7 @@ class LibraryDatabase2(LibraryDatabase):
uris = list(duplicate[3] for duplicate in duplicates)
return (paths, formats, metadata, uris), len(ids)
return None, len(ids)
def import_book(self, mi, formats, notify=True):
series_index = 1 if mi.series_index is None else mi.series_index
if not mi.title:
@ -1234,7 +1234,7 @@ class LibraryDatabase2(LibraryDatabase):
self.data.refresh_ids(self.conn, [id]) # Needed to update format list and size
if notify:
self.notify('add', [id])
def move_library_to(self, newloc, progress=None):
header = _(u'<p>Copying books to %s<br><center>')%newloc
books = self.conn.get('SELECT id, path, title FROM books')
@ -1263,7 +1263,7 @@ class LibraryDatabase2(LibraryDatabase):
old_dirs.add(srcdir)
if progress is not None:
progress.setValue(i+1)
dbpath = os.path.join(newloc, os.path.basename(self.dbpath))
shutil.copyfile(self.dbpath, dbpath)
opath = self.dbpath
@ -1279,22 +1279,22 @@ class LibraryDatabase2(LibraryDatabase):
if progress is not None:
progress.reset()
progress.hide()
def __iter__(self):
for record in self.data._data:
if record is not None:
yield record
def all_ids(self):
for i in iter(self):
yield i['id']
def get_data_as_dict(self, prefix=None, authors_as_string=False):
'''
Return all metadata stored in the database as a dict. Includes paths to
the cover and each format.
:param prefix: The prefix for all paths. By default, the prefix is the absolute path
to the library folder.
'''
@ -1325,9 +1325,9 @@ class LibraryDatabase2(LibraryDatabase):
x['formats'].append(path%fmt.lower())
x['fmt_'+fmt.lower()] = path%fmt.lower()
x['available_formats'] = [i.upper() for i in formats.split(',')]
return data
def migrate_old(self, db, progress):
header = _(u'<p>Migrating old database to ebook library in %s<br><center>')%self.library_path
progress.setValue(0)
@ -1338,23 +1338,23 @@ class LibraryDatabase2(LibraryDatabase):
books = db.conn.get('SELECT id, title, sort, timestamp, uri, series_index, author_sort, isbn FROM books ORDER BY id ASC')
progress.setAutoReset(False)
progress.setRange(0, len(books))
for book in books:
self.conn.execute('INSERT INTO books(id, title, sort, timestamp, uri, series_index, author_sort, isbn) VALUES(?, ?, ?, ?, ?, ?, ?, ?);', book)
tables = '''
authors ratings tags series books_tags_link
authors ratings tags series books_tags_link
comments publishers
books_authors_link conversion_options
books_publishers_link
books_ratings_link
books_authors_link conversion_options
books_publishers_link
books_ratings_link
books_series_link feeds
'''.split()
for table in tables:
rows = db.conn.get('SELECT * FROM %s ORDER BY id ASC'%table)
rows = db.conn.get('SELECT * FROM %s ORDER BY id ASC'%table)
for row in rows:
self.conn.execute('INSERT INTO %s VALUES(%s)'%(table, ','.join(repeat('?', len(row)))), row)
self.conn.commit()
self.refresh('timestamp', True)
for i, book in enumerate(books):
@ -1379,7 +1379,7 @@ books_series_link feeds
self.vacuum()
progress.reset()
return len(books)
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
index_is_id=False, callback=None):
if not os.path.exists(dir):
@ -1425,7 +1425,7 @@ books_series_link feeds
opf = OPFCreator(base, mi)
opf.render(f)
f.close()
fmts = self.formats(idx, index_is_id=index_is_id)
if not fmts:
fmts = ''
@ -1449,7 +1449,7 @@ books_series_link feeds
if not callback(count, mi.title):
return
def export_single_format_to_dir(self, dir, indices, format,
def export_single_format_to_dir(self, dir, indices, format,
index_is_id=False, callback=None):
dir = os.path.abspath(dir)
if not index_is_id:
@ -1476,7 +1476,7 @@ books_series_link feeds
f.write(data)
f.seek(0)
try:
set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True),
set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True),
stream_type=format.lower())
except:
pass
@ -1485,7 +1485,7 @@ books_series_link feeds
if not callback(count, title):
break
return failures
def find_books_in_directory(self, dirpath, single_book_per_directory):
dirpath = os.path.abspath(dirpath)
if single_book_per_directory:
@ -1514,12 +1514,12 @@ books_series_link feeds
ext = ext[1:].lower()
if ext not in BOOK_EXTENSIONS:
continue
key = os.path.splitext(path)[0]
if not books.has_key(key):
books[key] = []
books[key].append(path)
for formats in books.values():
yield formats
@ -1543,7 +1543,7 @@ books_series_link feeds
formats = self.find_books_in_directory(dirpath, True)
if not formats:
return
mi = metadata_from_formats(formats)
if mi.title is None:
return
@ -1552,7 +1552,7 @@ books_series_link feeds
self.import_book(mi, formats)
if callable(callback):
callback(mi.title)
def recursive_import(self, root, single_book_per_directory=True, callback=None):
root = os.path.abspath(root)
duplicates = []
@ -1565,8 +1565,5 @@ books_series_link feeds
if callable(callback):
if callback(''):
break
return duplicates

View File

@ -14,7 +14,7 @@ from Queue import Queue
from threading import RLock
from datetime import tzinfo, datetime, timedelta
from calibre.library import title_sort
from calibre.ebooks.metadata import title_sort
global_lock = RLock()

View File

@ -16,66 +16,56 @@ if os.environ.has_key('DESTDIR'):
entry_points = {
'console_scripts': [ \
'prs500 = calibre.devices.prs500.cli.main:main',
'lrf-meta = calibre.ebooks.lrf.meta:main',
'rtf-meta = calibre.ebooks.metadata.rtf:main',
'pdf-meta = calibre.ebooks.metadata.pdf:main',
'lit-meta = calibre.ebooks.metadata.lit:main',
'imp-meta = calibre.ebooks.metadata.imp:main',
'rb-meta = calibre.ebooks.metadata.rb:main',
'opf-meta = calibre.ebooks.metadata.opf2:main',
'odt-meta = calibre.ebooks.metadata.odt:main',
'epub-meta = calibre.ebooks.metadata.epub:main',
'mobi-meta = calibre.ebooks.metadata.mobi:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
'html2lrf = calibre.ebooks.lrf.html.convert_from:main',
'html2oeb = calibre.ebooks.html:main',
'html2epub = calibre.ebooks.epub.from_html:main',
'odt2oeb = calibre.ebooks.odt.to_oeb:main',
'markdown-calibre = calibre.ebooks.markdown.markdown:main',
'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main',
'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main',
'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main',
'web2disk = calibre.web.fetch.simple:main',
'feeds2disk = calibre.web.feeds.main:main',
'calibre-server = calibre.library.server:main',
'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main',
'feeds2epub = calibre.ebooks.epub.from_feeds:main',
'feeds2mobi = calibre.ebooks.mobi.from_feeds:main',
'web2lrf = calibre.ebooks.lrf.web.convert_from:main',
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
'fb2-meta = calibre.ebooks.metadata.fb2:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'any2epub = calibre.ebooks.epub.from_any:main',
'any2lit = calibre.ebooks.lit.from_any:main',
'any2mobi = calibre.ebooks.mobi.from_any:main',
'any2pdf = calibre.ebooks.pdf.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
],
'ebook-device = calibre.devices.prs500.cli.main:main',
'ebook-meta = calibre.ebooks.metadata.cli:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
'html2lrf = calibre.ebooks.lrf.html.convert_from:main',
'html2oeb = calibre.ebooks.html:main',
'html2epub = calibre.ebooks.epub.from_html:main',
'odt2oeb = calibre.ebooks.odt.to_oeb:main',
'markdown-calibre = calibre.ebooks.markdown.markdown:main',
'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main',
'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main',
'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main',
'web2disk = calibre.web.fetch.simple:main',
'feeds2disk = calibre.web.feeds.main:main',
'calibre-server = calibre.library.server:main',
'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main',
'feeds2epub = calibre.ebooks.epub.from_feeds:main',
'feeds2mobi = calibre.ebooks.mobi.from_feeds:main',
'web2lrf = calibre.ebooks.lrf.web.convert_from:main',
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'any2epub = calibre.ebooks.epub.from_any:main',
'any2lit = calibre.ebooks.lit.from_any:main',
'any2mobi = calibre.ebooks.mobi.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
'any2pdf = calibre.ebooks.pdf.from_any:main',
],
'gui_scripts' : [
__appname__+' = calibre.gui2.main:main',
'lrfviewer = calibre.gui2.lrf_renderer.main:main',
'ebook-viewer = calibre.gui2.viewer.main:main',
__appname__+' = calibre.gui2.main:main',
'lrfviewer = calibre.gui2.lrf_renderer.main:main',
'ebook-viewer = calibre.gui2.viewer.main:main',
],
}
@ -177,7 +167,7 @@ def setup_completion(fatal_errors):
sys.stdout.flush()
from calibre.ebooks.lrf.html.convert_from import option_parser as htmlop
from calibre.ebooks.lrf.txt.convert_from import option_parser as txtop
from calibre.ebooks.lrf.meta import option_parser as metaop
from calibre.ebooks.metadata.cli import option_parser as metaop, filetypes as meta_filetypes
from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
@ -186,7 +176,6 @@ def setup_completion(fatal_errors):
from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles
from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf
from calibre.ebooks.metadata.epub import option_parser as epub_meta
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_html import option_parser as html2epub
from calibre.ebooks.html import option_parser as html2oeb
@ -225,15 +214,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf']))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
f.write(opts_and_exts('imp-meta', metaop, ['imp']))
f.write(opts_and_exts('rb-meta', metaop, ['rb']))
f.write(opts_and_exts('opf-meta', metaop, ['opf']))
f.write(opts_and_exts('odt-meta', metaop, ['odt', 'ods', 'odf', 'odg', 'odp']))
f.write(opts_and_exts('epub-meta', epub_meta, ['epub']))
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
@ -423,10 +404,8 @@ def install_man_pages(fatal_errors):
os.environ['PATH'] += ':'+os.path.expanduser('~/bin')
for src in entry_points['console_scripts']:
prog = src[:src.index('=')].strip()
if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta',
'markdown-calibre', 'calibre-debug', 'fb2-meta',
'calibre-fontconfig', 'calibre-parallel', 'odt-meta',
'rb-meta', 'imp-meta', 'mobi-meta'):
if prog in ('ebook-device', 'markdown-calibre',
'calibre-fontconfig', 'calibre-parallel'):
continue
help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,

View File

@ -0,0 +1,92 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'A simplified logging system'
DEBUG = 0
INFO = 1
WARN = 2
ERROR = 3
import sys, traceback
from functools import partial
from calibre import prints
from calibre.utils.terminfo import TerminalController
class ANSIStream:
def __init__(self, stream=sys.stdout):
self.stream = stream
tc = TerminalController(stream)
self.color = {
DEBUG: tc.GREEN,
INFO:'',
WARN: tc.YELLOW,
ERROR: tc.RED
}
self.normal = tc.NORMAL
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class HTMLStream:
def __init__(self, stream=sys.stdout):
self.stream = stream
self.color = {
DEBUG: '<span style="color:green">',
INFO:'<span>',
WARN: '<span style="color:yellow">',
ERROR: '<span style="color:red">'
}
self.normal = '</span>'
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class Log(object):
DEBUG = DEBUG
INFO = INFO
WARN = WARN
ERROR = ERROR
def __init__(self, level=INFO):
self.filter_level = level
default_output = ANSIStream()
self.outputs = [default_output]
self.debug = partial(self.prints, DEBUG)
self.info = partial(self.prints, INFO)
self.warn = self.warning = partial(self.prints, WARN)
self.error = partial(self.prints, ERROR)
def prints(self, level, *args, **kwargs):
if level < self.filter_level:
return
for output in self.outputs:
output.prints(level, *args, **kwargs)
def exception(self, *args, **kwargs):
limit = kwargs.pop('limit', None)
self.prints(ERROR, *args, **kwargs)
self.prints(DEBUG, traceback.format_exc(limit))
def __call__(self, *args, **kwargs):
self.prints(INFO, *args, **kwargs)

View File

@ -33,7 +33,7 @@ class TerminalController:
>>> term = TerminalController()
>>> if term.CLEAR_SCREEN:
... print 'This terminal supports clearning the screen.'
... print 'This terminal supports clearing the screen.'
Finally, if the width and height of the terminal are known, then
they will be stored in the `COLS` and `LINES` attributes.