mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Beginnings of the new conversion framework. Input plugins for MOBI and EPUB.
This commit is contained in:
parent
30bd23ee38
commit
925a86fb0c
@ -90,28 +90,11 @@ def prints(*args, **kwargs):
|
|||||||
if i != len(args)-1:
|
if i != len(args)-1:
|
||||||
file.write(sep)
|
file.write(sep)
|
||||||
file.write(end)
|
file.write(end)
|
||||||
file.flush()
|
|
||||||
|
|
||||||
class CommandLineError(Exception):
|
class CommandLineError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class ColoredFormatter(Formatter):
|
|
||||||
|
|
||||||
def format(self, record):
|
|
||||||
ln = record.__dict__['levelname']
|
|
||||||
col = ''
|
|
||||||
if ln == 'CRITICAL':
|
|
||||||
col = terminal_controller.YELLOW
|
|
||||||
elif ln == 'ERROR':
|
|
||||||
col = terminal_controller.RED
|
|
||||||
elif ln in ['WARN', 'WARNING']:
|
|
||||||
col = terminal_controller.BLUE
|
|
||||||
elif ln == 'INFO':
|
|
||||||
col = terminal_controller.GREEN
|
|
||||||
elif ln == 'DEBUG':
|
|
||||||
col = terminal_controller.CYAN
|
|
||||||
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
|
|
||||||
return Formatter.format(self, record)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_cli_handlers(logger, level):
|
def setup_cli_handlers(logger, level):
|
||||||
@ -335,66 +318,23 @@ def english_sort(x, y):
|
|||||||
'''
|
'''
|
||||||
return cmp(_spat.sub('', x), _spat.sub('', y))
|
return cmp(_spat.sub('', x), _spat.sub('', y))
|
||||||
|
|
||||||
class LoggingInterface:
|
class ColoredFormatter(Formatter):
|
||||||
|
|
||||||
def __init__(self, logger):
|
def format(self, record):
|
||||||
self.__logger = self.logger = logger
|
ln = record.__dict__['levelname']
|
||||||
|
col = ''
|
||||||
def setup_cli_handler(self, verbosity):
|
if ln == 'CRITICAL':
|
||||||
for handler in self.__logger.handlers:
|
col = terminal_controller.YELLOW
|
||||||
if isinstance(handler, logging.StreamHandler):
|
elif ln == 'ERROR':
|
||||||
return
|
col = terminal_controller.RED
|
||||||
if os.environ.get('CALIBRE_WORKER', None) is not None and self.__logger.handlers:
|
elif ln in ['WARN', 'WARNING']:
|
||||||
return
|
col = terminal_controller.BLUE
|
||||||
stream = sys.stdout
|
elif ln == 'INFO':
|
||||||
formatter = logging.Formatter()
|
col = terminal_controller.GREEN
|
||||||
level = logging.INFO
|
elif ln == 'DEBUG':
|
||||||
if verbosity > 0:
|
col = terminal_controller.CYAN
|
||||||
formatter = ColoredFormatter('[%(levelname)s] %(message)s') if verbosity > 1 else \
|
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
|
||||||
ColoredFormatter('%(levelname)s: %(message)s')
|
return Formatter.format(self, record)
|
||||||
level = logging.DEBUG
|
|
||||||
if verbosity > 1:
|
|
||||||
stream = sys.stderr
|
|
||||||
|
|
||||||
handler = logging.StreamHandler(stream)
|
|
||||||
handler.setFormatter(formatter)
|
|
||||||
handler.setLevel(level)
|
|
||||||
self.__logger.addHandler(handler)
|
|
||||||
self.__logger.setLevel(level)
|
|
||||||
|
|
||||||
|
|
||||||
def ___log(self, func, msg, args, kwargs):
|
|
||||||
args = [msg] + list(args)
|
|
||||||
for i in range(len(args)):
|
|
||||||
if not isinstance(args[i], basestring):
|
|
||||||
continue
|
|
||||||
if sys.version_info[:2] > (2, 5):
|
|
||||||
if not isinstance(args[i], unicode):
|
|
||||||
args[i] = args[i].decode(preferred_encoding, 'replace')
|
|
||||||
elif isinstance(args[i], unicode):
|
|
||||||
args[i] = args[i].encode(preferred_encoding, 'replace')
|
|
||||||
func(*args, **kwargs)
|
|
||||||
|
|
||||||
def log_debug(self, msg, *args, **kwargs):
|
|
||||||
self.___log(self.__logger.debug, msg, args, kwargs)
|
|
||||||
|
|
||||||
def log_info(self, msg, *args, **kwargs):
|
|
||||||
self.___log(self.__logger.info, msg, args, kwargs)
|
|
||||||
|
|
||||||
def log_warning(self, msg, *args, **kwargs):
|
|
||||||
self.___log(self.__logger.warning, msg, args, kwargs)
|
|
||||||
|
|
||||||
def log_warn(self, msg, *args, **kwargs):
|
|
||||||
self.___log(self.__logger.warning, msg, args, kwargs)
|
|
||||||
|
|
||||||
def log_error(self, msg, *args, **kwargs):
|
|
||||||
self.___log(self.__logger.error, msg, args, kwargs)
|
|
||||||
|
|
||||||
def log_critical(self, msg, *args, **kwargs):
|
|
||||||
self.___log(self.__logger.critical, msg, args, kwargs)
|
|
||||||
|
|
||||||
def log_exception(self, msg, *args):
|
|
||||||
self.___log(self.__logger.exception, msg, args, {})
|
|
||||||
|
|
||||||
def walk(dir):
|
def walk(dir):
|
||||||
''' A nice interface to os.walk '''
|
''' A nice interface to os.walk '''
|
||||||
|
@ -242,8 +242,13 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
|
|||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
|
|
||||||
plugins = [HTML2ZIP]
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
|
from calibre.customize.profiles import input_profiles
|
||||||
|
|
||||||
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataWriter')]
|
x.__name__.endswith('MetadataWriter')]
|
||||||
|
plugins += input_profiles
|
@ -1,28 +1,30 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
'''
|
'''
|
||||||
Defines the plugin sytem for conversions.
|
Defines the plugin sytem for conversions.
|
||||||
'''
|
'''
|
||||||
import re
|
import re, os, shutil
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
from calibre.customize import Plugin
|
from calibre.customize import Plugin
|
||||||
|
|
||||||
|
|
||||||
class ConversionOption(object):
|
class ConversionOption(object):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Class representing conversion options
|
Class representing conversion options
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, name=None, default=None, help=None, long_switch=None,
|
def __init__(self, name=None, help=None, long_switch=None,
|
||||||
short_switch=None, choices=None, gui_label=None,
|
short_switch=None, choices=None):
|
||||||
category=None):
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.default = default
|
|
||||||
self.help = help
|
self.help = help
|
||||||
self.long_switch = long_switch
|
self.long_switch = long_switch
|
||||||
self.short_switch = short_switch
|
self.short_switch = short_switch
|
||||||
self.choices = choices
|
self.choices = choices
|
||||||
self.gui_label = gui_label
|
|
||||||
self.category = category
|
if self.long_switch is None:
|
||||||
|
self.long_switch = '--'+self.name.replace('_', '-')
|
||||||
|
|
||||||
self.validate_parameters()
|
self.validate_parameters()
|
||||||
|
|
||||||
@ -32,41 +34,156 @@ class ConversionOption(object):
|
|||||||
'''
|
'''
|
||||||
if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
|
if re.match(r'[a-zA-Z_]([a-zA-Z0-9_])*', self.name) is None:
|
||||||
raise ValueError(self.name + ' is not a valid Python identifier')
|
raise ValueError(self.name + ' is not a valid Python identifier')
|
||||||
if not (isinstance(self.default, (int, float, str, unicode)) or \
|
if not self.help:
|
||||||
self.default is None):
|
raise ValueError('You must set the help text')
|
||||||
|
|
||||||
|
|
||||||
|
class OptionRecommendation(object):
|
||||||
|
LOW = 1
|
||||||
|
MED = 2
|
||||||
|
HIGH = 3
|
||||||
|
|
||||||
|
def __init__(self, recommeded_value, level=LOW, **kwargs):
|
||||||
|
'''
|
||||||
|
An option recommendation. That is, an option as well as its recommended
|
||||||
|
value and the level of the recommendation.
|
||||||
|
'''
|
||||||
|
self.level = level
|
||||||
|
self.recommended_value = recommeded_value
|
||||||
|
self.option = kwargs.pop('option', None)
|
||||||
|
if self.option is None:
|
||||||
|
self.option = ConversionOption(**kwargs)
|
||||||
|
|
||||||
|
self.validate_parameters()
|
||||||
|
|
||||||
|
def validate_parameters(self):
|
||||||
|
if self.option.choices and self.recommended_value not in \
|
||||||
|
self.option.choices:
|
||||||
|
raise ValueError('Recommended value not in choices')
|
||||||
|
if not (isinstance(self.recommended_value, (int, float, str, unicode))\
|
||||||
|
or self.default is None):
|
||||||
raise ValueError(unicode(self.default) +
|
raise ValueError(unicode(self.default) +
|
||||||
' is not a string or a number')
|
' is not a string or a number')
|
||||||
if not self.help:
|
|
||||||
raise ValueError('You must set the help text')
|
|
||||||
|
|
||||||
class ConversionPlugin(Plugin):
|
class InputFormatPlugin(Plugin):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
The base class for all conversion related plugins.
|
InputFormatPlugins are responsible for converting a document into
|
||||||
|
HTML+OPF+CSS+etc.
|
||||||
|
The results of the conversion *must* be encoded in UTF-8.
|
||||||
|
The main action happens in :method:`convert`.
|
||||||
'''
|
'''
|
||||||
#: List of options
|
|
||||||
#: Each option must be a dictionary. The dictionary can contain several
|
|
||||||
#: keys defining the option. The ones marked by a * are required, the rest
|
|
||||||
#: are optional. The keys are::
|
|
||||||
#:
|
|
||||||
#: *'name' : A valid python identifier.
|
|
||||||
#: *'default' : The default value for this option.
|
|
||||||
#: *'help' :
|
|
||||||
#: 'short_switch' : A suggestion for a short form of the command line
|
|
||||||
#: switch (for example if name is 'title', this
|
|
||||||
#: could be 't'). It is only used if no prior
|
|
||||||
#: conversion plugin has claimed it.
|
|
||||||
options = []
|
|
||||||
|
|
||||||
type = _('Conversion')
|
type = _('Conversion Input')
|
||||||
can_be_disabled = False
|
can_be_disabled = False
|
||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
|
|
||||||
class InputFormatPlugin(ConversionPlugin):
|
|
||||||
|
|
||||||
#: Set of file types for which this plugin should be run
|
#: Set of file types for which this plugin should be run
|
||||||
#: For example: ``set(['lit', 'mobi', 'prc'])``
|
#: For example: ``set(['azw', 'mobi', 'prc'])``
|
||||||
file_types = set([])
|
file_types = set([])
|
||||||
|
|
||||||
|
#: Options shared by all Input format plugins. Do not override
|
||||||
|
#: in sub-classes. Use :member:`options` instead. Every option must be an
|
||||||
|
#: instance of :class:`OptionRecommendation`.
|
||||||
|
common_options = set([
|
||||||
|
OptionRecommendation(name='debug_input',
|
||||||
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Save the output from the input plugin to the specified '
|
||||||
|
'directory. Useful if you are unsure at which stage '
|
||||||
|
'of the conversion process a bug is occurring. '
|
||||||
|
'WARNING: This completely deletes the contents of '
|
||||||
|
'the specified directory.')
|
||||||
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='input_encoding',
|
||||||
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Specify the character encoding of the input document. If '
|
||||||
|
'set this option will override any encoding declared by the '
|
||||||
|
'document itself. Particularly useful for documents that '
|
||||||
|
'do not declare an encoding or that have erroneous '
|
||||||
|
'encoding declarations.')
|
||||||
|
),
|
||||||
|
|
||||||
|
])
|
||||||
|
|
||||||
|
#: Options to customize the behavior of this plugin. Every option must be an
|
||||||
|
#: instance of :class:`OptionRecommendation`.
|
||||||
|
options = set([])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, parse_cache, log):
|
||||||
|
'''
|
||||||
|
This method must be implemented in sub-classes. It must return
|
||||||
|
the path to the created OPF file. All output should be contained in
|
||||||
|
the current directory. If this plugin creates files outside the current
|
||||||
|
directory they must be deleted/marked for deletion before this method
|
||||||
|
returns.
|
||||||
|
|
||||||
|
:param stream: A file like object that contains the input file.
|
||||||
|
|
||||||
|
:param options: Options to customize the conversion process.
|
||||||
|
Guaranteed to have attributes corresponding
|
||||||
|
to all the options declared by this plugin. In
|
||||||
|
addition, it will have a verbose attribute that
|
||||||
|
takes integral values from zero upwards. Higher numbers
|
||||||
|
mean be more verbose. Another useful attribute is
|
||||||
|
``input_profile`` that is an instance of
|
||||||
|
:class:`calibre.customize.profiles.InputProfile`.
|
||||||
|
|
||||||
|
:param file_ext: The extension (without the .) of the input file. It
|
||||||
|
is guaranteed to be one of the `file_types` supported
|
||||||
|
by this plugin.
|
||||||
|
|
||||||
|
:param parse_cache: A dictionary that maps absolute file paths to
|
||||||
|
parsed representations of their contents. For
|
||||||
|
HTML the representation is an lxml element of
|
||||||
|
the root of the tree. For CSS it is a cssutils
|
||||||
|
stylesheet. If this plugin parses any of the
|
||||||
|
output files, it should add them to the cache
|
||||||
|
so that later stages of the conversion wont
|
||||||
|
have to re-parse them. If a parsed representation
|
||||||
|
is in the cache, there is no need to actually
|
||||||
|
write the file to disk.
|
||||||
|
|
||||||
|
:param log: A :class:`calibre.utils.logging.Log` object. All output
|
||||||
|
should use this object.
|
||||||
|
'''
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __call__(self, stream, options, file_ext, parse_cache, log, output_dir):
|
||||||
|
log('InputFormatPlugin: %s running'%self.name, end=' ')
|
||||||
|
if hasattr(stream, 'name'):
|
||||||
|
log('on', stream.name)
|
||||||
|
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
for x in os.listdir('.'):
|
||||||
|
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
|
||||||
|
|
||||||
|
|
||||||
|
ret = self.convert(stream, options, file_ext, parse_cache, log)
|
||||||
|
for key in list(parse_cache.keys()):
|
||||||
|
if os.path.abspath(key) != key:
|
||||||
|
log.warn(('InputFormatPlugin: %s returned a '
|
||||||
|
'relative path: %s')%(self.name, key)
|
||||||
|
)
|
||||||
|
parse_cache[os.path.abspath(key)] = parse_cache.pop(key)
|
||||||
|
|
||||||
|
if options.debug_input is not None:
|
||||||
|
options.debug_input = os.path.abspath(options.debug_input)
|
||||||
|
if not os.path.exists(options.debug_input):
|
||||||
|
os.makedirs(options.debug_input)
|
||||||
|
shutil.rmtree(options.debug_input)
|
||||||
|
for f, obj in parse_cache.items():
|
||||||
|
if hasattr(obj, 'cssText'):
|
||||||
|
raw = obj.cssText
|
||||||
|
else:
|
||||||
|
raw = html.tostring(obj, encoding='utf-8', method='xml',
|
||||||
|
include_meta_content_type=True, pretty_print=True)
|
||||||
|
if isinstance(raw, unicode):
|
||||||
|
raw = raw.encode('utf-8')
|
||||||
|
open(f, 'wb').write(raw)
|
||||||
|
shutil.copytree('.', options.debug_input)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
27
src/calibre/customize/profiles.py
Normal file
27
src/calibre/customize/profiles.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.customize import Plugin
|
||||||
|
|
||||||
|
class InputProfile(Plugin):
|
||||||
|
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
supported_platforms = set(['windows', 'osx', 'linux'])
|
||||||
|
can_be_disabled = False
|
||||||
|
type = _('Input profile')
|
||||||
|
|
||||||
|
# TODO: Add some real information to this profile. All other profiles must
|
||||||
|
# inherit from this profile and override as needed
|
||||||
|
|
||||||
|
name = 'Default Input Profile'
|
||||||
|
short_name = 'default' # Used in the CLI so dont spaces etc. in it
|
||||||
|
description = _('This profile tries to provide sane defaults and is useful '
|
||||||
|
'if you know nothing about the input document.')
|
||||||
|
|
||||||
|
input_profiles = [InputProfile]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -6,13 +6,14 @@ import os, shutil, traceback, functools, sys
|
|||||||
|
|
||||||
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
|
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
|
||||||
MetadataWriterPlugin
|
MetadataWriterPlugin
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.customize.profiles import InputProfile
|
||||||
from calibre.customize.builtins import plugins as builtin_plugins
|
from calibre.customize.builtins import plugins as builtin_plugins
|
||||||
from calibre.constants import __version__, iswindows, isosx
|
from calibre.constants import __version__, iswindows, isosx
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
from calibre.utils.config import make_config_dir, Config, ConfigProxy, \
|
||||||
plugin_dir, OptionParser
|
plugin_dir, OptionParser
|
||||||
|
|
||||||
|
|
||||||
version = tuple([int(x) for x in __version__.split('.')])
|
version = tuple([int(x) for x in __version__.split('.')])
|
||||||
|
|
||||||
platform = 'linux'
|
platform = 'linux'
|
||||||
@ -70,7 +71,10 @@ _on_import = {}
|
|||||||
_on_preprocess = {}
|
_on_preprocess = {}
|
||||||
_on_postprocess = {}
|
_on_postprocess = {}
|
||||||
|
|
||||||
|
def input_profiles():
|
||||||
|
for plugin in _initialized_plugins:
|
||||||
|
if isinstance(plugin, InputProfile):
|
||||||
|
yield plugin
|
||||||
|
|
||||||
def reread_filetype_plugins():
|
def reread_filetype_plugins():
|
||||||
global _on_import
|
global _on_import
|
||||||
@ -234,6 +238,17 @@ def find_plugin(name):
|
|||||||
if plugin.name == name:
|
if plugin.name == name:
|
||||||
return plugin
|
return plugin
|
||||||
|
|
||||||
|
def input_format_plugins():
|
||||||
|
for plugin in _initialized_plugins:
|
||||||
|
if isinstance(plugin, InputFormatPlugin):
|
||||||
|
yield plugin
|
||||||
|
|
||||||
|
def plugin_for_input_format(fmt):
|
||||||
|
for plugin in input_format_plugins():
|
||||||
|
if fmt in plugin.file_types:
|
||||||
|
return plugin
|
||||||
|
|
||||||
|
|
||||||
def disable_plugin(plugin_or_name):
|
def disable_plugin(plugin_or_name):
|
||||||
x = getattr(plugin_or_name, 'name', plugin_or_name)
|
x = getattr(plugin_or_name, 'name', plugin_or_name)
|
||||||
plugin = find_plugin(x)
|
plugin = find_plugin(x)
|
||||||
|
4
src/calibre/ebooks/conversion/__init__.py
Normal file
4
src/calibre/ebooks/conversion/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
30
src/calibre/ebooks/conversion/plumber.py
Normal file
30
src/calibre/ebooks/conversion/plumber.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
|
from calibre.customize.ui import input_profiles
|
||||||
|
|
||||||
|
pipeline_options = [
|
||||||
|
|
||||||
|
OptionRecommendation(name='verbose',
|
||||||
|
recommended_value=0, level=OptionRecommendation.LOW,
|
||||||
|
short_switch='v',
|
||||||
|
help=_('Level of verbosity. Specify multiple times for greater '
|
||||||
|
'verbosity.')
|
||||||
|
),
|
||||||
|
|
||||||
|
|
||||||
|
OptionRecommendation(name='input_profile',
|
||||||
|
recommended_value='default', level=OptionRecommendation.LOW,
|
||||||
|
choices=[x.short_name for x in input_profiles()],
|
||||||
|
help=_('Specify the input profile. The input profile gives the '
|
||||||
|
'conversion system information on how to interpret '
|
||||||
|
'various information in the input document. For '
|
||||||
|
'example resolution dependent lengths (i.e. lengths in '
|
||||||
|
'pixels).')
|
||||||
|
),
|
||||||
|
|
||||||
|
]
|
@ -40,38 +40,6 @@ def rules(stylesheets):
|
|||||||
if r.type == r.STYLE_RULE:
|
if r.type == r.STYLE_RULE:
|
||||||
yield r
|
yield r
|
||||||
|
|
||||||
def decrypt_font(key, path):
|
|
||||||
raw = open(path, 'rb').read()
|
|
||||||
crypt = raw[:1024]
|
|
||||||
key = cycle(iter(key))
|
|
||||||
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
|
|
||||||
with open(path, 'wb') as f:
|
|
||||||
f.write(decrypt)
|
|
||||||
f.write(raw[1024:])
|
|
||||||
|
|
||||||
def process_encryption(encfile, opf):
|
|
||||||
key = None
|
|
||||||
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
|
|
||||||
if m:
|
|
||||||
key = m.group(1)
|
|
||||||
key = list(map(ord, uuid.UUID(key).bytes))
|
|
||||||
try:
|
|
||||||
root = etree.parse(encfile)
|
|
||||||
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
|
|
||||||
algorithm = em.get('Algorithm', '')
|
|
||||||
if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
|
|
||||||
return False
|
|
||||||
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
|
|
||||||
uri = cr.get('URI')
|
|
||||||
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
|
|
||||||
if os.path.exists(path):
|
|
||||||
decrypt_font(key, path)
|
|
||||||
return True
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return False
|
|
||||||
|
|
||||||
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
||||||
'''
|
'''
|
||||||
Create an empty EPUB document, with a default skeleton.
|
Create an empty EPUB document, with a default skeleton.
|
||||||
|
76
src/calibre/ebooks/epub/input.py
Normal file
76
src/calibre/ebooks/epub/input.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, re, uuid
|
||||||
|
from itertools import cycle
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class EPUBInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'EPUB Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert EPUB files (.epub) to HTML'
|
||||||
|
file_types = set(['epub'])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def decrypt_font(cls, key, path):
|
||||||
|
raw = open(path, 'rb').read()
|
||||||
|
crypt = raw[:1024]
|
||||||
|
key = cycle(iter(key))
|
||||||
|
decrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
|
||||||
|
with open(path, 'wb') as f:
|
||||||
|
f.write(decrypt)
|
||||||
|
f.write(raw[1024:])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def process_ecryption(cls, encfile, opf, log):
|
||||||
|
key = None
|
||||||
|
m = re.search(r'(?i)(urn:uuid:[0-9a-f-]+)', open(opf, 'rb').read())
|
||||||
|
if m:
|
||||||
|
key = m.group(1)
|
||||||
|
key = list(map(ord, uuid.UUID(key).bytes))
|
||||||
|
try:
|
||||||
|
root = etree.parse(encfile)
|
||||||
|
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
|
||||||
|
algorithm = em.get('Algorithm', '')
|
||||||
|
if algorithm != 'http://ns.adobe.com/pdf/enc#RC':
|
||||||
|
return False
|
||||||
|
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
|
||||||
|
uri = cr.get('URI')
|
||||||
|
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
|
||||||
|
if os.path.exists(path):
|
||||||
|
cls.decrypt_font(key, path)
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, parse_cache, log):
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
from calibre import walk
|
||||||
|
from calibre.ebooks import DRMError
|
||||||
|
zf = ZipFile(stream)
|
||||||
|
zf.extractall(os.getcwd())
|
||||||
|
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
|
||||||
|
opf = None
|
||||||
|
for f in walk('.'):
|
||||||
|
if f.lower().endswith('.opf'):
|
||||||
|
opf = f
|
||||||
|
break
|
||||||
|
path = getattr(stream, 'name', 'stream')
|
||||||
|
|
||||||
|
if opf is None:
|
||||||
|
raise ValueError('%s is not a valid EPUB file'%path)
|
||||||
|
|
||||||
|
if os.path.exists(encfile):
|
||||||
|
if not self.process_encryption(encfile, opf, log):
|
||||||
|
raise DRMError(os.path.basename(path))
|
||||||
|
|
||||||
|
return opf
|
||||||
|
|
29
src/calibre/ebooks/mobi/input.py
Normal file
29
src/calibre/ebooks/mobi/input.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class MOBIInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'MOBI Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
||||||
|
file_types = set(['mobi', 'prc', 'azw'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, parse_cache, log):
|
||||||
|
from calibre.ebooks.mobi.reader import MobiReader
|
||||||
|
mr = MobiReader(stream, log, options.input_encoding,
|
||||||
|
options.debug_input)
|
||||||
|
mr.extract_content(output_dir=os.getcwdu(), parse_cache)
|
||||||
|
raw = parse_cache.get('calibre_raw_mobi_markup', False)
|
||||||
|
if raw:
|
||||||
|
if isinstance(raw, unicode):
|
||||||
|
raw = raw.encode('utf-8')
|
||||||
|
open('debug-raw.html', 'wb').write(raw)
|
||||||
|
|
||||||
|
return mr.created_opf_path
|
||||||
|
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Read data from .mobi files
|
Read data from .mobi files
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, struct, os, cStringIO, re, functools
|
import struct, os, cStringIO, re, functools
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
@ -35,8 +35,10 @@ class EXTHHeader(object):
|
|||||||
pos = 0
|
pos = 0
|
||||||
self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
self.mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
self.has_fake_cover = True
|
self.has_fake_cover = True
|
||||||
|
left = self.num_items
|
||||||
|
|
||||||
for i in range(self.num_items):
|
while left > 0:
|
||||||
|
left -= 1
|
||||||
id, size = struct.unpack('>LL', raw[pos:pos+8])
|
id, size = struct.unpack('>LL', raw[pos:pos+8])
|
||||||
content = raw[pos+8:pos+size]
|
content = raw[pos+8:pos+size]
|
||||||
pos += size
|
pos += size
|
||||||
@ -76,7 +78,8 @@ class EXTHHeader(object):
|
|||||||
|
|
||||||
class BookHeader(object):
|
class BookHeader(object):
|
||||||
|
|
||||||
def __init__(self, raw, ident):
|
def __init__(self, raw, ident, user_encoding, log):
|
||||||
|
self.log = log
|
||||||
self.compression_type = raw[:2]
|
self.compression_type = raw[:2]
|
||||||
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
||||||
self.encryption_type, = struct.unpack('>H', raw[12:14])
|
self.encryption_type, = struct.unpack('>H', raw[12:14])
|
||||||
@ -92,8 +95,8 @@ class BookHeader(object):
|
|||||||
else:
|
else:
|
||||||
self.ancient = False
|
self.ancient = False
|
||||||
self.doctype = raw[16:20]
|
self.doctype = raw[16:20]
|
||||||
self.length, self.type, self.codepage, self.unique_id, self.version = \
|
self.length, self.type, self.codepage, self.unique_id, \
|
||||||
struct.unpack('>LLLLL', raw[20:40])
|
self.version = struct.unpack('>LLLLL', raw[20:40])
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -102,8 +105,9 @@ class BookHeader(object):
|
|||||||
65001 : 'utf-8',
|
65001 : 'utf-8',
|
||||||
}[self.codepage]
|
}[self.codepage]
|
||||||
except (IndexError, KeyError):
|
except (IndexError, KeyError):
|
||||||
print '[WARNING] Unknown codepage %d. Assuming cp-1252'%self.codepage
|
self.codec = 'cp1252' if user_encoding is None else user_encoding
|
||||||
self.codec = 'cp1252'
|
log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
|
||||||
|
self.codec))
|
||||||
|
|
||||||
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
|
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
|
||||||
self.extra_flags = 0
|
self.extra_flags = 0
|
||||||
@ -138,9 +142,24 @@ class MobiReader(object):
|
|||||||
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
|
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
|
||||||
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
|
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
|
||||||
|
|
||||||
def __init__(self, filename_or_stream, verbose=False):
|
def __init__(self, filename_or_stream, log, user_encoding=None, debug=None):
|
||||||
self.verbose = verbose
|
self.log = log
|
||||||
|
self.debug = debug
|
||||||
self.embedded_mi = None
|
self.embedded_mi = None
|
||||||
|
self.base_css_rules = '''
|
||||||
|
blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
|
||||||
|
|
||||||
|
p { margin: 0em; text-align: justify }
|
||||||
|
|
||||||
|
.bold { font-weight: bold }
|
||||||
|
|
||||||
|
.italic { font-style: italic }
|
||||||
|
|
||||||
|
.mbp_pagebreak {
|
||||||
|
page-break-after: always; margin: 0; display: block
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
self.tag_css_rules = []
|
||||||
|
|
||||||
if hasattr(filename_or_stream, 'read'):
|
if hasattr(filename_or_stream, 'read'):
|
||||||
stream = filename_or_stream
|
stream = filename_or_stream
|
||||||
@ -177,17 +196,21 @@ class MobiReader(object):
|
|||||||
self.sections.append((section(i), self.section_headers[i]))
|
self.sections.append((section(i), self.section_headers[i]))
|
||||||
|
|
||||||
|
|
||||||
self.book_header = BookHeader(self.sections[0][0], self.ident)
|
self.book_header = BookHeader(self.sections[0][0], self.ident,
|
||||||
|
user_encoding, self.log)
|
||||||
self.name = self.name.decode(self.book_header.codec, 'replace')
|
self.name = self.name.decode(self.book_header.codec, 'replace')
|
||||||
|
|
||||||
def extract_content(self, output_dir=os.getcwdu()):
|
def extract_content(self, output_dir, parse_cache):
|
||||||
output_dir = os.path.abspath(output_dir)
|
output_dir = os.path.abspath(output_dir)
|
||||||
if self.book_header.encryption_type != 0:
|
if self.book_header.encryption_type != 0:
|
||||||
raise DRMError(self.name)
|
raise DRMError(self.name)
|
||||||
|
|
||||||
processed_records = self.extract_text()
|
processed_records = self.extract_text()
|
||||||
|
if self.debug is not None:
|
||||||
|
self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
|
||||||
self.add_anchors()
|
self.add_anchors()
|
||||||
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
|
self.processed_html = self.processed_html.decode(self.book_header.codec,
|
||||||
|
'ignore')
|
||||||
for pat in ENCODING_PATS:
|
for pat in ENCODING_PATS:
|
||||||
self.processed_html = pat.sub('', self.processed_html)
|
self.processed_html = pat.sub('', self.processed_html)
|
||||||
e2u = functools.partial(entity_to_unicode,
|
e2u = functools.partial(entity_to_unicode,
|
||||||
@ -203,16 +226,10 @@ class MobiReader(object):
|
|||||||
self.processed_html = \
|
self.processed_html = \
|
||||||
re.compile('<head>', re.IGNORECASE).sub(
|
re.compile('<head>', re.IGNORECASE).sub(
|
||||||
'\n<head>\n'
|
'\n<head>\n'
|
||||||
'<style type="text/css">\n'
|
'\t<link type="text/css" href="styles.css" />\n',
|
||||||
'blockquote { margin: 0em 0em 0em 1.25em; text-align: justify; }\n'
|
|
||||||
'p { margin: 0em; text-align: justify; }\n'
|
|
||||||
'.bold { font-weight: bold; }\n'
|
|
||||||
'.italic { font-style: italic; }\n'
|
|
||||||
'</style>\n',
|
|
||||||
self.processed_html)
|
self.processed_html)
|
||||||
|
|
||||||
if self.verbose:
|
self.log.debug('Parsing HTML...')
|
||||||
print 'Parsing HTML...'
|
|
||||||
root = html.fromstring(self.processed_html)
|
root = html.fromstring(self.processed_html)
|
||||||
self.upshift_markup(root)
|
self.upshift_markup(root)
|
||||||
guides = root.xpath('//guide')
|
guides = root.xpath('//guide')
|
||||||
@ -230,25 +247,24 @@ class MobiReader(object):
|
|||||||
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
|
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
if self.verbose:
|
parse_cache[htmlfile] = root
|
||||||
print 'Serializing...'
|
|
||||||
with open(htmlfile, 'wb') as f:
|
|
||||||
raw = html.tostring(root, encoding='utf-8', method='xml',
|
|
||||||
include_meta_content_type=True, pretty_print=True)
|
|
||||||
raw = raw.replace('<head>',
|
|
||||||
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
|
|
||||||
f.write(raw)
|
|
||||||
self.htmlfile = htmlfile
|
self.htmlfile = htmlfile
|
||||||
|
|
||||||
if self.book_header.exth is not None or self.embedded_mi is not None:
|
self.log.debug('Creating OPF...')
|
||||||
if self.verbose:
|
ncx = cStringIO.StringIO()
|
||||||
print 'Creating OPF...'
|
opf = self.create_opf(htmlfile, guide, root)
|
||||||
ncx = cStringIO.StringIO()
|
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
|
||||||
opf = self.create_opf(htmlfile, guide, root)
|
opf.render(open(self.created_opf_path, 'wb'), ncx)
|
||||||
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
|
ncx = ncx.getvalue()
|
||||||
ncx = ncx.getvalue()
|
if ncx:
|
||||||
if ncx:
|
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
|
||||||
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
|
|
||||||
|
with open('styles.css', 'wb') as s:
|
||||||
|
s.write(self.base_css_rules+'\n\n')
|
||||||
|
for rule in self.tag_css_rules:
|
||||||
|
if isinstance(rule, unicode):
|
||||||
|
rule = rule.encode('utf-8')
|
||||||
|
s.write(rule+'\n\n')
|
||||||
|
|
||||||
def read_embedded_metadata(self, root, elem, guide):
|
def read_embedded_metadata(self, root, elem, guide):
|
||||||
raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
|
raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
|
||||||
@ -277,8 +293,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
|
|
||||||
def cleanup_html(self):
|
def cleanup_html(self):
|
||||||
if self.verbose:
|
self.log.debug('Cleaning up HTML...')
|
||||||
print 'Cleaning up HTML...'
|
|
||||||
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
|
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
|
||||||
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
|
||||||
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
|
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
|
||||||
@ -286,8 +301,7 @@ class MobiReader(object):
|
|||||||
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
self.processed_html = self.processed_html.replace('> <', '>\n<')
|
||||||
|
|
||||||
def upshift_markup(self, root):
|
def upshift_markup(self, root):
|
||||||
if self.verbose:
|
self.log.debug('Converting style information to CSS...')
|
||||||
print 'Converting style information to CSS...'
|
|
||||||
size_map = {
|
size_map = {
|
||||||
'xx-small' : '0.5',
|
'xx-small' : '0.5',
|
||||||
'x-small' : '1',
|
'x-small' : '1',
|
||||||
@ -298,7 +312,7 @@ class MobiReader(object):
|
|||||||
'xx-large' : '6',
|
'xx-large' : '6',
|
||||||
}
|
}
|
||||||
mobi_version = self.book_header.mobi_version
|
mobi_version = self.book_header.mobi_version
|
||||||
for tag in root.iter(etree.Element):
|
for i, tag in enumerate(root.iter(etree.Element)):
|
||||||
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
|
||||||
'state', 'city'):
|
'state', 'city'):
|
||||||
tag.tag = 'span'
|
tag.tag = 'span'
|
||||||
@ -352,8 +366,7 @@ class MobiReader(object):
|
|||||||
elif tag.tag == 'pre':
|
elif tag.tag == 'pre':
|
||||||
if not tag.text:
|
if not tag.text:
|
||||||
tag.tag = 'div'
|
tag.tag = 'div'
|
||||||
if styles:
|
|
||||||
attrib['style'] = '; '.join(styles)
|
|
||||||
if 'filepos-id' in attrib:
|
if 'filepos-id' in attrib:
|
||||||
attrib['id'] = attrib.pop('filepos-id')
|
attrib['id'] = attrib.pop('filepos-id')
|
||||||
if 'filepos' in attrib:
|
if 'filepos' in attrib:
|
||||||
@ -362,15 +375,24 @@ class MobiReader(object):
|
|||||||
attrib['href'] = "#filepos%d" % int(filepos)
|
attrib['href'] = "#filepos%d" % int(filepos)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if styles:
|
||||||
|
attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i)
|
||||||
|
self.tag_css_rules.append('#%s {%s}'%(attrib['id'],
|
||||||
|
'; '.join(styles)))
|
||||||
|
|
||||||
|
|
||||||
def create_opf(self, htmlfile, guide=None, root=None):
|
def create_opf(self, htmlfile, guide=None, root=None):
|
||||||
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
|
mi = getattr(self.book_header.exth, 'mi', self.embedded_mi)
|
||||||
|
if mi is None:
|
||||||
|
mi = MetaInformation(self.title, [_('Unknown')])
|
||||||
opf = OPFCreator(os.path.dirname(htmlfile), mi)
|
opf = OPFCreator(os.path.dirname(htmlfile), mi)
|
||||||
if hasattr(self.book_header.exth, 'cover_offset'):
|
if hasattr(self.book_header.exth, 'cover_offset'):
|
||||||
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
|
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
|
||||||
elif mi.cover is not None:
|
elif mi.cover is not None:
|
||||||
opf.cover = mi.cover
|
opf.cover = mi.cover
|
||||||
manifest = [(htmlfile, 'text/x-oeb1-document')]
|
manifest = [(htmlfile, 'text/x-oeb1-document'),
|
||||||
|
(os.path.abspath('styles.css'), 'text/css')]
|
||||||
bp = os.path.dirname(htmlfile)
|
bp = os.path.dirname(htmlfile)
|
||||||
for i in getattr(self, 'image_names', []):
|
for i in getattr(self, 'image_names', []):
|
||||||
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
|
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
|
||||||
@ -441,8 +463,7 @@ class MobiReader(object):
|
|||||||
return data[:len(data)-trail_size]
|
return data[:len(data)-trail_size]
|
||||||
|
|
||||||
def extract_text(self):
|
def extract_text(self):
|
||||||
if self.verbose:
|
self.log.debug('Extracting text...')
|
||||||
print 'Extracting text...'
|
|
||||||
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
|
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
|
||||||
processed_records = list(range(0, self.book_header.records+1))
|
processed_records = list(range(0, self.book_header.records+1))
|
||||||
|
|
||||||
@ -472,12 +493,11 @@ class MobiReader(object):
|
|||||||
|
|
||||||
def replace_page_breaks(self):
|
def replace_page_breaks(self):
|
||||||
self.processed_html = self.PAGE_BREAK_PAT.sub(
|
self.processed_html = self.PAGE_BREAK_PAT.sub(
|
||||||
'<div class="mbp_pagebreak" style="page-break-after: always; margin: 0; display: block" />',
|
'<div class="mbp_pagebreak" />',
|
||||||
self.processed_html)
|
self.processed_html)
|
||||||
|
|
||||||
def add_anchors(self):
|
def add_anchors(self):
|
||||||
if self.verbose:
|
self.log.debug('Adding anchors...')
|
||||||
print 'Adding anchors...'
|
|
||||||
positions = set([])
|
positions = set([])
|
||||||
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
|
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
@ -507,8 +527,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
|
|
||||||
def extract_images(self, processed_records, output_dir):
|
def extract_images(self, processed_records, output_dir):
|
||||||
if self.verbose:
|
self.log.debug('Extracting images...')
|
||||||
print 'Extracting images...'
|
|
||||||
output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
|
output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
|
||||||
if not os.path.exists(output_dir):
|
if not os.path.exists(output_dir):
|
||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir)
|
||||||
@ -535,14 +554,17 @@ class MobiReader(object):
|
|||||||
im.convert('RGB').save(open(path, 'wb'), format='JPEG')
|
im.convert('RGB').save(open(path, 'wb'), format='JPEG')
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
mr = MobiReader(stream)
|
from calibre.utils.logging import Log
|
||||||
|
log = Log()
|
||||||
|
mr = MobiReader(stream, log)
|
||||||
if mr.book_header.exth is None:
|
if mr.book_header.exth is None:
|
||||||
mi = MetaInformation(mr.name, [_('Unknown')])
|
mi = MetaInformation(mr.name, [_('Unknown')])
|
||||||
else:
|
else:
|
||||||
mi = mr.create_opf('dummy.html')
|
mi = mr.create_opf('dummy.html')
|
||||||
try:
|
try:
|
||||||
if hasattr(mr.book_header.exth, 'cover_offset'):
|
if hasattr(mr.book_header.exth, 'cover_offset'):
|
||||||
cover_index = mr.book_header.first_image_index + mr.book_header.exth.cover_offset
|
cover_index = mr.book_header.first_image_index + \
|
||||||
|
mr.book_header.exth.cover_offset
|
||||||
data = mr.sections[int(cover_index)][0]
|
data = mr.sections[int(cover_index)][0]
|
||||||
else:
|
else:
|
||||||
data = mr.sections[mr.book_header.first_image_index][0]
|
data = mr.sections[mr.book_header.first_image_index][0]
|
||||||
@ -552,42 +574,7 @@ def get_metadata(stream):
|
|||||||
im.convert('RGBA').save(obuf, format='JPEG')
|
im.convert('RGBA').save(obuf, format='JPEG')
|
||||||
mi.cover_data = ('jpg', obuf.getvalue())
|
mi.cover_data = ('jpg', obuf.getvalue())
|
||||||
except:
|
except:
|
||||||
import traceback
|
log.exception()
|
||||||
traceback.print_exc()
|
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
from calibre.utils.config import OptionParser
|
|
||||||
parser = OptionParser(usage=_('%prog [options] myebook.mobi'))
|
|
||||||
parser.add_option('-o', '--output-dir', default='.',
|
|
||||||
help=_('Output directory. Defaults to current directory.'))
|
|
||||||
parser.add_option('-v', '--verbose', default=False, action='store_true',
|
|
||||||
help='Useful for debugging.')
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
if len(args) != 2:
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
|
|
||||||
mr = MobiReader(args[1], verbose=opts.verbose)
|
|
||||||
opts.output_dir = os.path.abspath(opts.output_dir)
|
|
||||||
mr.extract_content(opts.output_dir)
|
|
||||||
if opts.verbose:
|
|
||||||
oname = os.path.join(opts.output_dir, 'debug-raw.html')
|
|
||||||
dat = mr.mobi_html
|
|
||||||
if isinstance(dat, unicode):
|
|
||||||
dat = dat.encode('utf-8')
|
|
||||||
open(oname, 'wb').write(dat)
|
|
||||||
print _('Raw MOBI HTML saved in'), oname
|
|
||||||
|
|
||||||
print _('OEB ebook created in'), opts.output_dir
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
||||||
|
92
src/calibre/utils/logging.py
Normal file
92
src/calibre/utils/logging.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'A simplified logging system'
|
||||||
|
|
||||||
|
DEBUG = 0
|
||||||
|
INFO = 1
|
||||||
|
WARN = 2
|
||||||
|
ERROR = 3
|
||||||
|
|
||||||
|
import sys, traceback
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from calibre import prints
|
||||||
|
from calibre.utils.terminfo import TerminalController
|
||||||
|
|
||||||
|
class ANSIStream:
|
||||||
|
|
||||||
|
def __init__(self, stream=sys.stdout):
|
||||||
|
self.stream = stream
|
||||||
|
tc = TerminalController(stream)
|
||||||
|
self.color = {
|
||||||
|
DEBUG: tc.GREEN,
|
||||||
|
INFO:'',
|
||||||
|
WARN: tc.YELLOW,
|
||||||
|
ERROR: tc.RED
|
||||||
|
}
|
||||||
|
self.normal = tc.NORMAL
|
||||||
|
|
||||||
|
def prints(self, level, *args, **kwargs):
|
||||||
|
self.stream.write(self.color[level])
|
||||||
|
kwargs['file'] = self.stream
|
||||||
|
prints(*args, **kwargs)
|
||||||
|
self.stream.write(self.normal)
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
self.stream.flush()
|
||||||
|
|
||||||
|
class HTMLStream:
|
||||||
|
|
||||||
|
def __init__(self, stream=sys.stdout):
|
||||||
|
self.stream = stream
|
||||||
|
self.color = {
|
||||||
|
DEBUG: '<span style="color:green">',
|
||||||
|
INFO:'<span>',
|
||||||
|
WARN: '<span style="color:yellow">',
|
||||||
|
ERROR: '<span style="color:red">'
|
||||||
|
}
|
||||||
|
self.normal = '</span>'
|
||||||
|
|
||||||
|
def prints(self, level, *args, **kwargs):
|
||||||
|
self.stream.write(self.color[level])
|
||||||
|
kwargs['file'] = self.stream
|
||||||
|
prints(*args, **kwargs)
|
||||||
|
self.stream.write(self.normal)
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
self.stream.flush()
|
||||||
|
|
||||||
|
class Log(object):
|
||||||
|
|
||||||
|
DEBUG = DEBUG
|
||||||
|
INFO = INFO
|
||||||
|
WARN = WARN
|
||||||
|
ERROR = ERROR
|
||||||
|
|
||||||
|
def __init__(self, level=INFO):
|
||||||
|
self.filter_level = level
|
||||||
|
default_output = ANSIStream()
|
||||||
|
self.outputs = [default_output]
|
||||||
|
|
||||||
|
self.debug = partial(self.prints, DEBUG)
|
||||||
|
self.info = partial(self.prints, INFO)
|
||||||
|
self.warn = self.warning = partial(self.prints, WARN)
|
||||||
|
self.error = partial(self.prints, ERROR)
|
||||||
|
|
||||||
|
|
||||||
|
def prints(self, level, *args, **kwargs):
|
||||||
|
if level < self.filter_level:
|
||||||
|
return
|
||||||
|
for output in self.outputs:
|
||||||
|
output.prints(level, *args, **kwargs)
|
||||||
|
|
||||||
|
def exception(self, *args, **kwargs):
|
||||||
|
limit = kwargs.pop('limit', None)
|
||||||
|
self.prints(ERROR, *args, **kwargs)
|
||||||
|
self.prints(DEBUG, traceback.format_exc(limit))
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
self.prints(INFO, *args, **kwargs)
|
@ -33,7 +33,7 @@ class TerminalController:
|
|||||||
|
|
||||||
>>> term = TerminalController()
|
>>> term = TerminalController()
|
||||||
>>> if term.CLEAR_SCREEN:
|
>>> if term.CLEAR_SCREEN:
|
||||||
... print 'This terminal supports clearning the screen.'
|
... print 'This terminal supports clearing the screen.'
|
||||||
|
|
||||||
Finally, if the width and height of the terminal are known, then
|
Finally, if the width and height of the terminal are known, then
|
||||||
they will be stored in the `COLS` and `LINES` attributes.
|
they will be stored in the `COLS` and `LINES` attributes.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user