Commit so I can pull from trunk

This commit is contained in:
Kovid Goyal 2009-03-08 13:45:54 -07:00
parent 02a940d33b
commit 4e128c1073
12 changed files with 356 additions and 52 deletions

View File

@ -7,7 +7,6 @@ import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None) __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
from htmlentitydefs import name2codepoint from htmlentitydefs import name2codepoint
from math import floor from math import floor
from logging import Formatter
from PyQt4.QtCore import QUrl from PyQt4.QtCore import QUrl
from PyQt4.QtGui import QDesktopServices from PyQt4.QtGui import QDesktopServices
@ -318,24 +317,6 @@ def english_sort(x, y):
''' '''
return cmp(_spat.sub('', x), _spat.sub('', y)) return cmp(_spat.sub('', x), _spat.sub('', y))
class ColoredFormatter(Formatter):
def format(self, record):
ln = record.__dict__['levelname']
col = ''
if ln == 'CRITICAL':
col = terminal_controller.YELLOW
elif ln == 'ERROR':
col = terminal_controller.RED
elif ln in ['WARN', 'WARNING']:
col = terminal_controller.BLUE
elif ln == 'INFO':
col = terminal_controller.GREEN
elif ln == 'DEBUG':
col = terminal_controller.CYAN
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
return Formatter.format(self, record)
def walk(dir): def walk(dir):
''' A nice interface to os.walk ''' ''' A nice interface to os.walk '''
for record in os.walk(dir): for record in os.walk(dir):

View File

@ -244,9 +244,10 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.customize.profiles import input_profiles from calibre.customize.profiles import input_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput] plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -37,19 +37,24 @@ class ConversionOption(object):
if not self.help: if not self.help:
raise ValueError('You must set the help text') raise ValueError('You must set the help text')
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
return hash(self) == hash(other)
class OptionRecommendation(object): class OptionRecommendation(object):
LOW = 1 LOW = 1
MED = 2 MED = 2
HIGH = 3 HIGH = 3
def __init__(self, recommeded_value, level=LOW, **kwargs): def __init__(self, recommended_value=None, level=LOW, **kwargs):
''' '''
An option recommendation. That is, an option as well as its recommended An option recommendation. That is, an option as well as its recommended
value and the level of the recommendation. value and the level of the recommendation.
''' '''
self.level = level self.level = level
self.recommended_value = recommeded_value self.recommended_value = recommended_value
self.option = kwargs.pop('option', None) self.option = kwargs.pop('option', None)
if self.option is None: if self.option is None:
self.option = ConversionOption(**kwargs) self.option = ConversionOption(**kwargs)
@ -59,10 +64,12 @@ class OptionRecommendation(object):
def validate_parameters(self): def validate_parameters(self):
if self.option.choices and self.recommended_value not in \ if self.option.choices and self.recommended_value not in \
self.option.choices: self.option.choices:
raise ValueError('Recommended value not in choices') raise ValueError('OpRec: %s: Recommended value not in choices'%
self.option.name)
if not (isinstance(self.recommended_value, (int, float, str, unicode))\ if not (isinstance(self.recommended_value, (int, float, str, unicode))\
or self.default is None): or self.recommended_value is None):
raise ValueError(unicode(self.default) + raise ValueError('OpRec: %s:'%self.option.name +
repr(self.recommended_value) +
' is not a string or a number') ' is not a string or a number')
@ -186,4 +193,34 @@ class InputFormatPlugin(Plugin):
return ret return ret
class OutputFormatPlugin(Plugin):
'''
OutputFormatPlugins are responsible for converting an OEB document
(OPF+HTML) into an output ebook.
The OEB document can be assumed to be encoded in UTF-8.
The main action happens in :method:`convert`.
'''
type = _('Conversion Output')
can_be_disabled = False
supported_platforms = ['windows', 'osx', 'linux']
#: The file type (extension without leading period) that this
#: plugin outputs
file_type = None
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
common_options = set([])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.
options = set([])
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
raise NotImplementedError

View File

@ -3,6 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, re
from calibre.customize import Plugin from calibre.customize import Plugin
class InputProfile(Plugin): class InputProfile(Plugin):
@ -16,12 +17,43 @@ class InputProfile(Plugin):
# inherit from this profile and override as needed # inherit from this profile and override as needed
name = 'Default Input Profile' name = 'Default Input Profile'
short_name = 'default' # Used in the CLI so dont spaces etc. in it short_name = 'default' # Used in the CLI so dont use spaces etc. in it
description = _('This profile tries to provide sane defaults and is useful ' description = _('This profile tries to provide sane defaults and is useful '
'if you know nothing about the input document.') 'if you know nothing about the input document.')
input_profiles = [InputProfile] input_profiles = [InputProfile]
class OutputProfile(Plugin):
author = 'Kovid Goyal'
supported_platforms = set(['windows', 'osx', 'linux'])
can_be_disabled = False
type = _('Output profile')
name = 'Default Output Profile'
short_name = 'default' # Used in the CLI so dont use spaces etc. in it
description = _('This profile tries to provide sane defaults and is useful '
'if you want to produce a document intended to be read at a '
'computer or on a range of devices.')
epub_flow_size = sys.maxint
screen_size = None
remove_special_chars = False
remove_object_tags = False
class SonyReader(OutputProfile):
name = 'Sony Reader'
short_name = 'sony'
description = _('This profile is intended for the SONY PRS line. '
'The 500/505/700 etc.')
epub_flow_size = 270000
screen_size = (590, 765)
remove_special_chars = re.compile(u'[\u200b\u00ad]')
remove_object_tags = True
output_profiles = [OutputProfile, SonyReader]

View File

@ -6,8 +6,8 @@ import os, shutil, traceback, functools, sys
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \ from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin MetadataWriterPlugin
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin
from calibre.customize.profiles import InputProfile from calibre.customize.profiles import InputProfile, OutputProfile
from calibre.customize.builtins import plugins as builtin_plugins from calibre.customize.builtins import plugins as builtin_plugins
from calibre.constants import __version__, iswindows, isosx from calibre.constants import __version__, iswindows, isosx
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -76,6 +76,12 @@ def input_profiles():
if isinstance(plugin, InputProfile): if isinstance(plugin, InputProfile):
yield plugin yield plugin
def output_profiles():
for plugin in _initialized_plugins:
if isinstance(plugin, OutputProfile):
yield plugin
def reread_filetype_plugins(): def reread_filetype_plugins():
global _on_import global _on_import
global _on_preprocess global _on_preprocess
@ -245,9 +251,19 @@ def input_format_plugins():
def plugin_for_input_format(fmt): def plugin_for_input_format(fmt):
for plugin in input_format_plugins(): for plugin in input_format_plugins():
if fmt in plugin.file_types: if fmt.lower() in plugin.file_types:
return plugin return plugin
def output_format_plugins():
for plugin in _initialized_plugins:
if isinstance(plugin, OutputFormatPlugin):
yield plugin
def plugin_for_output_format(fmt):
for plugin in output_format_plugins():
if fmt.lower() == plugin.file_type:
return plugin
def disable_plugin(plugin_or_name): def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name) x = getattr(plugin_or_name, 'name', plugin_or_name)

View File

@ -0,0 +1,146 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Command line interface to conversion sub-system
'''
USAGE = '%prog ' + _('''\
input_file output_file [options]
Convert an ebook from one format to another.
input_file is the input and output_file is the output. Both must be
specified as the first two arguments to the command.
The output ebook format is guessed from the file extension of
output_file. output_file can also be of the special format .EXT where
EXT is the output file extension. In this case, the name of the output
file is derived the name of the input file. Note that the filenames must
not start with a hyphen. Finally, if output_file has no extension, then
it is treated as a directory and an "open ebook" (OEB) consisting of HTML files
is written to that directory. These files are the files that would normally
have been passed to the output plugin.
After specifying the input
and output file you can customize the conversion by specifying various
options, listed below.
For full documentation of the conversion system see
''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html'
import sys, os
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def check_command_line_options(parser, args, log):
if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
print_help(parser)
log.error('\n\nYou must specify the input AND output files')
raise SystemExit(1)
input = os.path.abspath(args[1])
if not os.access(input, os.R_OK):
log.error('Cannot read from', input)
raise SystemExit(1)
output = args[2]
if output.startswith('.'):
output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output)
if '.' in output:
if os.path.exists(output):
log.warn('WARNING:', output, 'exists. Deleting.')
os.remove(output)
return input, output
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = [opt.short_switch] if opt.short_switch else []
switches.append(opt.long_switch)
add_option(opt.name, switches=switches, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
def add_input_output_options(parser, plumber):
input_options, output_options = \
plumber.input_options, plumber.output_options
def add_options(group, options):
for opt in options:
option_recommendation_to_cli_option(group, opt)
if input_options:
io = parser.add_group(plumber.input_fmt.upper() + ' ' + _('OPTIONS'))
add_options(io, input_options)
if output_options:
oo = parser.add_group(plumber.output_fmt.upper() + ' ' + _('OPTIONS'))
add_options(oo, output_options)
def add_pipeline_options(parser, plumber):
groups = {
'' : ('',
[
'input_profile',
'output_profile',
]
),
'DEBUG': (_('Options to help with debugging the conversion'),
[
'verbose',
]),
}
for group, spec in groups.items():
desc, options = spec
if group:
group = parser.add_option_group(group, desc)
add_option = group if group != '' else parser.add_option
for name in options:
rec = plumber.get_option_by_name(name)
if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec)
def main(args=sys.argv):
log = Log()
parser = OptionParser(usage=USAGE)
fargs = parser.parse_args(args)[1]
input, output = check_command_line_options(parser, fargs, log)
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(input, output, log)
add_input_output_options(parser, plumber)
add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0]
recommendations = [(n.dest, getattr(opts, n.dest)) \
for n in parser.options_iter()]
plumber.merge_ui_recommendations(recommendations)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -3,11 +3,15 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format
pipeline_options = [ class Plumber(object):
pipeline_options = [
OptionRecommendation(name='verbose', OptionRecommendation(name='verbose',
recommended_value=0, level=OptionRecommendation.LOW, recommended_value=0, level=OptionRecommendation.LOW,
@ -16,7 +20,6 @@ OptionRecommendation(name='verbose',
'verbosity.') 'verbosity.')
), ),
OptionRecommendation(name='input_profile', OptionRecommendation(name='input_profile',
recommended_value='default', level=OptionRecommendation.LOW, recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in input_profiles()], choices=[x.short_name for x in input_profiles()],
@ -27,4 +30,66 @@ OptionRecommendation(name='input_profile',
'pixels).') 'pixels).')
), ),
] OptionRecommendation(name='output_profile',
recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in output_profiles()],
help=_('Specify the output profile. The output profile '
'tells the conversion system how to optimize the '
'created document for the specified device. In some cases, '
'an output profile is required to produce documents that '
'will work on a device. For example EPUB on the SONY reader.'
)
),
]
def __init__(self, input, output, log):
self.input = input
self.output = output
self.log = log
input_fmt = os.path.splitext(input)[1]
if not input_fmt:
raise ValueError('Input file must have and extension')
input_fmt = input_fmt[1:].lower()
output_fmt = os.path.splitext(input)[1]
if not output_fmt:
output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower()
self.input_plugin = plugin_for_input_format(input_fmt)
self.output_plugin = plugin_for_output_format(output_fmt)
if self.input_plugin is None:
raise ValueError('No plugin to handle input format: '+input_fmt)
if self.output_plugin is None:
raise ValueError('No plugin to handle output format: '+output_fmt)
self.input_fmt = input_fmt
self.output_fmt = output_fmt
self.input_options = self.input_plugin.options.union(
self.input_plugin.common_options)
self.output_options = self.output_plugin.options.union(
self.output_plugin.common_options)
self.merge_plugin_recommendations()
def get_option_by_name(self, name):
for group in (self.input_options, self.pipeline_options,
self.output_options):
for rec in group:
if rec.option == name:
return rec
def merge_plugin_recommendations(self):
pass
def merge_ui_recommendations(self, recommendations):
pass

View File

@ -19,11 +19,10 @@ from lxml.html import HtmlElementClassLookup, HTMLParser as _HTMLParser, \
from lxml.etree import XPath from lxml.etree import XPath
get_text = XPath("//text()") get_text = XPath("//text()")
from calibre import LoggingInterface, unicode_path, entity_to_unicode from calibre import unicode_path, entity_to_unicode
from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
from calibre.utils.config import Config, StringConfig from calibre.utils.config import Config, StringConfig
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
@ -401,7 +400,7 @@ class PreProcessor(object):
html = rule[0].sub(rule[1], html) html = rule[0].sub(rule[1], html)
return html return html
class Parser(PreProcessor, LoggingInterface): class Parser(PreProcessor):
# SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont' # SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont'
# SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in # SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in
# [ # [
@ -412,7 +411,6 @@ class Parser(PreProcessor, LoggingInterface):
# ] # ]
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'): def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'):
LoggingInterface.__init__(self, logging.getLogger(name))
self.setup_cli_handler(opts.verbose) self.setup_cli_handler(opts.verbose)
self.htmlfile = htmlfile self.htmlfile = htmlfile
self.opts = opts self.opts = opts
@ -1038,6 +1036,7 @@ def merge_metadata(htmlfile, opf, opts):
if opf: if opf:
mi = MetaInformation(opf) mi = MetaInformation(opf)
elif htmlfile: elif htmlfile:
from calibre.ebooks.metadata.meta import get_metadata
try: try:
mi = get_metadata(open(htmlfile, 'rb'), 'html') mi = get_metadata(open(htmlfile, 'rb'), 'html')
except: except:

View File

@ -3,8 +3,6 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin): class MOBIInput(InputFormatPlugin):
@ -18,12 +16,11 @@ class MOBIInput(InputFormatPlugin):
from calibre.ebooks.mobi.reader import MobiReader from calibre.ebooks.mobi.reader import MobiReader
mr = MobiReader(stream, log, options.input_encoding, mr = MobiReader(stream, log, options.input_encoding,
options.debug_input) options.debug_input)
mr.extract_content(output_dir=os.getcwdu(), parse_cache) mr.extract_content('.', parse_cache)
raw = parse_cache.get('calibre_raw_mobi_markup', False) raw = parse_cache.get('calibre_raw_mobi_markup', False)
if raw: if raw:
if isinstance(raw, unicode): if isinstance(raw, unicode):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
open('debug-raw.html', 'wb').write(raw) open('debug-raw.html', 'wb').write(raw)
return mr.created_opf_path return mr.created_opf_path

View File

@ -0,0 +1,17 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import OutputFormatPlugin
class OEBOutput(OutputFormatPlugin):
name = 'OEB Output'
author = 'Kovid Goyal'
file_type = 'oeb'
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
pass

View File

@ -18,6 +18,7 @@ entry_points = {
'console_scripts': [ \ 'console_scripts': [ \
'ebook-device = calibre.devices.prs500.cli.main:main', 'ebook-device = calibre.devices.prs500.cli.main:main',
'ebook-meta = calibre.ebooks.metadata.cli:main', 'ebook-meta = calibre.ebooks.metadata.cli:main',
'ebook-convert = calibre.ebooks.convert.cli:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
'html2lrf = calibre.ebooks.lrf.html.convert_from:main', 'html2lrf = calibre.ebooks.lrf.html.convert_from:main',
'html2oeb = calibre.ebooks.html:main', 'html2oeb = calibre.ebooks.html:main',

View File

@ -13,13 +13,25 @@ ERROR = 3
import sys, traceback import sys, traceback
from functools import partial from functools import partial
from calibre import prints
from calibre.utils.terminfo import TerminalController
class ANSIStream:
class Stream(object):
def __init__(self, stream):
from calibre import prints
self._prints = prints
self.stream = stream
def flush(self):
self.stream.flush()
class ANSIStream(Stream):
def __init__(self, stream=sys.stdout): def __init__(self, stream=sys.stdout):
self.stream = stream Stream.__init__(self, stream)
from calibre.utils.terminfo import TerminalController
tc = TerminalController(stream) tc = TerminalController(stream)
self.color = { self.color = {
DEBUG: tc.GREEN, DEBUG: tc.GREEN,
@ -32,16 +44,16 @@ class ANSIStream:
def prints(self, level, *args, **kwargs): def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level]) self.stream.write(self.color[level])
kwargs['file'] = self.stream kwargs['file'] = self.stream
prints(*args, **kwargs) self._prints(*args, **kwargs)
self.stream.write(self.normal) self.stream.write(self.normal)
def flush(self): def flush(self):
self.stream.flush() self.stream.flush()
class HTMLStream: class HTMLStream(Stream):
def __init__(self, stream=sys.stdout): def __init__(self, stream=sys.stdout):
self.stream = stream Stream.__init__(self, stream)
self.color = { self.color = {
DEBUG: '<span style="color:green">', DEBUG: '<span style="color:green">',
INFO:'<span>', INFO:'<span>',
@ -53,7 +65,7 @@ class HTMLStream:
def prints(self, level, *args, **kwargs): def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level]) self.stream.write(self.color[level])
kwargs['file'] = self.stream kwargs['file'] = self.stream
prints(*args, **kwargs) self._prints(*args, **kwargs)
self.stream.write(self.normal) self.stream.write(self.normal)
def flush(self): def flush(self):