diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index de133ddb57..942df667e9 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -3,11 +3,13 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' __docformat__ = 'restructuredtext en' import sys, os, re, logging, time, subprocess, atexit, mimetypes, \ - __builtin__ + __builtin__, warnings __builtin__.__dict__['dynamic_property'] = lambda(func): func(None) from htmlentitydefs import name2codepoint from math import floor -from logging import Formatter + +warnings.simplefilter('ignore', DeprecationWarning) + from PyQt4.QtCore import QUrl from PyQt4.QtGui import QDesktopServices @@ -86,6 +88,8 @@ def prints(*args, **kwargs): for i, arg in enumerate(args): if isinstance(arg, unicode): arg = arg.encode(preferred_encoding) + if not isinstance(arg, str): + arg = str(arg) file.write(arg) if i != len(args)-1: file.write(sep) @@ -318,24 +322,6 @@ def english_sort(x, y): ''' return cmp(_spat.sub('', x), _spat.sub('', y)) -class ColoredFormatter(Formatter): - - def format(self, record): - ln = record.__dict__['levelname'] - col = '' - if ln == 'CRITICAL': - col = terminal_controller.YELLOW - elif ln == 'ERROR': - col = terminal_controller.RED - elif ln in ['WARN', 'WARNING']: - col = terminal_controller.BLUE - elif ln == 'INFO': - col = terminal_controller.GREEN - elif ln == 'DEBUG': - col = terminal_controller.CYAN - record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL - return Formatter.format(self, record) - def walk(dir): ''' A nice interface to os.walk ''' for record in os.walk(dir): diff --git a/src/calibre/constants.py b/src/calibre/constants.py index aebcb35dc0..913c54bffe 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.143' +__version__ = '0.5.0' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index fafe8e5afa..b6a6141612 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -244,11 +244,12 @@ class MOBIMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput -from calibre.customize.profiles import input_profiles +from calibre.ebooks.oeb.output import OEBOutput +from calibre.customize.profiles import input_profiles, output_profiles -plugins = [HTML2ZIP, EPUBInput, MOBIInput] +plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataWriter')] -plugins += input_profiles \ No newline at end of file +plugins += input_profiles + output_profiles \ No newline at end of file diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index aa7b0c1dea..a77e32beee 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -24,7 +24,7 @@ class ConversionOption(object): self.choices = choices if self.long_switch is None: - self.long_switch = '--'+self.name.replace('_', '-') + self.long_switch = self.name.replace('_', '-') self.validate_parameters() @@ -37,19 +37,24 @@ class ConversionOption(object): if not self.help: raise ValueError('You must set the help text') + def __hash__(self): + return hash(self.name) + + def __eq__(self, other): + return hash(self) == hash(other) class OptionRecommendation(object): LOW = 1 MED = 2 HIGH = 3 - def __init__(self, recommeded_value, level=LOW, **kwargs): + def __init__(self, recommended_value=None, level=LOW, **kwargs): ''' An option recommendation. That is, an option as well as its recommended value and the level of the recommendation. ''' self.level = level - self.recommended_value = recommeded_value + self.recommended_value = recommended_value self.option = kwargs.pop('option', None) if self.option is None: self.option = ConversionOption(**kwargs) @@ -59,10 +64,12 @@ class OptionRecommendation(object): def validate_parameters(self): if self.option.choices and self.recommended_value not in \ self.option.choices: - raise ValueError('Recommended value not in choices') + raise ValueError('OpRec: %s: Recommended value not in choices'% + self.option.name) if not (isinstance(self.recommended_value, (int, float, str, unicode))\ - or self.default is None): - raise ValueError(unicode(self.default) + + or self.recommended_value is None): + raise ValueError('OpRec: %s:'%self.option.name + + repr(self.recommended_value) + ' is not a string or a number') @@ -110,7 +117,11 @@ class InputFormatPlugin(Plugin): #: instance of :class:`OptionRecommendation`. options = set([]) - def convert(self, stream, options, file_ext, parse_cache, log): + #: A set of 3-tuples of the form + #: (option_name, recommended_value, recommendation_level) + recommendations = set([]) + + def convert(self, stream, options, file_ext, parse_cache, log, accelerators): ''' This method must be implemented in sub-classes. It must return the path to the created OPF file. All output should be contained in @@ -146,10 +157,16 @@ class InputFormatPlugin(Plugin): :param log: A :class:`calibre.utils.logging.Log` object. All output should use this object. + + :param accelarators: A dictionary of various information that the input + plugin can get easily that would speed up the + subsequent stages of the conversion. + ''' raise NotImplementedError - def __call__(self, stream, options, file_ext, parse_cache, log, output_dir): + def __call__(self, stream, options, file_ext, parse_cache, log, + accelerators, output_dir): log('InputFormatPlugin: %s running'%self.name, end=' ') if hasattr(stream, 'name'): log('on', stream.name) @@ -159,7 +176,8 @@ class InputFormatPlugin(Plugin): shutil.rmtree(x) if os.path.isdir(x) else os.remove(x) - ret = self.convert(stream, options, file_ext, parse_cache, log) + ret = self.convert(stream, options, file_ext, parse_cache, + log, accelerators) for key in list(parse_cache.keys()): if os.path.abspath(key) != key: log.warn(('InputFormatPlugin: %s returned a ' @@ -186,4 +204,38 @@ class InputFormatPlugin(Plugin): return ret + + +class OutputFormatPlugin(Plugin): + ''' + OutputFormatPlugins are responsible for converting an OEB document + (OPF+HTML) into an output ebook. + + The OEB document can be assumed to be encoded in UTF-8. + The main action happens in :method:`convert`. + ''' + + type = _('Conversion Output') + can_be_disabled = False + supported_platforms = ['windows', 'osx', 'linux'] + + #: The file type (extension without leading period) that this + #: plugin outputs + file_type = None + + #: Options shared by all Input format plugins. Do not override + #: in sub-classes. Use :member:`options` instead. Every option must be an + #: instance of :class:`OptionRecommendation`. + common_options = set([]) + + #: Options to customize the behavior of this plugin. Every option must be an + #: instance of :class:`OptionRecommendation`. + options = set([]) + + #: A set of 3-tuples of the form + #: (option_name, recommended_value, recommendation_level) + recommendations = set([]) + + def convert(self, oeb_book, input_plugin, options, parse_cache, log): + raise NotImplementedError diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 002f56879f..a3a7e22298 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -3,6 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import sys, re from calibre.customize import Plugin class InputProfile(Plugin): @@ -16,12 +17,43 @@ class InputProfile(Plugin): # inherit from this profile and override as needed name = 'Default Input Profile' - short_name = 'default' # Used in the CLI so dont spaces etc. in it + short_name = 'default' # Used in the CLI so dont use spaces etc. in it description = _('This profile tries to provide sane defaults and is useful ' 'if you know nothing about the input document.') input_profiles = [InputProfile] - +class OutputProfile(Plugin): + author = 'Kovid Goyal' + supported_platforms = set(['windows', 'osx', 'linux']) + can_be_disabled = False + type = _('Output profile') + + name = 'Default Output Profile' + short_name = 'default' # Used in the CLI so dont use spaces etc. in it + description = _('This profile tries to provide sane defaults and is useful ' + 'if you want to produce a document intended to be read at a ' + 'computer or on a range of devices.') + + epub_flow_size = sys.maxint + screen_size = None + remove_special_chars = False + remove_object_tags = False + +class SonyReader(OutputProfile): + + name = 'Sony Reader' + short_name = 'sony' + description = _('This profile is intended for the SONY PRS line. ' + 'The 500/505/700 etc.') + + epub_flow_size = 270000 + screen_size = (590, 765) + remove_special_chars = re.compile(u'[\u200b\u00ad]') + remove_object_tags = True + + + +output_profiles = [OutputProfile, SonyReader] \ No newline at end of file diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 1cdafae4f0..d8b7ebf6d8 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -6,8 +6,8 @@ import os, shutil, traceback, functools, sys from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin -from calibre.customize.conversion import InputFormatPlugin -from calibre.customize.profiles import InputProfile +from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin +from calibre.customize.profiles import InputProfile, OutputProfile from calibre.customize.builtins import plugins as builtin_plugins from calibre.constants import __version__, iswindows, isosx from calibre.ebooks.metadata import MetaInformation @@ -76,6 +76,12 @@ def input_profiles(): if isinstance(plugin, InputProfile): yield plugin +def output_profiles(): + for plugin in _initialized_plugins: + if isinstance(plugin, OutputProfile): + yield plugin + + def reread_filetype_plugins(): global _on_import global _on_preprocess @@ -245,9 +251,19 @@ def input_format_plugins(): def plugin_for_input_format(fmt): for plugin in input_format_plugins(): - if fmt in plugin.file_types: + if fmt.lower() in plugin.file_types: return plugin - + +def output_format_plugins(): + for plugin in _initialized_plugins: + if isinstance(plugin, OutputFormatPlugin): + yield plugin + +def plugin_for_output_format(fmt): + for plugin in output_format_plugins(): + if fmt.lower() == plugin.file_type: + return plugin + def disable_plugin(plugin_or_name): x = getattr(plugin_or_name, 'name', plugin_or_name) diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py index b1f5d36d32..0998a60451 100644 --- a/src/calibre/devices/cybookg3/driver.py +++ b/src/calibre/devices/cybookg3/driver.py @@ -74,7 +74,7 @@ class CYBOOKG3(USBMS): if self.SUPPORTS_SUB_DIRS: if 'tags' in mdata.keys(): for tag in mdata['tags']: - if tag.startswith('News'): + if tag.startswith(_('News')): newpath = os.path.join(newpath, 'news') newpath = os.path.join(newpath, mdata.get('title', '')) newpath = os.path.join(newpath, mdata.get('timestamp', '')) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index af6d724883..971ac9bc9a 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, try: raw = raw.decode(encoding, 'replace') except LookupError: - raw = raw.decode('utf-8', 'replace') + encoding = 'utf-8' + raw = raw.decode(encoding, 'replace') if strip_encoding_pats: raw = strip_encoding_declarations(raw) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py new file mode 100644 index 0000000000..f52264f8d0 --- /dev/null +++ b/src/calibre/ebooks/conversion/cli.py @@ -0,0 +1,167 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +''' +Command line interface to conversion sub-system +''' + +USAGE = '%prog ' + _('''\ +input_file output_file [options] + +Convert an ebook from one format to another. + +input_file is the input and output_file is the output. Both must be \ +specified as the first two arguments to the command. + +The output ebook format is guessed from the file extension of \ +output_file. output_file can also be of the special format .EXT where \ +EXT is the output file extension. In this case, the name of the output \ +file is derived the name of the input file. Note that the filenames must \ +not start with a hyphen. Finally, if output_file has no extension, then \ +it is treated as a directory and an "open ebook" (OEB) consisting of HTML \ +files is written to that directory. These files are the files that would \ +normally have been passed to the output plugin. + +After specifying the input \ +and output file you can customize the conversion by specifying various \ +options. the available options depend on the input and output file types. \ +To get help on them specify the input and output file and then use the -h \ +option. + +For full documentation of the conversion system see +''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html' + +import sys, os +from optparse import OptionGroup, Option + +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) + +def check_command_line_options(parser, args, log): + if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'): + print_help(parser) + log.error('\n\nYou must specify the input AND output files') + raise SystemExit(1) + + input = os.path.abspath(args[1]) + if not os.access(input, os.R_OK): + log.error('Cannot read from', input) + raise SystemExit(1) + + output = args[2] + if output.startswith('.'): + output = os.path.splitext(os.path.basename(input))[0]+output + output = os.path.abspath(output) + + if '.' in output: + if os.path.exists(output): + log.warn('WARNING:', output, 'exists. Deleting.') + os.remove(output) + + return input, output + +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = ['-'+opt.short_switch] if opt.short_switch else [] + switches.append('--'+opt.long_switch) + attrs = dict(dest=opt.name, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + add_option(Option(*switches, **attrs)) + +def add_input_output_options(parser, plumber): + input_options, output_options = \ + plumber.input_options, plumber.output_options + + def add_options(group, options): + for opt in options: + option_recommendation_to_cli_option(group, opt) + + if input_options: + title = _('INPUT OPTIONS') + io = OptionGroup(parser, title, _('Options to control the processing' + ' of the input %s file')%plumber.input_fmt) + add_options(io.add_option, input_options) + parser.add_option_group(io) + + if output_options: + title = plumber.output_fmt.upper() + ' ' + _('OPTIONS') + oo = OptionGroup(parser, title, _('Options to control the processing' + ' of the output %s file')%plumber.input_fmt) + add_options(oo.add_option, output_options) + parser.add_option_group(oo) + +def add_pipeline_options(parser, plumber): + groups = { + '' : ('', + [ + 'input_profile', + 'output_profile', + ] + ), + + 'METADATA' : (_('Options to set metadata in the output'), + plumber.metadata_option_names, + ), + 'DEBUG': (_('Options to help with debugging the conversion'), + [ + 'verbose', + ]), + + + } + + group_order = ['', 'METADATA', 'DEBUG'] + + for group in group_order: + desc, options = groups[group] + if group: + group = OptionGroup(parser, group, desc) + parser.add_option_group(group) + add_option = group.add_option if group != '' else parser.add_option + + for name in options: + rec = plumber.get_option_by_name(name) + if rec.level < rec.HIGH: + option_recommendation_to_cli_option(add_option, rec) + +def option_parser(): + return OptionParser(usage=USAGE) + +def main(args=sys.argv): + log = Log() + parser = option_parser() + if len(args) < 3: + print_help(parser, log) + return 1 + + input, output = check_command_line_options(parser, args, log) + + from calibre.ebooks.conversion.plumber import Plumber + + plumber = Plumber(input, output, log) + add_input_output_options(parser, plumber) + add_pipeline_options(parser, plumber) + + opts = parser.parse_args(args)[0] + recommendations = [(n.dest, getattr(opts, n.dest), + OptionRecommendation.HIGH) \ + for n in parser.options_iter() + if n.dest] + plumber.merge_ui_recommendations(recommendations) + + plumber.run() + + log(_('Output saved to'), ' ', plumber.output) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index ac7490bd39..75a6687c4e 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -3,11 +3,29 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os from calibre.customize.conversion import OptionRecommendation -from calibre.customize.ui import input_profiles +from calibre.customize.ui import input_profiles, output_profiles, \ + plugin_for_input_format, plugin_for_output_format -pipeline_options = [ +class OptionValues(object): + pass + +class Plumber(object): + + metadata_option_names = [ + 'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments', + 'publisher', 'series', 'series_index', 'rating', 'isbn', + 'tags', 'book_producer', 'language' + ] + + def __init__(self, input, output, log): + self.input = input + self.output = output + self.log = log + + self.pipeline_options = [ OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, @@ -16,7 +34,6 @@ OptionRecommendation(name='verbose', 'verbosity.') ), - OptionRecommendation(name='input_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in input_profiles()], @@ -27,4 +44,193 @@ OptionRecommendation(name='input_profile', 'pixels).') ), -] \ No newline at end of file +OptionRecommendation(name='output_profile', + recommended_value='default', level=OptionRecommendation.LOW, + choices=[x.short_name for x in output_profiles()], + help=_('Specify the output profile. The output profile ' + 'tells the conversion system how to optimize the ' + 'created document for the specified device. In some cases, ' + 'an output profile is required to produce documents that ' + 'will work on a device. For example EPUB on the SONY reader.' + ) + ), + +OptionRecommendation(name='read_metadata_from_opf', + recommended_value=None, level=OptionRecommendation.LOW, + short_switch='m', + help=_('Read metadata from the specified OPF file. Metadata read ' + 'from this file will override any metadata in the source ' + 'file.') + ), + +OptionRecommendation(name='title', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the title.')), + +OptionRecommendation(name='authors', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the authors. Multiple authors should be separated ')), + +OptionRecommendation(name='title_sort', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('The version of the title to be used for sorting. ')), + +OptionRecommendation(name='author_sort', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('String to be used when sorting by author. ')), + +OptionRecommendation(name='cover', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the cover to the specified file.')), + +OptionRecommendation(name='comments', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the ebook description.')), + +OptionRecommendation(name='publisher', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the ebook publisher.')), + +OptionRecommendation(name='series', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the series this ebook belongs to.')), + +OptionRecommendation(name='series_index', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the index of the book in this series.')), + +OptionRecommendation(name='rating', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the rating. Should be a number between 1 and 5.')), + +OptionRecommendation(name='isbn', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the ISBN of the book.')), + +OptionRecommendation(name='tags', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the tags for the book. Should be a comma separated list.')), + +OptionRecommendation(name='book_producer', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the book producer.')), + +OptionRecommendation(name='language', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the language.')), +] + + + input_fmt = os.path.splitext(input)[1] + if not input_fmt: + raise ValueError('Input file must have an extension') + input_fmt = input_fmt[1:].lower() + + output_fmt = os.path.splitext(output)[1] + if not output_fmt: + output_fmt = '.oeb' + output_fmt = output_fmt[1:].lower() + + self.input_plugin = plugin_for_input_format(input_fmt) + self.output_plugin = plugin_for_output_format(output_fmt) + + if self.input_plugin is None: + raise ValueError('No plugin to handle input format: '+input_fmt) + + if self.output_plugin is None: + raise ValueError('No plugin to handle output format: '+output_fmt) + + self.input_fmt = input_fmt + self.output_fmt = output_fmt + + self.input_options = self.input_plugin.options.union( + self.input_plugin.common_options) + self.output_options = self.output_plugin.options.union( + self.output_plugin.common_options) + + self.merge_plugin_recommendations() + + def get_option_by_name(self, name): + for group in (self.input_options, self.pipeline_options, + self.output_options): + for rec in group: + if rec.option == name: + return rec + + def merge_plugin_recommendations(self): + for source in (self.input_plugin, self.output_plugin): + for name, val, level in source.recommendations: + rec = self.get_option_by_name(name) + if rec is not None and rec.level <= level: + rec.recommended_value = val + + def merge_ui_recommendations(self, recommendations): + for name, val, level in recommendations: + rec = self.get_option_by_name(name) + if rec is not None and rec.level <= level and rec.level < rec.HIGH: + rec.recommended_value = val + + def read_user_metadata(self): + from calibre.ebooks.metadata import MetaInformation, string_to_authors + from calibre.ebooks.metadata.opf2 import OPF + mi = MetaInformation(None, []) + if self.opts.read_metadata_from_opf is not None: + self.opts.read_metadata_from_opf = os.path.abspath( + self.opts.read_metadata_from_opf) + opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'), + os.path.dirname(self.opts.read_metadata_from_opf)) + mi = MetaInformation(opf) + for x in self.metadata_option_names: + val = getattr(self.opts, x, None) + if val is not None: + if x == 'authors': + val = string_to_authors(val) + elif x == 'tags': + val = [i.strip() for i in val.split(',')] + elif x in ('rating', 'series_index'): + val = float(val) + setattr(mi, x, val) + if mi.cover: + mi.cover_data = ('', open(mi.cover, 'rb').read()) + mi.cover = None + self.user_metadata = mi + + + def setup_options(self): + self.opts = OptionValues() + for group in (self.input_options, self.pipeline_options, + self.output_options): + for rec in group: + setattr(self.opts, rec.option.name, rec.recommended_value) + + for x in input_profiles(): + if x.short_name == self.opts.input_profile: + self.opts.input_profile = x + break + + for x in output_profiles(): + if x.short_name == self.opts.output_profile: + self.opts.output_profile = x + break + + self.read_user_metadata() + + def run(self): + self.setup_options() + from calibre.customize.ui import run_plugins_on_preprocess + self.input = run_plugins_on_preprocess(self.input) + + from calibre.ebooks.oeb.reader import OEBReader + from calibre.ebooks.oeb.base import OEBBook + parse_cache, accelerators = {}, {} + + opfpath = self.input_plugin(open(self.input, 'rb'), self.opts, + self.input_fmt, parse_cache, self.log, + accelerators) + + self.reader = OEBReader() + self.oeb = OEBBook(self.log, parse_cache=parse_cache) + self.reader(self.oeb, opfpath) + + + \ No newline at end of file diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py index 9a8e251108..b3e5281525 100644 --- a/src/calibre/ebooks/epub/from_any.py +++ b/src/calibre/ebooks/epub/from_any.py @@ -12,7 +12,7 @@ from contextlib import nested from calibre import extract, walk from calibre.ebooks import DRMError -from calibre.ebooks.epub import config as common_config, process_encryption +from calibre.ebooks.epub import config as common_config from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.metadata import MetaInformation diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index ffe402538f..47d278a2b6 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) + for tag in self.root.xpath('//form'): + tag.getparent().remove(tag) + if self.opts.linearize_tables: for tag in self.root.xpath('//table | //tr | //th | //td'): tag.tag = 'div' diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index 1b69424a9e..4c1cdbfcf5 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -51,7 +51,8 @@ class EPUBInput(InputFormatPlugin): traceback.print_exc() return False - def convert(self, stream, options, file_ext, parse_cache, log): + def convert(self, stream, options, file_ext, parse_cache, log, + accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError diff --git a/src/calibre/ebooks/epub/pages.py b/src/calibre/ebooks/epub/pages.py index 1ab5edde86..4737107a6c 100644 --- a/src/calibre/ebooks/epub/pages.py +++ b/src/calibre/ebooks/epub/pages.py @@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en' import os, re from itertools import count, chain from calibre.ebooks.oeb.base import XHTML, XHTML_NS -from calibre.ebooks.oeb.base import OEBBook, DirWriter +from calibre.ebooks.oeb.base import OEBBook from lxml import etree, html from lxml.etree import XPath diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py index 9814c40df5..c3099c1682 100644 --- a/src/calibre/ebooks/epub/split.py +++ b/src/calibre/ebooks/epub/split.py @@ -15,7 +15,7 @@ from lxml.cssselect import CSSSelector from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.epub import tostring, rules -from calibre import CurrentDir, LoggingInterface +from calibre import CurrentDir XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'}) content = functools.partial(os.path.join, 'content') @@ -32,10 +32,9 @@ class SplitError(ValueError): -class Splitter(LoggingInterface): +class Splitter(object): def __init__(self, path, opts, stylesheet_map, opf): - LoggingInterface.__init__(self, logging.getLogger('htmlsplit')) self.setup_cli_handler(opts.verbose) self.path = path self.always_remove = not opts.preserve_tag_structure or \ diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 710b544007..da64e58684 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -19,11 +19,10 @@ from lxml.html import HtmlElementClassLookup, HTMLParser as _HTMLParser, \ from lxml.etree import XPath get_text = XPath("//text()") -from calibre import LoggingInterface, unicode_path, entity_to_unicode +from calibre import unicode_path, entity_to_unicode from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS from calibre.utils.config import Config, StringConfig from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPF, OPFCreator from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.zipfile import ZipFile @@ -401,7 +400,7 @@ class PreProcessor(object): html = rule[0].sub(rule[1], html) return html -class Parser(PreProcessor, LoggingInterface): +class Parser(PreProcessor): # SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont' # SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in # [ @@ -412,7 +411,6 @@ class Parser(PreProcessor, LoggingInterface): # ] def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'): - LoggingInterface.__init__(self, logging.getLogger(name)) self.setup_cli_handler(opts.verbose) self.htmlfile = htmlfile self.opts = opts @@ -859,7 +857,7 @@ class Processor(Parser): except ValueError: setting = '' face = font.attrib.pop('face', None) - if face is not None: + if face: faces = [] for face in face.split(','): face = face.strip() @@ -1038,6 +1036,7 @@ def merge_metadata(htmlfile, opf, opts): if opf: mi = MetaInformation(opf) elif htmlfile: + from calibre.ebooks.metadata.meta import get_metadata try: mi = get_metadata(open(htmlfile, 'rb'), 'html') except: diff --git a/src/calibre/ebooks/lrf/comic/convert_from.py b/src/calibre/ebooks/lrf/comic/convert_from.py index 45254f7b87..50f5e1e72e 100755 --- a/src/calibre/ebooks/lrf/comic/convert_from.py +++ b/src/calibre/ebooks/lrf/comic/convert_from.py @@ -143,7 +143,8 @@ class PageProcessor(list): MagickRotateImage(wand, pw, -90) # 25 percent fuzzy trim? - MagickTrimImage(wand, 25*65535/100) + if not self.opts.disable_trim: + MagickTrimImage(wand, 25*65535/100) MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage" # Do the Photoshop "Auto Levels" equivalent if not self.opts.dont_normalize: @@ -303,6 +304,9 @@ def config(defaults=None,output_format='lrf'): help=_('Maintain picture aspect ratio. Default is to fill the screen.')) c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False, help=_('Disable sharpening.')) + c.add_opt('disable_trim', ['--disable-trim'], default=False, + help=_('Disable trimming of comic pages. For some comics, ' + 'trimming might remove content as well as borders.')) c.add_opt('landscape', ['-l', '--landscape'], default=False, help=_("Don't split landscape images into two portrait images")) c.add_opt('wide', ['-w', '--wide-aspect'], default=False, diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 2bd63d1d8f..9ec4857126 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -31,7 +31,7 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser from calibre.ebooks import ConversionError from calibre.ebooks.lrf.html.table import Table from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \ - fit_image, LoggingInterface, preferred_encoding + fit_image, preferred_encoding from calibre.ptempfile import PersistentTemporaryFile from calibre.devices.interface import Device from calibre.ebooks.lrf.html.color_map import lrs_color @@ -78,7 +78,7 @@ def tag_regex(tagname): return dict(open=r'(?:<\s*%(t)s\s+[^<>]*?>|<\s*%(t)s\s*>)'%dict(t=tagname), \ close=r''%dict(t=tagname)) -class HTMLConverter(object, LoggingInterface): +class HTMLConverter(object): SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) @@ -99,6 +99,10 @@ class HTMLConverter(object, LoggingInterface): # Replace common line break patterns with line breaks (re.compile(r'

( |\s)*

', re.IGNORECASE), lambda m: '
'), + # Replace empty headers with line breaks + (re.compile(r'( |\s)*', + re.IGNORECASE), lambda m: '
'), + # Replace entities (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt', 'amp'])), @@ -209,7 +213,6 @@ class HTMLConverter(object, LoggingInterface): ''' # Defaults for various formatting tags object.__setattr__(self, 'options', options) - LoggingInterface.__init__(self, logger) self.fonts = fonts #: dict specifying font families to use # Memory self.scaled_images = {} #: Temporary files with scaled version of images diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py index 495d9adb50..86a97aa70b 100644 --- a/src/calibre/ebooks/lrf/lrs/convert_from.py +++ b/src/calibre/ebooks/lrf/lrs/convert_from.py @@ -28,8 +28,9 @@ class LrsParser(object): def __init__(self, stream, logger): self.logger = logger src = stream.read() - self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], - selfClosingTags=self.SELF_CLOSING_TAGS) + self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], + convertEntities=BeautifulStoneSoup.XML_ENTITIES, + selfClosingTags=self.SELF_CLOSING_TAGS) self.objects = {} for obj in self.soup.findAll(objid=True): self.objects[obj['objid']] = obj diff --git a/src/calibre/ebooks/lrf/meta.py b/src/calibre/ebooks/lrf/meta.py index 322835f470..6ec87892d6 100644 --- a/src/calibre/ebooks/lrf/meta.py +++ b/src/calibre/ebooks/lrf/meta.py @@ -530,7 +530,7 @@ class LRFMetaFile(object): """ See L{file.write} """ self._file.write(val) - def objects(self): + def _objects(self): self._file.seek(self.object_index_offset) c = self.number_of_objects while c > 0: @@ -543,7 +543,7 @@ class LRFMetaFile(object): def get_objects_by_type(self, type): from calibre.ebooks.lrf.tags import Tag objects = [] - for id, offset, size in self.objects(): + for id, offset, size in self._objects(): self._file.seek(offset) tag = Tag(self._file) if tag.id == 0xF500: @@ -554,7 +554,7 @@ class LRFMetaFile(object): def get_object_by_id(self, tid): from calibre.ebooks.lrf.tags import Tag - for id, offset, size in self.objects(): + for id, offset, size in self._objects(): self._file.seek(offset) tag = Tag(self._file) if tag.id == 0xF500: diff --git a/src/calibre/ebooks/metadata/isbndb.py b/src/calibre/ebooks/metadata/isbndb.py index 3cf5f92eaf..487a52335b 100644 --- a/src/calibre/ebooks/metadata/isbndb.py +++ b/src/calibre/ebooks/metadata/isbndb.py @@ -112,7 +112,8 @@ key is the account key you generate after signing up for a free account from isb default=None, help=_('The title of the book to search for.')) parser.add_option('-p', '--publisher', default=None, dest='publisher', help=_('The publisher of the book to search for.')) - parser.add_option('--verbose', default=False, action='store_true', help=_('Verbose processing')) + parser.add_option('-v', '--verbose', default=False, + action='store_true', help=_('Verbose processing')) return parser diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 7b3c873b38..071111e0f7 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -19,14 +19,22 @@ def get_metadata(stream): for item in opf.iterguide(): if 'cover' not in item.get('type', '').lower(): continue + ctype = item.get('type') href = item.get('href', '') candidates = [href, href.replace('&', '%26')] for item in litfile.manifest.values(): if item.path in candidates: - covers.append(item.internal) + try: + covers.append((litfile.get_file('/data/'+item.internal), + ctype)) + except: + pass break - covers = [litfile.get_file('/data/' + i) for i in covers] - covers.sort(cmp=lambda x, y:cmp(len(x), len(y))) - mi.cover_data = ('jpg', covers[-1]) + covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True) + idx = 0 + if len(covers) > 1: + if covers[1][1] == covers[1][0]+'-standard': + idx = 1 + mi.cover_data = ('jpg', covers[idx][0]) return mi diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 1ce9950677..b3400c54e1 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,8 +3,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os - from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): @@ -14,16 +12,19 @@ class MOBIInput(InputFormatPlugin): description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML' file_types = set(['mobi', 'prc', 'azw']) - def convert(self, stream, options, file_ext, parse_cache, log): + def convert(self, stream, options, file_ext, parse_cache, log, + accelerators): from calibre.ebooks.mobi.reader import MobiReader mr = MobiReader(stream, log, options.input_encoding, options.debug_input) - mr.extract_content(output_dir=os.getcwdu(), parse_cache) + mr.extract_content('.', parse_cache) raw = parse_cache.get('calibre_raw_mobi_markup', False) if raw: if isinstance(raw, unicode): raw = raw.encode('utf-8') open('debug-raw.html', 'wb').write(raw) - - return mr.created_opf_path - + for f, root in parse_cache.items(): + if '.' in f: + accelerators[f] = {'pagebreaks':root.xpath( + '//div[@class="mbp_pagebreak"]')} + return mr.created_opf_path \ No newline at end of file diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index a72eb7716a..85057017a6 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -312,7 +312,7 @@ class MobiReader(object): mobi_version = self.book_header.mobi_version for i, tag in enumerate(root.iter(etree.Element)): if tag.tag in ('country-region', 'place', 'placetype', 'placename', - 'state', 'city'): + 'state', 'city', 'street', 'address'): tag.tag = 'span' for key in tag.attrib.keys(): tag.attrib.pop(key) @@ -389,7 +389,13 @@ class MobiReader(object): opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1) elif mi.cover is not None: opf.cover = mi.cover - manifest = [(htmlfile, 'text/x-oeb1-document'), + else: + opf.cover = 'images/%05d.jpg'%1 + if not os.path.exists(os.path.join(os.path.dirname(htmlfile), + *opf.cover.split('/'))): + opf.cover = None + + manifest = [(htmlfile, 'text/x-oeb1-document'), (os.path.abspath('styles.css'), 'text/css')] bp = os.path.dirname(htmlfile) for i in getattr(self, 'image_names', []): diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index fdabfaa618..6ebeba3739 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -9,7 +9,6 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import sys import os from struct import pack -import functools import time import random from cStringIO import StringIO @@ -18,13 +17,12 @@ from itertools import izip, count from collections import defaultdict from urlparse import urldefrag import logging -from lxml import etree from PIL import Image from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ OEB_RASTER_IMAGES -from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname +from calibre.ebooks.oeb.base import namespace, prefixname from calibre.ebooks.oeb.base import urlnormalize -from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.profile import Context from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 2e160d1571..59ce1f7b95 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' __docformat__ = 'restructuredtext en' -import os, sys, re, uuid +import os, re, uuid from mimetypes import types_map from collections import defaultdict from itertools import count @@ -15,7 +15,6 @@ from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote from lxml import etree, html import calibre -from calibre import LoggingInterface from calibre.translations.dynamic import translate from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.entitydefs import ENTITYDEFS @@ -204,22 +203,6 @@ class OEBError(Exception): """Generic OEB-processing error.""" pass - -class FauxLogger(object): - """Fake logging interface.""" - def __getattr__(self, name): - return self - def __call__(self, message): - print message - -class Logger(LoggingInterface, object): - """A logging object which provides both the standard `logging.Logger` and - calibre-specific interfaces. - """ - def __getattr__(self, name): - return object.__getattribute__(self, 'log_' + name) - - class NullContainer(object): """An empty container. @@ -1233,16 +1216,20 @@ class PageList(object): class OEBBook(object): """Representation of a book in the IDPF OEB data model.""" - def __init__(self, encoding=None, pretty_print=False, logger=FauxLogger()): + def __init__(self, logger, parse_cache={}, encoding='utf-8', + pretty_print=False): """Create empty book. Optional arguments: + :param parse_cache: A cache of parsed XHTML/CSS. Keys are absolute + paths to te cached files and values are lxml root objects and + cssutils stylesheets. :param:`encoding`: Default encoding for textual content read from an external container. :param:`pretty_print`: Whether or not the canonical string form of XML markup is pretty-printed. - :prama:`logger`: A Logger object to use for logging all messages + :param:`logger`: A Log object to use for logging all messages related to the processing of this book. It is accessible - via the instance data member :attr:`logger`. + via the instance data members :attr:`logger,log`. It provides the following public instance data members for accessing various parts of the OEB data model: @@ -1260,7 +1247,7 @@ class OEBBook(object): """ self.encoding = encoding self.pretty_print = pretty_print - self.logger = logger + self.logger = self.log = logger self.version = '2.0' self.container = NullContainer() self.metadata = Metadata(self) diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py new file mode 100644 index 0000000000..0a74f488cf --- /dev/null +++ b/src/calibre/ebooks/oeb/output.py @@ -0,0 +1,17 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.customize.conversion import OutputFormatPlugin + +class OEBOutput(OutputFormatPlugin): + + name = 'OEB Output' + author = 'Kovid Goyal' + file_type = 'oeb' + + + def convert(self, oeb_book, input_plugin, options, parse_cache, log): + pass + diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 0fce1c2b0d..dbafa5afac 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -19,9 +19,9 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \ PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \ ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE -from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath -from calibre.ebooks.oeb.base import urlnormalize, xml2str -from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer +from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath, \ + urlnormalize, BINARY_MIME, \ + OEBError, OEBBook, DirContainer from calibre.ebooks.oeb.writer import OEBWriter from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.metadata.epub import CoverRenderer @@ -45,9 +45,6 @@ class OEBReader(object): TRANSFORMS = [] """List of transforms to apply to content read with this Reader.""" - def __init__(self): - return - @classmethod def config(cls, cfg): """Add any book-reading options to the :class:`Config` object @@ -65,7 +62,7 @@ class OEBReader(object): :param:`oeb`. """ self.oeb = oeb - self.logger = oeb.logger + self.logger = self.log = oeb.logger oeb.container = self.Container(path) opf = self._read_opf() self._all_from_opf(opf) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index ede2a027ed..9833b3b4d0 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -6,18 +6,14 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys -import os import re import operator import math -from itertools import chain from collections import defaultdict from lxml import etree from calibre.ebooks.oeb.base import XHTML, XHTML_NS from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import namespace, barename -from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.stylizer import Stylizer COLLAPSE = re.compile(r'[ \t\r\n\v]+') diff --git a/src/calibre/ebooks/oeb/transforms/htmltoc.py b/src/calibre/ebooks/oeb/transforms/htmltoc.py index 0040f39c14..4504059531 100644 --- a/src/calibre/ebooks/oeb/transforms/htmltoc.py +++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py @@ -6,9 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys -import os -from lxml import etree from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME from calibre.ebooks.oeb.base import element diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py index c819475a4d..4b852db6c4 100644 --- a/src/calibre/ebooks/oeb/transforms/manglecase.py +++ b/src/calibre/ebooks/oeb/transforms/manglecase.py @@ -6,13 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys -import os -import re -import operator -import math -from itertools import chain -from collections import defaultdict from lxml import etree from calibre.ebooks.oeb.base import XHTML, XHTML_NS from calibre.ebooks.oeb.base import CSS_MIME diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index aef5c2c98b..2d86fe63b5 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -6,7 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys import os from urlparse import urldefrag import base64 @@ -20,9 +19,9 @@ from PyQt4.QtGui import QImage from PyQt4.QtGui import QPainter from PyQt4.QtSvg import QSvgRenderer from PyQt4.QtGui import QApplication -from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK -from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME -from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename +from calibre.ebooks.oeb.base import XHTML, XLINK +from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME +from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer @@ -88,7 +87,7 @@ class SVGRasterizer(object): hrefs = self.oeb.manifest.hrefs for elem in xpath(svg, '//svg:*[@xl:href]'): href = urlnormalize(elem.attrib[XLINK('href')]) - path, frag = urldefrag(href) + path = urldefrag(href)[0] if not path: continue abshref = item.abshref(path) diff --git a/src/calibre/gui2/dialogs/comicconf.ui b/src/calibre/gui2/dialogs/comicconf.ui index 36af85764a..acab125d57 100644 --- a/src/calibre/gui2/dialogs/comicconf.ui +++ b/src/calibre/gui2/dialogs/comicconf.ui @@ -1,154 +1,162 @@ - + + Dialog - - + + 0 0 646 - 468 + 503 - + Dialog - - + + :/images/convert.svg:/images/convert.svg - - - - + + + + &Title: - + opt_title - - + + - - - + + + &Author(s): - + opt_author - - + + - - - + + + &Number of Colors: - + opt_colors - - - + + + 8 - + 3200000 - + 8 - - - + + + &Profile: - + opt_profile - - + + - - - + + + Disable &normalize - - - + + + Keep &aspect ratio - - - + + + Disable &Sharpening - - - + + + &Landscape - - - + + + Don't so&rt - - - + + + Qt::Horizontal - + QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - + + + &Right to left - - - + + + De&speckle - - - + + + &Wide + + + + Disable &Trimming + + + - + @@ -157,11 +165,11 @@ Dialog accept() - + 248 254 - + 157 274 @@ -173,11 +181,11 @@ Dialog reject() - + 316 260 - + 286 274 diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py index 3014d3dbbd..9958ce53fa 100644 --- a/src/calibre/gui2/dialogs/config.py +++ b/src/calibre/gui2/dialogs/config.py @@ -194,7 +194,11 @@ class ConfigDialog(QDialog, Ui_Dialog): lang = get_lang() if lang is not None and language_codes.has_key(lang): self.language.addItem(language_codes[lang], QVariant(lang)) - items = [(l, language_codes[l]) for l in translations.keys() if l != lang] + else: + lang = 'en' + self.language.addItem('English', QVariant('en')) + items = [(l, language_codes[l]) for l in translations.keys() \ + if l != lang] if lang != 'en': items.append(('en', 'English')) items.sort(cmp=lambda x, y: cmp(x[1], y[1])) diff --git a/src/calibre/gui2/images/news/wikinews_en.png b/src/calibre/gui2/images/news/wikinews_en.png new file mode 100644 index 0000000000..489061b923 Binary files /dev/null and b/src/calibre/gui2/images/news/wikinews_en.png differ diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 163a9d8bd0..4ecfc08f58 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow): dir = os.path.expanduser('~/Library') self.library_path = os.path.abspath(dir) if not os.path.exists(self.library_path): - os.makedirs(self.library_path) + try: + os.makedirs(self.library_path) + except: + self.library_path = os.path.expanduser('~/Library') + error_dialog(self, _('Invalid library location'), + _('Could not access %s. Using %s as the library.')% + (repr(self.library_path), repr(self.library_path)) + ).exec_() + os.makedirs(self.library_path) def read_settings(self): diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index f8b63f1124..cb823e6c73 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -15,7 +15,7 @@ from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock from PyQt4.QtGui import QApplication, QImage __app = None -from calibre.library import title_sort +from calibre.ebooks.metadata import title_sort from calibre.library.database import LibraryDatabase from calibre.library.sqlite import connect, IntegrityError from calibre.utils.search_query_parser import SearchQueryParser diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 427b41ca5f..e08222ed3a 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -1,9 +1,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' Post installation script for linux ''' -import sys, os, re, shutil +import sys, os, shutil from subprocess import check_call, call -from tempfile import NamedTemporaryFile from calibre import __version__, __appname__ from calibre.devices import devices @@ -18,15 +17,8 @@ entry_points = { 'console_scripts': [ \ 'ebook-device = calibre.devices.prs500.cli.main:main', 'ebook-meta = calibre.ebooks.metadata.cli:main', - 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', - 'html2lrf = calibre.ebooks.lrf.html.convert_from:main', - 'html2oeb = calibre.ebooks.html:main', - 'html2epub = calibre.ebooks.epub.from_html:main', - 'odt2oeb = calibre.ebooks.odt.to_oeb:main', + 'ebook-convert = calibre.ebooks.conversion.cli:main', 'markdown-calibre = calibre.ebooks.markdown.markdown:main', - 'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main', - 'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main', - 'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main', 'web2disk = calibre.web.fetch.simple:main', 'feeds2disk = calibre.web.feeds.main:main', 'calibre-server = calibre.library.server:main', @@ -34,22 +26,10 @@ entry_points = { 'feeds2epub = calibre.ebooks.epub.from_feeds:main', 'feeds2mobi = calibre.ebooks.mobi.from_feeds:main', 'web2lrf = calibre.ebooks.lrf.web.convert_from:main', - 'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main', - 'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main', - 'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main', - 'any2lrf = calibre.ebooks.lrf.any.convert_from:main', - 'any2epub = calibre.ebooks.epub.from_any:main', - 'any2lit = calibre.ebooks.lit.from_any:main', - 'any2mobi = calibre.ebooks.mobi.from_any:main', 'lrf2lrs = calibre.ebooks.lrf.lrfparser:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', - 'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main', 'isbndb = calibre.ebooks.metadata.isbndb:main', 'librarything = calibre.ebooks.metadata.library_thing:main', - 'mobi2oeb = calibre.ebooks.mobi.reader:main', - 'oeb2mobi = calibre.ebooks.mobi.writer:main', - 'lit2oeb = calibre.ebooks.lit.reader:main', - 'oeb2lit = calibre.ebooks.lit.writer:main', 'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main', 'comic2epub = calibre.ebooks.epub.from_comic:main', 'comic2mobi = calibre.ebooks.mobi.from_comic:main', @@ -60,7 +40,6 @@ entry_points = { 'calibre-parallel = calibre.parallel:main', 'calibre-customize = calibre.customize.ui:main', 'pdftrim = calibre.ebooks.pdf.pdftrim:main' , - 'any2pdf = calibre.ebooks.pdf.from_any:main', ], 'gui_scripts' : [ __appname__+' = calibre.gui2.main:main', @@ -171,25 +150,16 @@ def setup_completion(fatal_errors): from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop - from calibre.ebooks.mobi.reader import option_parser as mobioeb - from calibre.ebooks.lit.reader import option_parser as lit2oeb from calibre.web.feeds.main import option_parser as feeds2disk from calibre.web.feeds.recipes import titles as feed_titles from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop - from calibre.ebooks.epub.from_html import option_parser as html2epub - from calibre.ebooks.html import option_parser as html2oeb - from calibre.ebooks.odt.to_oeb import option_parser as odt2oeb from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi - from calibre.ebooks.epub.from_any import option_parser as any2epub - from calibre.ebooks.lit.from_any import option_parser as any2lit from calibre.ebooks.epub.from_comic import option_parser as comic2epub - from calibre.ebooks.mobi.from_any import option_parser as any2mobi - from calibre.ebooks.mobi.writer import option_parser as oeb2mobi - from calibre.gui2.main import option_parser as guiop + from calibre.gui2.main import option_parser as guiop any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', - 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] + 'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt'] f = open_file('/etc/bash_completion.d/libprs500') f.close() os.remove(f.name) @@ -209,16 +179,10 @@ def setup_completion(fatal_errors): f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf'])) f.write(opts_and_exts('any2lrf', htmlop, any_formats)) f.write(opts_and_exts('calibre', guiop, any_formats)) - f.write(opts_and_exts('any2epub', any2epub, any_formats)) - f.write(opts_and_exts('any2lit', any2lit, any_formats)) - f.write(opts_and_exts('any2mobi', any2mobi, any_formats)) - f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf'])) f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf'])) f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes()))) f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf'])) f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf'])) - f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc'])) - f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit'])) f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr'])) f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr'])) @@ -227,9 +191,6 @@ def setup_completion(fatal_errors): f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles)) f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles)) f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles)) - f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf'])) - f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml'])) - f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt'])) f.write(''' _prs500_ls() { @@ -392,43 +353,27 @@ def option_parser(): help='Save a manifest of all installed files to the specified location') return parser -def install_man_pages(fatal_errors): - from bz2 import compress - import subprocess +def install_man_pages(fatal_errors, use_destdir=False): + from calibre.utils.help2man import create_man_page + prefix = os.environ.get('DESTDIR', '/') if use_destdir else '/' + manpath = os.path.join(prefix, 'usr/share/man/man1') + if not os.path.exists(manpath): + os.makedirs(manpath) print 'Installing MAN pages...' - manpath = '/usr/share/man/man1' - f = NamedTemporaryFile() - f.write('[see also]\nhttp://%s.kovidgoyal.net\n'%__appname__) - f.flush() manifest = [] - os.environ['PATH'] += ':'+os.path.expanduser('~/bin') for src in entry_points['console_scripts']: - prog = src[:src.index('=')].strip() - if prog in ('ebook-device', 'markdown-calibre', - 'calibre-fontconfig', 'calibre-parallel'): + prog, right = src.split('=') + prog = prog.strip() + module = __import__(right.split(':')[0].strip(), fromlist=['a']) + parser = getattr(module, 'option_parser', None) + if parser is None: continue - - help2man = ('help2man', prog, '--name', 'part of %s'%__appname__, - '--section', '1', '--no-info', '--include', - f.name, '--manual', __appname__) + parser = parser() + raw = create_man_page(prog, parser) manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2') print '\tInstalling MAN page for', prog - try: - p = subprocess.Popen(help2man, stdout=subprocess.PIPE) - except OSError, err: - import errno - if err.errno != errno.ENOENT: - raise - print 'Failed to install MAN pages as help2man is missing from your system' - break - o = p.stdout.read() - raw = re.compile(r'^\.IP\s*^([A-Z :]+)$', re.MULTILINE).sub(r'.SS\n\1', o) - if not raw.strip(): - print 'Unable to create MAN page for', prog - continue - f2 = open_file(manfile) - manifest.append(f2.name) - f2.write(compress(raw)) + open(manfile, 'wb').write(raw) + manifest.append(manfile) return manifest def post_install(): @@ -440,9 +385,9 @@ def post_install(): manifest = [] setup_desktop_integration(opts.fatal_errors) if opts.no_root or os.geteuid() == 0: + manifest += install_man_pages(opts.fatal_errors, use_destdir) manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors) manifest += setup_completion(opts.fatal_errors) - manifest += install_man_pages(opts.fatal_errors) else: print "Skipping udev, completion, and man-page install for non-root user." diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index bb1eb9ba02..c069842e53 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -34,6 +34,8 @@ What formats does |app| support conversion to/from? | | | | | | | | ODT | ✔ | ✔ | ✔ | | | | | | | +| | FB2 | ✔ | ✔ | ✔ | +| | | | | | | | HTML | ✔ | ✔ | ✔ | | | | | | | | **Input formats** | CBR | ✔ | ✔ | ✔ | diff --git a/src/calibre/trac/donations/server.py b/src/calibre/trac/donations/server.py index 8e7a096353..24174db801 100644 --- a/src/calibre/trac/donations/server.py +++ b/src/calibre/trac/donations/server.py @@ -196,7 +196,7 @@ class Server(object): def calculate_month_trend(self, days=31): stats = self.get_slice(date.today()-timedelta(days=days-1), date.today()) - fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) + fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass) fig.clear() ax = fig.add_subplot(111) x = list(range(days-1, -1, -1)) @@ -216,7 +216,7 @@ Donors per day: %(dpd).2f ad=stats.average_deviation, dpd=len(stats.totals)/float(stats.period.days), ) - text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction') + text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction') fig.savefig(self.MONTH_TRENDS) def calculate_trend(self): diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py index 9c852c554e..020c0a0e3d 100644 --- a/src/calibre/trac/plugins/download.py +++ b/src/calibre/trac/plugins/download.py @@ -18,7 +18,6 @@ DEPENDENCIES = [ ('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'), ('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'), ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'), - ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'), ] diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html index 066f3c9b6d..96881aa108 100644 --- a/src/calibre/trac/plugins/templates/linux.html +++ b/src/calibre/trac/plugins/templates/linux.html @@ -88,7 +88,7 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal. be ignored.
  • - You must have help2man and xdg-utils installed + You must have xdg-utils installed on your system before running the installer.
  • diff --git a/src/calibre/utils/help2man.py b/src/calibre/utils/help2man.py new file mode 100644 index 0000000000..9777ea24cd --- /dev/null +++ b/src/calibre/utils/help2man.py @@ -0,0 +1,59 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import time, bz2 + +from calibre.constants import __version__, __appname__, __author__ + + +def create_man_page(prog, parser): + usage = parser.usage.splitlines() + for i, line in enumerate(list(usage)): + if not line.strip(): + usage[i] = '.PP' + else: + usage[i] = line.replace('%prog', prog) + lines = [ + '.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') + + ' "%s (%s %s)" "%s"'%(prog, __appname__, __version__, __appname__), + '.SH NAME', + prog + r' \- part of '+__appname__, + '.SH SYNOPSIS', + '.B "%s"'%prog + r'\fR '+' '.join(usage[0].split()[1:]), + '.SH DESCRIPTION', + ] + lines += usage[1:] + + lines += [ + '.SH OPTIONS' + ] + def format_option(opt): + ans = ['.TP'] + opts = [] + opts += opt._short_opts + opts.append(opt.get_opt_string()) + opts = [r'\fB'+x.replace('-', r'\-')+r'\fR' for x in opts] + ans.append(', '.join(opts)) + help = opt.help if opt.help else '' + ans.append(help.replace('%prog', prog).replace('%default', str(opt.default))) + return ans + + for opt in parser.option_list: + lines.extend(format_option(opt)) + for group in parser.option_groups: + lines.append('.SS '+group.title) + if group.description: + lines.extend(['.PP', group.description]) + for opt in group.option_list: + lines.extend(format_option(opt)) + + lines += ['.SH SEE ALSO', + 'The User Manual is available at ' + 'http://calibre.kovidgoyal.net/user_manual', + '.PP', '.B Created by '+__author__] + + return bz2.compress('\n'.join(lines)) + + diff --git a/src/calibre/utils/logging.py b/src/calibre/utils/logging.py index ae2e1a792b..d5a55ac48b 100644 --- a/src/calibre/utils/logging.py +++ b/src/calibre/utils/logging.py @@ -13,13 +13,25 @@ ERROR = 3 import sys, traceback from functools import partial -from calibre import prints -from calibre.utils.terminfo import TerminalController -class ANSIStream: + + +class Stream(object): + + def __init__(self, stream): + from calibre import prints + self._prints = prints + self.stream = stream + + def flush(self): + self.stream.flush() + + +class ANSIStream(Stream): def __init__(self, stream=sys.stdout): - self.stream = stream + Stream.__init__(self, stream) + from calibre.utils.terminfo import TerminalController tc = TerminalController(stream) self.color = { DEBUG: tc.GREEN, @@ -32,16 +44,16 @@ class ANSIStream: def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) kwargs['file'] = self.stream - prints(*args, **kwargs) + self._prints(*args, **kwargs) self.stream.write(self.normal) def flush(self): self.stream.flush() -class HTMLStream: +class HTMLStream(Stream): def __init__(self, stream=sys.stdout): - self.stream = stream + Stream.__init__(self, stream) self.color = { DEBUG: '', INFO:'', @@ -53,7 +65,7 @@ class HTMLStream: def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) kwargs['file'] = self.stream - prints(*args, **kwargs) + self._prints(*args, **kwargs) self.stream.write(self.normal) def flush(self): diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 3f0ec414a2..4a0f6b47f7 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -98,7 +98,7 @@ class Feed(object): if len(self.articles) >= max_articles_per_feed: break self.parse_article(item) - + def populate_from_preparsed_feed(self, title, articles, oldest_article=7, max_articles_per_feed=100): @@ -156,7 +156,6 @@ class Feed(object): content = None if not link and not content: return - article = Article(id, title, link, description, published, content) delta = datetime.utcnow() - article.utctime if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4773d551c3..bcc3cb050d 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -17,7 +17,7 @@ from PyQt4.Qt import QApplication, QFile, Qt, QPalette, QSize, QImage, QPainter, from PyQt4.QtWebKit import QWebPage -from calibre import browser, __appname__, iswindows, LoggingInterface, \ +from calibre import browser, __appname__, iswindows, \ strftime, __version__, preferred_encoding from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator @@ -32,7 +32,7 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.gui2 import images_rc # Needed for default cover -class BasicNewsRecipe(object, LoggingInterface): +class BasicNewsRecipe(object): ''' Abstract base class that contains logic needed in all feed fetchers. ''' @@ -444,7 +444,6 @@ class BasicNewsRecipe(object, LoggingInterface): :param parser: Command line option parser. Used to intelligently merge options. :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional. ''' - LoggingInterface.__init__(self, logging.getLogger('feeds2disk')) if not isinstance(self.title, unicode): self.title = unicode(self.title, 'utf-8', 'replace') @@ -1012,7 +1011,8 @@ class BasicNewsRecipe(object, LoggingInterface): feed.description = unicode(err) parsed_feeds.append(feed) self.log_exception(msg) - + + return parsed_feeds @classmethod diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 6eb24e162b..793d5cf45d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', + 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py new file mode 100644 index 0000000000..c64e328bf2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class TheDailyMail(BasicNewsRecipe): + title = u'The Daily Mail' + oldest_article = 2 + language = _('English') + author = 'RufusA' + simultaneous_downloads= 1 + max_articles_per_feed = 50 + + extra_css = 'h1 {text-align: left;}' + + remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ] + remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) + remove_tags_before = dict(name='div', attrs={'id':'content'}) + no_stylesheets = True + + feeds = [ + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')] + + def print_version(self, url): + main = url.partition('?')[0] + return main + '?printingPage=true' diff --git a/src/calibre/web/feeds/recipes/recipe_ecogeek.py b/src/calibre/web/feeds/recipes/recipe_ecogeek.py new file mode 100644 index 0000000000..7695763295 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_ecogeek.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +EcoGeek.org +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class EcoGeek(BasicNewsRecipe): + title = 'EcoGeek' + __author__ = 'Darko Miletic' + description = 'EcoGeek - Technology for the Environment Blog Feed' + publisher = 'EcoGeek' + language = _('English') + category = 'news, ecology, blog' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = True + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')] diff --git a/src/calibre/web/feeds/recipes/recipe_iht.py b/src/calibre/web/feeds/recipes/recipe_iht.py index c30be70dea..1bee27d061 100644 --- a/src/calibre/web/feeds/recipes/recipe_iht.py +++ b/src/calibre/web/feeds/recipes/recipe_iht.py @@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald' ''' iht.com ''' +import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ptempfile import PersistentTemporaryFile @@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe): max_articles_per_feed = 10 no_stylesheets = True - remove_tags = [dict(name='div', attrs={'class':'footer'})] + remove_tags = [dict(name='div', attrs={'class':'footer'}), + dict(name=['form'])] + preprocess_regexps = [ + (re.compile(r'