Sync to pluginize

This commit is contained in:
John Schember 2009-03-11 07:55:11 -04:00
commit a4b78d10ee
56 changed files with 1011 additions and 302 deletions

View File

@ -3,11 +3,13 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, re, logging, time, subprocess, atexit, mimetypes, \
__builtin__
__builtin__, warnings
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
from htmlentitydefs import name2codepoint
from math import floor
from logging import Formatter
warnings.simplefilter('ignore', DeprecationWarning)
from PyQt4.QtCore import QUrl
from PyQt4.QtGui import QDesktopServices
@ -86,6 +88,8 @@ def prints(*args, **kwargs):
for i, arg in enumerate(args):
if isinstance(arg, unicode):
arg = arg.encode(preferred_encoding)
if not isinstance(arg, str):
arg = str(arg)
file.write(arg)
if i != len(args)-1:
file.write(sep)
@ -318,24 +322,6 @@ def english_sort(x, y):
'''
return cmp(_spat.sub('', x), _spat.sub('', y))
class ColoredFormatter(Formatter):
def format(self, record):
ln = record.__dict__['levelname']
col = ''
if ln == 'CRITICAL':
col = terminal_controller.YELLOW
elif ln == 'ERROR':
col = terminal_controller.RED
elif ln in ['WARN', 'WARNING']:
col = terminal_controller.BLUE
elif ln == 'INFO':
col = terminal_controller.GREEN
elif ln == 'DEBUG':
col = terminal_controller.CYAN
record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL
return Formatter.format(self, record)
def walk(dir):
''' A nice interface to os.walk '''
for record in os.walk(dir):

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.143'
__version__ = '0.5.0'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -244,11 +244,12 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.customize.profiles import input_profiles
from calibre.ebooks.oeb.output import OEBOutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput]
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')]
plugins += input_profiles
plugins += input_profiles + output_profiles

View File

@ -24,7 +24,7 @@ class ConversionOption(object):
self.choices = choices
if self.long_switch is None:
self.long_switch = '--'+self.name.replace('_', '-')
self.long_switch = self.name.replace('_', '-')
self.validate_parameters()
@ -37,19 +37,24 @@ class ConversionOption(object):
if not self.help:
raise ValueError('You must set the help text')
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
return hash(self) == hash(other)
class OptionRecommendation(object):
LOW = 1
MED = 2
HIGH = 3
def __init__(self, recommeded_value, level=LOW, **kwargs):
def __init__(self, recommended_value=None, level=LOW, **kwargs):
'''
An option recommendation. That is, an option as well as its recommended
value and the level of the recommendation.
'''
self.level = level
self.recommended_value = recommeded_value
self.recommended_value = recommended_value
self.option = kwargs.pop('option', None)
if self.option is None:
self.option = ConversionOption(**kwargs)
@ -59,10 +64,12 @@ class OptionRecommendation(object):
def validate_parameters(self):
if self.option.choices and self.recommended_value not in \
self.option.choices:
raise ValueError('Recommended value not in choices')
raise ValueError('OpRec: %s: Recommended value not in choices'%
self.option.name)
if not (isinstance(self.recommended_value, (int, float, str, unicode))\
or self.default is None):
raise ValueError(unicode(self.default) +
or self.recommended_value is None):
raise ValueError('OpRec: %s:'%self.option.name +
repr(self.recommended_value) +
' is not a string or a number')
@ -110,7 +117,11 @@ class InputFormatPlugin(Plugin):
#: instance of :class:`OptionRecommendation`.
options = set([])
def convert(self, stream, options, file_ext, parse_cache, log):
#: A set of 3-tuples of the form
#: (option_name, recommended_value, recommendation_level)
recommendations = set([])
def convert(self, stream, options, file_ext, parse_cache, log, accelerators):
'''
This method must be implemented in sub-classes. It must return
the path to the created OPF file. All output should be contained in
@ -146,10 +157,16 @@ class InputFormatPlugin(Plugin):
:param log: A :class:`calibre.utils.logging.Log` object. All output
should use this object.
:param accelarators: A dictionary of various information that the input
plugin can get easily that would speed up the
subsequent stages of the conversion.
'''
raise NotImplementedError
def __call__(self, stream, options, file_ext, parse_cache, log, output_dir):
def __call__(self, stream, options, file_ext, parse_cache, log,
accelerators, output_dir):
log('InputFormatPlugin: %s running'%self.name, end=' ')
if hasattr(stream, 'name'):
log('on', stream.name)
@ -159,7 +176,8 @@ class InputFormatPlugin(Plugin):
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
ret = self.convert(stream, options, file_ext, parse_cache, log)
ret = self.convert(stream, options, file_ext, parse_cache,
log, accelerators)
for key in list(parse_cache.keys()):
if os.path.abspath(key) != key:
log.warn(('InputFormatPlugin: %s returned a '
@ -186,4 +204,38 @@ class InputFormatPlugin(Plugin):
return ret
class OutputFormatPlugin(Plugin):
'''
OutputFormatPlugins are responsible for converting an OEB document
(OPF+HTML) into an output ebook.
The OEB document can be assumed to be encoded in UTF-8.
The main action happens in :method:`convert`.
'''
type = _('Conversion Output')
can_be_disabled = False
supported_platforms = ['windows', 'osx', 'linux']
#: The file type (extension without leading period) that this
#: plugin outputs
file_type = None
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
common_options = set([])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.
options = set([])
#: A set of 3-tuples of the form
#: (option_name, recommended_value, recommendation_level)
recommendations = set([])
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
raise NotImplementedError

View File

@ -3,6 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, re
from calibre.customize import Plugin
class InputProfile(Plugin):
@ -16,12 +17,43 @@ class InputProfile(Plugin):
# inherit from this profile and override as needed
name = 'Default Input Profile'
short_name = 'default' # Used in the CLI so dont spaces etc. in it
short_name = 'default' # Used in the CLI so dont use spaces etc. in it
description = _('This profile tries to provide sane defaults and is useful '
'if you know nothing about the input document.')
input_profiles = [InputProfile]
class OutputProfile(Plugin):
author = 'Kovid Goyal'
supported_platforms = set(['windows', 'osx', 'linux'])
can_be_disabled = False
type = _('Output profile')
name = 'Default Output Profile'
short_name = 'default' # Used in the CLI so dont use spaces etc. in it
description = _('This profile tries to provide sane defaults and is useful '
'if you want to produce a document intended to be read at a '
'computer or on a range of devices.')
epub_flow_size = sys.maxint
screen_size = None
remove_special_chars = False
remove_object_tags = False
class SonyReader(OutputProfile):
name = 'Sony Reader'
short_name = 'sony'
description = _('This profile is intended for the SONY PRS line. '
'The 500/505/700 etc.')
epub_flow_size = 270000
screen_size = (590, 765)
remove_special_chars = re.compile(u'[\u200b\u00ad]')
remove_object_tags = True
output_profiles = [OutputProfile, SonyReader]

View File

@ -6,8 +6,8 @@ import os, shutil, traceback, functools, sys
from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \
MetadataWriterPlugin
from calibre.customize.conversion import InputFormatPlugin
from calibre.customize.profiles import InputProfile
from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin
from calibre.customize.profiles import InputProfile, OutputProfile
from calibre.customize.builtins import plugins as builtin_plugins
from calibre.constants import __version__, iswindows, isosx
from calibre.ebooks.metadata import MetaInformation
@ -76,6 +76,12 @@ def input_profiles():
if isinstance(plugin, InputProfile):
yield plugin
def output_profiles():
for plugin in _initialized_plugins:
if isinstance(plugin, OutputProfile):
yield plugin
def reread_filetype_plugins():
global _on_import
global _on_preprocess
@ -245,9 +251,19 @@ def input_format_plugins():
def plugin_for_input_format(fmt):
for plugin in input_format_plugins():
if fmt in plugin.file_types:
if fmt.lower() in plugin.file_types:
return plugin
def output_format_plugins():
for plugin in _initialized_plugins:
if isinstance(plugin, OutputFormatPlugin):
yield plugin
def plugin_for_output_format(fmt):
for plugin in output_format_plugins():
if fmt.lower() == plugin.file_type:
return plugin
def disable_plugin(plugin_or_name):
x = getattr(plugin_or_name, 'name', plugin_or_name)

View File

@ -74,7 +74,7 @@ class CYBOOKG3(USBMS):
if self.SUPPORTS_SUB_DIRS:
if 'tags' in mdata.keys():
for tag in mdata['tags']:
if tag.startswith('News'):
if tag.startswith(_('News')):
newpath = os.path.join(newpath, 'news')
newpath = os.path.join(newpath, mdata.get('title', ''))
newpath = os.path.join(newpath, mdata.get('timestamp', ''))

View File

@ -99,7 +99,8 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
try:
raw = raw.decode(encoding, 'replace')
except LookupError:
raw = raw.decode('utf-8', 'replace')
encoding = 'utf-8'
raw = raw.decode(encoding, 'replace')
if strip_encoding_pats:
raw = strip_encoding_declarations(raw)

View File

@ -0,0 +1,167 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Command line interface to conversion sub-system
'''
USAGE = '%prog ' + _('''\
input_file output_file [options]
Convert an ebook from one format to another.
input_file is the input and output_file is the output. Both must be \
specified as the first two arguments to the command.
The output ebook format is guessed from the file extension of \
output_file. output_file can also be of the special format .EXT where \
EXT is the output file extension. In this case, the name of the output \
file is derived the name of the input file. Note that the filenames must \
not start with a hyphen. Finally, if output_file has no extension, then \
it is treated as a directory and an "open ebook" (OEB) consisting of HTML \
files is written to that directory. These files are the files that would \
normally have been passed to the output plugin.
After specifying the input \
and output file you can customize the conversion by specifying various \
options. the available options depend on the input and output file types. \
To get help on them specify the input and output file and then use the -h \
option.
For full documentation of the conversion system see
''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html'
import sys, os
from optparse import OptionGroup, Option
from calibre.utils.config import OptionParser
from calibre.utils.logging import Log
from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
def check_command_line_options(parser, args, log):
if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
print_help(parser)
log.error('\n\nYou must specify the input AND output files')
raise SystemExit(1)
input = os.path.abspath(args[1])
if not os.access(input, os.R_OK):
log.error('Cannot read from', input)
raise SystemExit(1)
output = args[2]
if output.startswith('.'):
output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output)
if '.' in output:
if os.path.exists(output):
log.warn('WARNING:', output, 'exists. Deleting.')
os.remove(output)
return input, output
def option_recommendation_to_cli_option(add_option, rec):
opt = rec.option
switches = ['-'+opt.short_switch] if opt.short_switch else []
switches.append('--'+opt.long_switch)
attrs = dict(dest=opt.name, help=opt.help,
choices=opt.choices, default=rec.recommended_value)
add_option(Option(*switches, **attrs))
def add_input_output_options(parser, plumber):
input_options, output_options = \
plumber.input_options, plumber.output_options
def add_options(group, options):
for opt in options:
option_recommendation_to_cli_option(group, opt)
if input_options:
title = _('INPUT OPTIONS')
io = OptionGroup(parser, title, _('Options to control the processing'
' of the input %s file')%plumber.input_fmt)
add_options(io.add_option, input_options)
parser.add_option_group(io)
if output_options:
title = plumber.output_fmt.upper() + ' ' + _('OPTIONS')
oo = OptionGroup(parser, title, _('Options to control the processing'
' of the output %s file')%plumber.input_fmt)
add_options(oo.add_option, output_options)
parser.add_option_group(oo)
def add_pipeline_options(parser, plumber):
groups = {
'' : ('',
[
'input_profile',
'output_profile',
]
),
'METADATA' : (_('Options to set metadata in the output'),
plumber.metadata_option_names,
),
'DEBUG': (_('Options to help with debugging the conversion'),
[
'verbose',
]),
}
group_order = ['', 'METADATA', 'DEBUG']
for group in group_order:
desc, options = groups[group]
if group:
group = OptionGroup(parser, group, desc)
parser.add_option_group(group)
add_option = group.add_option if group != '' else parser.add_option
for name in options:
rec = plumber.get_option_by_name(name)
if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec)
def option_parser():
return OptionParser(usage=USAGE)
def main(args=sys.argv):
log = Log()
parser = option_parser()
if len(args) < 3:
print_help(parser, log)
return 1
input, output = check_command_line_options(parser, args, log)
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(input, output, log)
add_input_output_options(parser, plumber)
add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0]
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
for n in parser.options_iter()
if n.dest]
plumber.merge_ui_recommendations(recommendations)
plumber.run()
log(_('Output saved to'), ' ', plumber.output)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -3,11 +3,29 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles
from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format
pipeline_options = [
class OptionValues(object):
pass
class Plumber(object):
metadata_option_names = [
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
'publisher', 'series', 'series_index', 'rating', 'isbn',
'tags', 'book_producer', 'language'
]
def __init__(self, input, output, log):
self.input = input
self.output = output
self.log = log
self.pipeline_options = [
OptionRecommendation(name='verbose',
recommended_value=0, level=OptionRecommendation.LOW,
@ -16,7 +34,6 @@ OptionRecommendation(name='verbose',
'verbosity.')
),
OptionRecommendation(name='input_profile',
recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in input_profiles()],
@ -27,4 +44,193 @@ OptionRecommendation(name='input_profile',
'pixels).')
),
]
OptionRecommendation(name='output_profile',
recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in output_profiles()],
help=_('Specify the output profile. The output profile '
'tells the conversion system how to optimize the '
'created document for the specified device. In some cases, '
'an output profile is required to produce documents that '
'will work on a device. For example EPUB on the SONY reader.'
)
),
OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW,
short_switch='m',
help=_('Read metadata from the specified OPF file. Metadata read '
'from this file will override any metadata in the source '
'file.')
),
OptionRecommendation(name='title',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the title.')),
OptionRecommendation(name='authors',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the authors. Multiple authors should be separated ')),
OptionRecommendation(name='title_sort',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('The version of the title to be used for sorting. ')),
OptionRecommendation(name='author_sort',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('String to be used when sorting by author. ')),
OptionRecommendation(name='cover',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the cover to the specified file.')),
OptionRecommendation(name='comments',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the ebook description.')),
OptionRecommendation(name='publisher',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the ebook publisher.')),
OptionRecommendation(name='series',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the series this ebook belongs to.')),
OptionRecommendation(name='series_index',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the index of the book in this series.')),
OptionRecommendation(name='rating',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the rating. Should be a number between 1 and 5.')),
OptionRecommendation(name='isbn',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the ISBN of the book.')),
OptionRecommendation(name='tags',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the tags for the book. Should be a comma separated list.')),
OptionRecommendation(name='book_producer',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the book producer.')),
OptionRecommendation(name='language',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the language.')),
]
input_fmt = os.path.splitext(input)[1]
if not input_fmt:
raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower()
output_fmt = os.path.splitext(output)[1]
if not output_fmt:
output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower()
self.input_plugin = plugin_for_input_format(input_fmt)
self.output_plugin = plugin_for_output_format(output_fmt)
if self.input_plugin is None:
raise ValueError('No plugin to handle input format: '+input_fmt)
if self.output_plugin is None:
raise ValueError('No plugin to handle output format: '+output_fmt)
self.input_fmt = input_fmt
self.output_fmt = output_fmt
self.input_options = self.input_plugin.options.union(
self.input_plugin.common_options)
self.output_options = self.output_plugin.options.union(
self.output_plugin.common_options)
self.merge_plugin_recommendations()
def get_option_by_name(self, name):
for group in (self.input_options, self.pipeline_options,
self.output_options):
for rec in group:
if rec.option == name:
return rec
def merge_plugin_recommendations(self):
for source in (self.input_plugin, self.output_plugin):
for name, val, level in source.recommendations:
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level:
rec.recommended_value = val
def merge_ui_recommendations(self, recommendations):
for name, val, level in recommendations:
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level and rec.level < rec.HIGH:
rec.recommended_value = val
def read_user_metadata(self):
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from calibre.ebooks.metadata.opf2 import OPF
mi = MetaInformation(None, [])
if self.opts.read_metadata_from_opf is not None:
self.opts.read_metadata_from_opf = os.path.abspath(
self.opts.read_metadata_from_opf)
opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
os.path.dirname(self.opts.read_metadata_from_opf))
mi = MetaInformation(opf)
for x in self.metadata_option_names:
val = getattr(self.opts, x, None)
if val is not None:
if x == 'authors':
val = string_to_authors(val)
elif x == 'tags':
val = [i.strip() for i in val.split(',')]
elif x in ('rating', 'series_index'):
val = float(val)
setattr(mi, x, val)
if mi.cover:
mi.cover_data = ('', open(mi.cover, 'rb').read())
mi.cover = None
self.user_metadata = mi
def setup_options(self):
self.opts = OptionValues()
for group in (self.input_options, self.pipeline_options,
self.output_options):
for rec in group:
setattr(self.opts, rec.option.name, rec.recommended_value)
for x in input_profiles():
if x.short_name == self.opts.input_profile:
self.opts.input_profile = x
break
for x in output_profiles():
if x.short_name == self.opts.output_profile:
self.opts.output_profile = x
break
self.read_user_metadata()
def run(self):
self.setup_options()
from calibre.customize.ui import run_plugins_on_preprocess
self.input = run_plugins_on_preprocess(self.input)
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.base import OEBBook
parse_cache, accelerators = {}, {}
opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, parse_cache, self.log,
accelerators)
self.reader = OEBReader()
self.oeb = OEBBook(self.log, parse_cache=parse_cache)
self.reader(self.oeb, opfpath)

View File

@ -12,7 +12,7 @@ from contextlib import nested
from calibre import extract, walk
from calibre.ebooks import DRMError
from calibre.ebooks.epub import config as common_config, process_encryption
from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import MetaInformation

View File

@ -197,6 +197,9 @@ class HTMLProcessor(Processor, Rationalizer):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
for tag in self.root.xpath('//form'):
tag.getparent().remove(tag)
if self.opts.linearize_tables:
for tag in self.root.xpath('//table | //tr | //th | //td'):
tag.tag = 'div'

View File

@ -51,7 +51,8 @@ class EPUBInput(InputFormatPlugin):
traceback.print_exc()
return False
def convert(self, stream, options, file_ext, parse_cache, log):
def convert(self, stream, options, file_ext, parse_cache, log,
accelerators):
from calibre.utils.zipfile import ZipFile
from calibre import walk
from calibre.ebooks import DRMError

View File

@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
import os, re
from itertools import count, chain
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
from calibre.ebooks.oeb.base import OEBBook, DirWriter
from calibre.ebooks.oeb.base import OEBBook
from lxml import etree, html
from lxml.etree import XPath

View File

@ -15,7 +15,7 @@ from lxml.cssselect import CSSSelector
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.epub import tostring, rules
from calibre import CurrentDir, LoggingInterface
from calibre import CurrentDir
XPath = functools.partial(_XPath, namespaces={'re':'http://exslt.org/regular-expressions'})
content = functools.partial(os.path.join, 'content')
@ -32,10 +32,9 @@ class SplitError(ValueError):
class Splitter(LoggingInterface):
class Splitter(object):
def __init__(self, path, opts, stylesheet_map, opf):
LoggingInterface.__init__(self, logging.getLogger('htmlsplit'))
self.setup_cli_handler(opts.verbose)
self.path = path
self.always_remove = not opts.preserve_tag_structure or \

View File

@ -19,11 +19,10 @@ from lxml.html import HtmlElementClassLookup, HTMLParser as _HTMLParser, \
from lxml.etree import XPath
get_text = XPath("//text()")
from calibre import LoggingInterface, unicode_path, entity_to_unicode
from calibre import unicode_path, entity_to_unicode
from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
from calibre.utils.config import Config, StringConfig
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.zipfile import ZipFile
@ -401,7 +400,7 @@ class PreProcessor(object):
html = rule[0].sub(rule[1], html)
return html
class Parser(PreProcessor, LoggingInterface):
class Parser(PreProcessor):
# SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont'
# SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in
# [
@ -412,7 +411,6 @@ class Parser(PreProcessor, LoggingInterface):
# ]
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'):
LoggingInterface.__init__(self, logging.getLogger(name))
self.setup_cli_handler(opts.verbose)
self.htmlfile = htmlfile
self.opts = opts
@ -859,7 +857,7 @@ class Processor(Parser):
except ValueError:
setting = ''
face = font.attrib.pop('face', None)
if face is not None:
if face:
faces = []
for face in face.split(','):
face = face.strip()
@ -1038,6 +1036,7 @@ def merge_metadata(htmlfile, opf, opts):
if opf:
mi = MetaInformation(opf)
elif htmlfile:
from calibre.ebooks.metadata.meta import get_metadata
try:
mi = get_metadata(open(htmlfile, 'rb'), 'html')
except:

View File

@ -143,7 +143,8 @@ class PageProcessor(list):
MagickRotateImage(wand, pw, -90)
# 25 percent fuzzy trim?
MagickTrimImage(wand, 25*65535/100)
if not self.opts.disable_trim:
MagickTrimImage(wand, 25*65535/100)
MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage"
# Do the Photoshop "Auto Levels" equivalent
if not self.opts.dont_normalize:
@ -303,6 +304,9 @@ def config(defaults=None,output_format='lrf'):
help=_('Maintain picture aspect ratio. Default is to fill the screen.'))
c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False,
help=_('Disable sharpening.'))
c.add_opt('disable_trim', ['--disable-trim'], default=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.'))
c.add_opt('landscape', ['-l', '--landscape'], default=False,
help=_("Don't split landscape images into two portrait images"))
c.add_opt('wide', ['-w', '--wide-aspect'], default=False,

View File

@ -31,7 +31,7 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks import ConversionError
from calibre.ebooks.lrf.html.table import Table
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
fit_image, LoggingInterface, preferred_encoding
fit_image, preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.devices.interface import Device
from calibre.ebooks.lrf.html.color_map import lrs_color
@ -78,7 +78,7 @@ def tag_regex(tagname):
return dict(open=r'(?:<\s*%(t)s\s+[^<>]*?>|<\s*%(t)s\s*>)'%dict(t=tagname), \
close=r'</\s*%(t)s\s*>'%dict(t=tagname))
class HTMLConverter(object, LoggingInterface):
class HTMLConverter(object):
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
@ -99,6 +99,10 @@ class HTMLConverter(object, LoggingInterface):
# Replace common line break patterns with line breaks
(re.compile(r'<p>(&nbsp;|\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
# Replace empty headers with line breaks
(re.compile(r'<h[0-5]?>(&nbsp;|\s)*</h[0-5]?>',
re.IGNORECASE), lambda m: '<br />'),
# Replace entities
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp'])),
@ -209,7 +213,6 @@ class HTMLConverter(object, LoggingInterface):
'''
# Defaults for various formatting tags
object.__setattr__(self, 'options', options)
LoggingInterface.__init__(self, logger)
self.fonts = fonts #: dict specifying font families to use
# Memory
self.scaled_images = {} #: Temporary files with scaled version of images

View File

@ -28,8 +28,9 @@ class LrsParser(object):
def __init__(self, stream, logger):
self.logger = logger
src = stream.read()
self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
selfClosingTags=self.SELF_CLOSING_TAGS)
self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
convertEntities=BeautifulStoneSoup.XML_ENTITIES,
selfClosingTags=self.SELF_CLOSING_TAGS)
self.objects = {}
for obj in self.soup.findAll(objid=True):
self.objects[obj['objid']] = obj

View File

@ -530,7 +530,7 @@ class LRFMetaFile(object):
""" See L{file.write} """
self._file.write(val)
def objects(self):
def _objects(self):
self._file.seek(self.object_index_offset)
c = self.number_of_objects
while c > 0:
@ -543,7 +543,7 @@ class LRFMetaFile(object):
def get_objects_by_type(self, type):
from calibre.ebooks.lrf.tags import Tag
objects = []
for id, offset, size in self.objects():
for id, offset, size in self._objects():
self._file.seek(offset)
tag = Tag(self._file)
if tag.id == 0xF500:
@ -554,7 +554,7 @@ class LRFMetaFile(object):
def get_object_by_id(self, tid):
from calibre.ebooks.lrf.tags import Tag
for id, offset, size in self.objects():
for id, offset, size in self._objects():
self._file.seek(offset)
tag = Tag(self._file)
if tag.id == 0xF500:

View File

@ -112,7 +112,8 @@ key is the account key you generate after signing up for a free account from isb
default=None, help=_('The title of the book to search for.'))
parser.add_option('-p', '--publisher', default=None, dest='publisher',
help=_('The publisher of the book to search for.'))
parser.add_option('--verbose', default=False, action='store_true', help=_('Verbose processing'))
parser.add_option('-v', '--verbose', default=False,
action='store_true', help=_('Verbose processing'))
return parser

View File

@ -19,14 +19,22 @@ def get_metadata(stream):
for item in opf.iterguide():
if 'cover' not in item.get('type', '').lower():
continue
ctype = item.get('type')
href = item.get('href', '')
candidates = [href, href.replace('&', '%26')]
for item in litfile.manifest.values():
if item.path in candidates:
covers.append(item.internal)
try:
covers.append((litfile.get_file('/data/'+item.internal),
ctype))
except:
pass
break
covers = [litfile.get_file('/data/' + i) for i in covers]
covers.sort(cmp=lambda x, y:cmp(len(x), len(y)))
mi.cover_data = ('jpg', covers[-1])
covers.sort(cmp=lambda x, y:cmp(len(x[0]), len(y[0])), reverse=True)
idx = 0
if len(covers) > 1:
if covers[1][1] == covers[1][0]+'-standard':
idx = 1
mi.cover_data = ('jpg', covers[idx][0])
return mi

View File

@ -3,8 +3,6 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin):
@ -14,16 +12,19 @@ class MOBIInput(InputFormatPlugin):
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
file_types = set(['mobi', 'prc', 'azw'])
def convert(self, stream, options, file_ext, parse_cache, log):
def convert(self, stream, options, file_ext, parse_cache, log,
accelerators):
from calibre.ebooks.mobi.reader import MobiReader
mr = MobiReader(stream, log, options.input_encoding,
options.debug_input)
mr.extract_content(output_dir=os.getcwdu(), parse_cache)
mr.extract_content('.', parse_cache)
raw = parse_cache.get('calibre_raw_mobi_markup', False)
if raw:
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open('debug-raw.html', 'wb').write(raw)
return mr.created_opf_path
for f, root in parse_cache.items():
if '.' in f:
accelerators[f] = {'pagebreaks':root.xpath(
'//div[@class="mbp_pagebreak"]')}
return mr.created_opf_path

View File

@ -312,7 +312,7 @@ class MobiReader(object):
mobi_version = self.book_header.mobi_version
for i, tag in enumerate(root.iter(etree.Element)):
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city'):
'state', 'city', 'street', 'address'):
tag.tag = 'span'
for key in tag.attrib.keys():
tag.attrib.pop(key)
@ -389,7 +389,13 @@ class MobiReader(object):
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
elif mi.cover is not None:
opf.cover = mi.cover
manifest = [(htmlfile, 'text/x-oeb1-document'),
else:
opf.cover = 'images/%05d.jpg'%1
if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
*opf.cover.split('/'))):
opf.cover = None
manifest = [(htmlfile, 'text/x-oeb1-document'),
(os.path.abspath('styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []):

View File

@ -9,7 +9,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
import sys
import os
from struct import pack
import functools
import time
import random
from cStringIO import StringIO
@ -18,13 +17,12 @@ from itertools import izip, count
from collections import defaultdict
from urlparse import urldefrag
import logging
from lxml import etree
from PIL import Image
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
from calibre.ebooks.oeb.base import namespace, prefixname
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en'
import os, sys, re, uuid
import os, re, uuid
from mimetypes import types_map
from collections import defaultdict
from itertools import count
@ -15,7 +15,6 @@ from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote
from lxml import etree, html
import calibre
from calibre import LoggingInterface
from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -204,22 +203,6 @@ class OEBError(Exception):
"""Generic OEB-processing error."""
pass
class FauxLogger(object):
"""Fake logging interface."""
def __getattr__(self, name):
return self
def __call__(self, message):
print message
class Logger(LoggingInterface, object):
"""A logging object which provides both the standard `logging.Logger` and
calibre-specific interfaces.
"""
def __getattr__(self, name):
return object.__getattribute__(self, 'log_' + name)
class NullContainer(object):
"""An empty container.
@ -1233,16 +1216,20 @@ class PageList(object):
class OEBBook(object):
"""Representation of a book in the IDPF OEB data model."""
def __init__(self, encoding=None, pretty_print=False, logger=FauxLogger()):
def __init__(self, logger, parse_cache={}, encoding='utf-8',
pretty_print=False):
"""Create empty book. Optional arguments:
:param parse_cache: A cache of parsed XHTML/CSS. Keys are absolute
paths to te cached files and values are lxml root objects and
cssutils stylesheets.
:param:`encoding`: Default encoding for textual content read
from an external container.
:param:`pretty_print`: Whether or not the canonical string form
of XML markup is pretty-printed.
:prama:`logger`: A Logger object to use for logging all messages
:param:`logger`: A Log object to use for logging all messages
related to the processing of this book. It is accessible
via the instance data member :attr:`logger`.
via the instance data members :attr:`logger,log`.
It provides the following public instance data members for
accessing various parts of the OEB data model:
@ -1260,7 +1247,7 @@ class OEBBook(object):
"""
self.encoding = encoding
self.pretty_print = pretty_print
self.logger = logger
self.logger = self.log = logger
self.version = '2.0'
self.container = NullContainer()
self.metadata = Metadata(self)

View File

@ -0,0 +1,17 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import OutputFormatPlugin
class OEBOutput(OutputFormatPlugin):
name = 'OEB Output'
author = 'Kovid Goyal'
file_type = 'oeb'
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
pass

View File

@ -19,9 +19,9 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \
ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE
from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath
from calibre.ebooks.oeb.base import urlnormalize, xml2str
from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer
from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath, \
urlnormalize, BINARY_MIME, \
OEBError, OEBBook, DirContainer
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
from calibre.ebooks.metadata.epub import CoverRenderer
@ -45,9 +45,6 @@ class OEBReader(object):
TRANSFORMS = []
"""List of transforms to apply to content read with this Reader."""
def __init__(self):
return
@classmethod
def config(cls, cfg):
"""Add any book-reading options to the :class:`Config` object
@ -65,7 +62,7 @@ class OEBReader(object):
:param:`oeb`.
"""
self.oeb = oeb
self.logger = oeb.logger
self.logger = self.log = oeb.logger
oeb.container = self.Container(path)
opf = self._read_opf()
self._all_from_opf(opf)

View File

@ -6,18 +6,14 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
import re
import operator
import math
from itertools import chain
from collections import defaultdict
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import namespace, barename
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.stylizer import Stylizer
COLLAPSE = re.compile(r'[ \t\r\n\v]+')

View File

@ -6,9 +6,6 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from lxml import etree
from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
from calibre.ebooks.oeb.base import element

View File

@ -6,13 +6,6 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
import re
import operator
import math
from itertools import chain
from collections import defaultdict
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
from calibre.ebooks.oeb.base import CSS_MIME

View File

@ -6,7 +6,6 @@ from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from urlparse import urldefrag
import base64
@ -20,9 +19,9 @@ from PyQt4.QtGui import QImage
from PyQt4.QtGui import QPainter
from PyQt4.QtSvg import QSvgRenderer
from PyQt4.QtGui import QApplication
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
from calibre.ebooks.oeb.base import XHTML, XLINK
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
from calibre.ebooks.oeb.base import xml2str, xpath
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.stylizer import Stylizer
@ -88,7 +87,7 @@ class SVGRasterizer(object):
hrefs = self.oeb.manifest.hrefs
for elem in xpath(svg, '//svg:*[@xl:href]'):
href = urlnormalize(elem.attrib[XLINK('href')])
path, frag = urldefrag(href)
path = urldefrag(href)[0]
if not path:
continue
abshref = item.abshref(path)

View File

@ -1,154 +1,162 @@
<ui version="4.0" >
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog" >
<property name="geometry" >
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>646</width>
<height>468</height>
<height>503</height>
</rect>
</property>
<property name="windowTitle" >
<property name="windowTitle">
<string>Dialog</string>
</property>
<property name="windowIcon" >
<iconset resource="../images.qrc" >
<property name="windowIcon">
<iconset resource="../images.qrc">
<normaloff>:/images/convert.svg</normaloff>:/images/convert.svg</iconset>
</property>
<layout class="QGridLayout" name="gridLayout" >
<item row="0" column="0" >
<widget class="QLabel" name="title_label" >
<property name="text" >
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="title_label">
<property name="text">
<string>&amp;Title:</string>
</property>
<property name="buddy" >
<property name="buddy">
<cstring>opt_title</cstring>
</property>
</widget>
</item>
<item row="0" column="1" >
<widget class="QLineEdit" name="opt_title" />
<item row="0" column="1">
<widget class="QLineEdit" name="opt_title"/>
</item>
<item row="1" column="0" >
<widget class="QLabel" name="author_label" >
<property name="text" >
<item row="1" column="0">
<widget class="QLabel" name="author_label">
<property name="text">
<string>&amp;Author(s):</string>
</property>
<property name="buddy" >
<property name="buddy">
<cstring>opt_author</cstring>
</property>
</widget>
</item>
<item row="1" column="1" >
<widget class="QLineEdit" name="opt_author" />
<item row="1" column="1">
<widget class="QLineEdit" name="opt_author"/>
</item>
<item row="2" column="0" >
<widget class="QLabel" name="label_3" >
<property name="text" >
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>&amp;Number of Colors:</string>
</property>
<property name="buddy" >
<property name="buddy">
<cstring>opt_colors</cstring>
</property>
</widget>
</item>
<item row="2" column="1" >
<widget class="QSpinBox" name="opt_colors" >
<property name="minimum" >
<item row="2" column="1">
<widget class="QSpinBox" name="opt_colors">
<property name="minimum">
<number>8</number>
</property>
<property name="maximum" >
<property name="maximum">
<number>3200000</number>
</property>
<property name="singleStep" >
<property name="singleStep">
<number>8</number>
</property>
</widget>
</item>
<item row="3" column="0" >
<widget class="QLabel" name="label_4" >
<property name="text" >
<item row="3" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>&amp;Profile:</string>
</property>
<property name="buddy" >
<property name="buddy">
<cstring>opt_profile</cstring>
</property>
</widget>
</item>
<item row="3" column="1" >
<widget class="QComboBox" name="opt_profile" />
<item row="3" column="1">
<widget class="QComboBox" name="opt_profile"/>
</item>
<item row="4" column="0" >
<widget class="QCheckBox" name="opt_dont_normalize" >
<property name="text" >
<item row="4" column="0">
<widget class="QCheckBox" name="opt_dont_normalize">
<property name="text">
<string>Disable &amp;normalize</string>
</property>
</widget>
</item>
<item row="5" column="0" >
<widget class="QCheckBox" name="opt_keep_aspect_ratio" >
<property name="text" >
<item row="5" column="0">
<widget class="QCheckBox" name="opt_keep_aspect_ratio">
<property name="text">
<string>Keep &amp;aspect ratio</string>
</property>
</widget>
</item>
<item row="6" column="0" >
<widget class="QCheckBox" name="opt_dont_sharpen" >
<property name="text" >
<item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_sharpen">
<property name="text">
<string>Disable &amp;Sharpening</string>
</property>
</widget>
</item>
<item row="8" column="0" >
<widget class="QCheckBox" name="opt_landscape" >
<property name="text" >
<item row="9" column="0">
<widget class="QCheckBox" name="opt_landscape">
<property name="text">
<string>&amp;Landscape</string>
</property>
</widget>
</item>
<item row="10" column="0" >
<widget class="QCheckBox" name="opt_no_sort" >
<property name="text" >
<item row="11" column="0">
<widget class="QCheckBox" name="opt_no_sort">
<property name="text">
<string>Don't so&amp;rt</string>
</property>
</widget>
</item>
<item row="12" column="1" >
<widget class="QDialogButtonBox" name="buttonBox" >
<property name="orientation" >
<item row="13" column="1">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons" >
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>
<item row="9" column="0" >
<widget class="QCheckBox" name="opt_right2left" >
<property name="text" >
<item row="10" column="0">
<widget class="QCheckBox" name="opt_right2left">
<property name="text">
<string>&amp;Right to left</string>
</property>
</widget>
</item>
<item row="11" column="0" >
<widget class="QCheckBox" name="opt_despeckle" >
<property name="text" >
<item row="12" column="0">
<widget class="QCheckBox" name="opt_despeckle">
<property name="text">
<string>De&amp;speckle</string>
</property>
</widget>
</item>
<item row="7" column="0" >
<widget class="QCheckBox" name="opt_wide" >
<property name="text" >
<item row="8" column="0">
<widget class="QCheckBox" name="opt_wide">
<property name="text">
<string>&amp;Wide</string>
</property>
</widget>
</item>
<item row="7" column="0">
<widget class="QCheckBox" name="opt_disable_trim">
<property name="text">
<string>Disable &amp;Trimming</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources>
<include location="../images.qrc" />
<include location="../images.qrc"/>
</resources>
<connections>
<connection>
@ -157,11 +165,11 @@
<receiver>Dialog</receiver>
<slot>accept()</slot>
<hints>
<hint type="sourcelabel" >
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
</hint>
<hint type="destinationlabel" >
<hint type="destinationlabel">
<x>157</x>
<y>274</y>
</hint>
@ -173,11 +181,11 @@
<receiver>Dialog</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel" >
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel" >
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>

View File

@ -194,7 +194,11 @@ class ConfigDialog(QDialog, Ui_Dialog):
lang = get_lang()
if lang is not None and language_codes.has_key(lang):
self.language.addItem(language_codes[lang], QVariant(lang))
items = [(l, language_codes[l]) for l in translations.keys() if l != lang]
else:
lang = 'en'
self.language.addItem('English', QVariant('en'))
items = [(l, language_codes[l]) for l in translations.keys() \
if l != lang]
if lang != 'en':
items.append(('en', 'English'))
items.sort(cmp=lambda x, y: cmp(x[1], y[1]))

Binary file not shown.

After

Width:  |  Height:  |  Size: 951 B

View File

@ -1406,7 +1406,15 @@ class Main(MainWindow, Ui_MainWindow):
dir = os.path.expanduser('~/Library')
self.library_path = os.path.abspath(dir)
if not os.path.exists(self.library_path):
os.makedirs(self.library_path)
try:
os.makedirs(self.library_path)
except:
self.library_path = os.path.expanduser('~/Library')
error_dialog(self, _('Invalid library location'),
_('Could not access %s. Using %s as the library.')%
(repr(self.library_path), repr(self.library_path))
).exec_()
os.makedirs(self.library_path)
def read_settings(self):

View File

@ -15,7 +15,7 @@ from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock
from PyQt4.QtGui import QApplication, QImage
__app = None
from calibre.library import title_sort
from calibre.ebooks.metadata import title_sort
from calibre.library.database import LibraryDatabase
from calibre.library.sqlite import connect, IntegrityError
from calibre.utils.search_query_parser import SearchQueryParser

View File

@ -1,9 +1,8 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' Post installation script for linux '''
import sys, os, re, shutil
import sys, os, shutil
from subprocess import check_call, call
from tempfile import NamedTemporaryFile
from calibre import __version__, __appname__
from calibre.devices import devices
@ -18,15 +17,8 @@ entry_points = {
'console_scripts': [ \
'ebook-device = calibre.devices.prs500.cli.main:main',
'ebook-meta = calibre.ebooks.metadata.cli:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
'html2lrf = calibre.ebooks.lrf.html.convert_from:main',
'html2oeb = calibre.ebooks.html:main',
'html2epub = calibre.ebooks.epub.from_html:main',
'odt2oeb = calibre.ebooks.odt.to_oeb:main',
'ebook-convert = calibre.ebooks.conversion.cli:main',
'markdown-calibre = calibre.ebooks.markdown.markdown:main',
'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main',
'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main',
'rtf2lrf = calibre.ebooks.lrf.rtf.convert_from:main',
'web2disk = calibre.web.fetch.simple:main',
'feeds2disk = calibre.web.feeds.main:main',
'calibre-server = calibre.library.server:main',
@ -34,22 +26,10 @@ entry_points = {
'feeds2epub = calibre.ebooks.epub.from_feeds:main',
'feeds2mobi = calibre.ebooks.mobi.from_feeds:main',
'web2lrf = calibre.ebooks.lrf.web.convert_from:main',
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'any2epub = calibre.ebooks.epub.from_any:main',
'any2lit = calibre.ebooks.lit.from_any:main',
'any2mobi = calibre.ebooks.mobi.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
@ -60,7 +40,6 @@ entry_points = {
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
'any2pdf = calibre.ebooks.pdf.from_any:main',
],
'gui_scripts' : [
__appname__+' = calibre.gui2.main:main',
@ -171,25 +150,16 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
from calibre.ebooks.mobi.reader import option_parser as mobioeb
from calibre.ebooks.lit.reader import option_parser as lit2oeb
from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles
from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_html import option_parser as html2epub
from calibre.ebooks.html import option_parser as html2oeb
from calibre.ebooks.odt.to_oeb import option_parser as odt2oeb
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi
from calibre.ebooks.epub.from_any import option_parser as any2epub
from calibre.ebooks.lit.from_any import option_parser as any2lit
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.mobi.from_any import option_parser as any2mobi
from calibre.ebooks.mobi.writer import option_parser as oeb2mobi
from calibre.gui2.main import option_parser as guiop
from calibre.gui2.main import option_parser as guiop
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
'txt', 'lit', 'rtf', 'pdf', 'prc', 'mobi', 'fb2', 'odt']
f = open_file('/etc/bash_completion.d/libprs500')
f.close()
os.remove(f.name)
@ -209,16 +179,10 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
f.write(opts_and_exts('any2lrf', htmlop, any_formats))
f.write(opts_and_exts('calibre', guiop, any_formats))
f.write(opts_and_exts('any2epub', any2epub, any_formats))
f.write(opts_and_exts('any2lit', any2lit, any_formats))
f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf']))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
@ -227,9 +191,6 @@ def setup_completion(fatal_errors):
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))
f.write('''
_prs500_ls()
{
@ -392,43 +353,27 @@ def option_parser():
help='Save a manifest of all installed files to the specified location')
return parser
def install_man_pages(fatal_errors):
from bz2 import compress
import subprocess
def install_man_pages(fatal_errors, use_destdir=False):
from calibre.utils.help2man import create_man_page
prefix = os.environ.get('DESTDIR', '/') if use_destdir else '/'
manpath = os.path.join(prefix, 'usr/share/man/man1')
if not os.path.exists(manpath):
os.makedirs(manpath)
print 'Installing MAN pages...'
manpath = '/usr/share/man/man1'
f = NamedTemporaryFile()
f.write('[see also]\nhttp://%s.kovidgoyal.net\n'%__appname__)
f.flush()
manifest = []
os.environ['PATH'] += ':'+os.path.expanduser('~/bin')
for src in entry_points['console_scripts']:
prog = src[:src.index('=')].strip()
if prog in ('ebook-device', 'markdown-calibre',
'calibre-fontconfig', 'calibre-parallel'):
prog, right = src.split('=')
prog = prog.strip()
module = __import__(right.split(':')[0].strip(), fromlist=['a'])
parser = getattr(module, 'option_parser', None)
if parser is None:
continue
help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,
'--section', '1', '--no-info', '--include',
f.name, '--manual', __appname__)
parser = parser()
raw = create_man_page(prog, parser)
manfile = os.path.join(manpath, prog+'.1'+__appname__+'.bz2')
print '\tInstalling MAN page for', prog
try:
p = subprocess.Popen(help2man, stdout=subprocess.PIPE)
except OSError, err:
import errno
if err.errno != errno.ENOENT:
raise
print 'Failed to install MAN pages as help2man is missing from your system'
break
o = p.stdout.read()
raw = re.compile(r'^\.IP\s*^([A-Z :]+)$', re.MULTILINE).sub(r'.SS\n\1', o)
if not raw.strip():
print 'Unable to create MAN page for', prog
continue
f2 = open_file(manfile)
manifest.append(f2.name)
f2.write(compress(raw))
open(manfile, 'wb').write(raw)
manifest.append(manfile)
return manifest
def post_install():
@ -440,9 +385,9 @@ def post_install():
manifest = []
setup_desktop_integration(opts.fatal_errors)
if opts.no_root or os.geteuid() == 0:
manifest += install_man_pages(opts.fatal_errors, use_destdir)
manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
manifest += setup_completion(opts.fatal_errors)
manifest += install_man_pages(opts.fatal_errors)
else:
print "Skipping udev, completion, and man-page install for non-root user."

View File

@ -34,6 +34,8 @@ What formats does |app| support conversion to/from?
| | | | | |
| | ODT | ✔ | ✔ | ✔ |
| | | | | |
| | FB2 | ✔ | ✔ | ✔ |
| | | | | |
| | HTML | ✔ | ✔ | ✔ |
| | | | | |
| **Input formats** | CBR | ✔ | ✔ | ✔ |

View File

@ -196,7 +196,7 @@ class Server(object):
def calculate_month_trend(self, days=31):
stats = self.get_slice(date.today()-timedelta(days=days-1), date.today())
fig = plt.figure(2, (12, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
fig = plt.figure(2, (10, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
fig.clear()
ax = fig.add_subplot(111)
x = list(range(days-1, -1, -1))
@ -216,7 +216,7 @@ Donors per day: %(dpd).2f
ad=stats.average_deviation,
dpd=len(stats.totals)/float(stats.period.days),
)
text = ax.annotate(text, (0.6, 0.65), textcoords='axes fraction')
text = ax.annotate(text, (0.5, 0.65), textcoords='axes fraction')
fig.savefig(self.MONTH_TRENDS)
def calculate_trend(self):

View File

@ -18,7 +18,6 @@ DEPENDENCIES = [
('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'),
('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'),
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
]

View File

@ -88,7 +88,7 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
be ignored.
</li>
<li>
You must have help2man and xdg-utils installed
You must have xdg-utils installed
on your system before running the installer.
</li>
<li>

View File

@ -0,0 +1,59 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time, bz2
from calibre.constants import __version__, __appname__, __author__
def create_man_page(prog, parser):
usage = parser.usage.splitlines()
for i, line in enumerate(list(usage)):
if not line.strip():
usage[i] = '.PP'
else:
usage[i] = line.replace('%prog', prog)
lines = [
'.TH ' + prog.upper() + ' "1" ' + time.strftime('"%B %Y"') +
' "%s (%s %s)" "%s"'%(prog, __appname__, __version__, __appname__),
'.SH NAME',
prog + r' \- part of '+__appname__,
'.SH SYNOPSIS',
'.B "%s"'%prog + r'\fR '+' '.join(usage[0].split()[1:]),
'.SH DESCRIPTION',
]
lines += usage[1:]
lines += [
'.SH OPTIONS'
]
def format_option(opt):
ans = ['.TP']
opts = []
opts += opt._short_opts
opts.append(opt.get_opt_string())
opts = [r'\fB'+x.replace('-', r'\-')+r'\fR' for x in opts]
ans.append(', '.join(opts))
help = opt.help if opt.help else ''
ans.append(help.replace('%prog', prog).replace('%default', str(opt.default)))
return ans
for opt in parser.option_list:
lines.extend(format_option(opt))
for group in parser.option_groups:
lines.append('.SS '+group.title)
if group.description:
lines.extend(['.PP', group.description])
for opt in group.option_list:
lines.extend(format_option(opt))
lines += ['.SH SEE ALSO',
'The User Manual is available at '
'http://calibre.kovidgoyal.net/user_manual',
'.PP', '.B Created by '+__author__]
return bz2.compress('\n'.join(lines))

View File

@ -13,13 +13,25 @@ ERROR = 3
import sys, traceback
from functools import partial
from calibre import prints
from calibre.utils.terminfo import TerminalController
class ANSIStream:
class Stream(object):
def __init__(self, stream):
from calibre import prints
self._prints = prints
self.stream = stream
def flush(self):
self.stream.flush()
class ANSIStream(Stream):
def __init__(self, stream=sys.stdout):
self.stream = stream
Stream.__init__(self, stream)
from calibre.utils.terminfo import TerminalController
tc = TerminalController(stream)
self.color = {
DEBUG: tc.GREEN,
@ -32,16 +44,16 @@ class ANSIStream:
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
prints(*args, **kwargs)
self._prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class HTMLStream:
class HTMLStream(Stream):
def __init__(self, stream=sys.stdout):
self.stream = stream
Stream.__init__(self, stream)
self.color = {
DEBUG: '<span style="color:green">',
INFO:'<span>',
@ -53,7 +65,7 @@ class HTMLStream:
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
prints(*args, **kwargs)
self._prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):

View File

@ -98,7 +98,7 @@ class Feed(object):
if len(self.articles) >= max_articles_per_feed:
break
self.parse_article(item)
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
max_articles_per_feed=100):
@ -156,7 +156,6 @@ class Feed(object):
content = None
if not link and not content:
return
article = Article(id, title, link, description, published, content)
delta = datetime.utcnow() - article.utctime
if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article:

View File

@ -17,7 +17,7 @@ from PyQt4.Qt import QApplication, QFile, Qt, QPalette, QSize, QImage, QPainter,
from PyQt4.QtWebKit import QWebPage
from calibre import browser, __appname__, iswindows, LoggingInterface, \
from calibre import browser, __appname__, iswindows, \
strftime, __version__, preferred_encoding
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
@ -32,7 +32,7 @@ from calibre.ptempfile import PersistentTemporaryFile
from calibre.gui2 import images_rc # Needed for default cover
class BasicNewsRecipe(object, LoggingInterface):
class BasicNewsRecipe(object):
'''
Abstract base class that contains logic needed in all feed fetchers.
'''
@ -444,7 +444,6 @@ class BasicNewsRecipe(object, LoggingInterface):
:param parser: Command line option parser. Used to intelligently merge options.
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
'''
LoggingInterface.__init__(self, logging.getLogger('feeds2disk'))
if not isinstance(self.title, unicode):
self.title = unicode(self.title, 'utf-8', 'replace')
@ -1012,7 +1011,8 @@ class BasicNewsRecipe(object, LoggingInterface):
feed.description = unicode(err)
parsed_feeds.append(feed)
self.log_exception(msg)
return parsed_feeds
@classmethod

View File

@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in (
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
'el_universal',
'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,33 @@
from calibre.web.feeds.news import BasicNewsRecipe
class TheDailyMail(BasicNewsRecipe):
title = u'The Daily Mail'
oldest_article = 2
language = _('English')
author = 'RufusA'
simultaneous_downloads= 1
max_articles_per_feed = 50
extra_css = 'h1 {text-align: left;}'
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
remove_tags_before = dict(name='div', attrs={'id':'content'})
no_stylesheets = True
feeds = [
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
def print_version(self, url):
main = url.partition('?')[0]
return main + '?printingPage=true'

View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
EcoGeek.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class EcoGeek(BasicNewsRecipe):
title = 'EcoGeek'
__author__ = 'Darko Miletic'
description = 'EcoGeek - Technology for the Environment Blog Feed'
publisher = 'EcoGeek'
language = _('English')
category = 'news, ecology, blog'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')]

View File

@ -3,6 +3,7 @@ __copyright__ = '2008, Derry FitzGerald'
'''
iht.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
@ -16,7 +17,12 @@ class InternationalHeraldTribune(BasicNewsRecipe):
max_articles_per_feed = 10
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':'footer'})]
remove_tags = [dict(name='div', attrs={'class':'footer'}),
dict(name=['form'])]
preprocess_regexps = [
(re.compile(r'<!-- webtrends.*', re.DOTALL),
lambda m:'</body></html>')
]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
feeds = [

View File

@ -0,0 +1,53 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>'
'''
Mediapart
'''
import re, string
from datetime import date
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class Mediapart(BasicNewsRecipe):
title = 'Mediapart'
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
description = 'Global news in french from online newspapers'
oldest_article = 7
language = _('French')
max_articles_per_feed = 50
no_stylesheets = True
html2lrf_options = ['--base-font-size', '10']
feeds = [
('Les articles', 'http://www.mediapart.fr/articles/feed'),
]
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
[
(r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
(r'<p>Mediapart\.fr</p>', lambda match : ''),
(r'<p[^>]*>[\s]*</p>', lambda match : ''),
(r'<p><a href="[^\.]+\.pdf">[^>]*</a></p>', lambda match : ''),
]
]
remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}),
dict(name='div', attrs={'class':'print-links'}),
dict(name='img', attrs={'src':'entete_article.png'}),
]
def print_version(self, url):
raw = self.browser.open(url).read()
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
div = soup.find('div', {'class':'node node-type-article'})
if div is None:
return None
article_id = string.replace(div['id'], 'node-', '')
if article_id is None:
return None
return 'http://www.mediapart.fr/print/'+article_id

View File

@ -70,10 +70,14 @@ class NYTimesMobile(BasicNewsRecipe):
def find_articles(self, root):
for a in root.xpath('//a[@accesskey]'):
href = a.get('href')
if href.startswith('http://'):
url = href
else:
url = 'http://mobile.nytimes.com/article' + href[href.find('?'):]+'&single=1',
yield {
'title': a.text.strip(),
'date' : '',
'url' : 'http://mobile.nytimes.com/article' + href[href.find('?'):]+'&single=1',
'url' : url,
'description': '',
}

View File

@ -75,7 +75,9 @@ class NYTimes(BasicNewsRecipe):
dict(title=title, url=url, date=pubdate,
description=description,
content=''))
ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = self.sort_index_by(ans, {'The Front Page':-1,
'Dining In, Dining Out':1,
'Obituaries':2})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -45,3 +45,7 @@ class USAToday(BasicNewsRecipe):
def print_version(self, url):
return 'http://www.printthis.clickability.com/pt/printThis?clickMap=printThis&fb=Y&url=' + url
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
en.wikinews.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class WikiNews(BasicNewsRecipe):
title = 'Wikinews'
__author__ = 'Darko Miletic'
description = 'News from wikipedia'
category = 'news, world'
oldest_article = 7
max_articles_per_feed = 100
publisher = 'Wiki'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
language = _('English')
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='h1', attrs={'id':'firstHeading'})
,dict(name='div', attrs={'id':'bodyContent'})
]
remove_tags = [
dict(name='link')
,dict(name='div',attrs={'id':['printfooter','catlinks','footer']})
,dict(name='div',attrs={'class':['thumb left','thumb right']})
]
remove_tags_after = dict(name='h2')
feeds = [(u'News', u'http://feeds.feedburner.com/WikinewsLatestNews')]
def get_article_url(self, article):
artl = article.get('link', None)
rest, sep, article_id = artl.rpartition('/')
return 'http://en.wikinews.org/wiki/' + article_id
def print_version(self, url):
rest, sep, article_id = url.rpartition('/')
return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="en"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
btag = soup.find('div',attrs={'id':'bodyContent'})
for item in btag.findAll('div'):
item.extract()
for item in btag.findAll('h2'):
item.extract()
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
return soup

View File

@ -15,7 +15,7 @@ from PIL import Image
from cStringIO import StringIO
from calibre import setup_cli_handlers, browser, sanitize_file_name, \
relpath, LoggingInterface, unicode_path
relpath, unicode_path
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
@ -80,7 +80,7 @@ class DummyLock(object):
def __enter__(self, *args): return self
def __exit__(self, *args): pass
class RecursiveFetcher(object, LoggingInterface):
class RecursiveFetcher(object):
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
#ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
@ -93,7 +93,6 @@ class RecursiveFetcher(object, LoggingInterface):
DUMMY_LOCK = DummyLock()
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
LoggingInterface.__init__(self, logger)
self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir)