Sync to pluginize

This commit is contained in:
John Schember 2009-04-27 18:48:55 -04:00
commit 9a363a02cc
25 changed files with 750 additions and 722 deletions

View File

@ -286,6 +286,8 @@ from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.odt.input import ODTInput from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.comic.input import ComicInput
from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
@ -294,8 +296,8 @@ from calibre.ebooks.pdb.ereader.output import EREADEROutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput] FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -96,6 +96,11 @@ class InputFormatPlugin(Plugin):
#: For example: ``set(['azw', 'mobi', 'prc'])`` #: For example: ``set(['azw', 'mobi', 'prc'])``
file_types = set([]) file_types = set([])
#: If True, this input plugin generates a collection of images,
#: one per HTML file. You can obtain access to the images via
#: convenience method, :method:`get_image_collection`.
is_image_collection = False
#: Options shared by all Input format plugins. Do not override #: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an #: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`. #: instance of :class:`OptionRecommendation`.
@ -128,6 +133,14 @@ class InputFormatPlugin(Plugin):
#: (option_name, recommended_value, recommendation_level) #: (option_name, recommended_value, recommendation_level)
recommendations = set([]) recommendations = set([])
def get_images(self):
'''
Return a list of absolute paths to the images, if this input plugin
represents an image collection. The list of images is in the same order
as the spine and the TOC.
'''
raise NotImplementedError()
def convert(self, stream, options, file_ext, log, accelerators): def convert(self, stream, options, file_ext, log, accelerators):
''' '''
This method must be implemented in sub-classes. It must return This method must be implemented in sub-classes. It must return

View File

@ -148,6 +148,8 @@ class OutputProfile(Plugin):
remove_special_chars = re.compile(u'[\u200b\u00ad]') remove_special_chars = re.compile(u'[\u200b\u00ad]')
# ADE falls to the ground in a dead faint when it sees an <object> # ADE falls to the ground in a dead faint when it sees an <object>
remove_object_tags = True remove_object_tags = True
# The image size for comics
comic_screen_size = (584, 754)
class SonyReaderOutput(OutputProfile): class SonyReaderOutput(OutputProfile):
@ -162,6 +164,18 @@ class SonyReaderOutput(OutputProfile):
fbase = 12 fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24] fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
class SonyReaderLandscapeOutput(SonyReaderOutput):
name = 'Sony Reader Landscape'
short_name = 'sony-landscape'
description = _('This profile is intended for the SONY PRS line. '
'The 500/505/700 etc, in landscape mode. Mainly useful '
'for comics.')
screen_size = (784, 1012)
comic_screen_size = (784, 1012)
class MSReaderOutput(OutputProfile): class MSReaderOutput(OutputProfile):
name = 'Microsoft Reader' name = 'Microsoft Reader'
@ -223,4 +237,5 @@ class KindleOutput(OutputProfile):
fsizes = [12, 12, 14, 16, 18, 20, 22, 24] fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput, output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput] MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
SonyReaderLandscapeOutput]

460
src/calibre/ebooks/comic/input.py Executable file
View File

@ -0,0 +1,460 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Based on ideas from comiclrf created by FangornUK.
'''
import os, shutil, traceback, textwrap
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import extract, CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.parallel import Server, ParallelJob
def extract_comic(path_to_comic_file):
'''
Un-archive the comic file.
'''
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
extract(path_to_comic_file, tdir)
return tdir
def find_pages(dir, sort_on_mtime=False, verbose=False):
'''
Find valid comic pages in a previously un-archived comic.
:param dir: Directory in which extracted comic lives
:param sort_on_mtime: If True sort pages based on their last modified time.
Otherwise, sort alphabetically.
'''
extensions = ['jpeg', 'jpg', 'gif', 'png']
pages = []
for datum in os.walk(dir):
for name in datum[-1]:
path = os.path.join(datum[0], name)
for ext in extensions:
if path.lower().endswith('.'+ext):
pages.append(path)
break
if sort_on_mtime:
comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
else:
comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
pages.sort(cmp=comparator)
if verbose:
print 'Found comic pages...'
print '\t'+'\n\t'.join([os.path.basename(p) for p in pages])
return pages
class PageProcessor(list):
'''
Contains the actual image rendering logic. See :method:`render` and
:method:`process_pages`.
'''
def __init__(self, path_to_page, dest, opts, num):
list.__init__(self)
self.path_to_page = path_to_page
self.opts = opts
self.num = num
self.dest = dest
self.rotate = False
self.render()
def render(self):
import calibre.utils.PythonMagickWand as pw
img = pw.NewMagickWand()
if img < 0:
raise RuntimeError('Cannot create wand.')
if not pw.MagickReadImage(img, self.path_to_page):
raise IOError('Failed to read image from: %'%self.path_to_page)
width = pw.MagickGetImageWidth(img)
height = pw.MagickGetImageHeight(img)
if self.num == 0: # First image so create a thumbnail from it
thumb = pw.CloneMagickWand(img)
if thumb < 0:
raise RuntimeError('Cannot create wand.')
pw.MagickThumbnailImage(thumb, 60, 80)
pw.MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
pw.DestroyMagickWand(thumb)
self.pages = [img]
if width > height:
if self.opts.landscape:
self.rotate = True
else:
split1, split2 = map(pw.CloneMagickWand, (img, img))
pw.DestroyMagickWand(img)
if split1 < 0 or split2 < 0:
raise RuntimeError('Cannot create wand.')
pw.MagickCropImage(split1, (width/2)-1, height, 0, 0)
pw.MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
self.process_pages()
def process_pages(self):
import calibre.utils.PythonMagickWand as p
for i, wand in enumerate(self.pages):
pw = p.NewPixelWand()
try:
if pw < 0:
raise RuntimeError('Cannot create wand.')
p.PixelSetColor(pw, 'white')
p.MagickSetImageBorderColor(wand, pw)
if self.rotate:
p.MagickRotateImage(wand, pw, -90)
# 25 percent fuzzy trim?
if not self.opts.disable_trim:
p.MagickTrimImage(wand, 25*65535/100)
p.MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage"
# Do the Photoshop "Auto Levels" equivalent
if not self.opts.dont_normalize:
p.MagickNormalizeImage(wand)
sizex = p.MagickGetImageWidth(wand)
sizey = p.MagickGetImageHeight(wand)
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
if self.opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
newsizey = SCRHEIGHT
newsizex = int(newsizey * aspect)
deltax = (SCRWIDTH - newsizex) / 2
deltay = 0
else:
newsizex = SCRWIDTH
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (SCRHEIGHT - newsizey) / 2
p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
p.MagickSetImageBorderColor(wand, pw)
p.MagickBorderImage(wand, pw, deltax, deltay)
elif self.opts.wide:
# Keep aspect and Use device height as scaled image width so landscape mode is clean
aspect = float(sizex) / float(sizey)
screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
# Get dimensions of the landscape mode screen
# Add 25px back to height for the battery bar.
wscreenx = SCRHEIGHT + 25
wscreeny = int(wscreenx / screen_aspect)
if aspect <= screen_aspect:
newsizey = wscreeny
newsizex = int(newsizey * aspect)
deltax = (wscreenx - newsizex) / 2
deltay = 0
else:
newsizex = wscreenx
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (wscreeny - newsizey) / 2
p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
p.MagickSetImageBorderColor(wand, pw)
p.MagickBorderImage(wand, pw, deltax, deltay)
else:
p.MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, p.CatromFilter, 1.0)
if not self.opts.dont_sharpen:
p.MagickSharpenImage(wand, 0.0, 1.0)
p.MagickSetImageType(wand, p.GrayscaleType)
if self.opts.despeckle:
p.MagickDespeckleImage(wand)
p.MagickQuantizeImage(wand, self.opts.colors, p.RGBColorspace, 0, 1, 0)
dest = '%d_%d.png'%(self.num, i)
dest = os.path.join(self.dest, dest)
p.MagickWriteImage(wand, dest+'8')
os.rename(dest+'8', dest)
self.append(dest)
finally:
if pw > 0:
p.DestroyPixelWand(pw)
p.DestroyMagickWand(wand)
def render_pages(tasks, dest, opts, notification=None):
'''
Entry point for the job server.
'''
failures, pages = [], []
from calibre.utils.PythonMagickWand import ImageMagick
with ImageMagick():
for num, path in tasks:
try:
pages.extend(PageProcessor(path, dest, opts, num))
msg = _('Rendered %s')
except:
failures.append(path)
msg = _('Failed %s')
if opts.verbose:
msg += '\n' + traceback.format_exc()
msg = msg%path
if notification is not None:
notification(0.5, msg)
return pages, failures
class JobManager(object):
'''
Simple job manager responsible for keeping track of overall progress.
'''
def __init__(self, total, update):
self.total = total
self.update = update
self.done = 0
self.add_job = lambda j: j
self.output = lambda j: j
self.start_work = lambda j: j
self.job_done = lambda j: j
def status_update(self, job):
self.done += 1
#msg = msg%os.path.basename(job.args[0])
self.update(float(self.done)/self.total, job.msg)
def process_pages(pages, opts, update, tdir):
'''
Render all identified comic pages.
'''
from calibre.utils.PythonMagickWand import ImageMagick
ImageMagick
job_manager = JobManager(len(pages), update)
server = Server()
jobs = []
tasks = server.split(pages)
for task in tasks:
jobs.append(ParallelJob('render_pages', lambda s:s, job_manager=job_manager,
args=[task, tdir, opts]))
server.add_job(jobs[-1])
server.wait()
server.killall()
server.close()
ans, failures = [], []
for job in jobs:
if job.result is None:
raise Exception(_('Failed to process comic: %s\n\n%s')%(job.exception, job.traceback))
pages, failures_ = job.result
ans += pages
failures += failures_
return ans, failures
class ComicInput(InputFormatPlugin):
name = 'Comic Input'
author = 'Kovid Goyal'
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
file_types = set(['cbz', 'cbr', 'cbc'])
is_image_collection = True
options = set([
OptionRecommendation(name='colors', recommended_value=64,
help=_('Number of colors for grayscale image conversion. Default: %default')),
OptionRecommendation(name='dont_normalize', recommended_value=False,
help=_('Disable normalize (improve contrast) color range '
'for pictures. Default: False')),
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
OptionRecommendation(name='dont_sharpen', recommended_value=False,
help=_('Disable sharpening.')),
OptionRecommendation(name='disable_trim', recommended_value=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.')),
OptionRecommendation(name='landspace', recommended_value=False,
help=_("Don't split landscape images into two portrait images")),
OptionRecommendation(name='wide', recommended_value=False,
help=_("Keep aspect ratio and scale image using screen height as "
"image width for viewing in landscape mode.")),
OptionRecommendation(name='right2left', recommended_value=False,
help=_('Used for right-to-left publications like manga. '
'Causes landscape pages to be split into portrait pages '
'from right to left.')),
OptionRecommendation(name='despeckle', recommended_value=False,
help=_('Enable Despeckle. Reduces speckle noise. '
'May greatly increase processing time.')),
OptionRecommendation(name='no_sort', recommended_value=False,
help=_("Don't sort the files found in the comic "
"alphabetically by name. Instead use the order they were "
"added to the comic.")),
OptionRecommendation(name='no_process', recommended_value=False,
help=_("Apply no processing to the image")),
])
recommendations = set([
('margin_left', 0, OptionRecommendation.HIGH),
('margin_top', 0, OptionRecommendation.HIGH),
('margin_right', 0, OptionRecommendation.HIGH),
('margin_bottom', 0, OptionRecommendation.HIGH),
('insert_blank_line', False, OptionRecommendation.HIGH),
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
('dont_justify', True, OptionRecommendation.HIGH),
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
('chapter', None, OptionRecommendation.HIGH),
('page_breaks_brefore', None, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
])
def get_comics_from_collection(self, stream):
from calibre.libunzip import extract as zipextract
tdir = PersistentTemporaryDirectory('_comic_collection')
zipextract(stream, tdir)
comics = []
with CurrentDir(tdir):
if not os.path.exists('comics.txt'):
raise ValueError('%s is not a valid comic collection'
%stream.name)
for line in open('comics.txt',
'rb').read().decode('utf-8').splitlines():
fname, title = line.partition(':')[0], line.partition(':')[-1]
fname = os.path.join(tdir, *fname.split('/'))
if not title:
title = os.path.basename(fname).rpartition('.')[0]
if os.access(fname, os.R_OK):
comics.append([title, fname])
if not comics:
raise ValueError('%s has no comics'%stream.name)
return comics
def get_pages(self, comic, tdir2):
tdir = extract_comic(comic)
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
verbose=self.opts.verbose)
thumbnail = None
if not new_pages:
raise ValueError('Could not find any pages in the comic: %s'
%comic)
if self.opts.no_process:
n2 = []
for page in new_pages:
n2.append(os.path.join(tdir2, os.path.basename(page)))
shutil.copyfile(page, n2[-1])
new_pages = n2
else:
new_pages, failures = process_pages(new_pages, self.opts,
self.progress, tdir2)
if not new_pages:
raise ValueError('Could not find any valid pages in comic: %s'
% comic)
if failures:
self.log.warning('Could not process the following pages '
'(run with --verbose to see why):')
for f in failures:
self.log.warning('\t', f)
thumbnail = os.path.join(tdir2, 'thumbnail.png')
if not os.access(thumbnail, os.R_OK):
thumbnail = None
return new_pages
def get_images(self):
return self._images
def convert(self, stream, opts, file_ext, log, accelerators,
progress=lambda p, m : m):
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
self.opts, self.log, self.progress = opts, log, progress
if file_ext == 'cbc':
comics_ = self.get_comics_from_collection(stream)
else:
comics_ = [['Comic', os.path.abspath(stream.name)]]
stream.close()
comics = []
for i, x in enumerate(comics_):
title, fname = x
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
cdir = os.path.abspath(cdir)
if not os.path.exists(cdir):
os.makedirs(cdir)
pages = self.get_pages(fname, cdir)
if not pages: continue
wrappers = self.create_wrappers(pages)
comics.append((title, pages, wrappers))
if not comics:
raise ValueError('No comic pages found in %s'%stream.name)
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
[_('Unknown')])
opf = OPFCreator(os.path.abspath('.'), mi)
entries = []
def href(x):
if len(comics) == 1: return os.path.basename(x)
return '/'.join(x.split(os.sep)[-2:])
for comic in comics:
pages, wrappers = comic[1:]
entries += [(w, None) for w in map(href, wrappers)] + \
[(x, None) for x in map(href, pages)]
opf.create_manifest(entries)
spine = []
for comic in comics:
spine.extend(map(href, comic[2]))
self._images = []
for comic in comics:
self._images.extend(comic[1])
opf.create_spine(spine)
toc = TOC()
if len(comics) == 1:
wrappers = comics[0][2]
for i, x in enumerate(wrappers):
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
play_order=i)
else:
po = 0
for comic in comics:
po += 1
wrappers = comic[2]
stoc = toc.add_item(href(wrappers[0]),
None, comic[0], play_order=po)
for i, x in enumerate(wrappers):
stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po)
po += 1
opf.set_toc(toc)
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, 'toc.ncx')
return os.path.abspath('metadata.opf')
def create_wrappers(self, pages):
from calibre.ebooks.oeb.base import XHTML_NS
wrappers = []
WRAPPER = textwrap.dedent('''\
<html xmlns="%s">
<head>
<title>Page #%d</title>
<style type="text/css">
@page { margin:0pt; padding: 0pt}
body { margin: 0pt; padding: 0pt}
div { text-align: center }
</style>
</head>
<body>
<div>
<img src="%s" alt="comic page #%d" />
</div>
</body>
</html>
''')
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
return wrappers

View File

@ -47,12 +47,12 @@ def print_help(parser, log):
def check_command_line_options(parser, args, log): def check_command_line_options(parser, args, log):
if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'): if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
print_help(parser) print_help(parser, log)
log.error('\n\nYou must specify the input AND output files') log.error('\n\nYou must specify the input AND output files')
raise SystemExit(1) raise SystemExit(1)
input = os.path.abspath(args[1]) input = os.path.abspath(args[1])
if not os.access(input, os.R_OK): if not input.endswith('.recipe') and not os.access(input, os.R_OK):
log.error('Cannot read from', input) log.error('Cannot read from', input)
raise SystemExit(1) raise SystemExit(1)
@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
if rec.level < rec.HIGH: if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec) option_recommendation_to_cli_option(add_option, rec)
option_recommendation_to_cli_option(parser.add_option,
plumber.get_option_by_name('list_recipes'))
def option_parser(): def option_parser():
return OptionParser(usage=USAGE) return OptionParser(usage=USAGE)

View File

@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer',
OptionRecommendation(name='language', OptionRecommendation(name='language',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the language.')), help=_('Set the language.')),
OptionRecommendation(name='list_recipes',
recommended_value=False, help=_('List available recipes.')),
] ]
input_fmt = os.path.splitext(self.input)[1] input_fmt = os.path.splitext(self.input)[1]
@ -525,6 +529,13 @@ OptionRecommendation(name='language',
self.setup_options() self.setup_options()
if self.opts.verbose: if self.opts.verbose:
self.log.filter_level = self.log.DEBUG self.log.filter_level = self.log.DEBUG
if self.opts.list_recipes:
from calibre.web.feeds.recipes import titles
self.log('Available recipes:')
for title in sorted(titles):
self.log('\t'+title)
self.log('%d recipes available'%len(titles))
raise SystemExit(0)
# Run any preprocess plugins # Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess from calibre.customize.ui import run_plugins_on_preprocess
@ -535,8 +546,13 @@ OptionRecommendation(name='language',
accelerators = {} accelerators = {}
tdir = PersistentTemporaryDirectory('_plumber') tdir = PersistentTemporaryDirectory('_plumber')
stream = self.input if self.input_fmt == 'recipe' else \
open(self.input, 'rb')
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts, if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
self.opts.lrf = True
self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log, self.input_fmt, self.log,
accelerators, tdir) accelerators, tdir)
if self.opts.debug_input is not None: if self.opts.debug_input is not None:

View File

@ -882,6 +882,9 @@ class LitContainer(object):
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP) unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
return str(unbin) return str(unbin)
def get_metadata(self):
return self._read_meta()
class LitReader(OEBReader): class LitReader(OEBReader):
Container = LitContainer Container = LitContainer

View File

@ -1,562 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Based on ideas from comiclrf created by FangornUK.
'''
import os, sys, shutil, traceback, textwrap, fnmatch
from uuid import uuid4
from calibre import extract, terminal_controller, __appname__, __version__
from calibre.utils.config import Config, StringConfig
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.parallel import Server, ParallelJob
from calibre.utils.terminfo import ProgressBar
from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.epub.from_html import config as html2epub_config, convert as html2epub
from calibre.customize.ui import run_plugins_on_preprocess
try:
from calibre.utils.PythonMagickWand import \
NewMagickWand, NewPixelWand, \
MagickSetImageBorderColor, \
MagickReadImage, MagickRotateImage, \
MagickTrimImage, PixelSetColor,\
MagickNormalizeImage, MagickGetImageWidth, \
MagickGetImageHeight, \
MagickResizeImage, MagickSetImageType, \
GrayscaleType, CatromFilter, MagickSetImagePage, \
MagickBorderImage, MagickSharpenImage, MagickDespeckleImage, \
MagickQuantizeImage, RGBColorspace, \
MagickWriteImage, DestroyPixelWand, \
DestroyMagickWand, CloneMagickWand, \
MagickThumbnailImage, MagickCropImage, ImageMagick
_imagemagick_loaded = True
except:
_imagemagick_loaded = False
PROFILES = {
# Name : (width, height) in pixels
'prs500':(584, 754),
# The SONY's LRF renderer (on the PRS500) only uses the first 800x600 block of the image
'prs500-landscape': (784, 1012)
}
def extract_comic(path_to_comic_file):
'''
Un-archive the comic file.
'''
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
extract(path_to_comic_file, tdir)
return tdir
def find_pages(dir, sort_on_mtime=False, verbose=False):
'''
Find valid comic pages in a previously un-archived comic.
:param dir: Directory in which extracted comic lives
:param sort_on_mtime: If True sort pages based on their last modified time.
Otherwise, sort alphabetically.
'''
extensions = ['jpeg', 'jpg', 'gif', 'png']
pages = []
for datum in os.walk(dir):
for name in datum[-1]:
path = os.path.join(datum[0], name)
for ext in extensions:
if path.lower().endswith('.'+ext):
pages.append(path)
break
if sort_on_mtime:
comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
else:
comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
pages.sort(cmp=comparator)
if verbose:
print 'Found comic pages...'
print '\t'+'\n\t'.join([os.path.basename(p) for p in pages])
return pages
class PageProcessor(list):
'''
Contains the actual image rendering logic. See :method:`render` and
:method:`process_pages`.
'''
def __init__(self, path_to_page, dest, opts, num):
list.__init__(self)
self.path_to_page = path_to_page
self.opts = opts
self.num = num
self.dest = dest
self.rotate = False
self.render()
def render(self):
img = NewMagickWand()
if img < 0:
raise RuntimeError('Cannot create wand.')
if not MagickReadImage(img, self.path_to_page):
raise IOError('Failed to read image from: %'%self.path_to_page)
width = MagickGetImageWidth(img)
height = MagickGetImageHeight(img)
if self.num == 0: # First image so create a thumbnail from it
thumb = CloneMagickWand(img)
if thumb < 0:
raise RuntimeError('Cannot create wand.')
MagickThumbnailImage(thumb, 60, 80)
MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
DestroyMagickWand(thumb)
self.pages = [img]
if width > height:
if self.opts.landscape:
self.rotate = True
else:
split1, split2 = map(CloneMagickWand, (img, img))
DestroyMagickWand(img)
if split1 < 0 or split2 < 0:
raise RuntimeError('Cannot create wand.')
MagickCropImage(split1, (width/2)-1, height, 0, 0)
MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
self.process_pages()
def process_pages(self):
for i, wand in enumerate(self.pages):
pw = NewPixelWand()
try:
if pw < 0:
raise RuntimeError('Cannot create wand.')
PixelSetColor(pw, 'white')
MagickSetImageBorderColor(wand, pw)
if self.rotate:
MagickRotateImage(wand, pw, -90)
# 25 percent fuzzy trim?
if not self.opts.disable_trim:
MagickTrimImage(wand, 25*65535/100)
MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage"
# Do the Photoshop "Auto Levels" equivalent
if not self.opts.dont_normalize:
MagickNormalizeImage(wand)
sizex = MagickGetImageWidth(wand)
sizey = MagickGetImageHeight(wand)
SCRWIDTH, SCRHEIGHT = PROFILES[self.opts.profile]
if self.opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
newsizey = SCRHEIGHT
newsizex = int(newsizey * aspect)
deltax = (SCRWIDTH - newsizex) / 2
deltay = 0
else:
newsizex = SCRWIDTH
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (SCRHEIGHT - newsizey) / 2
MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0)
MagickSetImageBorderColor(wand, pw)
MagickBorderImage(wand, pw, deltax, deltay)
elif self.opts.wide:
# Keep aspect and Use device height as scaled image width so landscape mode is clean
aspect = float(sizex) / float(sizey)
screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
# Get dimensions of the landscape mode screen
# Add 25px back to height for the battery bar.
wscreenx = SCRHEIGHT + 25
wscreeny = int(wscreenx / screen_aspect)
if aspect <= screen_aspect:
newsizey = wscreeny
newsizex = int(newsizey * aspect)
deltax = (wscreenx - newsizex) / 2
deltay = 0
else:
newsizex = wscreenx
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (wscreeny - newsizey) / 2
MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0)
MagickSetImageBorderColor(wand, pw)
MagickBorderImage(wand, pw, deltax, deltay)
else:
MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, CatromFilter, 1.0)
if not self.opts.dont_sharpen:
MagickSharpenImage(wand, 0.0, 1.0)
MagickSetImageType(wand, GrayscaleType)
if self.opts.despeckle:
MagickDespeckleImage(wand)
MagickQuantizeImage(wand, self.opts.colors, RGBColorspace, 0, 1, 0)
dest = '%d_%d.png'%(self.num, i)
dest = os.path.join(self.dest, dest)
MagickWriteImage(wand, dest+'8')
os.rename(dest+'8', dest)
self.append(dest)
finally:
if pw > 0:
DestroyPixelWand(pw)
DestroyMagickWand(wand)
def render_pages(tasks, dest, opts, notification=None):
'''
Entry point for the job server.
'''
failures, pages = [], []
with ImageMagick():
for num, path in tasks:
try:
pages.extend(PageProcessor(path, dest, opts, num))
msg = _('Rendered %s')
except:
failures.append(path)
msg = _('Failed %s')
if opts.verbose:
msg += '\n' + traceback.format_exc()
msg = msg%path
if notification is not None:
notification(0.5, msg)
return pages, failures
class JobManager(object):
'''
Simple job manager responsible for keeping track of overall progress.
'''
def __init__(self, total, update):
self.total = total
self.update = update
self.done = 0
self.add_job = lambda j: j
self.output = lambda j: j
self.start_work = lambda j: j
self.job_done = lambda j: j
def status_update(self, job):
self.done += 1
#msg = msg%os.path.basename(job.args[0])
self.update(float(self.done)/self.total, job.msg)
def process_pages(pages, opts, update):
'''
Render all identified comic pages.
'''
if not _imagemagick_loaded:
raise RuntimeError('Failed to load ImageMagick')
tdir = PersistentTemporaryDirectory('_comic2lrf_pp')
job_manager = JobManager(len(pages), update)
server = Server()
jobs = []
tasks = server.split(pages)
for task in tasks:
jobs.append(ParallelJob('render_pages', lambda s:s, job_manager=job_manager,
args=[task, tdir, opts]))
server.add_job(jobs[-1])
server.wait()
server.killall()
server.close()
ans, failures = [], []
for job in jobs:
if job.result is None:
raise Exception(_('Failed to process comic: %s\n\n%s')%(job.exception, job.traceback))
pages, failures_ = job.result
ans += pages
failures += failures_
return ans, failures, tdir
def config(defaults=None,output_format='lrf'):
desc = _('Options to control the conversion of comics (CBR, CBZ) files into ebooks')
if defaults is None:
c = Config('comic', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('title', ['-t', '--title'],
help=_('Title for generated ebook. Default is to use the filename.'))
c.add_opt('author', ['-a', '--author'],
help=_('Set the author in the metadata of the generated ebook. Default is %default'),
default=_('Unknown'))
c.add_opt('output', ['-o', '--output'],
help=_('Path to output file. By default a file is created in the current directory.'))
c.add_opt('colors', ['-c', '--colors'], type='int', default=64,
help=_('Number of colors for grayscale image conversion. Default: %default'))
c.add_opt('dont_normalize', ['-n', '--disable-normalize'], default=False,
help=_('Disable normalize (improve contrast) color range for pictures. Default: False'))
c.add_opt('keep_aspect_ratio', ['-r', '--keep-aspect-ratio'], default=False,
help=_('Maintain picture aspect ratio. Default is to fill the screen.'))
c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False,
help=_('Disable sharpening.'))
c.add_opt('disable_trim', ['--disable-trim'], default=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.'))
c.add_opt('landscape', ['-l', '--landscape'], default=False,
help=_("Don't split landscape images into two portrait images"))
c.add_opt('wide', ['-w', '--wide-aspect'], default=False,
help=_("Keep aspect ratio and scale image using screen height as image width for viewing in landscape mode."))
c.add_opt('right2left', ['--right2left'], default=False, action='store_true',
help=_('Used for right-to-left publications like manga. Causes landscape pages to be split into portrait pages from right to left.'))
c.add_opt('despeckle', ['-d', '--despeckle'], default=False,
help=_('Enable Despeckle. Reduces speckle noise. May greatly increase processing time.'))
c.add_opt('no_sort', ['--no-sort'], default=False,
help=_("Don't sort the files found in the comic alphabetically by name. Instead use the order they were added to the comic."))
c.add_opt('profile', ['-p', '--profile'], default='prs500', choices=PROFILES.keys(),
help=_('Choose a profile for the device you are generating this file for. The default is the SONY PRS-500 with a screen size of 584x754 pixels. This is suitable for any reader with the same screen size. Choices are %s')%PROFILES.keys())
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False,
help=_("Don't show progress bar."))
if output_format == 'pdf':
c.add_opt('no_process',['--no_process'], default=False,
help=_("Apply no processing to the image"))
return c
def option_parser(output_format='lrf'):
c = config(output_format=output_format)
return c.option_parser(usage=_('''\
%prog [options] comic.cb[z|r]
Convert a comic in a CBZ or CBR file to an ebook.
'''))
def create_epub(pages, profile, opts, thumbnail=None):
wrappers = []
WRAPPER = textwrap.dedent('''\
<html>
<head>
<title>Page #%d</title>
<style type="text/css">@page {margin:0pt; padding: 0pt;}</style>
</head>
<body style="margin: 0pt; padding: 0pt">
<div style="text-align:center">
<img src="%s" alt="comic page #%d" />
</div>
</body>
</html>
''')
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.html'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
mi = MetaInformation(opts.title, [opts.author])
opf = OPFCreator(dir, mi)
opf.create_manifest([(w, None) for w in wrappers])
opf.create_spine(wrappers)
metadata = os.path.join(dir, 'metadata.opf')
opf.render(open(metadata, 'wb'))
opts2 = html2epub_config('margin_left=0\nmargin_right=0\nmargin_top=0\nmargin_bottom=0').parse()
opts2.output = opts.output
html2epub(metadata, opts2)
def create_lrf(pages, profile, opts, thumbnail=None):
width, height = PROFILES[profile]
ps = {}
ps['topmargin'] = 0
ps['evensidemargin'] = 0
ps['oddsidemargin'] = 0
ps['textwidth'] = width
ps['textheight'] = height
book = Book(title=opts.title, author=opts.author,
bookid=uuid4().hex,
publisher='%s %s'%(__appname__, __version__), thumbnail=thumbnail,
category='Comic', pagestyledefault=ps,
booksetting=BookSetting(screenwidth=width, screenheight=height))
for page in pages:
imageStream = ImageStream(page)
_page = book.create_page()
_page.append(ImageBlock(refstream=imageStream,
blockwidth=width, blockheight=height, xsize=width,
ysize=height, x1=width, y1=height))
book.append(_page)
book.renderLrf(open(opts.output, 'wb'))
print _('Output written to'), opts.output
def create_pdf(pages, profile, opts, thumbnail=None,toc=None):
width, height = PROFILES[profile]
from reportlab.pdfgen import canvas
cur_page=0
heading = []
if toc != None:
if len(toc) == 1:
toc = None
else:
toc_index = 0
base_cur = 0
rem = 0
breaker = False
while True:
letter=toc[0][0][base_cur]
for i in range(len(toc)):
if letter != toc[i][0][base_cur]:
breaker = True
if breaker:
break
if letter == os.sep:
rem=base_cur
base_cur += 1
toc.append(("Not seen",-1))
pdf = canvas.Canvas(filename=opts.output, pagesize=(width,height+15))
pdf.setAuthor(opts.author)
pdf.setTitle(opts.title)
for page in pages:
if opts.keep_aspect_ratio:
img = NewMagickWand()
if img < 0:
raise RuntimeError('Cannot create wand.')
if not MagickReadImage(img, page):
raise IOError('Failed to read image from: %'%page)
sizex = MagickGetImageWidth(img)
sizey = MagickGetImageHeight(img)
if opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
if aspect <= (float(width) / float(height)):
newsizey = height
newsizex = int(newsizey * aspect)
deltax = (width - newsizex) / 2
deltay = 0
else:
newsizex = width
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (height - newsizey) / 2
pdf.drawImage(page, x=deltax,y=deltay,width=newsizex, height=newsizey)
else:
pdf.drawImage(page, x=0,y=0,width=width, height=height)
if toc != None:
if toc[toc_index][1] == cur_page:
tmp=toc[toc_index][0]
toc_current=tmp[rem:len(tmp)-4]
index=0
while True:
key = 'page%d-%d' % (cur_page, index)
pdf.bookmarkPage(key)
(head,dummy,list)=toc_current.partition(os.sep)
try:
if heading[index] != head:
heading[index] = head
pdf.addOutlineEntry(title=head,key=key,level=index)
except:
heading.append(head)
pdf.addOutlineEntry(title=head,key=key,level=index)
index += 1
toc_current=list
if dummy == "":
break
toc_index += 1
cur_page += 1
pdf.showPage()
# Write the document to disk
pdf.save()
def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='lrf'):
path_to_file = run_plugins_on_preprocess(path_to_file)
source = path_to_file
to_delete = []
toc = []
list = []
pages = []
if not opts.title:
opts.title = os.path.splitext(os.path.basename(source))[0]
if not opts.output:
opts.output = os.path.abspath(os.path.splitext(os.path.basename(source))[0]+'.'+output_format)
if os.path.isdir(source):
for path in all_files( source , '*.cbr|*.cbz' ):
list.append( path )
else:
list= [ os.path.abspath(source) ]
for source in list:
tdir = extract_comic(source)
new_pages = find_pages(tdir, sort_on_mtime=opts.no_sort, verbose=opts.verbose)
thumbnail = None
if not new_pages:
raise ValueError('Could not find any pages in the comic: %s'%source)
if not getattr(opts, 'no_process', False):
new_pages, failures, tdir2 = process_pages(new_pages, opts, notification)
if not new_pages:
raise ValueError('Could not find any valid pages in the comic: %s'%source)
if failures:
print 'Could not process the following pages (run with --verbose to see why):'
for f in failures:
print '\t', f
thumbnail = os.path.join(tdir2, 'thumbnail.png')
if not os.access(thumbnail, os.R_OK):
thumbnail = None
toc.append((source,len(pages)))
pages.extend(new_pages)
to_delete.append(tdir)
if output_format == 'lrf':
create_lrf(pages, opts.profile, opts, thumbnail=thumbnail)
if output_format == 'epub':
create_epub(pages, opts.profile, opts, thumbnail=thumbnail)
if output_format == 'pdf':
create_pdf(pages, opts.profile, opts, thumbnail=thumbnail,toc=toc)
for tdir in to_delete:
shutil.rmtree(tdir)
def all_files(root, patterns='*'):
# Expand patterns from semicolon-separated string to list
patterns = patterns.split('|')
for path, subdirs, files in os.walk(root):
files.sort( )
for name in files:
for pattern in patterns:
if fnmatch.fnmatch(name, pattern):
yield os.path.join(path, name)
break
def main(args=sys.argv, notification=None, output_format='lrf'):
parser = option_parser(output_format=output_format)
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print '\nYou must specify a file to convert'
return 1
if not callable(notification):
pb = ProgressBar(terminal_controller, _('Rendering comic pages...'),
no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
notification = pb.update
source = os.path.abspath(args[1])
do_convert(source, opts, notification, output_format=output_format)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -5,16 +5,16 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, array, os, re, codecs, logging import sys, array, os, re, codecs, logging
from calibre import setup_cli_handlers, sanitize_file_name from calibre import setup_cli_handlers, sanitize_file_name
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.lrf.meta import LRFMetaFile from calibre.ebooks.lrf.meta import LRFMetaFile
from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \ from calibre.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
Font, Text, TOCObject, BookAttr, ruby_tags Font, Text, TOCObject, BookAttr, ruby_tags
class LRFDocument(LRFMetaFile): class LRFDocument(LRFMetaFile):
class temp(object): pass class temp(object): pass
def __init__(self, stream): def __init__(self, stream):
LRFMetaFile.__init__(self, stream) LRFMetaFile.__init__(self, stream)
self.scramble_key = self.xor_key self.scramble_key = self.xor_key
@ -23,11 +23,11 @@ class LRFDocument(LRFMetaFile):
self.image_map = {} self.image_map = {}
self.toc = '' self.toc = ''
self.keep_parsing = True self.keep_parsing = True
def parse(self): def parse(self):
self._parse_objects() self._parse_objects()
self.metadata = LRFDocument.temp() self.metadata = LRFDocument.temp()
for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id',
'classification', 'free_text', 'publisher', 'label', 'category'): 'classification', 'free_text', 'publisher', 'label', 'category'):
setattr(self.metadata, a, getattr(self, a)) setattr(self.metadata, a, getattr(self, a))
self.doc_info = LRFDocument.temp() self.doc_info = LRFDocument.temp()
@ -37,7 +37,7 @@ class LRFDocument(LRFMetaFile):
self.device_info = LRFDocument.temp() self.device_info = LRFDocument.temp()
for a in ('dpi', 'width', 'height'): for a in ('dpi', 'width', 'height'):
setattr(self.device_info, a, getattr(self, a)) setattr(self.device_info, a, getattr(self, a))
def _parse_objects(self): def _parse_objects(self):
self.objects = {} self.objects = {}
self._file.seek(self.object_index_offset) self._file.seek(self.object_index_offset)
@ -68,15 +68,15 @@ class LRFDocument(LRFMetaFile):
attr = h[0] attr = h[0]
if hasattr(obj, attr): if hasattr(obj, attr):
self.ruby_tags[attr] = getattr(obj, attr) self.ruby_tags[attr] = getattr(obj, attr)
def __iter__(self): def __iter__(self):
for pt in self.page_trees: for pt in self.page_trees:
yield pt yield pt
def write_files(self): def write_files(self):
for obj in self.image_map.values() + self.font_map.values(): for obj in self.image_map.values() + self.font_map.values():
open(obj.file, 'wb').write(obj.stream) open(obj.file, 'wb').write(obj.stream)
def to_xml(self, write_files=True): def to_xml(self, write_files=True):
bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n' bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
bookinfo += u'<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title) bookinfo += u'<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title)
@ -113,7 +113,7 @@ class LRFDocument(LRFMetaFile):
pages += unicode(page) pages += unicode(page)
pages += close pages += close
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
objects = u'\n<Objects>\n' objects = u'\n<Objects>\n'
styles = u'\n<Style>\n' styles = u'\n<Style>\n'
for obj in self.objects: for obj in self.objects:
@ -131,16 +131,16 @@ class LRFDocument(LRFMetaFile):
if write_files: if write_files:
self.write_files() self.write_files()
return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>' return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
def option_parser(): def option_parser():
parser = OptionParser(usage=_('%prog book.lrf\nConvert an LRF file into an LRS (XML UTF-8 encoded) file')) parser = OptionParser(usage=_('%prog book.lrf\nConvert an LRF file into an LRS (XML UTF-8 encoded) file'))
parser.add_option('--output', '-o', default=None, help=_('Output LRS file'), dest='out') parser.add_option('--output', '-o', default=None, help=_('Output LRS file'), dest='out')
parser.add_option('--dont-output-resources', default=True, action='store_false', parser.add_option('--dont-output-resources', default=True, action='store_false',
help=_('Do not save embedded image and font files to disk'), help=_('Do not save embedded image and font files to disk'),
dest='output_resources') dest='output_resources')
parser.add_option('--verbose', default=False, action='store_true', dest='verbose') parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
return parser return parser
def main(args=sys.argv, logger=None): def main(args=sys.argv, logger=None):
parser = option_parser() parser = option_parser()
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)

View File

@ -970,7 +970,12 @@ class Canvas(LRFStream):
stream = cStringIO.StringIO(self.stream) stream = cStringIO.StringIO(self.stream)
while stream.tell() < len(self.stream): while stream.tell() < len(self.stream):
tag = Tag(stream) tag = Tag(stream)
self._contents.append(PutObj(self._document.objects, *struct.unpack("<HHI", tag.contents))) try:
self._contents.append(
PutObj(self._document.objects,
*struct.unpack("<HHI", tag.contents)))
except struct.error:
print 'Canvas object has errors, skipping.'
def __unicode__(self): def __unicode__(self):
s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,) s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)

View File

@ -4,15 +4,16 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Support for reading the metadata from a LIT file. Support for reading the metadata from a LIT file.
''' '''
import sys, cStringIO, os import cStringIO, os
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.lit.reader import LitReader
def get_metadata(stream): def get_metadata(stream):
litfile = LitReader(stream) from calibre.ebooks.lit.reader import LitContainer
src = litfile.meta.encode('utf-8') litfile = LitContainer(stream)
src = litfile.get_metadata().encode('utf-8')
litfile = litfile._litfile
opf = OPF(cStringIO.StringIO(src), os.getcwd()) opf = OPF(cStringIO.StringIO(src), os.getcwd())
mi = MetaInformation(opf) mi = MetaInformation(opf)
covers = [] covers = []

View File

@ -313,8 +313,10 @@ class MobiReader(object):
self.read_embedded_metadata(root, metadata_elems[0], guide) self.read_embedded_metadata(root, metadata_elems[0], guide)
for elem in guides + metadata_elems: for elem in guides + metadata_elems:
elem.getparent().remove(elem) elem.getparent().remove(elem)
fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir, htmlfile = os.path.join(output_dir,
sanitize_file_name(self.name)+'.html') sanitize_file_name(fname)+'.html')
try: try:
for ref in guide.xpath('descendant::reference'): for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'): if ref.attrib.has_key('href'):
@ -396,8 +398,8 @@ class MobiReader(object):
'xx-large' : '6', 'xx-large' : '6',
} }
mobi_version = self.book_header.mobi_version mobi_version = self.book_header.mobi_version
style_map = {}
for i, tag in enumerate(root.iter(etree.Element)): for i, tag in enumerate(root.iter(etree.Element)):
tag.attrib.pop('xmlns', '')
if tag.tag in ('country-region', 'place', 'placetype', 'placename', if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content'): 'state', 'city', 'street', 'address', 'content'):
tag.tag = 'div' if tag.tag == 'content' else 'span' tag.tag = 'div' if tag.tag == 'content' else 'span'

View File

@ -1578,15 +1578,15 @@ class OEBBook(object):
return data.decode('utf-16') return data.decode('utf-16')
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
try:
return data.decode('utf-8')
except UnicodeDecodeError:
pass
if self.encoding is not None: if self.encoding is not None:
try: try:
return data.decode(self.encoding) return data.decode(self.encoding)
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
try:
return data.decode('utf-8')
except UnicodeDecodeError:
pass
data, _ = xml_to_unicode(data) data, _ = xml_to_unicode(data)
data = data.replace('\r\n', '\n') data = data.replace('\r\n', '\n')
data = data.replace('\r', '\n') data = data.replace('\r', '\n')

View File

@ -128,6 +128,8 @@ class EbookIterator(object):
plumber.setup_options() plumber.setup_options()
if hasattr(plumber.opts, 'dont_package'): if hasattr(plumber.opts, 'dont_package'):
plumber.opts.dont_package = True plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'), self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log, plumber.opts, plumber.input_fmt, self.log,
{}, self.base) {}, self.base)

View File

@ -59,6 +59,7 @@ class Split(object):
self.fix_links() self.fix_links()
def split_item(self, item): def split_item(self, item):
page_breaks, page_break_ids = [], []
if self.split_on_page_breaks: if self.split_on_page_breaks:
page_breaks, page_break_ids = self.find_page_breaks(item) page_breaks, page_break_ids = self.find_page_breaks(item)

View File

@ -40,7 +40,7 @@ class PDFOutput(OutputFormatPlugin):
OptionRecommendation(name='margin_right', recommended_value='1', OptionRecommendation(name='margin_right', recommended_value='1',
level=OptionRecommendation.LOW, level=OptionRecommendation.LOW,
help=_('The right margin around the document.')), help=_('The right margin around the document.')),
OptionRecommendation(name='unit', recommended_value='inch', OptionRecommendation(name='unit', recommended_value='inch',
level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(), level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
help=_('The unit of measure. Default is inch. Choices ' help=_('The unit of measure. Default is inch. Choices '
@ -58,15 +58,18 @@ class PDFOutput(OutputFormatPlugin):
help=_('The orientation of the page. Default is portrait. Choices ' help=_('The orientation of the page. Default is portrait. Choices '
'are %s' % ORIENTATIONS.keys())), 'are %s' % ORIENTATIONS.keys())),
]) ])
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):
self.opts, self.log = opts, log
if input_plugin.is_image_collection:
self.convert_images(input_plugin.get_images())
with TemporaryDirectory('_pdf_out') as oebdir: with TemporaryDirectory('_pdf_out') as oebdir:
OEBOutput(None).convert(oeb_book, oebdir, input_plugin, opts, log) OEBOutput(None).convert(oeb_book, oebdir, input_plugin, opts, log)
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0] opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
writer = PDFWriter(log, opts) writer = PDFWriter(log, opts)
close = False close = False
if not hasattr(output_path, 'write'): if not hasattr(output_path, 'write'):
close = True close = True
@ -75,10 +78,10 @@ class PDFOutput(OutputFormatPlugin):
out_stream = open(output_path, 'wb') out_stream = open(output_path, 'wb')
else: else:
out_stream = output_path out_stream = output_path
out_stream.seek(0) out_stream.seek(0)
out_stream.truncate() out_stream.truncate()
writer.dump(opf, out_stream, PDFMetadata(oeb_book.metadata)) writer.dump(opf, out_stream, PDFMetadata(oeb_book.metadata))
if close: if close:
out_stream.close() out_stream.close()

View File

@ -9,12 +9,11 @@ __docformat__ = 'restructuredtext en'
Write content to PDF. Write content to PDF.
''' '''
import os, shutil, sys import os, shutil
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.customize.profiles import OutputProfile
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \ from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
orientation, size orientation, size
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
@ -24,12 +23,12 @@ from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, \
from PyQt4.QtWebKit import QWebView from PyQt4.QtWebKit import QWebView
from pyPdf import PdfFileWriter, PdfFileReader from pyPdf import PdfFileWriter, PdfFileReader
class PDFMetadata(object): class PDFMetadata(object):
def __init__(self, oeb_metadata=None): def __init__(self, oeb_metadata=None):
self.title = _('Unknown') self.title = _('Unknown')
self.author = _('Unknown') self.author = _('Unknown')
if oeb_metadata != None: if oeb_metadata != None:
if len(oeb_metadata.title) >= 1: if len(oeb_metadata.title) >= 1:
self.title = oeb_metadata.title[0].value self.title = oeb_metadata.title[0].value
@ -42,16 +41,16 @@ class PDFWriter(QObject):
if QApplication.instance() is None: if QApplication.instance() is None:
QApplication([]) QApplication([])
QObject.__init__(self) QObject.__init__(self)
self.logger = log self.logger = log
self.loop = QEventLoop() self.loop = QEventLoop()
self.view = QWebView() self.view = QWebView()
self.connect(self.view, SIGNAL('loadFinished(bool)'), self._render_html) self.connect(self.view, SIGNAL('loadFinished(bool)'), self._render_html)
self.render_queue = [] self.render_queue = []
self.combine_queue = [] self.combine_queue = []
self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts') self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts')
self.custom_size = None self.custom_size = None
if opts.custom_size != None: if opts.custom_size != None:
width, sep, height = opts.custom_size.partition('x') width, sep, height = opts.custom_size.partition('x')
@ -62,44 +61,44 @@ class PDFWriter(QObject):
self.custom_size = (width, height) self.custom_size = (width, height)
except: except:
self.custom_size = None self.custom_size = None
self.opts = opts self.opts = opts
def dump(self, opfpath, out_stream, pdf_metadata): def dump(self, opfpath, out_stream, pdf_metadata):
self.metadata = pdf_metadata self.metadata = pdf_metadata
self._delete_tmpdir() self._delete_tmpdir()
opf = OPF(opfpath, os.path.dirname(opfpath)) opf = OPF(opfpath, os.path.dirname(opfpath))
self.render_queue = [i.path for i in opf.spine] self.render_queue = [i.path for i in opf.spine]
self.combine_queue = [] self.combine_queue = []
self.out_stream = out_stream self.out_stream = out_stream
QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection) QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
self.loop.exec_() self.loop.exec_()
@QtCore.pyqtSignature('_render_book()') @QtCore.pyqtSignature('_render_book()')
def _render_book(self): def _render_book(self):
if len(self.render_queue) == 0: if len(self.render_queue) == 0:
self._write() self._write()
else: else:
self._render_next() self._render_next()
def _render_next(self): def _render_next(self):
item = str(self.render_queue.pop(0)) item = str(self.render_queue.pop(0))
self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1)))
self.logger.info('Processing %s...' % item) self.logger.info('Processing %s...' % item)
self.view.load(QUrl(item)) self.view.load(QUrl(item))
def _render_html(self, ok): def _render_html(self, ok):
if ok: if ok:
item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue)) item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue))
self.logger.debug('\tRendering item %s as %i' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue))) self.logger.debug('\tRendering item %s as %i' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
printer = QPrinter(QPrinter.HighResolution) printer = QPrinter(QPrinter.HighResolution)
if self.opts.output_profile.short_name == 'default': if self.opts.output_profile.short_name == 'default':
if self.custom_size == None: if self.custom_size == None:
printer.setPaperSize(paper_size(self.opts.paper_size)) printer.setPaperSize(paper_size(self.opts.paper_size))
@ -107,7 +106,7 @@ class PDFWriter(QObject):
printer.setPaperSize(QSizeF(self.custom_size[0], self.custom_size[1]), unit(self.opts.unit)) printer.setPaperSize(QSizeF(self.custom_size[0], self.custom_size[1]), unit(self.opts.unit))
else: else:
printer.setPaperSize(QSizeF(self.opts.output_profile.width / self.opts.output_profile.dpi, self.opts.output_profile.height / self.opts.output_profile.dpi), QPrinter.Inch) printer.setPaperSize(QSizeF(self.opts.output_profile.width / self.opts.output_profile.dpi, self.opts.output_profile.height / self.opts.output_profile.dpi), QPrinter.Inch)
printer.setPageMargins(size(self.opts.margin_left), size(self.opts.margin_top), size(self.opts.margin_right), size(self.opts.margin_bottom), unit(self.opts.unit)) printer.setPageMargins(size(self.opts.margin_left), size(self.opts.margin_top), size(self.opts.margin_right), size(self.opts.margin_bottom), unit(self.opts.unit))
printer.setOrientation(orientation(self.opts.orientation)) printer.setOrientation(orientation(self.opts.orientation))
printer.setOutputFormat(QPrinter.PdfFormat) printer.setOutputFormat(QPrinter.PdfFormat)
@ -122,7 +121,7 @@ class PDFWriter(QObject):
def _write(self): def _write(self):
self.logger.info('Combining individual PDF parts...') self.logger.info('Combining individual PDF parts...')
try: try:
outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author) outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
for item in self.combine_queue: for item in self.combine_queue:

View File

@ -3,19 +3,19 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, re import re
from calibre.utils import zipfile from calibre.utils import zipfile
def update(pathtozip, patterns, filepaths, names, compression=zipfile.ZIP_DEFLATED, verbose=True): def update(pathtozip, patterns, filepaths, names, compression=zipfile.ZIP_DEFLATED, verbose=True):
''' '''
Update files in the zip file at `pathtozip` matching the given Update files in the zip file at `pathtozip` matching the given
`patterns` with the given `filepaths`. If more than `patterns` with the given `filepaths`. If more than
one file matches, all of the files are replaced. one file matches, all of the files are replaced.
:param patterns: A list of compiled regular expressions :param patterns: A list of compiled regular expressions
:param filepaths: A list of paths to the replacement files. Must have the :param filepaths: A list of paths to the replacement files. Must have the
same length as `patterns`. same length as `patterns`.
:param names: A list of archive names for each file in filepaths. :param names: A list of archive names for each file in filepaths.
A name can be `None` in which case the name of the existing A name can be `None` in which case the name of the existing
file in the archive is used. file in the archive is used.
:param compression: The compression to use when replacing files. Can be :param compression: The compression to use when replacing files. Can be
@ -48,4 +48,4 @@ def extract_member(filename, match=re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
names = zf.namelist() names = zf.namelist()
for name in names: for name in names:
if match.search(name): if match.search(name):
return name, zf.read(name) return name, zf.read(name)

View File

@ -43,7 +43,7 @@ PARALLEL_FUNCS = {
'lrfviewer' : 'lrfviewer' :
('calibre.gui2.lrf_renderer.main', 'main', {}, None), ('calibre.gui2.lrf_renderer.main', 'main', {}, None),
'ebook-viewer' : 'ebook-viewer' :
('calibre.gui2.viewer.main', 'main', {}, None), ('calibre.gui2.viewer.main', 'main', {}, None),
@ -52,34 +52,34 @@ PARALLEL_FUNCS = {
'render_table' : 'render_table' :
('calibre.ebooks.lrf.html.table_as_image', 'do_render', {}, None), ('calibre.ebooks.lrf.html.table_as_image', 'do_render', {}, None),
'render_pages' : 'render_pages' :
('calibre.ebooks.lrf.comic.convert_from', 'render_pages', {}, 'notification'), ('calibre.ebooks.comic.input', 'render_pages', {}, 'notification'),
'comic2lrf' : 'comic2lrf' :
('calibre.ebooks.lrf.comic.convert_from', 'do_convert', {}, 'notification'), ('calibre.ebooks.lrf.comic.convert_from', 'do_convert', {}, 'notification'),
'any2epub' : 'any2epub' :
('calibre.ebooks.epub.from_any', 'any2epub', {}, None), ('calibre.ebooks.epub.from_any', 'any2epub', {}, None),
'feeds2epub' : 'feeds2epub' :
('calibre.ebooks.epub.from_feeds', 'main', {}, 'notification'), ('calibre.ebooks.epub.from_feeds', 'main', {}, 'notification'),
'comic2epub' : 'comic2epub' :
('calibre.ebooks.epub.from_comic', 'convert', {}, 'notification'), ('calibre.ebooks.epub.from_comic', 'convert', {}, 'notification'),
'any2mobi' : 'any2mobi' :
('calibre.ebooks.mobi.from_any', 'any2mobi', {}, None), ('calibre.ebooks.mobi.from_any', 'any2mobi', {}, None),
'any2pdf' : 'any2pdf' :
('calibre.ebooks.pdf.from_any', 'any2pdf', {}, None), ('calibre.ebooks.pdf.from_any', 'any2pdf', {}, None),
'feeds2mobi' : 'feeds2mobi' :
('calibre.ebooks.mobi.from_feeds', 'main', {}, 'notification'), ('calibre.ebooks.mobi.from_feeds', 'main', {}, 'notification'),
'comic2mobi' : 'comic2mobi' :
('calibre.ebooks.mobi.from_comic', 'convert', {}, 'notification'), ('calibre.ebooks.mobi.from_comic', 'convert', {}, 'notification'),
'ebook-convert' : 'ebook-convert' :
('calibre.ebooks.conversion.cli', 'main', {}, None), ('calibre.ebooks.conversion.cli', 'main', {}, None),
} }
@ -174,7 +174,7 @@ class WorkerMother(object):
contents = os.path.join(contents, 'console.app', 'Contents') contents = os.path.join(contents, 'console.app', 'Contents')
self.executable = os.path.join(contents, 'MacOS', self.executable = os.path.join(contents, 'MacOS',
os.path.basename(sys.executable)) os.path.basename(sys.executable))
resources = os.path.join(contents, 'Resources') resources = os.path.join(contents, 'Resources')
fd = os.path.join(contents, 'Frameworks') fd = os.path.join(contents, 'Frameworks')
sp = os.path.join(resources, 'lib', 'python'+sys.version[:3], 'site-packages.zip') sp = os.path.join(resources, 'lib', 'python'+sys.version[:3], 'site-packages.zip')
@ -198,7 +198,7 @@ class WorkerMother(object):
for func in ('spawn_free_spirit', 'spawn_worker'): for func in ('spawn_free_spirit', 'spawn_worker'):
setattr(self, func, getattr(self, func+'_'+ext)) setattr(self, func, getattr(self, func+'_'+ext))
def cleanup_child_windows(self, child, name=None, fd=None): def cleanup_child_windows(self, child, name=None, fd=None):
try: try:
child.kill() child.kill()
@ -526,8 +526,8 @@ class JobKilled(Exception):
pass pass
class Job(object): class Job(object):
def __init__(self, job_done, job_manager=None, def __init__(self, job_done, job_manager=None,
args=[], kwargs={}, description=None): args=[], kwargs={}, description=None):
self.args = args self.args = args
self.kwargs = kwargs self.kwargs = kwargs
@ -540,9 +540,9 @@ class Job(object):
self.description = description self.description = description
self.start_time = None self.start_time = None
self.running_time = None self.running_time = None
self.result = self.exception = self.traceback = self.log = None self.result = self.exception = self.traceback = self.log = None
def __cmp__(self, other): def __cmp__(self, other):
sstatus, ostatus = self.status(), other.status() sstatus, ostatus = self.status(), other.status()
if sstatus == ostatus or (self.has_run and other.has_run): if sstatus == ostatus or (self.has_run and other.has_run):
@ -557,8 +557,8 @@ class Job(object):
return -1 return -1
if ostatus == 'WAITING': if ostatus == 'WAITING':
return 1 return 1
def job_done(self): def job_done(self):
self.is_running, self.has_run = False, True self.is_running, self.has_run = False, True
self.running_time = (time.time() - self.start_time) if \ self.running_time = (time.time() - self.start_time) if \
@ -566,14 +566,14 @@ class Job(object):
if self.job_manager is not None: if self.job_manager is not None:
self.job_manager.job_done(self) self.job_manager.job_done(self)
self._job_done(self) self._job_done(self)
def start_work(self): def start_work(self):
self.is_running = True self.is_running = True
self.has_run = False self.has_run = False
self.start_time = time.time() self.start_time = time.time()
if self.job_manager is not None: if self.job_manager is not None:
self.job_manager.start_work(self) self.job_manager.start_work(self)
def update_status(self, percent, msg=None): def update_status(self, percent, msg=None):
self.percent = percent self.percent = percent
self.msg = msg self.msg = msg
@ -582,7 +582,7 @@ class Job(object):
self.job_manager.status_update(self) self.job_manager.status_update(self)
except: except:
traceback.print_exc() traceback.print_exc()
def status(self): def status(self):
if self.is_running: if self.is_running:
return 'WORKING' return 'WORKING'
@ -592,7 +592,7 @@ class Job(object):
if self.exception is None: if self.exception is None:
return 'DONE' return 'DONE'
return 'ERROR' return 'ERROR'
def console_text(self): def console_text(self):
ans = [u'Job: '] ans = [u'Job: ']
if self.description: if self.description:
@ -610,13 +610,13 @@ class Job(object):
if self.traceback: if self.traceback:
ans.append(u'**Traceback**:') ans.append(u'**Traceback**:')
ans.extend(self.traceback.split('\n')) ans.extend(self.traceback.split('\n'))
if self.log: if self.log:
if isinstance(self.log, str): if isinstance(self.log, str):
self.log = unicode(self.log, 'utf-8', 'replace') self.log = unicode(self.log, 'utf-8', 'replace')
ans.append(self.log) ans.append(self.log)
return (u'\n'.join(ans)).encode('utf-8') return (u'\n'.join(ans)).encode('utf-8')
def gui_text(self): def gui_text(self):
ans = [u'Job: '] ans = [u'Job: ']
if self.description: if self.description:
@ -641,19 +641,19 @@ class Job(object):
if isinstance(self.log, str): if isinstance(self.log, str):
self.log = unicode(self.log, 'utf-8', 'replace') self.log = unicode(self.log, 'utf-8', 'replace')
ans.extend(self.log.split('\n')) ans.extend(self.log.split('\n'))
ans = [x.decode(preferred_encoding, 'replace') if isinstance(x, str) else x for x in ans] ans = [x.decode(preferred_encoding, 'replace') if isinstance(x, str) else x for x in ans]
return u'<br>'.join(ans) return u'<br>'.join(ans)
class ParallelJob(Job): class ParallelJob(Job):
def __init__(self, func, *args, **kwargs): def __init__(self, func, *args, **kwargs):
Job.__init__(self, *args, **kwargs) Job.__init__(self, *args, **kwargs)
self.func = func self.func = func
self.done = self.job_done self.done = self.job_done
def output(self, msg): def output(self, msg):
if not self.log: if not self.log:
self.log = u'' self.log = u''
@ -663,7 +663,7 @@ class ParallelJob(Job):
self.log += msg self.log += msg
if self.job_manager is not None: if self.job_manager is not None:
self.job_manager.output(self) self.job_manager.output(self)
def remove_ipc_socket(path): def remove_ipc_socket(path):
os = __import__('os') os = __import__('os')
@ -702,7 +702,7 @@ class Server(Thread):
self.result_lock = RLock() self.result_lock = RLock()
self.pool_lock = RLock() self.pool_lock = RLock()
self.start() self.start()
def split(self, tasks): def split(self, tasks):
''' '''
Split a list into a list of sub lists, with the number of sub lists being Split a list into a list of sub lists, with the number of sub lists being
@ -720,7 +720,7 @@ class Server(Thread):
ans.append(section) ans.append(section)
pos += delta pos += delta
return ans return ans
def close(self): def close(self):
try: try:
@ -733,7 +733,7 @@ class Server(Thread):
self.jobs.append(job) self.jobs.append(job)
if job.job_manager is not None: if job.job_manager is not None:
job.job_manager.add_job(job) job.job_manager.add_job(job)
def poll(self): def poll(self):
''' '''
Return True if the server has either working or queued jobs Return True if the server has either working or queued jobs
@ -741,14 +741,14 @@ class Server(Thread):
with self.job_lock: with self.job_lock:
with self.working_lock: with self.working_lock:
return len(self.jobs) + len(self.working) > 0 return len(self.jobs) + len(self.working) > 0
def wait(self, sleep=1): def wait(self, sleep=1):
''' '''
Wait until job queue is empty Wait until job queue is empty
''' '''
while self.poll(): while self.poll():
time.sleep(sleep) time.sleep(sleep)
def run(self): def run(self):
while True: while True:
job = None job = None
@ -935,7 +935,7 @@ def work(client_socket, func, args, kwdargs):
func(*args, **kwargs) func(*args, **kwargs)
except (Exception, SystemExit): except (Exception, SystemExit):
continue continue
time.sleep(5) # Give any in progress BufferedSend time to complete time.sleep(5) # Give any in progress BufferedSend time to complete
@ -948,7 +948,7 @@ def worker(host, port):
if msg != 'OK': if msg != 'OK':
return 1 return 1
write(client_socket, 'WAITING') write(client_socket, 'WAITING')
sys.stdout = BufferedSender(client_socket) sys.stdout = BufferedSender(client_socket)
sys.stderr = sys.stdout sys.stderr = sys.stdout

View File

@ -81,7 +81,12 @@ def sendmail(msg, from_, to, localhost=None, verbose=0, timeout=30,
for x in to: for x in to:
return sendmail_direct(from_, x, msg, timeout, localhost, verbose) return sendmail_direct(from_, x, msg, timeout, localhost, verbose)
import smtplib import smtplib
cls = smtplib.SMTP if encryption == 'TLS' else smtplib.SMTP_SSL class SMTP_SSL(smtplib.SMTP_SSL): # Workaround for bug in smtplib.py
def _get_socket(self, host, port, timeout):
smtplib.SMTP_SSL._get_socket(self, host, port, timeout)
return self.sock
cls = smtplib.SMTP if encryption == 'TLS' else SMTP_SSL
timeout = None # Non-blocking sockets sometimes don't work timeout = None # Non-blocking sockets sometimes don't work
port = int(port) port = int(port)
s = cls(timeout=timeout, local_hostname=localhost) s = cls(timeout=timeout, local_hostname=localhost)
@ -93,6 +98,8 @@ def sendmail(msg, from_, to, localhost=None, verbose=0, timeout=30,
s.starttls() s.starttls()
s.ehlo() s.ehlo()
if username is not None and password is not None: if username is not None and password is not None:
if encryption == 'SSL':
s.sock = s.file.sslobj
s.login(username, password) s.login(username, password)
s.sendmail(from_, to, msg) s.sendmail(from_, to, msg)
return s.quit() return s.quit()

View File

@ -7,22 +7,22 @@ import sys, re, os
class TerminalController: class TerminalController:
""" """
A class that can be used to portably generate formatted output to A class that can be used to portably generate formatted output to
a terminal. a terminal.
`TerminalController` defines a set of instance variables whose `TerminalController` defines a set of instance variables whose
values are initialized to the control sequence necessary to values are initialized to the control sequence necessary to
perform a given action. These can be simply included in normal perform a given action. These can be simply included in normal
output to the terminal: output to the terminal:
>>> term = TerminalController() >>> term = TerminalController()
>>> print 'This is '+term.GREEN+'green'+term.NORMAL >>> print 'This is '+term.GREEN+'green'+term.NORMAL
Alternatively, the `render()` method can used, which replaces Alternatively, the `render()` method can used, which replaces
'${action}' with the string required to perform 'action': '${action}' with the string required to perform 'action':
>>> term = TerminalController() >>> term = TerminalController()
>>> print term.render('This is ${GREEN}green${NORMAL}') >>> print term.render('This is ${GREEN}green${NORMAL}')
If the terminal doesn't support a given action, then the value of If the terminal doesn't support a given action, then the value of
the corresponding instance variable will be set to ''. As a the corresponding instance variable will be set to ''. As a
result, the above code will still work on terminals that do not result, the above code will still work on terminals that do not
@ -30,11 +30,11 @@ class TerminalController:
Also, this means that you can test whether the terminal supports a Also, this means that you can test whether the terminal supports a
given action by simply testing the truth value of the given action by simply testing the truth value of the
corresponding instance variable: corresponding instance variable:
>>> term = TerminalController() >>> term = TerminalController()
>>> if term.CLEAR_SCREEN: >>> if term.CLEAR_SCREEN:
... print 'This terminal supports clearing the screen.' ... print 'This terminal supports clearing the screen.'
Finally, if the width and height of the terminal are known, then Finally, if the width and height of the terminal are known, then
they will be stored in the `COLS` and `LINES` attributes. they will be stored in the `COLS` and `LINES` attributes.
""" """
@ -44,35 +44,35 @@ class TerminalController:
DOWN = '' #: Move the cursor down one line DOWN = '' #: Move the cursor down one line
LEFT = '' #: Move the cursor left one char LEFT = '' #: Move the cursor left one char
RIGHT = '' #: Move the cursor right one char RIGHT = '' #: Move the cursor right one char
# Deletion: # Deletion:
CLEAR_SCREEN = '' #: Clear the screen and move to home position CLEAR_SCREEN = '' #: Clear the screen and move to home position
CLEAR_EOL = '' #: Clear to the end of the line. CLEAR_EOL = '' #: Clear to the end of the line.
CLEAR_BOL = '' #: Clear to the beginning of the line. CLEAR_BOL = '' #: Clear to the beginning of the line.
CLEAR_EOS = '' #: Clear to the end of the screen CLEAR_EOS = '' #: Clear to the end of the screen
# Output modes: # Output modes:
BOLD = '' #: Turn on bold mode BOLD = '' #: Turn on bold mode
BLINK = '' #: Turn on blink mode BLINK = '' #: Turn on blink mode
DIM = '' #: Turn on half-bright mode DIM = '' #: Turn on half-bright mode
REVERSE = '' #: Turn on reverse-video mode REVERSE = '' #: Turn on reverse-video mode
NORMAL = '' #: Turn off all modes NORMAL = '' #: Turn off all modes
# Cursor display: # Cursor display:
HIDE_CURSOR = '' #: Make the cursor invisible HIDE_CURSOR = '' #: Make the cursor invisible
SHOW_CURSOR = '' #: Make the cursor visible SHOW_CURSOR = '' #: Make the cursor visible
# Terminal size: # Terminal size:
COLS = None #: Width of the terminal (None for unknown) COLS = None #: Width of the terminal (None for unknown)
LINES = None #: Height of the terminal (None for unknown) LINES = None #: Height of the terminal (None for unknown)
# Foreground colors: # Foreground colors:
BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = '' BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
# Background colors: # Background colors:
BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = '' BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = '' BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
_STRING_CAPABILITIES = """ _STRING_CAPABILITIES = """
BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1 BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
@ -80,7 +80,7 @@ class TerminalController:
HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split() HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
_COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split() _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
_ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split() _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
def __init__(self, term_stream=sys.stdout): def __init__(self, term_stream=sys.stdout):
""" """
Create a `TerminalController` and initialize its attributes Create a `TerminalController` and initialize its attributes
@ -92,24 +92,24 @@ class TerminalController:
# Curses isn't available on all platforms # Curses isn't available on all platforms
try: import curses try: import curses
except: return except: return
# If the stream isn't a tty, then assume it has no capabilities. # If the stream isn't a tty, then assume it has no capabilities.
if os.environ.get('CALIBRE_WORKER', None) is not None or not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return if os.environ.get('CALIBRE_WORKER', None) is not None or not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return
# Check the terminal type. If we fail, then assume that the # Check the terminal type. If we fail, then assume that the
# terminal has no capabilities. # terminal has no capabilities.
try: curses.setupterm() try: curses.setupterm()
except: return except: return
# Look up numeric capabilities. # Look up numeric capabilities.
self.COLS = curses.tigetnum('cols') self.COLS = curses.tigetnum('cols')
self.LINES = curses.tigetnum('lines') self.LINES = curses.tigetnum('lines')
# Look up string capabilities. # Look up string capabilities.
for capability in self._STRING_CAPABILITIES: for capability in self._STRING_CAPABILITIES:
(attrib, cap_name) = capability.split('=') (attrib, cap_name) = capability.split('=')
setattr(self, attrib, self._tigetstr(cap_name) or '') setattr(self, attrib, self._tigetstr(cap_name) or '')
# Colors # Colors
set_fg = self._tigetstr('setf') set_fg = self._tigetstr('setf')
if set_fg: if set_fg:
@ -127,7 +127,7 @@ class TerminalController:
if set_bg_ansi: if set_bg_ansi:
for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS): for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '') setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
def _tigetstr(self, cap_name): def _tigetstr(self, cap_name):
# String capabilities can include "delays" of the form "$<2>". # String capabilities can include "delays" of the form "$<2>".
# For any modern terminal, we should be able to just ignore # For any modern terminal, we should be able to just ignore
@ -135,7 +135,7 @@ class TerminalController:
import curses import curses
cap = curses.tigetstr(cap_name) or '' cap = curses.tigetstr(cap_name) or ''
return re.sub(r'\$<\d+>[/*]?', '', cap) return re.sub(r'\$<\d+>[/*]?', '', cap)
def render(self, template): def render(self, template):
""" """
Replace each $-substitutions in the given template string with Replace each $-substitutions in the given template string with
@ -143,7 +143,7 @@ class TerminalController:
'' (if it's not). '' (if it's not).
""" """
return re.sub(r'\$\$|\${\w+}', self._render_sub, template) return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
def _render_sub(self, match): def _render_sub(self, match):
s = match.group() s = match.group()
if s == '$$': return s if s == '$$': return s
@ -156,20 +156,20 @@ class TerminalController:
class ProgressBar: class ProgressBar:
""" """
A 3-line progress bar, which looks like:: A 3-line progress bar, which looks like::
Header Header
20% [===========----------------------------------] 20% [===========----------------------------------]
progress message progress message
The progress bar is colored, if the terminal supports color The progress bar is colored, if the terminal supports color
output; and adjusts to the width of the terminal. output; and adjusts to the width of the terminal.
If the terminal doesn't have the required capabilities, it uses a If the terminal doesn't have the required capabilities, it uses a
simple progress bar. simple progress bar.
""" """
BAR = '%3d%% ${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}\n' BAR = '%3d%% ${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}\n'
HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n' HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
def __init__(self, term, header, no_progress_bar = False): def __init__(self, term, header, no_progress_bar = False):
self.term, self.no_progress_bar = term, no_progress_bar self.term, self.no_progress_bar = term, no_progress_bar
self.fancy = self.term.CLEAR_EOL and self.term.UP and self.term.BOL self.fancy = self.term.CLEAR_EOL and self.term.UP and self.term.BOL
@ -177,12 +177,14 @@ class ProgressBar:
self.width = self.term.COLS or 75 self.width = self.term.COLS or 75
self.bar = term.render(self.BAR) self.bar = term.render(self.BAR)
self.header = self.term.render(self.HEADER % header.center(self.width)) self.header = self.term.render(self.HEADER % header.center(self.width))
if isinstance(self.header, unicode):
self.header = self.header.encode('utf-8')
self.cleared = 1 #: true if we haven't drawn the bar yet. self.cleared = 1 #: true if we haven't drawn the bar yet.
def update(self, percent, message=''): def update(self, percent, message=''):
if isinstance(message, unicode): if isinstance(message, unicode):
message = message.encode('utf-8', 'replace') message = message.encode('utf-8', 'replace')
if self.no_progress_bar: if self.no_progress_bar:
if message: if message:
print message print message
@ -203,8 +205,8 @@ class ProgressBar:
else: else:
print '%d%%'%(percent*100), message print '%d%%'%(percent*100), message
sys.stdout.flush() sys.stdout.flush()
def clear(self): def clear(self):
if self.fancy and not self.cleared: if self.fancy and not self.cleared:
sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL + sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +

View File

@ -2,5 +2,6 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class Recipe(object):
pass

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
class RecipeInput(InputFormatPlugin):
name = 'Recipe Input'
author = 'Kovid Goyal'
description = _('Download periodical content from the internet')
file_types = set(['recipe'])
recommendations = set([
('chapter_mark', 'none', OptionRecommendation.HIGH),
('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
])
options = set([
OptionRecommendation(name='test', recommended_value=False,
help=_('Useful for recipe development. Forces '
'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
OptionRecommendation(name='username', recommended_value=None,
help=_('Username for sites that require a login to access '
'content.')),
OptionRecommendation(name='password', recommended_value=None,
help=_('Password for sites that require a login to access '
'content.')),
OptionRecommendation(name='lrf', recommended_value=False,
help='Optimize fetching for subsequent conversion to LRF.'),
])
def convert(self, recipe_or_file, opts, file_ext, log,
accelerators, progress=lambda x, y: x):
from calibre.web.feeds.recipes import \
get_builtin_recipe, compile_recipe
if os.access(recipe_or_file, os.R_OK):
recipe = compile_recipe(open(recipe_or_file, 'rb').read())
else:
title = os.path.basename(recipe_or_file).rpartition('.')[0]
recipe = get_builtin_recipe(title)
if recipe is None:
raise ValueError('%s is not a valid recipe file or builtin recipe' %
recipe_or_file)
ro = recipe(opts, log, progress)
ro.download()
opts.output_profile.flow_size = 0
for f in os.listdir('.'):
if f.endswith('.opf'):
return os.path.abspath(f)

View File

@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.lrf import entity_to_unicode from calibre.ebooks.lrf import entity_to_unicode
from calibre.web import Recipe
from calibre.ebooks import render_html from calibre.ebooks import render_html
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import option_parser as web2disk_option_parser
from calibre.web.fetch.simple import RecursiveFetcher from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.utils.logging import Log
from calibre.ptempfile import PersistentTemporaryFile, \ from calibre.ptempfile import PersistentTemporaryFile, \
PersistentTemporaryDirectory PersistentTemporaryDirectory
class BasicNewsRecipe(object): class BasicNewsRecipe(Recipe):
''' '''
Abstract base class that contains logic needed in all feed fetchers. Abstract base class that contains logic needed in all feed fetchers.
''' '''
@ -443,40 +443,34 @@ class BasicNewsRecipe(object):
''' '''
raise NotImplementedError raise NotImplementedError
def __init__(self, options, parser, progress_reporter): def __init__(self, options, log, progress_reporter):
''' '''
Initialize the recipe. Initialize the recipe.
:param options: Parsed commandline options :param options: Parsed commandline options
:param parser: Command line option parser. Used to intelligently merge options. :param parser: Command line option parser. Used to intelligently merge options.
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional. :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
''' '''
self.log = Log() self.log = log
if options.verbose:
self.log.filter_level = self.log.DEBUG
if not isinstance(self.title, unicode): if not isinstance(self.title, unicode):
self.title = unicode(self.title, 'utf-8', 'replace') self.title = unicode(self.title, 'utf-8', 'replace')
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'): self.debug = options.verbose > 1
setattr(self, attr, getattr(options, attr)) self.output_dir = os.getcwd()
self.verbose = options.verbose
self.test = options.test
self.username = options.username
self.password = options.password
self.lrf = options.lrf
self.output_dir = os.path.abspath(self.output_dir) self.output_dir = os.path.abspath(self.output_dir)
if options.test: if options.test:
self.max_articles_per_feed = 2 self.max_articles_per_feed = 2
self.simultaneous_downloads = min(4, self.simultaneous_downloads) self.simultaneous_downloads = min(4, self.simultaneous_downloads)
if self.debug: if self.debug:
self.verbose = True self.verbose = True
self.report_progress = progress_reporter self.report_progress = progress_reporter
self.username = self.password = None
#: If True optimize downloading for eventual conversion to LRF
self.lrf = False
defaults = parser.get_default_values()
for opt in options.__dict__.keys():
if getattr(options, opt) != getattr(defaults, opt, None):
setattr(self, opt, getattr(options, opt))
if isinstance(self.feeds, basestring): if isinstance(self.feeds, basestring):
self.feeds = eval(self.feeds) self.feeds = eval(self.feeds)
if isinstance(self.feeds, basestring): if isinstance(self.feeds, basestring):
@ -493,7 +487,6 @@ class BasicNewsRecipe(object):
'--timeout', str(self.timeout), '--timeout', str(self.timeout),
'--max-recursions', str(self.recursions), '--max-recursions', str(self.recursions),
'--delay', str(self.delay), '--delay', str(self.delay),
'--timeout', str(self.timeout),
] ]
if self.encoding is not None: if self.encoding is not None:
web2disk_cmdline.extend(['--encoding', self.encoding]) web2disk_cmdline.extend(['--encoding', self.encoding])
@ -520,9 +513,6 @@ class BasicNewsRecipe(object):
self.simultaneous_downloads = 1 self.simultaneous_downloads = 1
self.navbar = templates.NavBarTemplate() self.navbar = templates.NavBarTemplate()
self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
if '--base-font-size' not in self.html2lrf_options:
self.html2lrf_options.extend(['--base-font-size', '12'])
self.failed_downloads = [] self.failed_downloads = []
self.partial_failures = [] self.partial_failures = []
@ -557,7 +547,7 @@ class BasicNewsRecipe(object):
return self.postprocess_html(soup, first_fetch) return self.postprocess_html(soup, first_fetch)
def download(self, for_lrf=False): def download(self):
''' '''
Download and pre-process all articles from the feeds in this recipe. Download and pre-process all articles from the feeds in this recipe.
This method should be called only one on a particular Recipe instance. This method should be called only one on a particular Recipe instance.