Sync to pluginize

This commit is contained in:
John Schember 2009-04-27 18:48:55 -04:00
commit 9a363a02cc
25 changed files with 750 additions and 722 deletions

View File

@ -286,6 +286,8 @@ from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.comic.input import ComicInput
from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.txt.output import TXTOutput
@ -294,8 +296,8 @@ from calibre.ebooks.pdb.ereader.output import EREADEROutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -96,6 +96,11 @@ class InputFormatPlugin(Plugin):
#: For example: ``set(['azw', 'mobi', 'prc'])``
file_types = set([])
#: If True, this input plugin generates a collection of images,
#: one per HTML file. You can obtain access to the images via
#: convenience method, :method:`get_image_collection`.
is_image_collection = False
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
@ -128,6 +133,14 @@ class InputFormatPlugin(Plugin):
#: (option_name, recommended_value, recommendation_level)
recommendations = set([])
def get_images(self):
'''
Return a list of absolute paths to the images, if this input plugin
represents an image collection. The list of images is in the same order
as the spine and the TOC.
'''
raise NotImplementedError()
def convert(self, stream, options, file_ext, log, accelerators):
'''
This method must be implemented in sub-classes. It must return

View File

@ -148,6 +148,8 @@ class OutputProfile(Plugin):
remove_special_chars = re.compile(u'[\u200b\u00ad]')
# ADE falls to the ground in a dead faint when it sees an <object>
remove_object_tags = True
# The image size for comics
comic_screen_size = (584, 754)
class SonyReaderOutput(OutputProfile):
@ -162,6 +164,18 @@ class SonyReaderOutput(OutputProfile):
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
class SonyReaderLandscapeOutput(SonyReaderOutput):
name = 'Sony Reader Landscape'
short_name = 'sony-landscape'
description = _('This profile is intended for the SONY PRS line. '
'The 500/505/700 etc, in landscape mode. Mainly useful '
'for comics.')
screen_size = (784, 1012)
comic_screen_size = (784, 1012)
class MSReaderOutput(OutputProfile):
name = 'Microsoft Reader'
@ -223,4 +237,5 @@ class KindleOutput(OutputProfile):
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
output_profiles = [OutputProfile, SonyReaderOutput, MSReaderOutput,
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput]
MobipocketOutput, HanlinV3Output, CybookG3Output, KindleOutput,
SonyReaderLandscapeOutput]

460
src/calibre/ebooks/comic/input.py Executable file
View File

@ -0,0 +1,460 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Based on ideas from comiclrf created by FangornUK.
'''
import os, shutil, traceback, textwrap
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import extract, CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.parallel import Server, ParallelJob
def extract_comic(path_to_comic_file):
'''
Un-archive the comic file.
'''
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
extract(path_to_comic_file, tdir)
return tdir
def find_pages(dir, sort_on_mtime=False, verbose=False):
'''
Find valid comic pages in a previously un-archived comic.
:param dir: Directory in which extracted comic lives
:param sort_on_mtime: If True sort pages based on their last modified time.
Otherwise, sort alphabetically.
'''
extensions = ['jpeg', 'jpg', 'gif', 'png']
pages = []
for datum in os.walk(dir):
for name in datum[-1]:
path = os.path.join(datum[0], name)
for ext in extensions:
if path.lower().endswith('.'+ext):
pages.append(path)
break
if sort_on_mtime:
comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
else:
comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
pages.sort(cmp=comparator)
if verbose:
print 'Found comic pages...'
print '\t'+'\n\t'.join([os.path.basename(p) for p in pages])
return pages
class PageProcessor(list):
'''
Contains the actual image rendering logic. See :method:`render` and
:method:`process_pages`.
'''
def __init__(self, path_to_page, dest, opts, num):
list.__init__(self)
self.path_to_page = path_to_page
self.opts = opts
self.num = num
self.dest = dest
self.rotate = False
self.render()
def render(self):
import calibre.utils.PythonMagickWand as pw
img = pw.NewMagickWand()
if img < 0:
raise RuntimeError('Cannot create wand.')
if not pw.MagickReadImage(img, self.path_to_page):
raise IOError('Failed to read image from: %'%self.path_to_page)
width = pw.MagickGetImageWidth(img)
height = pw.MagickGetImageHeight(img)
if self.num == 0: # First image so create a thumbnail from it
thumb = pw.CloneMagickWand(img)
if thumb < 0:
raise RuntimeError('Cannot create wand.')
pw.MagickThumbnailImage(thumb, 60, 80)
pw.MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
pw.DestroyMagickWand(thumb)
self.pages = [img]
if width > height:
if self.opts.landscape:
self.rotate = True
else:
split1, split2 = map(pw.CloneMagickWand, (img, img))
pw.DestroyMagickWand(img)
if split1 < 0 or split2 < 0:
raise RuntimeError('Cannot create wand.')
pw.MagickCropImage(split1, (width/2)-1, height, 0, 0)
pw.MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
self.process_pages()
def process_pages(self):
import calibre.utils.PythonMagickWand as p
for i, wand in enumerate(self.pages):
pw = p.NewPixelWand()
try:
if pw < 0:
raise RuntimeError('Cannot create wand.')
p.PixelSetColor(pw, 'white')
p.MagickSetImageBorderColor(wand, pw)
if self.rotate:
p.MagickRotateImage(wand, pw, -90)
# 25 percent fuzzy trim?
if not self.opts.disable_trim:
p.MagickTrimImage(wand, 25*65535/100)
p.MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage"
# Do the Photoshop "Auto Levels" equivalent
if not self.opts.dont_normalize:
p.MagickNormalizeImage(wand)
sizex = p.MagickGetImageWidth(wand)
sizey = p.MagickGetImageHeight(wand)
SCRWIDTH, SCRHEIGHT = self.opts.output_profile.comic_screen_size
if self.opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
newsizey = SCRHEIGHT
newsizex = int(newsizey * aspect)
deltax = (SCRWIDTH - newsizex) / 2
deltay = 0
else:
newsizex = SCRWIDTH
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (SCRHEIGHT - newsizey) / 2
p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
p.MagickSetImageBorderColor(wand, pw)
p.MagickBorderImage(wand, pw, deltax, deltay)
elif self.opts.wide:
# Keep aspect and Use device height as scaled image width so landscape mode is clean
aspect = float(sizex) / float(sizey)
screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
# Get dimensions of the landscape mode screen
# Add 25px back to height for the battery bar.
wscreenx = SCRHEIGHT + 25
wscreeny = int(wscreenx / screen_aspect)
if aspect <= screen_aspect:
newsizey = wscreeny
newsizex = int(newsizey * aspect)
deltax = (wscreenx - newsizex) / 2
deltay = 0
else:
newsizex = wscreenx
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (wscreeny - newsizey) / 2
p.MagickResizeImage(wand, newsizex, newsizey, p.CatromFilter, 1.0)
p.MagickSetImageBorderColor(wand, pw)
p.MagickBorderImage(wand, pw, deltax, deltay)
else:
p.MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, p.CatromFilter, 1.0)
if not self.opts.dont_sharpen:
p.MagickSharpenImage(wand, 0.0, 1.0)
p.MagickSetImageType(wand, p.GrayscaleType)
if self.opts.despeckle:
p.MagickDespeckleImage(wand)
p.MagickQuantizeImage(wand, self.opts.colors, p.RGBColorspace, 0, 1, 0)
dest = '%d_%d.png'%(self.num, i)
dest = os.path.join(self.dest, dest)
p.MagickWriteImage(wand, dest+'8')
os.rename(dest+'8', dest)
self.append(dest)
finally:
if pw > 0:
p.DestroyPixelWand(pw)
p.DestroyMagickWand(wand)
def render_pages(tasks, dest, opts, notification=None):
'''
Entry point for the job server.
'''
failures, pages = [], []
from calibre.utils.PythonMagickWand import ImageMagick
with ImageMagick():
for num, path in tasks:
try:
pages.extend(PageProcessor(path, dest, opts, num))
msg = _('Rendered %s')
except:
failures.append(path)
msg = _('Failed %s')
if opts.verbose:
msg += '\n' + traceback.format_exc()
msg = msg%path
if notification is not None:
notification(0.5, msg)
return pages, failures
class JobManager(object):
'''
Simple job manager responsible for keeping track of overall progress.
'''
def __init__(self, total, update):
self.total = total
self.update = update
self.done = 0
self.add_job = lambda j: j
self.output = lambda j: j
self.start_work = lambda j: j
self.job_done = lambda j: j
def status_update(self, job):
self.done += 1
#msg = msg%os.path.basename(job.args[0])
self.update(float(self.done)/self.total, job.msg)
def process_pages(pages, opts, update, tdir):
'''
Render all identified comic pages.
'''
from calibre.utils.PythonMagickWand import ImageMagick
ImageMagick
job_manager = JobManager(len(pages), update)
server = Server()
jobs = []
tasks = server.split(pages)
for task in tasks:
jobs.append(ParallelJob('render_pages', lambda s:s, job_manager=job_manager,
args=[task, tdir, opts]))
server.add_job(jobs[-1])
server.wait()
server.killall()
server.close()
ans, failures = [], []
for job in jobs:
if job.result is None:
raise Exception(_('Failed to process comic: %s\n\n%s')%(job.exception, job.traceback))
pages, failures_ = job.result
ans += pages
failures += failures_
return ans, failures
class ComicInput(InputFormatPlugin):
name = 'Comic Input'
author = 'Kovid Goyal'
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
file_types = set(['cbz', 'cbr', 'cbc'])
is_image_collection = True
options = set([
OptionRecommendation(name='colors', recommended_value=64,
help=_('Number of colors for grayscale image conversion. Default: %default')),
OptionRecommendation(name='dont_normalize', recommended_value=False,
help=_('Disable normalize (improve contrast) color range '
'for pictures. Default: False')),
OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
OptionRecommendation(name='dont_sharpen', recommended_value=False,
help=_('Disable sharpening.')),
OptionRecommendation(name='disable_trim', recommended_value=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.')),
OptionRecommendation(name='landspace', recommended_value=False,
help=_("Don't split landscape images into two portrait images")),
OptionRecommendation(name='wide', recommended_value=False,
help=_("Keep aspect ratio and scale image using screen height as "
"image width for viewing in landscape mode.")),
OptionRecommendation(name='right2left', recommended_value=False,
help=_('Used for right-to-left publications like manga. '
'Causes landscape pages to be split into portrait pages '
'from right to left.')),
OptionRecommendation(name='despeckle', recommended_value=False,
help=_('Enable Despeckle. Reduces speckle noise. '
'May greatly increase processing time.')),
OptionRecommendation(name='no_sort', recommended_value=False,
help=_("Don't sort the files found in the comic "
"alphabetically by name. Instead use the order they were "
"added to the comic.")),
OptionRecommendation(name='no_process', recommended_value=False,
help=_("Apply no processing to the image")),
])
recommendations = set([
('margin_left', 0, OptionRecommendation.HIGH),
('margin_top', 0, OptionRecommendation.HIGH),
('margin_right', 0, OptionRecommendation.HIGH),
('margin_bottom', 0, OptionRecommendation.HIGH),
('insert_blank_line', False, OptionRecommendation.HIGH),
('remove_paragraph_spacing', False, OptionRecommendation.HIGH),
('dont_justify', True, OptionRecommendation.HIGH),
('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
('chapter', None, OptionRecommendation.HIGH),
('page_breaks_brefore', None, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
])
def get_comics_from_collection(self, stream):
from calibre.libunzip import extract as zipextract
tdir = PersistentTemporaryDirectory('_comic_collection')
zipextract(stream, tdir)
comics = []
with CurrentDir(tdir):
if not os.path.exists('comics.txt'):
raise ValueError('%s is not a valid comic collection'
%stream.name)
for line in open('comics.txt',
'rb').read().decode('utf-8').splitlines():
fname, title = line.partition(':')[0], line.partition(':')[-1]
fname = os.path.join(tdir, *fname.split('/'))
if not title:
title = os.path.basename(fname).rpartition('.')[0]
if os.access(fname, os.R_OK):
comics.append([title, fname])
if not comics:
raise ValueError('%s has no comics'%stream.name)
return comics
def get_pages(self, comic, tdir2):
tdir = extract_comic(comic)
new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
verbose=self.opts.verbose)
thumbnail = None
if not new_pages:
raise ValueError('Could not find any pages in the comic: %s'
%comic)
if self.opts.no_process:
n2 = []
for page in new_pages:
n2.append(os.path.join(tdir2, os.path.basename(page)))
shutil.copyfile(page, n2[-1])
new_pages = n2
else:
new_pages, failures = process_pages(new_pages, self.opts,
self.progress, tdir2)
if not new_pages:
raise ValueError('Could not find any valid pages in comic: %s'
% comic)
if failures:
self.log.warning('Could not process the following pages '
'(run with --verbose to see why):')
for f in failures:
self.log.warning('\t', f)
thumbnail = os.path.join(tdir2, 'thumbnail.png')
if not os.access(thumbnail, os.R_OK):
thumbnail = None
return new_pages
def get_images(self):
return self._images
def convert(self, stream, opts, file_ext, log, accelerators,
progress=lambda p, m : m):
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
self.opts, self.log, self.progress = opts, log, progress
if file_ext == 'cbc':
comics_ = self.get_comics_from_collection(stream)
else:
comics_ = [['Comic', os.path.abspath(stream.name)]]
stream.close()
comics = []
for i, x in enumerate(comics_):
title, fname = x
cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
cdir = os.path.abspath(cdir)
if not os.path.exists(cdir):
os.makedirs(cdir)
pages = self.get_pages(fname, cdir)
if not pages: continue
wrappers = self.create_wrappers(pages)
comics.append((title, pages, wrappers))
if not comics:
raise ValueError('No comic pages found in %s'%stream.name)
mi = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
[_('Unknown')])
opf = OPFCreator(os.path.abspath('.'), mi)
entries = []
def href(x):
if len(comics) == 1: return os.path.basename(x)
return '/'.join(x.split(os.sep)[-2:])
for comic in comics:
pages, wrappers = comic[1:]
entries += [(w, None) for w in map(href, wrappers)] + \
[(x, None) for x in map(href, pages)]
opf.create_manifest(entries)
spine = []
for comic in comics:
spine.extend(map(href, comic[2]))
self._images = []
for comic in comics:
self._images.extend(comic[1])
opf.create_spine(spine)
toc = TOC()
if len(comics) == 1:
wrappers = comics[0][2]
for i, x in enumerate(wrappers):
toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
play_order=i)
else:
po = 0
for comic in comics:
po += 1
wrappers = comic[2]
stoc = toc.add_item(href(wrappers[0]),
None, comic[0], play_order=po)
for i, x in enumerate(wrappers):
stoc.add_item(href(x), None,
_('Page')+' %d'%(i+1), play_order=po)
po += 1
opf.set_toc(toc)
m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
opf.render(m, n, 'toc.ncx')
return os.path.abspath('metadata.opf')
def create_wrappers(self, pages):
from calibre.ebooks.oeb.base import XHTML_NS
wrappers = []
WRAPPER = textwrap.dedent('''\
<html xmlns="%s">
<head>
<title>Page #%d</title>
<style type="text/css">
@page { margin:0pt; padding: 0pt}
body { margin: 0pt; padding: 0pt}
div { text-align: center }
</style>
</head>
<body>
<div>
<img src="%s" alt="comic page #%d" />
</div>
</body>
</html>
''')
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
return wrappers

View File

@ -47,12 +47,12 @@ def print_help(parser, log):
def check_command_line_options(parser, args, log):
if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
print_help(parser)
print_help(parser, log)
log.error('\n\nYou must specify the input AND output files')
raise SystemExit(1)
input = os.path.abspath(args[1])
if not os.access(input, os.R_OK):
if not input.endswith('.recipe') and not os.access(input, os.R_OK):
log.error('Cannot read from', input)
raise SystemExit(1)
@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec)
option_recommendation_to_cli_option(parser.add_option,
plumber.get_option_by_name('list_recipes'))
def option_parser():
return OptionParser(usage=USAGE)

View File

@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer',
OptionRecommendation(name='language',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the language.')),
OptionRecommendation(name='list_recipes',
recommended_value=False, help=_('List available recipes.')),
]
input_fmt = os.path.splitext(self.input)[1]
@ -525,6 +529,13 @@ OptionRecommendation(name='language',
self.setup_options()
if self.opts.verbose:
self.log.filter_level = self.log.DEBUG
if self.opts.list_recipes:
from calibre.web.feeds.recipes import titles
self.log('Available recipes:')
for title in sorted(titles):
self.log('\t'+title)
self.log('%d recipes available'%len(titles))
raise SystemExit(0)
# Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess
@ -535,8 +546,13 @@ OptionRecommendation(name='language',
accelerators = {}
tdir = PersistentTemporaryDirectory('_plumber')
stream = self.input if self.input_fmt == 'recipe' else \
open(self.input, 'rb')
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
self.opts.lrf = True
self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log,
accelerators, tdir)
if self.opts.debug_input is not None:

View File

@ -882,6 +882,9 @@ class LitContainer(object):
unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
return str(unbin)
def get_metadata(self):
return self._read_meta()
class LitReader(OEBReader):
Container = LitContainer

View File

@ -1,562 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Based on ideas from comiclrf created by FangornUK.
'''
import os, sys, shutil, traceback, textwrap, fnmatch
from uuid import uuid4
from calibre import extract, terminal_controller, __appname__, __version__
from calibre.utils.config import Config, StringConfig
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.parallel import Server, ParallelJob
from calibre.utils.terminfo import ProgressBar
from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.epub.from_html import config as html2epub_config, convert as html2epub
from calibre.customize.ui import run_plugins_on_preprocess
try:
from calibre.utils.PythonMagickWand import \
NewMagickWand, NewPixelWand, \
MagickSetImageBorderColor, \
MagickReadImage, MagickRotateImage, \
MagickTrimImage, PixelSetColor,\
MagickNormalizeImage, MagickGetImageWidth, \
MagickGetImageHeight, \
MagickResizeImage, MagickSetImageType, \
GrayscaleType, CatromFilter, MagickSetImagePage, \
MagickBorderImage, MagickSharpenImage, MagickDespeckleImage, \
MagickQuantizeImage, RGBColorspace, \
MagickWriteImage, DestroyPixelWand, \
DestroyMagickWand, CloneMagickWand, \
MagickThumbnailImage, MagickCropImage, ImageMagick
_imagemagick_loaded = True
except:
_imagemagick_loaded = False
PROFILES = {
# Name : (width, height) in pixels
'prs500':(584, 754),
# The SONY's LRF renderer (on the PRS500) only uses the first 800x600 block of the image
'prs500-landscape': (784, 1012)
}
def extract_comic(path_to_comic_file):
'''
Un-archive the comic file.
'''
tdir = PersistentTemporaryDirectory(suffix='_comic_extract')
extract(path_to_comic_file, tdir)
return tdir
def find_pages(dir, sort_on_mtime=False, verbose=False):
'''
Find valid comic pages in a previously un-archived comic.
:param dir: Directory in which extracted comic lives
:param sort_on_mtime: If True sort pages based on their last modified time.
Otherwise, sort alphabetically.
'''
extensions = ['jpeg', 'jpg', 'gif', 'png']
pages = []
for datum in os.walk(dir):
for name in datum[-1]:
path = os.path.join(datum[0], name)
for ext in extensions:
if path.lower().endswith('.'+ext):
pages.append(path)
break
if sort_on_mtime:
comparator = lambda x, y : cmp(os.stat(x).st_mtime, os.stat(y).st_mtime)
else:
comparator = lambda x, y : cmp(os.path.basename(x), os.path.basename(y))
pages.sort(cmp=comparator)
if verbose:
print 'Found comic pages...'
print '\t'+'\n\t'.join([os.path.basename(p) for p in pages])
return pages
class PageProcessor(list):
'''
Contains the actual image rendering logic. See :method:`render` and
:method:`process_pages`.
'''
def __init__(self, path_to_page, dest, opts, num):
list.__init__(self)
self.path_to_page = path_to_page
self.opts = opts
self.num = num
self.dest = dest
self.rotate = False
self.render()
def render(self):
img = NewMagickWand()
if img < 0:
raise RuntimeError('Cannot create wand.')
if not MagickReadImage(img, self.path_to_page):
raise IOError('Failed to read image from: %'%self.path_to_page)
width = MagickGetImageWidth(img)
height = MagickGetImageHeight(img)
if self.num == 0: # First image so create a thumbnail from it
thumb = CloneMagickWand(img)
if thumb < 0:
raise RuntimeError('Cannot create wand.')
MagickThumbnailImage(thumb, 60, 80)
MagickWriteImage(thumb, os.path.join(self.dest, 'thumbnail.png'))
DestroyMagickWand(thumb)
self.pages = [img]
if width > height:
if self.opts.landscape:
self.rotate = True
else:
split1, split2 = map(CloneMagickWand, (img, img))
DestroyMagickWand(img)
if split1 < 0 or split2 < 0:
raise RuntimeError('Cannot create wand.')
MagickCropImage(split1, (width/2)-1, height, 0, 0)
MagickCropImage(split2, (width/2)-1, height, width/2, 0 )
self.pages = [split2, split1] if self.opts.right2left else [split1, split2]
self.process_pages()
def process_pages(self):
for i, wand in enumerate(self.pages):
pw = NewPixelWand()
try:
if pw < 0:
raise RuntimeError('Cannot create wand.')
PixelSetColor(pw, 'white')
MagickSetImageBorderColor(wand, pw)
if self.rotate:
MagickRotateImage(wand, pw, -90)
# 25 percent fuzzy trim?
if not self.opts.disable_trim:
MagickTrimImage(wand, 25*65535/100)
MagickSetImagePage(wand, 0,0,0,0) #Clear page after trim, like a "+repage"
# Do the Photoshop "Auto Levels" equivalent
if not self.opts.dont_normalize:
MagickNormalizeImage(wand)
sizex = MagickGetImageWidth(wand)
sizey = MagickGetImageHeight(wand)
SCRWIDTH, SCRHEIGHT = PROFILES[self.opts.profile]
if self.opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
if aspect <= (float(SCRWIDTH) / float(SCRHEIGHT)):
newsizey = SCRHEIGHT
newsizex = int(newsizey * aspect)
deltax = (SCRWIDTH - newsizex) / 2
deltay = 0
else:
newsizex = SCRWIDTH
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (SCRHEIGHT - newsizey) / 2
MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0)
MagickSetImageBorderColor(wand, pw)
MagickBorderImage(wand, pw, deltax, deltay)
elif self.opts.wide:
# Keep aspect and Use device height as scaled image width so landscape mode is clean
aspect = float(sizex) / float(sizey)
screen_aspect = float(SCRWIDTH) / float(SCRHEIGHT)
# Get dimensions of the landscape mode screen
# Add 25px back to height for the battery bar.
wscreenx = SCRHEIGHT + 25
wscreeny = int(wscreenx / screen_aspect)
if aspect <= screen_aspect:
newsizey = wscreeny
newsizex = int(newsizey * aspect)
deltax = (wscreenx - newsizex) / 2
deltay = 0
else:
newsizex = wscreenx
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (wscreeny - newsizey) / 2
MagickResizeImage(wand, newsizex, newsizey, CatromFilter, 1.0)
MagickSetImageBorderColor(wand, pw)
MagickBorderImage(wand, pw, deltax, deltay)
else:
MagickResizeImage(wand, SCRWIDTH, SCRHEIGHT, CatromFilter, 1.0)
if not self.opts.dont_sharpen:
MagickSharpenImage(wand, 0.0, 1.0)
MagickSetImageType(wand, GrayscaleType)
if self.opts.despeckle:
MagickDespeckleImage(wand)
MagickQuantizeImage(wand, self.opts.colors, RGBColorspace, 0, 1, 0)
dest = '%d_%d.png'%(self.num, i)
dest = os.path.join(self.dest, dest)
MagickWriteImage(wand, dest+'8')
os.rename(dest+'8', dest)
self.append(dest)
finally:
if pw > 0:
DestroyPixelWand(pw)
DestroyMagickWand(wand)
def render_pages(tasks, dest, opts, notification=None):
'''
Entry point for the job server.
'''
failures, pages = [], []
with ImageMagick():
for num, path in tasks:
try:
pages.extend(PageProcessor(path, dest, opts, num))
msg = _('Rendered %s')
except:
failures.append(path)
msg = _('Failed %s')
if opts.verbose:
msg += '\n' + traceback.format_exc()
msg = msg%path
if notification is not None:
notification(0.5, msg)
return pages, failures
class JobManager(object):
'''
Simple job manager responsible for keeping track of overall progress.
'''
def __init__(self, total, update):
self.total = total
self.update = update
self.done = 0
self.add_job = lambda j: j
self.output = lambda j: j
self.start_work = lambda j: j
self.job_done = lambda j: j
def status_update(self, job):
self.done += 1
#msg = msg%os.path.basename(job.args[0])
self.update(float(self.done)/self.total, job.msg)
def process_pages(pages, opts, update):
'''
Render all identified comic pages.
'''
if not _imagemagick_loaded:
raise RuntimeError('Failed to load ImageMagick')
tdir = PersistentTemporaryDirectory('_comic2lrf_pp')
job_manager = JobManager(len(pages), update)
server = Server()
jobs = []
tasks = server.split(pages)
for task in tasks:
jobs.append(ParallelJob('render_pages', lambda s:s, job_manager=job_manager,
args=[task, tdir, opts]))
server.add_job(jobs[-1])
server.wait()
server.killall()
server.close()
ans, failures = [], []
for job in jobs:
if job.result is None:
raise Exception(_('Failed to process comic: %s\n\n%s')%(job.exception, job.traceback))
pages, failures_ = job.result
ans += pages
failures += failures_
return ans, failures, tdir
def config(defaults=None,output_format='lrf'):
desc = _('Options to control the conversion of comics (CBR, CBZ) files into ebooks')
if defaults is None:
c = Config('comic', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('title', ['-t', '--title'],
help=_('Title for generated ebook. Default is to use the filename.'))
c.add_opt('author', ['-a', '--author'],
help=_('Set the author in the metadata of the generated ebook. Default is %default'),
default=_('Unknown'))
c.add_opt('output', ['-o', '--output'],
help=_('Path to output file. By default a file is created in the current directory.'))
c.add_opt('colors', ['-c', '--colors'], type='int', default=64,
help=_('Number of colors for grayscale image conversion. Default: %default'))
c.add_opt('dont_normalize', ['-n', '--disable-normalize'], default=False,
help=_('Disable normalize (improve contrast) color range for pictures. Default: False'))
c.add_opt('keep_aspect_ratio', ['-r', '--keep-aspect-ratio'], default=False,
help=_('Maintain picture aspect ratio. Default is to fill the screen.'))
c.add_opt('dont_sharpen', ['-s', '--disable-sharpen'], default=False,
help=_('Disable sharpening.'))
c.add_opt('disable_trim', ['--disable-trim'], default=False,
help=_('Disable trimming of comic pages. For some comics, '
'trimming might remove content as well as borders.'))
c.add_opt('landscape', ['-l', '--landscape'], default=False,
help=_("Don't split landscape images into two portrait images"))
c.add_opt('wide', ['-w', '--wide-aspect'], default=False,
help=_("Keep aspect ratio and scale image using screen height as image width for viewing in landscape mode."))
c.add_opt('right2left', ['--right2left'], default=False, action='store_true',
help=_('Used for right-to-left publications like manga. Causes landscape pages to be split into portrait pages from right to left.'))
c.add_opt('despeckle', ['-d', '--despeckle'], default=False,
help=_('Enable Despeckle. Reduces speckle noise. May greatly increase processing time.'))
c.add_opt('no_sort', ['--no-sort'], default=False,
help=_("Don't sort the files found in the comic alphabetically by name. Instead use the order they were added to the comic."))
c.add_opt('profile', ['-p', '--profile'], default='prs500', choices=PROFILES.keys(),
help=_('Choose a profile for the device you are generating this file for. The default is the SONY PRS-500 with a screen size of 584x754 pixels. This is suitable for any reader with the same screen size. Choices are %s')%PROFILES.keys())
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False,
help=_("Don't show progress bar."))
if output_format == 'pdf':
c.add_opt('no_process',['--no_process'], default=False,
help=_("Apply no processing to the image"))
return c
def option_parser(output_format='lrf'):
c = config(output_format=output_format)
return c.option_parser(usage=_('''\
%prog [options] comic.cb[z|r]
Convert a comic in a CBZ or CBR file to an ebook.
'''))
def create_epub(pages, profile, opts, thumbnail=None):
wrappers = []
WRAPPER = textwrap.dedent('''\
<html>
<head>
<title>Page #%d</title>
<style type="text/css">@page {margin:0pt; padding: 0pt;}</style>
</head>
<body style="margin: 0pt; padding: 0pt">
<div style="text-align:center">
<img src="%s" alt="comic page #%d" />
</div>
</body>
</html>
''')
dir = os.path.dirname(pages[0])
for i, page in enumerate(pages):
wrapper = WRAPPER%(i+1, os.path.basename(page), i+1)
page = os.path.join(dir, 'page_%d.html'%(i+1))
open(page, 'wb').write(wrapper)
wrappers.append(page)
mi = MetaInformation(opts.title, [opts.author])
opf = OPFCreator(dir, mi)
opf.create_manifest([(w, None) for w in wrappers])
opf.create_spine(wrappers)
metadata = os.path.join(dir, 'metadata.opf')
opf.render(open(metadata, 'wb'))
opts2 = html2epub_config('margin_left=0\nmargin_right=0\nmargin_top=0\nmargin_bottom=0').parse()
opts2.output = opts.output
html2epub(metadata, opts2)
def create_lrf(pages, profile, opts, thumbnail=None):
width, height = PROFILES[profile]
ps = {}
ps['topmargin'] = 0
ps['evensidemargin'] = 0
ps['oddsidemargin'] = 0
ps['textwidth'] = width
ps['textheight'] = height
book = Book(title=opts.title, author=opts.author,
bookid=uuid4().hex,
publisher='%s %s'%(__appname__, __version__), thumbnail=thumbnail,
category='Comic', pagestyledefault=ps,
booksetting=BookSetting(screenwidth=width, screenheight=height))
for page in pages:
imageStream = ImageStream(page)
_page = book.create_page()
_page.append(ImageBlock(refstream=imageStream,
blockwidth=width, blockheight=height, xsize=width,
ysize=height, x1=width, y1=height))
book.append(_page)
book.renderLrf(open(opts.output, 'wb'))
print _('Output written to'), opts.output
def create_pdf(pages, profile, opts, thumbnail=None,toc=None):
width, height = PROFILES[profile]
from reportlab.pdfgen import canvas
cur_page=0
heading = []
if toc != None:
if len(toc) == 1:
toc = None
else:
toc_index = 0
base_cur = 0
rem = 0
breaker = False
while True:
letter=toc[0][0][base_cur]
for i in range(len(toc)):
if letter != toc[i][0][base_cur]:
breaker = True
if breaker:
break
if letter == os.sep:
rem=base_cur
base_cur += 1
toc.append(("Not seen",-1))
pdf = canvas.Canvas(filename=opts.output, pagesize=(width,height+15))
pdf.setAuthor(opts.author)
pdf.setTitle(opts.title)
for page in pages:
if opts.keep_aspect_ratio:
img = NewMagickWand()
if img < 0:
raise RuntimeError('Cannot create wand.')
if not MagickReadImage(img, page):
raise IOError('Failed to read image from: %'%page)
sizex = MagickGetImageWidth(img)
sizey = MagickGetImageHeight(img)
if opts.keep_aspect_ratio:
# Preserve the aspect ratio by adding border
aspect = float(sizex) / float(sizey)
if aspect <= (float(width) / float(height)):
newsizey = height
newsizex = int(newsizey * aspect)
deltax = (width - newsizex) / 2
deltay = 0
else:
newsizex = width
newsizey = int(newsizex / aspect)
deltax = 0
deltay = (height - newsizey) / 2
pdf.drawImage(page, x=deltax,y=deltay,width=newsizex, height=newsizey)
else:
pdf.drawImage(page, x=0,y=0,width=width, height=height)
if toc != None:
if toc[toc_index][1] == cur_page:
tmp=toc[toc_index][0]
toc_current=tmp[rem:len(tmp)-4]
index=0
while True:
key = 'page%d-%d' % (cur_page, index)
pdf.bookmarkPage(key)
(head,dummy,list)=toc_current.partition(os.sep)
try:
if heading[index] != head:
heading[index] = head
pdf.addOutlineEntry(title=head,key=key,level=index)
except:
heading.append(head)
pdf.addOutlineEntry(title=head,key=key,level=index)
index += 1
toc_current=list
if dummy == "":
break
toc_index += 1
cur_page += 1
pdf.showPage()
# Write the document to disk
pdf.save()
def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='lrf'):
path_to_file = run_plugins_on_preprocess(path_to_file)
source = path_to_file
to_delete = []
toc = []
list = []
pages = []
if not opts.title:
opts.title = os.path.splitext(os.path.basename(source))[0]
if not opts.output:
opts.output = os.path.abspath(os.path.splitext(os.path.basename(source))[0]+'.'+output_format)
if os.path.isdir(source):
for path in all_files( source , '*.cbr|*.cbz' ):
list.append( path )
else:
list= [ os.path.abspath(source) ]
for source in list:
tdir = extract_comic(source)
new_pages = find_pages(tdir, sort_on_mtime=opts.no_sort, verbose=opts.verbose)
thumbnail = None
if not new_pages:
raise ValueError('Could not find any pages in the comic: %s'%source)
if not getattr(opts, 'no_process', False):
new_pages, failures, tdir2 = process_pages(new_pages, opts, notification)
if not new_pages:
raise ValueError('Could not find any valid pages in the comic: %s'%source)
if failures:
print 'Could not process the following pages (run with --verbose to see why):'
for f in failures:
print '\t', f
thumbnail = os.path.join(tdir2, 'thumbnail.png')
if not os.access(thumbnail, os.R_OK):
thumbnail = None
toc.append((source,len(pages)))
pages.extend(new_pages)
to_delete.append(tdir)
if output_format == 'lrf':
create_lrf(pages, opts.profile, opts, thumbnail=thumbnail)
if output_format == 'epub':
create_epub(pages, opts.profile, opts, thumbnail=thumbnail)
if output_format == 'pdf':
create_pdf(pages, opts.profile, opts, thumbnail=thumbnail,toc=toc)
for tdir in to_delete:
shutil.rmtree(tdir)
def all_files(root, patterns='*'):
# Expand patterns from semicolon-separated string to list
patterns = patterns.split('|')
for path, subdirs, files in os.walk(root):
files.sort( )
for name in files:
for pattern in patterns:
if fnmatch.fnmatch(name, pattern):
yield os.path.join(path, name)
break
def main(args=sys.argv, notification=None, output_format='lrf'):
parser = option_parser(output_format=output_format)
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print '\nYou must specify a file to convert'
return 1
if not callable(notification):
pb = ProgressBar(terminal_controller, _('Rendering comic pages...'),
no_progress_bar=opts.no_progress_bar or getattr(opts, 'no_process', False))
notification = pb.update
source = os.path.abspath(args[1])
do_convert(source, opts, notification, output_format=output_format)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -970,7 +970,12 @@ class Canvas(LRFStream):
stream = cStringIO.StringIO(self.stream)
while stream.tell() < len(self.stream):
tag = Tag(stream)
self._contents.append(PutObj(self._document.objects, *struct.unpack("<HHI", tag.contents)))
try:
self._contents.append(
PutObj(self._document.objects,
*struct.unpack("<HHI", tag.contents)))
except struct.error:
print 'Canvas object has errors, skipping.'
def __unicode__(self):
s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)

View File

@ -4,15 +4,16 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Support for reading the metadata from a LIT file.
'''
import sys, cStringIO, os
import cStringIO, os
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.lit.reader import LitReader
def get_metadata(stream):
litfile = LitReader(stream)
src = litfile.meta.encode('utf-8')
from calibre.ebooks.lit.reader import LitContainer
litfile = LitContainer(stream)
src = litfile.get_metadata().encode('utf-8')
litfile = litfile._litfile
opf = OPF(cStringIO.StringIO(src), os.getcwd())
mi = MetaInformation(opf)
covers = []

View File

@ -313,8 +313,10 @@ class MobiReader(object):
self.read_embedded_metadata(root, metadata_elems[0], guide)
for elem in guides + metadata_elems:
elem.getparent().remove(elem)
fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir,
sanitize_file_name(self.name)+'.html')
sanitize_file_name(fname)+'.html')
try:
for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'):
@ -396,8 +398,8 @@ class MobiReader(object):
'xx-large' : '6',
}
mobi_version = self.book_header.mobi_version
style_map = {}
for i, tag in enumerate(root.iter(etree.Element)):
tag.attrib.pop('xmlns', '')
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content'):
tag.tag = 'div' if tag.tag == 'content' else 'span'

View File

@ -1578,15 +1578,15 @@ class OEBBook(object):
return data.decode('utf-16')
except UnicodeDecodeError:
pass
try:
return data.decode('utf-8')
except UnicodeDecodeError:
pass
if self.encoding is not None:
try:
return data.decode(self.encoding)
except UnicodeDecodeError:
pass
try:
return data.decode('utf-8')
except UnicodeDecodeError:
pass
data, _ = xml_to_unicode(data)
data = data.replace('\r\n', '\n')
data = data.replace('\r', '\n')

View File

@ -128,6 +128,8 @@ class EbookIterator(object):
plumber.setup_options()
if hasattr(plumber.opts, 'dont_package'):
plumber.opts.dont_package = True
if hasattr(plumber.opts, 'no_process'):
plumber.opts.no_process = True
self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
plumber.opts, plumber.input_fmt, self.log,
{}, self.base)

View File

@ -59,6 +59,7 @@ class Split(object):
self.fix_links()
def split_item(self, item):
page_breaks, page_break_ids = [], []
if self.split_on_page_breaks:
page_breaks, page_break_ids = self.find_page_breaks(item)

View File

@ -60,6 +60,9 @@ class PDFOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
self.opts, self.log = opts, log
if input_plugin.is_image_collection:
self.convert_images(input_plugin.get_images())
with TemporaryDirectory('_pdf_out') as oebdir:
OEBOutput(None).convert(oeb_book, oebdir, input_plugin, opts, log)

View File

@ -9,10 +9,9 @@ __docformat__ = 'restructuredtext en'
Write content to PDF.
'''
import os, shutil, sys
import os, shutil
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.customize.profiles import OutputProfile
from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
orientation, size
from calibre.ebooks.metadata import authors_to_string

View File

@ -3,7 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import os, re
import re
from calibre.utils import zipfile
def update(pathtozip, patterns, filepaths, names, compression=zipfile.ZIP_DEFLATED, verbose=True):

View File

@ -54,7 +54,7 @@ PARALLEL_FUNCS = {
('calibre.ebooks.lrf.html.table_as_image', 'do_render', {}, None),
'render_pages' :
('calibre.ebooks.lrf.comic.convert_from', 'render_pages', {}, 'notification'),
('calibre.ebooks.comic.input', 'render_pages', {}, 'notification'),
'comic2lrf' :
('calibre.ebooks.lrf.comic.convert_from', 'do_convert', {}, 'notification'),

View File

@ -81,7 +81,12 @@ def sendmail(msg, from_, to, localhost=None, verbose=0, timeout=30,
for x in to:
return sendmail_direct(from_, x, msg, timeout, localhost, verbose)
import smtplib
cls = smtplib.SMTP if encryption == 'TLS' else smtplib.SMTP_SSL
class SMTP_SSL(smtplib.SMTP_SSL): # Workaround for bug in smtplib.py
def _get_socket(self, host, port, timeout):
smtplib.SMTP_SSL._get_socket(self, host, port, timeout)
return self.sock
cls = smtplib.SMTP if encryption == 'TLS' else SMTP_SSL
timeout = None # Non-blocking sockets sometimes don't work
port = int(port)
s = cls(timeout=timeout, local_hostname=localhost)
@ -93,6 +98,8 @@ def sendmail(msg, from_, to, localhost=None, verbose=0, timeout=30,
s.starttls()
s.ehlo()
if username is not None and password is not None:
if encryption == 'SSL':
s.sock = s.file.sslobj
s.login(username, password)
s.sendmail(from_, to, msg)
return s.quit()

View File

@ -177,6 +177,8 @@ class ProgressBar:
self.width = self.term.COLS or 75
self.bar = term.render(self.BAR)
self.header = self.term.render(self.HEADER % header.center(self.width))
if isinstance(self.header, unicode):
self.header = self.header.encode('utf-8')
self.cleared = 1 #: true if we haven't drawn the bar yet.
def update(self, percent, message=''):

View File

@ -2,5 +2,6 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class Recipe(object):
pass

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
class RecipeInput(InputFormatPlugin):
name = 'Recipe Input'
author = 'Kovid Goyal'
description = _('Download periodical content from the internet')
file_types = set(['recipe'])
recommendations = set([
('chapter_mark', 'none', OptionRecommendation.HIGH),
('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
])
options = set([
OptionRecommendation(name='test', recommended_value=False,
help=_('Useful for recipe development. Forces '
'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
OptionRecommendation(name='username', recommended_value=None,
help=_('Username for sites that require a login to access '
'content.')),
OptionRecommendation(name='password', recommended_value=None,
help=_('Password for sites that require a login to access '
'content.')),
OptionRecommendation(name='lrf', recommended_value=False,
help='Optimize fetching for subsequent conversion to LRF.'),
])
def convert(self, recipe_or_file, opts, file_ext, log,
accelerators, progress=lambda x, y: x):
from calibre.web.feeds.recipes import \
get_builtin_recipe, compile_recipe
if os.access(recipe_or_file, os.R_OK):
recipe = compile_recipe(open(recipe_or_file, 'rb').read())
else:
title = os.path.basename(recipe_or_file).rpartition('.')[0]
recipe = get_builtin_recipe(title)
if recipe is None:
raise ValueError('%s is not a valid recipe file or builtin recipe' %
recipe_or_file)
ro = recipe(opts, log, progress)
ro.download()
opts.output_profile.flow_size = 0
for f in os.listdir('.'):
if f.endswith('.opf'):
return os.path.abspath(f)

View File

@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.lrf import entity_to_unicode
from calibre.web import Recipe
from calibre.ebooks import render_html
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.utils.logging import Log
from calibre.ptempfile import PersistentTemporaryFile, \
PersistentTemporaryDirectory
class BasicNewsRecipe(object):
class BasicNewsRecipe(Recipe):
'''
Abstract base class that contains logic needed in all feed fetchers.
'''
@ -443,40 +443,34 @@ class BasicNewsRecipe(object):
'''
raise NotImplementedError
def __init__(self, options, parser, progress_reporter):
def __init__(self, options, log, progress_reporter):
'''
Initialize the recipe.
:param options: Parsed commandline options
:param parser: Command line option parser. Used to intelligently merge options.
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
'''
self.log = Log()
if options.verbose:
self.log.filter_level = self.log.DEBUG
self.log = log
if not isinstance(self.title, unicode):
self.title = unicode(self.title, 'utf-8', 'replace')
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
setattr(self, attr, getattr(options, attr))
self.debug = options.verbose > 1
self.output_dir = os.getcwd()
self.verbose = options.verbose
self.test = options.test
self.username = options.username
self.password = options.password
self.lrf = options.lrf
self.output_dir = os.path.abspath(self.output_dir)
if options.test:
self.max_articles_per_feed = 2
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
if self.debug:
self.verbose = True
self.report_progress = progress_reporter
self.username = self.password = None
#: If True optimize downloading for eventual conversion to LRF
self.lrf = False
defaults = parser.get_default_values()
for opt in options.__dict__.keys():
if getattr(options, opt) != getattr(defaults, opt, None):
setattr(self, opt, getattr(options, opt))
if isinstance(self.feeds, basestring):
self.feeds = eval(self.feeds)
if isinstance(self.feeds, basestring):
@ -493,7 +487,6 @@ class BasicNewsRecipe(object):
'--timeout', str(self.timeout),
'--max-recursions', str(self.recursions),
'--delay', str(self.delay),
'--timeout', str(self.timeout),
]
if self.encoding is not None:
web2disk_cmdline.extend(['--encoding', self.encoding])
@ -520,9 +513,6 @@ class BasicNewsRecipe(object):
self.simultaneous_downloads = 1
self.navbar = templates.NavBarTemplate()
self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
if '--base-font-size' not in self.html2lrf_options:
self.html2lrf_options.extend(['--base-font-size', '12'])
self.failed_downloads = []
self.partial_failures = []
@ -557,7 +547,7 @@ class BasicNewsRecipe(object):
return self.postprocess_html(soup, first_fetch)
def download(self, for_lrf=False):
def download(self):
'''
Download and pre-process all articles from the feeds in this recipe.
This method should be called only one on a particular Recipe instance.