Sync to trunk

This commit is contained in:
John Schember 2009-01-07 07:06:16 -05:00
commit 86efeee9e5
8 changed files with 165 additions and 67 deletions

View File

@ -10,6 +10,14 @@ Based on ideas from comiclrf created by FangornUK.
import os, sys, shutil, traceback, textwrap
from uuid import uuid4
try:
from reportlab.pdfgen import canvas
_reportlab = True
except:
_reportlab = False
from calibre import extract, terminal_controller, __appname__, __version__
from calibre.utils.config import Config, StringConfig
from calibre.ptempfile import PersistentTemporaryDirectory
@ -43,7 +51,7 @@ PROFILES = {
# Name : (width, height) in pixels
'prs500':(584, 754),
# The SONY's LRF renderer (on the PRS500) only uses the first 800x600 block of the image
#'prs500-landscape': (784, 1200-92)
'prs500-landscape': (784, 1012)
}
def extract_comic(path_to_comic_file):
@ -279,7 +287,7 @@ def process_pages(pages, opts, update):
failures += failures_
return ans, failures, tdir
def config(defaults=None):
def config(defaults=None,output_format='lrf'):
desc = _('Options to control the conversion of comics (CBR, CBZ) files into ebooks')
if defaults is None:
c = Config('comic', desc)
@ -316,10 +324,13 @@ def config(defaults=None):
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False,
help=_("Don't show progress bar."))
if output_format == 'pdf':
c.add_opt('no_process',['--no_process'], default=False,
help=_("Apply no processing to the image"))
return c
def option_parser():
c = config()
def option_parser(output_format='lrf'):
c = config(output_format=output_format)
return c.option_parser(usage=_('''\
%prog [options] comic.cb[z|r]
@ -383,6 +394,24 @@ def create_lrf(pages, profile, opts, thumbnail=None):
print _('Output written to'), opts.output
def create_pdf(pages, profile, opts, thumbnail=None):
width, height = PROFILES[profile]
if not _reportlab:
raise RuntimeError('Failed to load reportlab')
pdf = canvas.Canvas(filename=opts.output, pagesize=(width,height+15))
pdf.setAuthor(opts.author)
pdf.setTitle(opts.title)
for page in pages:
pdf.drawImage(page, x=0,y=0,width=width, height=height)
pdf.showPage()
# Write the document to disk
pdf.save()
def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='lrf'):
path_to_file = run_plugins_on_preprocess(path_to_file)
source = path_to_file
@ -393,29 +422,33 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
opts.output = os.path.abspath(os.path.splitext(os.path.basename(source))[0]+'.'+output_format)
tdir = extract_comic(source)
pages = find_pages(tdir, sort_on_mtime=opts.no_sort, verbose=opts.verbose)
thumbnail = None
if not pages:
raise ValueError('Could not find any pages in the comic: %s'%source)
pages, failures, tdir2 = process_pages(pages, opts, notification)
if not pages:
raise ValueError('Could not find any valid pages in the comic: %s'%source)
if failures:
print 'Could not process the following pages (run with --verbose to see why):'
for f in failures:
print '\t', f
thumbnail = os.path.join(tdir2, 'thumbnail.png')
if not os.access(thumbnail, os.R_OK):
thumbnail = None
if not opts.no_process:
pages, failures, tdir2 = process_pages(pages, opts, notification)
if not pages:
raise ValueError('Could not find any valid pages in the comic: %s'%source)
if failures:
print 'Could not process the following pages (run with --verbose to see why):'
for f in failures:
print '\t', f
thumbnail = os.path.join(tdir2, 'thumbnail.png')
if not os.access(thumbnail, os.R_OK):
thumbnail = None
if output_format == 'lrf':
create_lrf(pages, opts.profile, opts, thumbnail=thumbnail)
else:
if output_format == 'epub':
create_epub(pages, opts.profile, opts, thumbnail=thumbnail)
if output_format == 'pdf':
create_pdf(pages, opts.profile, opts, thumbnail=thumbnail)
shutil.rmtree(tdir)
shutil.rmtree(tdir2)
if not opts.no_process:
shutil.rmtree(tdir2)
def main(args=sys.argv, notification=None, output_format='lrf'):
parser = option_parser()
parser = option_parser(output_format=output_format)
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
@ -429,7 +462,6 @@ def main(args=sys.argv, notification=None, output_format='lrf'):
source = os.path.abspath(args[1])
do_convert(source, opts, notification, output_format=output_format)
return 0
if __name__ == '__main__':

View File

@ -12,15 +12,17 @@ try:
except ImportError:
import Image as PILImage
from lxml import html, etree
from calibre import __appname__, entity_to_unicode
from calibre.ebooks import DRMError
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import ENCODING_PATS
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre import sanitize_file_name
@ -176,6 +178,8 @@ class MobiReader(object):
processed_records = self.extract_text()
self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html)
self.extract_images(processed_records, output_dir)
self.replace_page_breaks()
self.cleanup_html()
@ -185,7 +189,6 @@ class MobiReader(object):
self.processed_html = \
re.compile('<head>', re.IGNORECASE).sub(
'\n<head>\n'
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n'
'<style type="text/css">\n'
'blockquote { margin: 0em 0em 0em 1.25em; text-align: justify; }\n'
'p { margin: 0em; text-align: justify; }\n'
@ -196,23 +199,33 @@ class MobiReader(object):
if self.verbose:
print 'Parsing HTML...'
soup = BeautifulSoup(self.processed_html)
self.cleanup_soup(soup)
guide = soup.find('guide')
for elem in soup.findAll(['metadata', 'guide']):
elem.extract()
root = html.fromstring(self.processed_html)
self.upshift_markup(root)
guides = root.xpath('//guide')
guide = guides[0] if guides else None
for elem in guides + root.xpath('//metadata'):
elem.getparent().remove(elem)
htmlfile = os.path.join(output_dir,
sanitize_file_name(self.name)+'.html')
try:
for ref in guide.findAll('reference', href=True):
ref['href'] = os.path.basename(htmlfile)+ref['href']
for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'):
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
except AttributeError:
pass
if self.verbose:
print 'Serializing...'
with open(htmlfile, 'wb') as f:
f.write(unicode(soup).encode('utf8'))
raw = html.tostring(root, encoding='utf-8', method='xml',
include_meta_content_type=True, pretty_print=True)
raw = raw.replace('<head>',
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
f.write(raw)
self.htmlfile = htmlfile
if self.book_header.exth is not None:
if self.verbose:
print 'Creating OPF...'
ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
@ -231,9 +244,9 @@ class MobiReader(object):
self.processed_html = re.sub(r'(?i)<%s>'%t, r'<span class="%s">'%c, self.processed_html)
self.processed_html = re.sub(r'(?i)</%s>'%t, r'</span>', self.processed_html)
def cleanup_soup(self, soup):
def upshift_markup(self, root):
if self.verbose:
print 'Replacing height, width and align attributes'
print 'Converting style information to CSS...'
size_map = {
'xx-small' : '0.5',
'x-small' : '1',
@ -243,41 +256,36 @@ class MobiReader(object):
'x-large' : '5',
'xx-large' : '6',
}
for tag in soup.recursiveChildGenerator():
if not isinstance(tag, Tag): continue
styles = []
try:
styles.append(tag['style'])
except KeyError:
pass
try:
styles.append('margin-top: %s' % tag['height'])
del tag['height']
except KeyError:
pass
try:
styles.append('text-indent: %s' % tag['width'])
if tag['width'].startswith('-'):
styles.append('margin-left: %s'%(tag['width'][1:]))
del tag['width']
except KeyError:
pass
try:
styles.append('text-align: %s' % tag['align'])
del tag['align']
except KeyError:
pass
for tag in root.iter(etree.Element):
styles, attrib = [], tag.attrib
if attrib.has_key('style'):
style = attrib.pop('style').strip()
if style:
styles.append(style)
if attrib.has_key('height'):
height = attrib.pop('height').strip()
if height:
styles.append('margin-top: %s' % height)
if attrib.has_key('width'):
width = attrib.pop('width').strip()
if width:
styles.append('text-indent: %s' % width)
if width.startswith('-'):
styles.append('margin-left: %s'%(width[1:]))
if attrib.has_key('align'):
align = attrib.pop('align').strip()
if align:
styles.append('text-align: %s' % align)
if styles:
tag['style'] = '; '.join(styles)
attrib['style'] = '; '.join(styles)
if tag.name.lower() == 'font':
sz = tag.get('size', '')
if tag.tag.lower() == 'font':
sz = tag.get('size', '').lower()
try:
float(sz)
except ValueError:
sz = sz.lower()
if sz in size_map.keys():
tag['size'] = size_map[sz]
attrib['size'] = size_map[sz]
def create_opf(self, htmlfile, guide=None):
mi = self.book_header.exth.mi
@ -292,7 +300,7 @@ class MobiReader(object):
opf.create_manifest(manifest)
opf.create_spine([os.path.basename(htmlfile)])
toc = None
if guide:
if guide is not None:
opf.create_guide(guide)
for ref in opf.guide:
if ref.type.lower() == 'toc':
@ -303,16 +311,16 @@ class MobiReader(object):
ent_pat = re.compile(r'&(\S+?);')
if index > -1:
raw = '<html><body>'+self.processed_html[index:]
soup = BeautifulSoup(raw)
root = html.fromstring(raw)
tocobj = TOC()
for a in soup.findAll('a', href=True):
for a in root.xpath('//a[@href]'):
try:
text = u''.join(a.findAll(text=True)).strip()
text = u' '.join([t.strip() for t in a.xpath('descendant::text()')])
except:
text = ''
text = ent_pat.sub(entity_to_unicode, text)
if a['href'].startswith('#'):
tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
if a.get('href', '').startswith('#'):
tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text)
if tocobj is not None:
opf.set_toc(tocobj)

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Used for pdf output for comic2pdf
'''

View File

@ -0,0 +1,21 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'Convert a comic in CBR/CBZ format to pdf'
import sys
from functools import partial
from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, config, main as _main
convert = partial(do_convert, output_format='pdf')
main = partial(_main, output_format='pdf')
if __name__ == '__main__':
sys.exit(main())
if False:
option_parser
config

Binary file not shown.

After

Width:  |  Height:  |  Size: 390 B

View File

@ -59,6 +59,7 @@ entry_points = {
'oeb2lit = calibre.ebooks.lit.writer:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
@ -228,6 +229,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))

View File

@ -21,6 +21,7 @@ recipe_modules = ['recipe_' + r for r in (
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
joelonsoftware.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Joelonsoftware(BasicNewsRecipe):
title = 'Joel on Software'
__author__ = 'Darko Miletic'
description = 'Painless Software Management'
no_stylesheets = True
use_embedded_content = True
cover_url = 'http://www.joelonsoftware.com/RssJoelOnSoftware.jpg'
html2lrf_options = [ '--comment' , description
, '--category' , 'blog,software,news'
, '--author' , 'Joel Spolsky'
]
feeds = [(u'Articles', u'http://www.joelonsoftware.com/rss.xml')]