mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk
This commit is contained in:
commit
86efeee9e5
@ -10,6 +10,14 @@ Based on ideas from comiclrf created by FangornUK.
|
||||
import os, sys, shutil, traceback, textwrap
|
||||
from uuid import uuid4
|
||||
|
||||
try:
|
||||
from reportlab.pdfgen import canvas
|
||||
_reportlab = True
|
||||
except:
|
||||
_reportlab = False
|
||||
|
||||
|
||||
|
||||
from calibre import extract, terminal_controller, __appname__, __version__
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
@ -43,7 +51,7 @@ PROFILES = {
|
||||
# Name : (width, height) in pixels
|
||||
'prs500':(584, 754),
|
||||
# The SONY's LRF renderer (on the PRS500) only uses the first 800x600 block of the image
|
||||
#'prs500-landscape': (784, 1200-92)
|
||||
'prs500-landscape': (784, 1012)
|
||||
}
|
||||
|
||||
def extract_comic(path_to_comic_file):
|
||||
@ -279,7 +287,7 @@ def process_pages(pages, opts, update):
|
||||
failures += failures_
|
||||
return ans, failures, tdir
|
||||
|
||||
def config(defaults=None):
|
||||
def config(defaults=None,output_format='lrf'):
|
||||
desc = _('Options to control the conversion of comics (CBR, CBZ) files into ebooks')
|
||||
if defaults is None:
|
||||
c = Config('comic', desc)
|
||||
@ -316,10 +324,13 @@ def config(defaults=None):
|
||||
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
|
||||
c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False,
|
||||
help=_("Don't show progress bar."))
|
||||
if output_format == 'pdf':
|
||||
c.add_opt('no_process',['--no_process'], default=False,
|
||||
help=_("Apply no processing to the image"))
|
||||
return c
|
||||
|
||||
def option_parser():
|
||||
c = config()
|
||||
def option_parser(output_format='lrf'):
|
||||
c = config(output_format=output_format)
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog [options] comic.cb[z|r]
|
||||
|
||||
@ -383,6 +394,24 @@ def create_lrf(pages, profile, opts, thumbnail=None):
|
||||
print _('Output written to'), opts.output
|
||||
|
||||
|
||||
def create_pdf(pages, profile, opts, thumbnail=None):
|
||||
width, height = PROFILES[profile]
|
||||
|
||||
if not _reportlab:
|
||||
raise RuntimeError('Failed to load reportlab')
|
||||
|
||||
pdf = canvas.Canvas(filename=opts.output, pagesize=(width,height+15))
|
||||
pdf.setAuthor(opts.author)
|
||||
pdf.setTitle(opts.title)
|
||||
|
||||
|
||||
for page in pages:
|
||||
pdf.drawImage(page, x=0,y=0,width=width, height=height)
|
||||
pdf.showPage()
|
||||
# Write the document to disk
|
||||
pdf.save()
|
||||
|
||||
|
||||
def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='lrf'):
|
||||
path_to_file = run_plugins_on_preprocess(path_to_file)
|
||||
source = path_to_file
|
||||
@ -393,29 +422,33 @@ def do_convert(path_to_file, opts, notification=lambda m, p: p, output_format='l
|
||||
opts.output = os.path.abspath(os.path.splitext(os.path.basename(source))[0]+'.'+output_format)
|
||||
tdir = extract_comic(source)
|
||||
pages = find_pages(tdir, sort_on_mtime=opts.no_sort, verbose=opts.verbose)
|
||||
thumbnail = None
|
||||
if not pages:
|
||||
raise ValueError('Could not find any pages in the comic: %s'%source)
|
||||
pages, failures, tdir2 = process_pages(pages, opts, notification)
|
||||
if not pages:
|
||||
raise ValueError('Could not find any valid pages in the comic: %s'%source)
|
||||
if failures:
|
||||
print 'Could not process the following pages (run with --verbose to see why):'
|
||||
for f in failures:
|
||||
print '\t', f
|
||||
thumbnail = os.path.join(tdir2, 'thumbnail.png')
|
||||
if not os.access(thumbnail, os.R_OK):
|
||||
thumbnail = None
|
||||
|
||||
if not opts.no_process:
|
||||
pages, failures, tdir2 = process_pages(pages, opts, notification)
|
||||
if not pages:
|
||||
raise ValueError('Could not find any valid pages in the comic: %s'%source)
|
||||
if failures:
|
||||
print 'Could not process the following pages (run with --verbose to see why):'
|
||||
for f in failures:
|
||||
print '\t', f
|
||||
thumbnail = os.path.join(tdir2, 'thumbnail.png')
|
||||
if not os.access(thumbnail, os.R_OK):
|
||||
thumbnail = None
|
||||
if output_format == 'lrf':
|
||||
create_lrf(pages, opts.profile, opts, thumbnail=thumbnail)
|
||||
else:
|
||||
if output_format == 'epub':
|
||||
create_epub(pages, opts.profile, opts, thumbnail=thumbnail)
|
||||
if output_format == 'pdf':
|
||||
create_pdf(pages, opts.profile, opts, thumbnail=thumbnail)
|
||||
shutil.rmtree(tdir)
|
||||
shutil.rmtree(tdir2)
|
||||
if not opts.no_process:
|
||||
shutil.rmtree(tdir2)
|
||||
|
||||
|
||||
def main(args=sys.argv, notification=None, output_format='lrf'):
|
||||
parser = option_parser()
|
||||
parser = option_parser(output_format=output_format)
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) < 2:
|
||||
parser.print_help()
|
||||
@ -429,7 +462,6 @@ def main(args=sys.argv, notification=None, output_format='lrf'):
|
||||
|
||||
source = os.path.abspath(args[1])
|
||||
do_convert(source, opts, notification, output_format=output_format)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -12,15 +12,17 @@ try:
|
||||
except ImportError:
|
||||
import Image as PILImage
|
||||
|
||||
from lxml import html, etree
|
||||
|
||||
from calibre import __appname__, entity_to_unicode
|
||||
from calibre.ebooks import DRMError
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
from calibre.ebooks.chardet import ENCODING_PATS
|
||||
from calibre.ebooks.mobi import MobiError
|
||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.opf import OPFCreator
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre import sanitize_file_name
|
||||
|
||||
@ -176,6 +178,8 @@ class MobiReader(object):
|
||||
processed_records = self.extract_text()
|
||||
self.add_anchors()
|
||||
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
|
||||
for pat in ENCODING_PATS:
|
||||
self.processed_html = pat.sub('', self.processed_html)
|
||||
self.extract_images(processed_records, output_dir)
|
||||
self.replace_page_breaks()
|
||||
self.cleanup_html()
|
||||
@ -185,7 +189,6 @@ class MobiReader(object):
|
||||
self.processed_html = \
|
||||
re.compile('<head>', re.IGNORECASE).sub(
|
||||
'\n<head>\n'
|
||||
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n'
|
||||
'<style type="text/css">\n'
|
||||
'blockquote { margin: 0em 0em 0em 1.25em; text-align: justify; }\n'
|
||||
'p { margin: 0em; text-align: justify; }\n'
|
||||
@ -196,23 +199,33 @@ class MobiReader(object):
|
||||
|
||||
if self.verbose:
|
||||
print 'Parsing HTML...'
|
||||
soup = BeautifulSoup(self.processed_html)
|
||||
self.cleanup_soup(soup)
|
||||
guide = soup.find('guide')
|
||||
for elem in soup.findAll(['metadata', 'guide']):
|
||||
elem.extract()
|
||||
root = html.fromstring(self.processed_html)
|
||||
self.upshift_markup(root)
|
||||
guides = root.xpath('//guide')
|
||||
guide = guides[0] if guides else None
|
||||
for elem in guides + root.xpath('//metadata'):
|
||||
elem.getparent().remove(elem)
|
||||
htmlfile = os.path.join(output_dir,
|
||||
sanitize_file_name(self.name)+'.html')
|
||||
try:
|
||||
for ref in guide.findAll('reference', href=True):
|
||||
ref['href'] = os.path.basename(htmlfile)+ref['href']
|
||||
for ref in guide.xpath('descendant::reference'):
|
||||
if ref.attrib.has_key('href'):
|
||||
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
|
||||
except AttributeError:
|
||||
pass
|
||||
if self.verbose:
|
||||
print 'Serializing...'
|
||||
with open(htmlfile, 'wb') as f:
|
||||
f.write(unicode(soup).encode('utf8'))
|
||||
raw = html.tostring(root, encoding='utf-8', method='xml',
|
||||
include_meta_content_type=True, pretty_print=True)
|
||||
raw = raw.replace('<head>',
|
||||
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n')
|
||||
f.write(raw)
|
||||
self.htmlfile = htmlfile
|
||||
|
||||
if self.book_header.exth is not None:
|
||||
if self.verbose:
|
||||
print 'Creating OPF...'
|
||||
ncx = cStringIO.StringIO()
|
||||
opf = self.create_opf(htmlfile, guide)
|
||||
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
|
||||
@ -231,9 +244,9 @@ class MobiReader(object):
|
||||
self.processed_html = re.sub(r'(?i)<%s>'%t, r'<span class="%s">'%c, self.processed_html)
|
||||
self.processed_html = re.sub(r'(?i)</%s>'%t, r'</span>', self.processed_html)
|
||||
|
||||
def cleanup_soup(self, soup):
|
||||
def upshift_markup(self, root):
|
||||
if self.verbose:
|
||||
print 'Replacing height, width and align attributes'
|
||||
print 'Converting style information to CSS...'
|
||||
size_map = {
|
||||
'xx-small' : '0.5',
|
||||
'x-small' : '1',
|
||||
@ -243,41 +256,36 @@ class MobiReader(object):
|
||||
'x-large' : '5',
|
||||
'xx-large' : '6',
|
||||
}
|
||||
for tag in soup.recursiveChildGenerator():
|
||||
if not isinstance(tag, Tag): continue
|
||||
styles = []
|
||||
try:
|
||||
styles.append(tag['style'])
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
styles.append('margin-top: %s' % tag['height'])
|
||||
del tag['height']
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
styles.append('text-indent: %s' % tag['width'])
|
||||
if tag['width'].startswith('-'):
|
||||
styles.append('margin-left: %s'%(tag['width'][1:]))
|
||||
del tag['width']
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
styles.append('text-align: %s' % tag['align'])
|
||||
del tag['align']
|
||||
except KeyError:
|
||||
pass
|
||||
for tag in root.iter(etree.Element):
|
||||
styles, attrib = [], tag.attrib
|
||||
if attrib.has_key('style'):
|
||||
style = attrib.pop('style').strip()
|
||||
if style:
|
||||
styles.append(style)
|
||||
if attrib.has_key('height'):
|
||||
height = attrib.pop('height').strip()
|
||||
if height:
|
||||
styles.append('margin-top: %s' % height)
|
||||
if attrib.has_key('width'):
|
||||
width = attrib.pop('width').strip()
|
||||
if width:
|
||||
styles.append('text-indent: %s' % width)
|
||||
if width.startswith('-'):
|
||||
styles.append('margin-left: %s'%(width[1:]))
|
||||
if attrib.has_key('align'):
|
||||
align = attrib.pop('align').strip()
|
||||
if align:
|
||||
styles.append('text-align: %s' % align)
|
||||
if styles:
|
||||
tag['style'] = '; '.join(styles)
|
||||
attrib['style'] = '; '.join(styles)
|
||||
|
||||
if tag.name.lower() == 'font':
|
||||
sz = tag.get('size', '')
|
||||
if tag.tag.lower() == 'font':
|
||||
sz = tag.get('size', '').lower()
|
||||
try:
|
||||
float(sz)
|
||||
except ValueError:
|
||||
sz = sz.lower()
|
||||
if sz in size_map.keys():
|
||||
tag['size'] = size_map[sz]
|
||||
attrib['size'] = size_map[sz]
|
||||
|
||||
def create_opf(self, htmlfile, guide=None):
|
||||
mi = self.book_header.exth.mi
|
||||
@ -292,7 +300,7 @@ class MobiReader(object):
|
||||
opf.create_manifest(manifest)
|
||||
opf.create_spine([os.path.basename(htmlfile)])
|
||||
toc = None
|
||||
if guide:
|
||||
if guide is not None:
|
||||
opf.create_guide(guide)
|
||||
for ref in opf.guide:
|
||||
if ref.type.lower() == 'toc':
|
||||
@ -303,16 +311,16 @@ class MobiReader(object):
|
||||
ent_pat = re.compile(r'&(\S+?);')
|
||||
if index > -1:
|
||||
raw = '<html><body>'+self.processed_html[index:]
|
||||
soup = BeautifulSoup(raw)
|
||||
root = html.fromstring(raw)
|
||||
tocobj = TOC()
|
||||
for a in soup.findAll('a', href=True):
|
||||
for a in root.xpath('//a[@href]'):
|
||||
try:
|
||||
text = u''.join(a.findAll(text=True)).strip()
|
||||
text = u' '.join([t.strip() for t in a.xpath('descendant::text()')])
|
||||
except:
|
||||
text = ''
|
||||
text = ent_pat.sub(entity_to_unicode, text)
|
||||
if a['href'].startswith('#'):
|
||||
tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
|
||||
if a.get('href', '').startswith('#'):
|
||||
tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text)
|
||||
if tocobj is not None:
|
||||
opf.set_toc(tocobj)
|
||||
|
||||
|
9
src/calibre/ebooks/pdf/__init__.py
Normal file
9
src/calibre/ebooks/pdf/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Used for pdf output for comic2pdf
|
||||
'''
|
||||
|
21
src/calibre/ebooks/pdf/from_comic.py
Normal file
21
src/calibre/ebooks/pdf/from_comic.py
Normal file
@ -0,0 +1,21 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'Convert a comic in CBR/CBZ format to pdf'
|
||||
|
||||
import sys
|
||||
from functools import partial
|
||||
from calibre.ebooks.lrf.comic.convert_from import do_convert, option_parser, config, main as _main
|
||||
|
||||
convert = partial(do_convert, output_format='pdf')
|
||||
main = partial(_main, output_format='pdf')
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
if False:
|
||||
option_parser
|
||||
config
|
||||
|
BIN
src/calibre/gui2/images/news/joelonsoftware.png
Normal file
BIN
src/calibre/gui2/images/news/joelonsoftware.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 390 B |
@ -59,6 +59,7 @@ entry_points = {
|
||||
'oeb2lit = calibre.ebooks.lit.writer:main',
|
||||
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
|
||||
'comic2epub = calibre.ebooks.epub.from_comic:main',
|
||||
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
|
||||
'calibre-debug = calibre.debug:main',
|
||||
'calibredb = calibre.library.cli:main',
|
||||
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
||||
@ -228,6 +229,7 @@ def setup_completion(fatal_errors):
|
||||
f.write(opts_and_exts('lit2oeb', lit2oeb, ['lit']))
|
||||
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
|
||||
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
|
||||
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
|
||||
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
||||
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
||||
f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))
|
||||
|
@ -21,6 +21,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
|
||||
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
|
||||
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
|
||||
'joelonsoftware',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
25
src/calibre/web/feeds/recipes/recipe_joelonsoftware.py
Normal file
25
src/calibre/web/feeds/recipes/recipe_joelonsoftware.py
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
joelonsoftware.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Joelonsoftware(BasicNewsRecipe):
|
||||
|
||||
title = 'Joel on Software'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Painless Software Management'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
|
||||
cover_url = 'http://www.joelonsoftware.com/RssJoelOnSoftware.jpg'
|
||||
|
||||
html2lrf_options = [ '--comment' , description
|
||||
, '--category' , 'blog,software,news'
|
||||
, '--author' , 'Joel Spolsky'
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://www.joelonsoftware.com/rss.xml')]
|
Loading…
x
Reference in New Issue
Block a user