Sync to pluginize

This commit is contained in:
John Schember 2009-04-02 20:57:09 -04:00
commit 394b35b435
25 changed files with 407 additions and 368 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.5.4' __version__ = '0.5.5'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -6,8 +6,11 @@ Code for the conversion of ebook formats and the reading of metadata
from various formats. from various formats.
''' '''
import traceback, os
from calibre import CurrentDir
class ConversionError(Exception): class ConversionError(Exception):
def __init__(self, msg, only_msg=False): def __init__(self, msg, only_msg=False):
Exception.__init__(self, msg) Exception.__init__(self, msg)
self.only_msg = only_msg self.only_msg = only_msg
@ -22,3 +25,54 @@ BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'prc', 'mobi', 'azw', 'html', 'xhtml', 'pdf', 'prc', 'mobi', 'azw',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip',
'rb', 'imp', 'odt'] 'rb', 'imp', 'odt']
class HTMLRenderer(object):
def __init__(self, page, loop):
self.page, self.loop = page, loop
self.data = ''
self.exception = self.tb = None
def __call__(self, ok):
from PyQt4.Qt import QImage, QPainter, QByteArray, QBuffer
try:
if not ok:
raise RuntimeError('Rendering of HTML failed.')
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
image.save(buf, 'JPEG')
self.data = str(ba.data())
except Exception, e:
self.exception = e
self.traceback = traceback.format_exc()
finally:
self.loop.exit(0)
def render_html(path_to_html, width=590, height=750):
from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
path_to_html = os.path.abspath(path_to_html)
with CurrentDir(os.path.dirname(path_to_html)):
page = QWebPage()
pal = page.palette()
pal.setBrush(QPalette.Background, Qt.white)
page.setPalette(pal)
page.setViewportSize(QSize(width, height))
page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
loop = QEventLoop()
renderer = HTMLRenderer(page, loop)
page.connect(page, SIGNAL('loadFinished(bool)'), renderer)
page.mainFrame().load(QUrl.fromLocalFile(path_to_html))
loop.exec_()
return renderer

View File

@ -339,7 +339,7 @@ OptionRecommendation(name='language',
trimmer = ManifestTrimmer() trimmer = ManifestTrimmer()
trimmer(self.oeb, self.opts) trimmer(self.oeb, self.opts)
self.log.info('Creating %s output...'%self.output_plugin.name) self.log.info('Creating %s...'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts, self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts,
self.log) self.log)

View File

@ -8,24 +8,24 @@ Conversion of HTML/OPF files follows several stages:
* All links in the HTML files or in the OPF manifest are * All links in the HTML files or in the OPF manifest are
followed to build up a list of HTML files to be converted. followed to build up a list of HTML files to be converted.
This stage is implemented by This stage is implemented by
:function:`calibre.ebooks.html.traverse` and :function:`calibre.ebooks.html.traverse` and
:class:`calibre.ebooks.html.HTMLFile`. :class:`calibre.ebooks.html.HTMLFile`.
* The HTML is pre-processed to make it more semantic. * The HTML is pre-processed to make it more semantic.
All links in the HTML files to other resources like images, All links in the HTML files to other resources like images,
stylesheets, etc. are relativized. The resources are copied stylesheets, etc. are relativized. The resources are copied
into the `resources` sub directory. This is accomplished by into the `resources` sub directory. This is accomplished by
:class:`calibre.ebooks.html.PreProcessor` and :class:`calibre.ebooks.html.PreProcessor` and
:class:`calibre.ebooks.html.Parser`. :class:`calibre.ebooks.html.Parser`.
* The HTML is processed. Various operations are performed. * The HTML is processed. Various operations are performed.
All style declarations are extracted and consolidated into All style declarations are extracted and consolidated into
a single style sheet. Chapters are auto-detected and marked. a single style sheet. Chapters are auto-detected and marked.
Various font related manipulations are performed. See Various font related manipulations are performed. See
:class:`HTMLProcessor`. :class:`HTMLProcessor`.
* The processed HTML is saved and the * The processed HTML is saved and the
:module:`calibre.ebooks.epub.split` module is used to split up :module:`calibre.ebooks.epub.split` module is used to split up
large HTML files into smaller chunks. large HTML files into smaller chunks.
@ -64,7 +64,7 @@ def remove_bad_link(element, attribute, link, pos):
def check_links(opf_path, pretty_print): def check_links(opf_path, pretty_print):
''' '''
Find and remove all invalid links in the HTML files Find and remove all invalid links in the HTML files
''' '''
logger = logging.getLogger('html2epub') logger = logging.getLogger('html2epub')
logger.info('\tChecking files for bad links...') logger.info('\tChecking files for bad links...')
@ -78,7 +78,7 @@ def check_links(opf_path, pretty_print):
if isinstance(f, str): if isinstance(f, str):
f = f.decode('utf-8') f = f.decode('utf-8')
html_files.append(os.path.abspath(content(f))) html_files.append(os.path.abspath(content(f)))
for path in html_files: for path in html_files:
if not os.access(path, os.R_OK): if not os.access(path, os.R_OK):
continue continue
@ -113,27 +113,27 @@ def find_html_index(files):
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:] return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
class HTMLProcessor(Processor, Rationalizer): class HTMLProcessor(Processor, Rationalizer):
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets): def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
name='html2epub') name='html2epub')
if opts.verbose > 2: if opts.verbose > 2:
self.debug_tree('parsed') self.debug_tree('parsed')
self.detect_chapters() self.detect_chapters()
self.extract_css(stylesheets) self.extract_css(stylesheets)
if self.opts.base_font_size2 > 0: if self.opts.base_font_size2 > 0:
self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet], self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
self.root, self.opts) self.root, self.opts)
if opts.verbose > 2: if opts.verbose > 2:
self.debug_tree('nocss') self.debug_tree('nocss')
if hasattr(self.body, 'xpath'): if hasattr(self.body, 'xpath'):
for script in list(self.body.xpath('descendant::script')): for script in list(self.body.xpath('descendant::script')):
script.getparent().remove(script) script.getparent().remove(script)
self.fix_markup() self.fix_markup()
def convert_image(self, img): def convert_image(self, img):
rpath = img.get('src', '') rpath = img.get('src', '')
path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/')) path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
@ -150,10 +150,10 @@ class HTMLProcessor(Processor, Rationalizer):
if val == rpath: if val == rpath:
self.resource_map[key] = rpath+'_calibre_converted.jpg' self.resource_map[key] = rpath+'_calibre_converted.jpg'
img.set('src', rpath+'_calibre_converted.jpg') img.set('src', rpath+'_calibre_converted.jpg')
def fix_markup(self): def fix_markup(self):
''' '''
Perform various markup transforms to get the output to render correctly Perform various markup transforms to get the output to render correctly
in the quirky ADE. in the quirky ADE.
''' '''
# Replace <br> that are children of <body> as ADE doesn't handle them # Replace <br> that are children of <body> as ADE doesn't handle them
@ -179,8 +179,8 @@ class HTMLProcessor(Processor, Rationalizer):
if not br.tail: if not br.tail:
br.tail = '' br.tail = ''
br.tail += sibling.tail br.tail += sibling.tail
if self.opts.profile.remove_object_tags: if self.opts.profile.remove_object_tags:
for tag in self.root.xpath('//embed'): for tag in self.root.xpath('//embed'):
tag.getparent().remove(tag) tag.getparent().remove(tag)
@ -188,42 +188,46 @@ class HTMLProcessor(Processor, Rationalizer):
if tag.get('type', '').lower().strip() in ('image/svg+xml',): if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue continue
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in self.root.xpath('//title|//style'): for tag in self.root.xpath('//title|//style'):
if not tag.text: if not tag.text:
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in self.root.xpath('//script'): for tag in self.root.xpath('//script'):
if not tag.text and not tag.get('src', False): if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in self.root.xpath('//form'): for tag in self.root.xpath('//form'):
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in self.root.xpath('//center'): for tag in self.root.xpath('//center'):
tag.tag = 'div' tag.tag = 'div'
tag.set('style', 'text-align:center') tag.set('style', 'text-align:center')
if self.opts.linearize_tables: if self.opts.linearize_tables:
for tag in self.root.xpath('//table | //tr | //th | //td'): for tag in self.root.xpath('//table | //tr | //th | //td'):
tag.tag = 'div' tag.tag = 'div'
# ADE can't handle &amp; in an img url
for tag in self.root.xpath('//img[@src]'):
tag.set('src', tag.get('src', '').replace('&', ''))
def save(self): def save(self):
for meta in list(self.root.xpath('//meta')): for meta in list(self.root.xpath('//meta')):
meta.getparent().remove(meta) meta.getparent().remove(meta)
# Strip all comments since Adobe DE is petrified of them # Strip all comments since Adobe DE is petrified of them
Processor.save(self, strip_comments=True) Processor.save(self, strip_comments=True)
def remove_first_image(self): def remove_first_image(self):
images = self.root.xpath('//img') images = self.root.xpath('//img')
if images: if images:
images[0].getparent().remove(images[0]) images[0].getparent().remove(images[0])
return True return True
return False return False
def config(defaults=None): def config(defaults=None):
return common_config(defaults=defaults) return common_config(defaults=defaults)
@ -235,7 +239,7 @@ def option_parser():
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file. Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
If you specify an OPF file instead of an HTML file, the list of links is takes from If you specify an OPF file instead of an HTML file, the list of links is takes from
the <spine> element of the OPF file. the <spine> element of the OPF file.
''')) '''))
def parse_content(filelist, opts, tdir): def parse_content(filelist, opts, tdir):
@ -246,7 +250,7 @@ def parse_content(filelist, opts, tdir):
first_image_removed = False first_image_removed = False
for htmlfile in filelist: for htmlfile in filelist:
logging.getLogger('html2epub').debug('Processing %s...'%htmlfile) logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
resource_map, filelist, stylesheets) resource_map, filelist, stylesheets)
if not first_image_removed and opts.remove_first_image: if not first_image_removed and opts.remove_first_image:
first_image_removed = hp.remove_first_image() first_image_removed = hp.remove_first_image()
@ -254,7 +258,7 @@ def parse_content(filelist, opts, tdir):
hp.save() hp.save()
stylesheet_map[os.path.basename(hp.save_path())] = \ stylesheet_map[os.path.basename(hp.save_path())] = \
[s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None] [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
logging.getLogger('html2epub').debug('Saving stylesheets...') logging.getLogger('html2epub').debug('Saving stylesheets...')
if opts.base_font_size2 > 0: if opts.base_font_size2 > 0:
Rationalizer.remove_font_size_information(stylesheets.values()) Rationalizer.remove_font_size_information(stylesheets.values())
@ -268,7 +272,7 @@ def parse_content(filelist, opts, tdir):
if toc.count('chapter') + toc.count('file') > opts.toc_threshold: if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
toc.purge(['link', 'unknown']) toc.purge(['link', 'unknown'])
toc.purge(['link'], max=opts.max_toc_links) toc.purge(['link'], max=opts.max_toc_links)
return resource_map, hp.htmlfile_map, toc, stylesheet_map return resource_map, hp.htmlfile_map, toc, stylesheet_map
TITLEPAGE = '''\ TITLEPAGE = '''\
@ -325,26 +329,26 @@ def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
metadata_cover = mi.cover metadata_cover = mi.cover
if metadata_cover and not os.path.exists(metadata_cover): if metadata_cover and not os.path.exists(metadata_cover):
metadata_cover = None metadata_cover = None
cpath = '/'.join(('resources', '_cover_.jpg')) cpath = '/'.join(('resources', '_cover_.jpg'))
cover_dest = os.path.join(tdir, 'content', *cpath.split('/')) cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
if metadata_cover is not None: if metadata_cover is not None:
if not create_cover_image(metadata_cover, cover_dest, if not create_cover_image(metadata_cover, cover_dest,
opts.profile.screen_size): opts.profile.screen_size):
metadata_cover = None metadata_cover = None
specified_cover = opts.cover specified_cover = opts.cover
if specified_cover and not os.path.exists(specified_cover): if specified_cover and not os.path.exists(specified_cover):
specified_cover = None specified_cover = None
if specified_cover is not None: if specified_cover is not None:
if not create_cover_image(specified_cover, cover_dest, if not create_cover_image(specified_cover, cover_dest,
opts.profile.screen_size): opts.profile.screen_size):
specified_cover = None specified_cover = None
cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
if cover is not None: if cover is not None:
titlepage = TITLEPAGE%cpath titlepage = TITLEPAGE%cpath
tp = 'calibre_title_page.html' if old_title_page is None else old_title_page tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
tppath = os.path.join(tdir, 'content', tp) tppath = os.path.join(tdir, 'content', tp)
with open(tppath, 'wb') as f: with open(tppath, 'wb') as f:
f.write(titlepage) f.write(titlepage)
@ -370,7 +374,7 @@ def condense_ncx(ncx_path):
compressed = etree.tostring(tree.getroot(), encoding='utf-8') compressed = etree.tostring(tree.getroot(), encoding='utf-8')
open(ncx_path, 'wb').write(compressed) open(ncx_path, 'wb').write(compressed)
def convert(htmlfile, opts, notification=None, create_epub=True, def convert(htmlfile, opts, notification=None, create_epub=True,
oeb_cover=False, extract_to=None): oeb_cover=False, extract_to=None):
htmlfile = os.path.abspath(htmlfile) htmlfile = os.path.abspath(htmlfile)
if opts.output is None: if opts.output is None:
@ -399,16 +403,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
else: else:
opf, filelist = get_filelist(htmlfile, opts) opf, filelist = get_filelist(htmlfile, opts)
mi = merge_metadata(htmlfile, opf, opts) mi = merge_metadata(htmlfile, opf, opts)
opts.chapter = XPath(opts.chapter, opts.chapter = XPath(opts.chapter,
namespaces={'re':'http://exslt.org/regular-expressions'}) namespaces={'re':'http://exslt.org/regular-expressions'})
for x in (1, 2, 3): for x in (1, 2, 3):
attr = 'level%d_toc'%x attr = 'level%d_toc'%x
if getattr(opts, attr): if getattr(opts, attr):
setattr(opts, attr, XPath(getattr(opts, attr), setattr(opts, attr, XPath(getattr(opts, attr),
namespaces={'re':'http://exslt.org/regular-expressions'})) namespaces={'re':'http://exslt.org/regular-expressions'}))
else: else:
setattr(opts, attr, None) setattr(opts, attr, None)
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir: with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
if opts.keep_intermediate: if opts.keep_intermediate:
print 'Intermediate files in', tdir print 'Intermediate files in', tdir
@ -416,16 +420,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
parse_content(filelist, opts, tdir) parse_content(filelist, opts, tdir)
logger = logging.getLogger('html2epub') logger = logging.getLogger('html2epub')
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()] resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir) title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
spine = [htmlfile_map[f.path] for f in filelist] spine = [htmlfile_map[f.path] for f in filelist]
if not oeb_cover and title_page is not None: if not oeb_cover and title_page is not None:
spine = [title_page] + spine spine = [title_page] + spine
mi.cover = None mi.cover = None
mi.cover_data = (None, None) mi.cover_data = (None, None)
mi = create_metadata(tdir, mi, spine, resources) mi = create_metadata(tdir, mi, spine, resources)
buf = cStringIO.StringIO() buf = cStringIO.StringIO()
if mi.toc: if mi.toc:
@ -453,7 +457,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
logger.info('\tBuilding page map...') logger.info('\tBuilding page map...')
add_page_map(opf_path, opts) add_page_map(opf_path, opts)
check_links(opf_path, opts.pretty_print) check_links(opf_path, opts.pretty_print)
opf = OPF(opf_path, tdir) opf = OPF(opf_path, tdir)
opf.remove_guide() opf.remove_guide()
oeb_cover_file = None oeb_cover_file = None
@ -465,7 +469,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
opf.add_guide_item('cover', 'Cover', 'content/'+spine[0]) opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
if oeb_cover and oeb_cover_file: if oeb_cover and oeb_cover_file:
opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file) opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg') cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
if os.path.exists(cpath): if os.path.exists(cpath):
opf.add_path_to_manifest(cpath, 'image/jpeg') opf.add_path_to_manifest(cpath, 'image/jpeg')
@ -477,29 +481,29 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
condense_ncx(ncx_path) condense_ncx(ncx_path)
if os.stat(ncx_path).st_size > opts.profile.flow_size: if os.stat(ncx_path).st_size > opts.profile.flow_size:
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size) logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
if create_epub: if create_epub:
epub = initialize_container(opts.output) epub = initialize_container(opts.output)
epub.add_dir(tdir) epub.add_dir(tdir)
epub.close() epub.close()
run_plugins_on_postprocess(opts.output, 'epub') run_plugins_on_postprocess(opts.output, 'epub')
logger.info(_('Output written to ')+opts.output) logger.info(_('Output written to ')+opts.output)
if opts.show_opf: if opts.show_opf:
print open(opf_path, 'rb').read() print open(opf_path, 'rb').read()
if opts.extract_to is not None: if opts.extract_to is not None:
if os.path.exists(opts.extract_to): if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to) shutil.rmtree(opts.extract_to)
shutil.copytree(tdir, opts.extract_to) shutil.copytree(tdir, opts.extract_to)
if extract_to is not None: if extract_to is not None:
if os.path.exists(extract_to): if os.path.exists(extract_to):
shutil.rmtree(extract_to) shutil.rmtree(extract_to)
shutil.copytree(tdir, extract_to) shutil.copytree(tdir, extract_to)
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
opts, args = parser.parse_args(args) opts, args = parser.parse_args(args)
@ -509,6 +513,6 @@ def main(args=sys.argv):
return 1 return 1
convert(args[1], opts) convert(args[1], opts)
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

File diff suppressed because it is too large Load Diff

View File

@ -266,12 +266,14 @@ class MobiReader(object):
parse_cache[htmlfile] = root parse_cache[htmlfile] = root
self.htmlfile = htmlfile self.htmlfile = htmlfile
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root) opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf' self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
opf.render(open(self.created_opf_path, 'wb'), ncx) opf.render(open(self.created_opf_path, 'wb'), ncx,
ncx_manifest_entry=ncx_manifest_entry)
ncx = ncx.getvalue() ncx = ncx.getvalue()
if ncx: if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
open(ncx_path, 'wb').write(ncx)
with open('styles.css', 'wb') as s: with open('styles.css', 'wb') as s:
s.write(self.base_css_rules+'\n\n') s.write(self.base_css_rules+'\n\n')
@ -284,8 +286,9 @@ class MobiReader(object):
if self.book_header.exth is not None or self.embedded_mi is not None: if self.book_header.exth is not None or self.embedded_mi is not None:
self.log.debug('Creating OPF...') self.log.debug('Creating OPF...')
ncx = cStringIO.StringIO() ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide, root) opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx,
ncx_manifest_entry )
ncx = ncx.getvalue() ncx = ncx.getvalue()
if ncx: if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
@ -434,7 +437,10 @@ class MobiReader(object):
for ref in opf.guide: for ref in opf.guide:
if ref.type.lower() == 'toc': if ref.type.lower() == 'toc':
toc = ref.href() toc = ref.href()
ncx_manifest_entry = None
if toc: if toc:
ncx_manifest_entry = 'toc.ncx'
elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1]) elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
tocobj = None tocobj = None
ent_pat = re.compile(r'&(\S+?);') ent_pat = re.compile(r'&(\S+?);')
@ -461,7 +467,7 @@ class MobiReader(object):
if tocobj is not None: if tocobj is not None:
opf.set_toc(tocobj) opf.set_toc(tocobj)
return opf return opf, ncx_manifest_entry
def sizeof_trailing_entries(self, data): def sizeof_trailing_entries(self, data):
@ -589,7 +595,7 @@ def get_metadata(stream):
if mr.book_header.exth is None: if mr.book_header.exth is None:
mi = MetaInformation(mr.name, [_('Unknown')]) mi = MetaInformation(mr.name, [_('Unknown')])
else: else:
mi = mr.create_opf('dummy.html') mi = mr.create_opf('dummy.html')[0]
try: try:
if hasattr(mr.book_header.exth, 'cover_offset'): if hasattr(mr.book_header.exth, 'cover_offset'):
cover_index = mr.book_header.first_image_index + \ cover_index = mr.book_header.first_image_index + \

View File

@ -44,7 +44,7 @@ class OEBOutput(OutputFormatPlugin):
else: else:
raw = etree.tostring(raw, encoding='utf-8', raw = etree.tostring(raw, encoding='utf-8',
pretty_print=opts.pretty_print) pretty_print=opts.pretty_print)
raw = raw + '<?xml version="1.0" encoding="utf-8" ?>\n' raw = '<?xml version="1.0" encoding="utf-8" ?>\n'+raw
if isinstance(raw, unicode): if isinstance(raw, unicode):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
with open(path, 'wb') as f: with open(path, 'wb') as f:

View File

@ -7,14 +7,12 @@ Defines various abstract base classes that can be subclassed to create powerful
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import logging, os, cStringIO, time, traceback, re, urlparse, sys, tempfile, functools import logging, os, cStringIO, time, traceback, re, urlparse, sys
from collections import defaultdict from collections import defaultdict
from functools import partial from functools import partial
from contextlib import nested, closing from contextlib import nested, closing
from PyQt4.Qt import QApplication, QFile, Qt, QPalette, QSize, QImage, QPainter, \ from PyQt4.Qt import QApplication, QFile, QIODevice
QBuffer, QByteArray, SIGNAL, QUrl, QEventLoop, QIODevice
from PyQt4.QtWebKit import QWebPage
from calibre import browser, __appname__, iswindows, \ from calibre import browser, __appname__, iswindows, \
@ -22,14 +20,15 @@ from calibre import browser, __appname__, iswindows, \
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.lrf import entity_to_unicode from calibre.ebooks.lrf import entity_to_unicode
from calibre.ebooks import render_html
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import option_parser as web2disk_option_parser
from calibre.web.fetch.simple import RecursiveFetcher from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile, \
from calibre.gui2 import images_rc # Needed for default cover PersistentTemporaryDirectory
class BasicNewsRecipe(object): class BasicNewsRecipe(object):
@ -787,15 +786,18 @@ class BasicNewsRecipe(object):
''' '''
Create a generic cover for recipes that dont have a cover Create a generic cover for recipes that dont have a cover
''' '''
from calibre.gui2 import images_rc # Needed for access to logo
images_rc
if QApplication.instance() is None: QApplication([]) if QApplication.instance() is None: QApplication([])
f = QFile(':/library') f = QFile(':/library')
f.open(QIODevice.ReadOnly) f.open(QIODevice.ReadOnly)
img = str(f.readAll()) img_data = str(f.readAll())
tdir = PersistentTemporaryDirectory('_default_cover')
img = os.path.join(tdir, 'logo.png')
with open(img, 'wb') as g:
g.write(img_data)
f.close() f.close()
f = tempfile.NamedTemporaryFile(suffix='library.png') img = os.path.basename(img)
f.write(img)
f.flush()
img = f.name
html= u'''\ html= u'''\
<html> <html>
<head> <head>
@ -834,38 +836,16 @@ class BasicNewsRecipe(object):
date=strftime(self.timefmt), date=strftime(self.timefmt),
app=__appname__ +' '+__version__, app=__appname__ +' '+__version__,
img=img) img=img)
f2 = tempfile.NamedTemporaryFile(suffix='cover.html') hf = os.path.join(tdir, 'cover.htm')
f2.write(html.encode('utf-8')) with open(hf, 'wb') as f:
f2.flush() f.write(html.encode('utf-8'))
page = QWebPage() renderer = render_html(hf)
pal = page.palette() if renderer.tb is not None:
pal.setBrush(QPalette.Background, Qt.white) self.logger.warning('Failed to render default cover')
page.setPalette(pal) self.logger.debug(renderer.tb)
page.setViewportSize(QSize(590, 750)) else:
page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) cover_file.write(renderer.data)
page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) cover_file.flush()
loop = QEventLoop()
def render_html(page, loop, ok):
try:
image = QImage(page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(96*(100/2.54))
image.setDotsPerMeterY(96*(100/2.54))
painter = QPainter(image)
page.mainFrame().render(painter)
painter.end()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
image.save(buf, 'JPEG')
image_data = str(ba.data())
cover_file.write(image_data)
cover_file.flush()
finally:
loop.exit(0)
page.connect(page, SIGNAL('loadFinished(bool)'), functools.partial(render_html, page, loop))
page.mainFrame().load(QUrl.fromLocalFile(f2.name))
loop.exec_()
def create_opf(self, feeds, dir=None): def create_opf(self, feeds, dir=None):

View File

@ -13,7 +13,6 @@ class Exiled(BasicNewsRecipe):
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "Mankind's only alternative since 1997 - Formerly known as The eXile" description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
publisher = 'Exiled Online' publisher = 'Exiled Online'
language = _('English')
category = 'news, politics, international' category = 'news, politics, international'
oldest_article = 15 oldest_article = 15
max_articles_per_feed = 100 max_articles_per_feed = 100
@ -21,16 +20,18 @@ class Exiled(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
remove_javascript = True remove_javascript = True
language = _('English')
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif' cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
html2lrf_options = [ html2lrf_options = [
'--comment' , description '--comment' , description
, '--base-font-size', '10'
, '--category' , category , '--category' , category
, '--publisher' , publisher , '--publisher' , publisher
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'main'})] keep_only_tags = [dict(name='div', attrs={'id':'main'})]
remove_tags = [ remove_tags = [
@ -39,8 +40,8 @@ class Exiled(BasicNewsRecipe):
,dict(name='div', attrs={'id':['comments','navig']}) ,dict(name='div', attrs={'id':['comments','navig']})
] ]
feeds = [(u'Articles', u'http://exiledonline.com/feed/' )] feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
@ -48,4 +49,9 @@ class Exiled(BasicNewsRecipe):
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n' mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
return soup return soup
def get_article_url(self, article):
raw = article.get('link', None)
final = raw + 'all/1/'
return final

View File

@ -6,7 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
laprensa.com.ni laprensa.com.ni
''' '''
import locale import datetime
import time import time
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -23,23 +23,9 @@ class LaPrensa_ni(BasicNewsRecipe):
encoding = 'cp1252' encoding = 'cp1252'
remove_javascript = True remove_javascript = True
language = _('Spanish') language = _('Spanish')
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
#Locale setting to get appropriate date/month values in Spanish current_month = months_es[datetime.date.today().month - 1]
try: current_index = time.strftime("http://www.laprensa.com.ni/archivo/%Y/" + current_month + "/%d/noticias/")
#Windows seting for locale
locale.setlocale(locale.LC_TIME,'Spanish_Nicaragua')
except locale.Error:
#Linux setting for locale -- choose one appropriate for your distribution
try:
locale.setlocale(locale.LC_TIME,'es_NI')
except locale.Error:
try:
locale.setlocale(locale.LC_TIME,'es_ES')
except:
pass
current_index = time.strftime("http://www.laprensa.com.ni/archivo/%Y/%B/%d/noticias/")
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
@ -91,6 +77,3 @@ class LaPrensa_ni(BasicNewsRecipe):
totalfeeds.append((feedtitle, articles)) totalfeeds.append((feedtitle, articles))
return totalfeeds return totalfeeds
def cleanup(self):
#Going back to the default locale
locale.setlocale(locale.LC_TIME,'')

View File

@ -2,10 +2,10 @@ from django.db import models
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from django.db.models import permalink from django.db.models import permalink
from django.contrib.auth.models import User from django.contrib.auth.models import User
from calibre.www.apps.tagging.fields import TagField from tagging.fields import TagField
from calibre.www.apps.blog.managers import PublicManager from calibre.www.apps.blog.managers import PublicManager
import calibre.www.apps.tagging as tagging import tagging
class Category(models.Model): class Category(models.Model):
"""Category model.""" """Category model."""

View File

@ -40,10 +40,10 @@ INSTALLED_APPS = (
'django.contrib.sites', 'django.contrib.sites',
'django.contrib.admin', 'django.contrib.admin',
'django.contrib.comments', 'django.contrib.comments',
'django.contrib.markup',
'calibre.www.apps.inlines', 'calibre.www.apps.inlines',
'calibre.www.apps.tagging', 'tagging',
'calibre.www.apps.blog', 'calibre.www.apps.blog',
) )

View File

@ -2,14 +2,16 @@ from django.conf.urls.defaults import patterns, include, handler404, handler500
from django.conf import settings from django.conf import settings
# Uncomment the next two lines to enable the admin: # Uncomment the next two lines to enable the admin:
#from django.contrib import admin from django.contrib import admin
#admin.autodiscover() admin.autodiscover()
urlpatterns = patterns('', urlpatterns = patterns('',
# (r'^admin/(.*)', admin.site.root), (r'^admin/(.*)', admin.site.root),
(r'^comments/', include('django.contrib.comments.urls')), (r'^comments/', include('django.contrib.comments.urls')),
(r'', include('calibre.www.apps.blog.urls')),
) )

View File

@ -1,6 +1,6 @@
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from calibre.www.apps.tagging.managers import ModelTaggedItemManager, TagDescriptor from tagging.managers import ModelTaggedItemManager, TagDescriptor
VERSION = (0, 3, 'pre') VERSION = (0, 3, 'pre')

View File

@ -1,5 +1,5 @@
from django.contrib import admin from django.contrib import admin
from calibre.www.apps.tagging.models import Tag, TaggedItem from tagging.models import Tag, TaggedItem
admin.site.register(TaggedItem) admin.site.register(TaggedItem)
admin.site.register(Tag) admin.site.register(Tag)

View File

@ -5,9 +5,9 @@ from django.db.models import signals
from django.db.models.fields import CharField from django.db.models.fields import CharField
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from calibre.www.apps.tagging import settings from tagging import settings
from calibre.www.apps.tagging.models import Tag from tagging.models import Tag
from calibre.www.apps.tagging.utils import edit_string_for_tags from tagging.utils import edit_string_for_tags
class TagField(CharField): class TagField(CharField):
""" """
@ -101,7 +101,7 @@ class TagField(CharField):
return 'CharField' return 'CharField'
def formfield(self, **kwargs): def formfield(self, **kwargs):
from calibre.www.apps.tagging import forms from tagging import forms
defaults = {'form_class': forms.TagField} defaults = {'form_class': forms.TagField}
defaults.update(kwargs) defaults.update(kwargs)
return super(TagField, self).formfield(**defaults) return super(TagField, self).formfield(**defaults)

View File

@ -4,9 +4,9 @@ Tagging components for Django's form library.
from django import forms from django import forms
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from calibre.www.apps.tagging import settings from tagging import settings
from calibre.www.apps.tagging.models import Tag from tagging.models import Tag
from calibre.www.apps.tagging.utils import parse_tag_input from tagging.utils import parse_tag_input
class AdminTagForm(forms.ModelForm): class AdminTagForm(forms.ModelForm):
class Meta: class Meta:

View File

@ -5,7 +5,7 @@ application.
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.db import models from django.db import models
from calibre.www.apps.tagging.models import Tag, TaggedItem from tagging.models import Tag, TaggedItem
class ModelTagManager(models.Manager): class ModelTagManager(models.Manager):
""" """

View File

@ -13,9 +13,9 @@ from django.db import connection, models
from django.db.models.query import QuerySet from django.db.models.query import QuerySet
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from calibre.www.apps.tagging import settings from tagging import settings
from calibre.www.apps.tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input from tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input
from calibre.www.apps.tagging.utils import LOGARITHMIC from tagging.utils import LOGARITHMIC
qn = connection.ops.quote_name qn = connection.ops.quote_name

View File

@ -159,7 +159,7 @@ def get_tag_list(tags):
* A ``Tag`` ``QuerySet``. * A ``Tag`` ``QuerySet``.
""" """
from calibre.www.apps.tagging.models import Tag from tagging.models import Tag
if isinstance(tags, Tag): if isinstance(tags, Tag):
return [tags] return [tags]
elif isinstance(tags, QuerySet) and tags.model is Tag: elif isinstance(tags, QuerySet) and tags.model is Tag:
@ -201,7 +201,7 @@ def get_tag(tag):
If no matching tag can be found, ``None`` will be returned. If no matching tag can be found, ``None`` will be returned.
""" """
from calibre.www.apps.tagging.models import Tag from tagging.models import Tag
if isinstance(tag, Tag): if isinstance(tag, Tag):
return tag return tag

View File

@ -5,8 +5,8 @@ from django.http import Http404
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from django.views.generic.list_detail import object_list from django.views.generic.list_detail import object_list
from calibre.www.apps.tagging.models import Tag, TaggedItem from tagging.models import Tag, TaggedItem
from calibre.www.apps.tagging.utils import get_tag, get_queryset_and_model from tagging.utils import get_tag, get_queryset_and_model
def tagged_object_list(request, queryset_or_model=None, tag=None, def tagged_object_list(request, queryset_or_model=None, tag=None,
related_tags=False, related_tag_counts=True, **kwargs): related_tags=False, related_tag_counts=True, **kwargs):