merge from trunk

This commit is contained in:
Lee 2011-04-04 11:36:06 +08:00
commit 1be45db9d7
29 changed files with 1430 additions and 558 deletions

View File

@ -19,6 +19,77 @@
# new recipes:
# - title:
- version: 0.7.53
date: 2011-04-01
new features:
- title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So you can specift the title/authors/series/whatever in the template."
tickets: [743535]
- title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
- title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
- title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins specified in calibre are more likely to be the actual margins used."
- title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
tickets: [744020]
- title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
tickets: [742686]
- title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
tickets: [743486]
- title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
tickets: [743533]
- title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
tickets: [743645]
- title: "FB2 Output: Option to set the FB2 genre explicitly."
tickets: [743178]
- title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
bug fixes:
- title: "Fix text color in the search bar set to black instead of the system font color"
tickets: [746846]
- title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
tickets: [745428]
- title: "Make sorting on the device view faster and more robust."
tickets: [742626]
- title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
tickets: [745001]
- title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
tickets: [744365]
- title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
tickets: [744122]
- title: "RTF Input: Handle RTF files with too many levels of list nesting."
tickets: [743243]
improved recipes:
- Irish Times
- LifeHacker
- Estadao
- Folha de Sao Paulo
new recipes:
- title: Financieele Dagblad
author: marvin_2
- title: "Prost Amerika, WV Hooligan and SB Nation"
author: rylsfan
- title: "Cracked.com"
author: Nudgenudge
- version: 0.7.52
date: 2011-03-25

21
recipes/developpez.recipe Normal file
View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1301849956(BasicNewsRecipe):
title = u'Developpez.com'
description = u'Toutes les news du site Developpez.com'
publisher = u'Developpez.com'
timefmt = ' [%a, %d %b, %Y]'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'ISO-8859-1'
language = 'fr'
__author__ = 'louhike'
remove_javascript = True
keep_only_tags = [dict(name='div', attrs={'class':'content'})]
feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')]
def get_cover_url(self):
return 'http://javascript.developpez.com/template/images/logo.gif'

View File

@ -35,8 +35,8 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
(u'Drive', u'http://www.theglobeandmail.com/auto/?service=rss')
]
preprocess_regexps = [

View File

@ -36,6 +36,7 @@ class Guardian(BasicNewsRecipe):
remove_tags = [
dict(name='div', attrs={'class':["video-content","videos-third-column"]}),
dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
dict(name='ul', attrs={'class':["pagination"]}),
dict(name='ul', attrs={'id':["content-actions"]}),
#dict(name='img'),

View File

@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'West Hawaii Today'
__author__ = 'Tony Stegall'
__author__ = 'Tony Stegall, fixed by HK'
language = 'en'
description = 'Westhawaiitoday.com'
publisher = 'West Hawaii '
@ -15,7 +15,14 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'
feeds = [ 'http://www.westhawaiitoday.com/rss.xml']
feeds = [
('http://www.westhawaiitoday.com/taxonomy/term/2/feed'), #Local News
('http://www.westhawaiitoday.com/taxonomy/term/15/feed'), #Local Sports
('http://www.westhawaiitoday.com/taxonomy/term/4/feed'), #Local Features
('http://www.westhawaiitoday.com/taxonomy/term/12/feed'), #Obituaries
('http://www.westhawaiitoday.com/taxonomy/term/18/feed'), #Letters
('http://www.westhawaiitoday.com/taxonomy/term/19/feed'), #Editorial
('http://www.westhawaiitoday.com/taxonomy/term/20/feed'), #columns
('http://www.westhawaiitoday.com/taxonomy/term/13/feed') #Volcano Update (Sundays)
]

View File

@ -34,7 +34,7 @@ class iHeuteRecipe(BasicNewsRecipe):
dict(name='table', attrs={'class':['video-16ku9']})]
remove_tags_after = [dict(name='div',attrs={'id':['related','related2']})]
keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day']})
keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day','art-full']})
,dict(name='table',attrs={'class':['kemel-box']})]
def print_version(self, url):

View File

@ -15,10 +15,10 @@ class InternationalHeraldTribune(BasicNewsRecipe):
language = 'en'
oldest_article = 1
max_articles_per_feed = 10
max_articles_per_feed = 30
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':'footer'}),
remove_tags = [dict(name='div', attrs={'class':['footer','header']}),
dict(name=['form'])]
preprocess_regexps = [
(re.compile(r'<!-- webtrends.*', re.DOTALL),
@ -26,6 +26,8 @@ class InternationalHeraldTribune(BasicNewsRecipe):
]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
remove_empty_feeds = True
feeds = [
(u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
(u'Business', u'http://www.iht.com/rss/business.xml'),
@ -46,13 +48,15 @@ class InternationalHeraldTribune(BasicNewsRecipe):
]
temp_files = []
articles_are_obfuscated = True
def get_obfuscated_article(self, url, logger):
masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
br.select_form(name='printFriendly')
res = br.submit()
html = res.read()
response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
html = response1.read()
self.temp_files.append(PersistentTemporaryFile('_iht.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()

View File

@ -7,6 +7,7 @@ class SmithsonianMagazine(BasicNewsRecipe):
__author__ = 'Krittika Goyal'
oldest_article = 31#days
max_articles_per_feed = 50
use_embedded_content = False
#encoding = 'latin1'
recursions = 1
match_regexps = ['&page=[2-9]$']

View File

@ -45,7 +45,6 @@ class Stage3(Command):
sub_commands = ['upload_user_manual', 'upload_demo', 'sdist',
'upload_to_sourceforge', 'upload_to_google_code',
'tag_release', 'upload_to_server',
'upload_to_mobileread',
]
class Stage4(Command):

View File

@ -356,7 +356,7 @@ class UploadUserManual(Command): # {{{
zf.write(os.path.join(x, y))
bname = self.b(path) + '_plugin.zip'
dest = '%s/%s'%(DOWNLOADS, bname)
subprocess.check_call(['scp', f.name, dest])
subprocess.check_call(['scp', f.name, 'divok:'+dest])
def run(self, opts):
path = self.j(self.SRC, 'calibre', 'manual', 'plugin_examples')

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.7.52'
__version__ = '0.7.53'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re, importlib

View File

@ -231,6 +231,17 @@ class HTMLMetadataReader(MetadataReaderPlugin):
from calibre.ebooks.metadata.html import get_metadata
return get_metadata(stream)
class HTMLZMetadataReader(MetadataReaderPlugin):
name = 'Read HTMLZ metadata'
file_types = set(['htmlz'])
description = _('Read metadata from %s files') % 'HTMLZ'
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.extz import get_metadata
return get_metadata(stream)
class IMPMetadataReader(MetadataReaderPlugin):
name = 'Read IMP metadata'
@ -407,7 +418,7 @@ class TXTZMetadataReader(MetadataReaderPlugin):
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.txtz import get_metadata
from calibre.ebooks.metadata.extz import get_metadata
return get_metadata(stream)
class ZipMetadataReader(MetadataReaderPlugin):
@ -433,6 +444,17 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.metadata.epub import set_metadata
set_metadata(stream, mi, apply_null=self.apply_null)
class HTMLZMetadataWriter(MetadataWriterPlugin):
name = 'Set HTMLZ metadata'
file_types = set(['htmlz'])
description = _('Set metadata from %s files') % 'HTMLZ'
author = 'John Schember'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.extz import set_metadata
set_metadata(stream, mi)
class LRFMetadataWriter(MetadataWriterPlugin):
name = 'Set LRF metadata'
@ -505,7 +527,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
author = 'John Schember'
def set_metadata(self, stream, mi, type):
from calibre.ebooks.metadata.txtz import set_metadata
from calibre.ebooks.metadata.extz import set_metadata
set_metadata(stream, mi)
# }}}
@ -514,6 +536,7 @@ from calibre.ebooks.comic.input import ComicInput
from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.htmlz.input import HTMLZInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.odt.input import ODTInput
@ -544,6 +567,7 @@ from calibre.ebooks.tcr.output import TCROutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.txt.output import TXTZOutput
from calibre.ebooks.html.output import HTMLOutput
from calibre.ebooks.htmlz.output import HTMLZOutput
from calibre.ebooks.snb.output import SNBOutput
from calibre.customize.profiles import input_profiles, output_profiles
@ -599,6 +623,7 @@ plugins += [
EPUBInput,
FB2Input,
HTMLInput,
HTMLZInput,
LITInput,
MOBIInput,
ODTInput,
@ -630,6 +655,7 @@ plugins += [
TXTOutput,
TXTZOutput,
HTMLOutput,
HTMLZOutput,
SNBOutput,
]
# Order here matters. The first matched device is the one used.

View File

@ -470,8 +470,8 @@ class KoboReaderOutput(OutputProfile):
description = _('This profile is intended for the Kobo Reader.')
screen_size = (540, 718)
comic_screen_size = (540, 718)
screen_size = (536, 710)
comic_screen_size = (536, 710)
dpi = 168.451
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]

View File

@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
try:
if encoding.lower().strip() == 'macintosh':
encoding = 'mac-roman'
if encoding.lower().replace('_', '-').strip() in (
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
# Microsoft Word exports to HTML with encoding incorrectly set to
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
encoding = 'gbk'
raw = raw.decode(encoding, 'replace')
except LookupError:
encoding = 'utf-8'
@ -110,11 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
if resolve_entities:
raw = substitute_entites(raw)
if encoding and encoding.lower().replace('_', '-').strip() in (
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
# Microsoft Word exports to HTML with encoding incorrectly set to
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
encoding = 'gbk'
return raw, encoding

View File

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre import walk
from calibre.customize.conversion import InputFormatPlugin
from calibre.utils.zipfile import ZipFile
class HTMLZInput(InputFormatPlugin):
name = 'HTLZ Input'
author = 'John Schember'
description = 'Convert HTML files to HTML'
file_types = set(['htmlz'])
def convert(self, stream, options, file_ext, log,
accelerators):
self.log = log
html = u''
# Extract content from zip archive.
zf = ZipFile(stream)
zf.extractall('.')
for x in walk('.'):
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
with open(x, 'rb') as tf:
html = tf.read()
break
# Run the HTML through the html processing plugin.
from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html')
for opt in html_input.options:
setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8'
base = os.getcwdu()
fname = os.path.join(base, 'index.html')
c = 0
while os.path.exists(fname):
c += 1
fname = 'index%d.html'%c
htmlfile = open(fname, 'wb')
with htmlfile:
htmlfile.write(html.encode('utf-8'))
odi = options.debug_pipeline
options.debug_pipeline = None
# Generate oeb from html conversion.
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
{})
options.debug_pipeline = odi
os.remove(htmlfile.name)
# Set metadata from file.
from calibre.customize.ui import get_file_type_metadata
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
mi = get_file_type_metadata(stream, file_ext)
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
return oeb

View File

@ -0,0 +1,372 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into a single (more or less) HTML file.
'''
import os
from urlparse import urlparse
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.logging import default_log
class OEB2HTML(object):
'''
Base class. All subclasses should implement dump_text to actually transform
content. Also, callers should use oeb2html to get the transformed html.
links and images can be retrieved after calling oeb2html to get the mapping
of OEB links and images to the new names used in the html returned by oeb2html.
Images will always be referenced as if they are in an images directory.
Use get_css to get the CSS classes for the OEB document as a string.
'''
def __init__(self, log=None):
self.log = default_log if log is None else log
self.links = {}
self.images = {}
def oeb2html(self, oeb_book, opts):
self.log.info('Converting OEB book to HTML...')
self.opts = opts
self.links = {}
self.images = {}
return self.mlize_spine(oeb_book)
def mlize_spine(self, oeb_book):
output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href)
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
output.append('\n\n')
output.append('</body></html>')
return ''.join(output)
def dump_text(self, elem, stylizer, page):
raise NotImplementedError
def get_link_id(self, href, aid):
aid = '%s#%s' % (href, aid)
if aid not in self.links:
self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
return self.links[aid]
def rewrite_links(self, tag, attribs, page):
# Rewrite ids.
if 'id' in attribs:
attribs['id'] = self.get_link_id(page.href, attribs['id'])
# Rewrite links.
if tag == 'a':
href = attribs['href']
href = page.abshref(href)
if self.url_is_relative(href):
if '#' not in href:
href += '#'
if href not in self.links:
self.links[href] = 'calibre_link-%s' % len(self.links.keys())
href = '#%s' % self.links[href]
attribs['href'] = href
return attribs
def rewrite_images(self, tag, attribs, page):
if tag == 'img':
src = attribs.get('src', None)
if src:
src = page.abshref(src)
if src not in self.images:
ext = os.path.splitext(src)[1]
fname = '%s%s' % (len(self.images), ext)
fname = fname.zfill(10)
self.images[src] = fname
attribs['src'] = 'images/%s' % self.images[src]
return attribs
def url_is_relative(self, url):
o = urlparse(url)
return False if o.scheme else True
def get_css(self, oeb_book):
css = u''
for item in oeb_book.manifest:
if item.media_type == 'text/css':
css = item.data.cssText
break
return css
class OEB2HTMLNoCSSizer(OEB2HTML):
'''
This will remap a small number of CSS styles to equivalent HTML tags.
'''
def dump_text(self, elem, stylizer, page):
'''
@elem: The element in the etree that we are working on.
@stylizer: The style information attached to the element.
'''
# We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
p = elem.getparent()
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
and elem.tail:
return [elem.tail]
return ['']
# Setup our variables.
text = ['']
style = stylizer.style(elem)
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
tags.append(tag)
# Ignore anything that is set to not be displayed.
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return ['']
# Remove attributes we won't want.
if 'class' in attribs:
del attribs['class']
if 'style' in attribs:
del attribs['style']
attribs = self.rewrite_links(tag, attribs, page)
attribs = self.rewrite_images(tag, attribs, page)
# Turn the rest of the attributes into a string we can write with the tag.
at = ''
for k, v in attribs.items():
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
# Write the tag.
text.append('<%s%s>' % (tag, at))
# Turn styles into tags.
if style['font-weight'] in ('bold', 'bolder'):
text.append('<b>')
tags.append('b')
if style['font-style'] == 'italic':
text.append('<i>')
tags.append('i')
if style['text-decoration'] == 'underline':
text.append('<u>')
tags.append('u')
if style['text-decoration'] == 'line-through':
text.append('<s>')
tags.append('s')
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text:
text.append(elem.text)
# Recurse down into tags within the tag we are in.
for item in elem:
text += self.dump_text(item, stylizer, page)
# Close all open tags.
tags.reverse()
for t in tags:
text.append('</%s>' % t)
# Add the text that is outside of the tag.
if hasattr(elem, 'tail') and elem.tail:
text.append(elem.tail)
return text
class OEB2HTMLInlineCSSizer(OEB2HTML):
'''
Turns external CSS classes into inline style attributes.
'''
def dump_text(self, elem, stylizer, page):
'''
@elem: The element in the etree that we are working on.
@stylizer: The style information attached to the element.
'''
# We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
p = elem.getparent()
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
and elem.tail:
return [elem.tail]
return ['']
# Setup our variables.
text = ['']
style = stylizer.style(elem)
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
style_a = '%s' % style
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
if not style['page-break-before'] == 'always':
style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
tags.append(tag)
# Remove attributes we won't want.
if 'class' in attribs:
del attribs['class']
if 'style' in attribs:
del attribs['style']
attribs = self.rewrite_links(tag, attribs, page)
attribs = self.rewrite_images(tag, attribs, page)
# Turn the rest of the attributes into a string we can write with the tag.
at = ''
for k, v in attribs.items():
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
# Turn style into strings for putting in the tag.
style_t = ''
if style_a:
style_t = ' style="%s"' % style_a
# Write the tag.
text.append('<%s%s%s>' % (tag, at, style_t))
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text:
text.append(elem.text)
# Recurse down into tags within the tag we are in.
for item in elem:
text += self.dump_text(item, stylizer, page)
# Close all open tags.
tags.reverse()
for t in tags:
text.append('</%s>' % t)
# Add the text that is outside of the tag.
if hasattr(elem, 'tail') and elem.tail:
text.append(elem.tail)
return text
class OEB2HTMLClassCSSizer(OEB2HTML):
'''
Use CSS classes. css_style option can specify whether to use
inline classes (style tag in the head) or reference an external
CSS file called style.css.
'''
def mlize_spine(self, oeb_book):
output = []
for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href)
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
output.append('\n\n')
if self.opts.htmlz_class_style == 'external':
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
else:
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + [css] + [u'</head><body>'] + output + [u'</body></html>']
return ''.join(output)
def dump_text(self, elem, stylizer, page):
'''
@elem: The element in the etree that we are working on.
@stylizer: The style information attached to the element.
'''
# We can only processes tags. If there isn't a tag return any text.
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
p = elem.getparent()
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
and elem.tail:
return [elem.tail]
return ['']
# Setup our variables.
text = ['']
#style = stylizer.style(elem)
tags = []
tag = barename(elem.tag)
attribs = elem.attrib
if tag == 'body':
tag = 'div'
attribs['id'] = self.get_link_id(page.href, '')
tags.append(tag)
# Remove attributes we won't want.
if 'style' in attribs:
del attribs['style']
attribs = self.rewrite_links(tag, attribs, page)
attribs = self.rewrite_images(tag, attribs, page)
# Turn the rest of the attributes into a string we can write with the tag.
at = ''
for k, v in attribs.items():
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
# Write the tag.
text.append('<%s%s>' % (tag, at))
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text:
text.append(elem.text)
# Recurse down into tags within the tag we are in.
for item in elem:
text += self.dump_text(item, stylizer, page)
# Close all open tags.
tags.reverse()
for t in tags:
text.append('</%s>' % t)
# Add the text that is outside of the tag.
if hasattr(elem, 'tail') and elem.tail:
text.append(elem.tail)
return text
def oeb2html_no_css(oeb_book, log, opts):
izer = OEB2HTMLNoCSSizer(log)
html = izer.oeb2html(oeb_book, opts)
images = izer.images
return (html, images)
def oeb2html_inline_css(oeb_book, log, opts):
izer = OEB2HTMLInlineCSSizer(log)
html = izer.oeb2html(oeb_book, opts)
images = izer.images
return (html, images)
def oeb2html_class_css(oeb_book, log, opts):
izer = OEB2HTMLClassCSSizer(log)
setattr(opts, 'class_style', 'inline')
html = izer.oeb2html(oeb_book, opts)
images = izer.images
return (html, images)

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
class HTMLZOutput(OutputFormatPlugin):
name = 'HTMLZ Output'
author = 'John Schember'
file_type = 'htmlz'
options = set([
OptionRecommendation(name='htmlz_css_type', recommended_value='class',
level=OptionRecommendation.LOW,
choices=['class', 'inline', 'tag'],
help=_('Specify the handling of CSS. Default is class.\n'
'class: Use CSS classes and have elements reference them.\n'
'inline: Write the CSS as an inline style attribute.\n'
'tag: Turn as many CSS styles as possible into HTML tags.'
)),
OptionRecommendation(name='htmlz_class_style', recommended_value='external',
level=OptionRecommendation.LOW,
choices=['external', 'inline'],
help=_('How to handle the CSS when using css-type = \'class\'.\n'
'Default is external.\n'
'external: Use an external CSS file that is linked in the document.\n'
'inline: Place the CSS in the head section of the document.'
)),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
# HTML
if opts.htmlz_css_type == 'inline':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
OEB2HTMLizer = OEB2HTMLInlineCSSizer
elif opts.htmlz_css_type == 'tag':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
OEB2HTMLizer = OEB2HTMLNoCSSizer
else:
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
with TemporaryDirectory('_htmlz_output') as tdir:
htmlizer = OEB2HTMLizer(log)
html = htmlizer.oeb2html(oeb_book, opts)
with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
tf.write(html)
# CSS
if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
tf.write(htmlizer.get_css(oeb_book))
# Images
images = htmlizer.images
if images:
if not os.path.exists(os.path.join(tdir, 'images')):
os.makedirs(os.path.join(tdir, 'images'))
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images:
fname = os.path.join(tdir, 'images', images[item.href])
with open(fname, 'wb') as img:
img.write(item.data)
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
htmlz = ZipFile(output_path, 'w')
htmlz.add_dir(tdir)

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
'''
Read meta information from TXT files
Read meta information from extZ (TXTZ, HTMLZ...) files.
'''
import os

View File

@ -20,7 +20,8 @@ class RemoveAdobeMargins(object):
self.oeb, self.opts, self.log = oeb, opts, log
for item in self.oeb.manifest:
if item.media_type == 'application/vnd.adobe-page-template+xml':
if item.media_type in ('application/vnd.adobe-page-template+xml',
'application/vnd.adobe.page-template+xml'):
self.log('Removing page margins specified in the'
' Adobe page template')
for elem in item.data.xpath(
@ -35,7 +36,7 @@ class RemoveFakeMargins(object):
'''
Remove left and right margins from paragraph/divs if the same margin is specified
on almost all the elements of at that level.
on almost all the elements at that level.
Must be called only after CSS flattening
'''

View File

@ -721,7 +721,7 @@ Reflow::render(int first_page, int last_page) {
globalParams->setTextEncoding(encoding);
int doc_pages = doc->getNumPages();
if (last_page < 1 or last_page > doc_pages) last_page = doc_pages;
if (last_page < 1 || last_page > doc_pages) last_page = doc_pages;
if (first_page < 1) first_page = 1;
if (first_page > last_page) first_page = last_page;

View File

@ -270,6 +270,8 @@ class BookInfo(QWebView):
<style type="text/css">
body, td {background-color: transparent; font-size: %dpx; color: %s }
a { text-decoration: none; color: blue }
div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
table { margin-bottom: 0; padding-bottom: 0; }
</style>
</head>
<body>
@ -278,9 +280,10 @@ class BookInfo(QWebView):
<html>
'''%(f, c)
if self.vertical:
extra = ''
if comments:
rows += u'<tr><td colspan="2">%s</td></tr>'%comments
self.setHtml(templ%(u'<table>%s</table>'%rows))
extra = u'<div class="description">%s</div>'%comments
self.setHtml(templ%(u'<table>%s</table>%s'%(rows, extra)))
else:
left_pane = u'<table>%s</table>'%rows
right_pane = u'<div>%s</div>'%comments

View File

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.gui2.convert.htmlz_output_ui import Ui_Form
from calibre.gui2.convert import Widget
format_model = None
class PluginWidget(Widget, Ui_Form):
TITLE = _('HTMLZ Output')
HELP = _('Options specific to')+' HTMLZ '+_('output')
COMMIT_NAME = 'htmlz_output'
ICON = I('mimetypes/html.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, ['htmlz_css_type', 'htmlz_class_style'])
self.db, self.book_id = db, book_id
for x in get_option('htmlz_css_type').option.choices:
self.opt_htmlz_css_type.addItem(x)
for x in get_option('htmlz_class_style').option.choices:
self.opt_htmlz_class_style.addItem(x)
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -0,0 +1,61 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>438</width>
<height>300</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="2" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>246</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>How to handle CSS</string>
</property>
<property name="buddy">
<cstring>opt_htmlz_css_type</cstring>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="opt_htmlz_css_type">
<property name="minimumContentsLength">
<number>20</number>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>How to handle class based CSS</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="opt_htmlz_class_style"/>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

View File

@ -136,17 +136,17 @@ class PostInstall:
self.icon_resources = []
self.menu_resources = []
self.mime_resources = []
if islinux:
if islinux or isfreebsd:
self.setup_completion()
self.install_man_pages()
if islinux:
if islinux or isfreebsd:
self.setup_desktop_integration()
self.create_uninstaller()
from calibre.utils.config import config_dir
if os.path.exists(config_dir):
os.chdir(config_dir)
if islinux:
if islinux or isfreebsd:
for f in os.listdir('.'):
if os.stat(f).st_uid == 0:
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)

View File

@ -40,3 +40,84 @@ Sections
glossary
The main |app| user interface
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
gui
Adding your favorite news website to |app|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
news
The |app| e-book viewer
^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
viewer
Customizing |app|'s e-book conversion
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
viewer
Editing e-book metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
viewer
Frequently Asked Questions
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
faq
Tutorials
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
tutorials
Customizing |app|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
customize
The Command Line Interface
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
cli/cli-index
Setting up a |app| development environment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. toctree::
:maxdepth: 2
develop

View File

@ -263,20 +263,18 @@ Tips for developing new recipes
The best way to develop new recipes is to use the command line interface. Create the recipe using your favorite python editor and save it to a file say :file:`myrecipe.recipe`. The `.recipe` extension is required. You can download content using this recipe with the command::
ebook-convert myrecipe.recipe output_dir --test -vv
ebook-convert myrecipe.recipe .epub --test -vv --debug-pipeline debug
The :command:`ebook-convert` will download all the webpages and save them to the directory :file:`output_dir`, creating it if necessary. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds.
The command :command:`ebook-convert` will download all the webpages and save them to the EPUB file :file:`myrecipe.epub`. The :option:`-vv` makes ebook-convert spit out a lot of information about what it is doing. The :option:`--test` makes it download only a couple of articles from at most two feeds. In addition, ebook-convert will put the downloaded HTML into the ``debug/input`` directory, where ``debug`` is the directory you specified in the :option:`--debug-pipeline` option.
Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
Once the download is complete, you can look at the downloaded :term:`HTML` by opening the file :file:`debug/input/index.html` in a browser. Once you're satisfied that the download and preprocessing is happening correctly, you can generate ebooks in different formats as shown below::
ebook-convert myrecipe.recipe myrecipe.epub
ebook-convert myrecipe.recipe myrecipe.mobi
...
If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, add a comment to the ticket http://bugs.calibre-ebook.com/ticket/405
Alternatively, you could just post your recipe in the calibre forum at http://www.mobileread.com/forums/forumdisplay.php?f=166 to share it with other calibre users.
If you're satisfied with your recipe, and you feel there is enough demand to justify its inclusion into the set of built-in recipes, post your recipe in the `calibre recipes forum <http://www.mobileread.com/forums/forumdisplay.php?f=228>`_ to share it with other calibre users.
.. seealso::

File diff suppressed because it is too large Load Diff

View File

@ -1386,12 +1386,19 @@ ol, ul { padding-left: 2em; }
self.purgedata()
def s_text_s(self, tag, attrs):
""" Generate a number of spaces. ODF has an element; HTML uses &nbsp;
We use &#160; so we can send the output through an XML parser if we desire to
# Changed by Kovid to fix non breaking spaces being prepended to
# element instead of being part of the text flow.
# We don't use an entity for the nbsp as the contents of self.data will
# be escaped on writeout.
""" Generate a number of spaces. We use the non breaking space for
the text:s ODF element.
"""
c = attrs.get( (TEXTNS,'c'),"1")
for x in xrange(int(c)):
self.writeout('&#160;')
try:
c = int(attrs.get((TEXTNS, 'c'), 1))
except:
c = 0
if c > 0:
self.data.append(u'\u00a0'*c)
def s_text_span(self, tag, attrs):
""" The <text:span> element matches the <span> element in HTML. It is