Sync to pluginize

This commit is contained in:
John Schember 2009-04-21 19:20:06 -04:00
commit 6f7d0f7696
72 changed files with 20767 additions and 16531 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.5.7'
__version__ = '0.5.8'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -282,6 +282,9 @@ from calibre.ebooks.pdb.input import PDBInput
from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput
@ -289,7 +292,8 @@ from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input, ODTInput, RTFInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -59,7 +59,10 @@ class HTMLRenderer(object):
def render_html(path_to_html, width=590, height=750):
from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize, \
QApplication
if QApplication.instance() is None:
QApplication([])
path_to_html = os.path.abspath(path_to_html)
with CurrentDir(os.path.dirname(path_to_html)):
page = QWebPage()

View File

@ -116,6 +116,25 @@ def add_pipeline_options(parser, plumber):
'font_size_mapping',
'line_height',
'linearize_tables',
'extra_css',
]
),
'STRUCTURE DETECTION' : (
_('Control auto-detection of document structure.'),
[
'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
]
),
'TABLE OF CONTENTS' : (
_('Control the automatic generation of a Table of Contents. By '
'default, if the source file has a Table of Contents, it will '
'be used in preference to the automatically generated one.'),
[
'level1_toc', 'level2_toc', 'level3_toc',
'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
'use_auto_toc', 'toc_filter',
]
),
@ -130,7 +149,8 @@ def add_pipeline_options(parser, plumber):
}
group_order = ['', 'LOOK AND FEEL', 'METADATA', 'DEBUG']
group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
for group in group_order:
desc, options = groups[group]
@ -163,6 +183,10 @@ def main(args=sys.argv):
add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0]
y = lambda q : os.path.abspath(os.path.expanduser(q))
for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None:
setattr(opts, x, y(getattr(opts, x)))
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
for n in parser.options_iter()

View File

@ -3,13 +3,21 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
import os, re
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre import extract, walk
def supported_input_formats():
from calibre.customize.ui import available_input_formats
fmts = available_input_formats()
for x in ('zip', 'rar', 'oebzip'):
fmts.add(x)
return fmts
class OptionValues(object):
pass
@ -121,6 +129,105 @@ OptionRecommendation(name='dont_split_on_page_breaks',
)
),
OptionRecommendation(name='level1_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that '
'should be added to the Table of Contents at level one. If '
'this is specified, it takes precedence over other forms '
'of auto-detection.'
)
),
OptionRecommendation(name='level2_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level two. Each entry is added '
'under the previous level one entry.'
)
),
OptionRecommendation(name='level3_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level three. Each entry '
'is added under the previous level two entry.'
)
),
OptionRecommendation(name='use_auto_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally, if the source file already has a Table of '
'Contents, it is used in preference to the auto-generated one. '
'With this option, the auto-generated one is always used.'
)
),
OptionRecommendation(name='no_chapters_in_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_("Don't add auto-detected chapters to the Table of "
'Contents.'
)
),
OptionRecommendation(name='toc_threshold',
recommended_value=6, level=OptionRecommendation.LOW,
help=_(
'If fewer than this number of chapters is detected, then links '
'are added to the Table of Contents. Default: %default')
),
OptionRecommendation(name='max_toc_links',
recommended_value=50, level=OptionRecommendation.LOW,
help=_('Maximum number of links to insert into the TOC. Set to 0 '
'to disable. Default is: %default. Links are only added to the '
'TOC if less than the threshold number of chapters were detected.'
)
),
OptionRecommendation(name='toc_filter',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Remove entries from the Table of Contents whose titles '
'match the specified regular expression. Matching entries and all '
'their children are removed.'
)
),
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
"re:test(., 'chapter|book|section|part', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '
'"chapter","book","section" or "part" as chapter titles as '
'well as any tags that have class="chapter". The expression '
'used must evaluate to a list of elements. To disable chapter '
'detection, use the expression "/". See the XPath Tutorial '
'in the calibre User Manual for further help on using this '
'feature.'
)
),
OptionRecommendation(name='chapter_mark',
recommended_value='pagebreak', level=OptionRecommendation.LOW,
choices=['pagebreak', 'rule', 'both', 'none'],
help=_('Specify how to mark detected chapters. A value of '
'"pagebreak" will insert page breaks before chapters. '
'A value of "rule" will insert a line before chapters. '
'A value of "none" will disable chapter marking and a '
'value of "both" will use both page breaks and lines '
'to mark chapters.')
),
OptionRecommendation(name='extra_css',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Either the path to a CSS stylesheet or raw CSS. '
'This CSS will be appended to the style rules from '
'the source file, so it can be used to override those '
'rules.')
),
OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW,
@ -130,6 +237,7 @@ OptionRecommendation(name='read_metadata_from_opf',
'file.')
),
OptionRecommendation(name='title',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the title.')),
@ -187,11 +295,14 @@ OptionRecommendation(name='language',
help=_('Set the language.')),
]
input_fmt = os.path.splitext(self.input)[1]
if not input_fmt:
raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower()
if input_fmt in ('zip', 'rar', 'oebzip'):
self.log('Processing archive...')
tdir = PersistentTemporaryDirectory('_plumber')
self.input, input_fmt = self.unarchive(self.input, tdir)
if os.path.exists(self.output) and os.path.isdir(self.output):
output_fmt = 'oeb'
@ -201,7 +312,7 @@ OptionRecommendation(name='language',
output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower()
self.input_plugin = plugin_for_input_format(input_fmt)
self.input_plugin = plugin_for_input_format(input_fmt)
self.output_plugin = plugin_for_output_format(output_fmt)
if self.input_plugin is None:
@ -224,6 +335,43 @@ OptionRecommendation(name='language',
# plugins.
self.merge_plugin_recommendations()
@classmethod
def unarchive(self, path, tdir):
extract(path, tdir)
files = list(walk(tdir))
from calibre.customize.ui import available_input_formats
fmts = available_input_formats()
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
for ext in fmts:
for f in files:
if f.lower().endswith('.'+ext):
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
continue
return f, ext
return self.find_html_index(files)
@classmethod
def find_html_index(self, files):
'''
Given a list of files, find the most likely root HTML file in the
list.
'''
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
html_files = [f for f in files if html_pat.search(f) is not None]
if not html_files:
raise ValueError(_('Could not find an ebook inside the archive'))
html_files = [(f, os.stat(f).st_size) for f in html_files]
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
html_files = [f[0] for f in html_files]
for q in ('toc', 'index'):
for f in html_files:
if os.path.splitext(os.path.basename(f))[0].lower() == q:
return f, os.path.splitext(f)[1].lower()[1:]
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
def get_option_by_name(self, name):
for group in (self.input_options, self.pipeline_options,
self.output_options):
@ -237,6 +385,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level:
rec.recommended_value = val
rec.level = level
def merge_ui_recommendations(self, recommendations):
'''
@ -248,6 +397,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level and rec.level < rec.HIGH:
rec.recommended_value = val
rec.level = level
def read_user_metadata(self):
'''
@ -332,6 +482,9 @@ OptionRecommendation(name='language',
self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile
from calibre.ebooks.oeb.transforms.structure import DetectStructure
DetectStructure()(self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size
if fbase == 0:
@ -342,6 +495,9 @@ OptionRecommendation(name='language',
else:
fkey = map(float, fkey.split(','))
if self.opts.extra_css and os.path.exists(self.opts.extra_css):
self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=self.opts.line_height,
untable=self.opts.linearize_tables)
@ -364,6 +520,8 @@ OptionRecommendation(name='language',
trimmer = ManifestTrimmer()
trimmer(self.oeb, self.opts)
self.oeb.toc.rationalize_play_orders()
self.log.info('Creating %s...'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log)
@ -384,4 +542,3 @@ def create_oebbook(log, path_or_stream, opts, reader=None):
reader()(oeb, path_or_stream)
return oeb

View File

@ -15,130 +15,17 @@ from calibre.ebooks import DRMError
from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.utils.zipfile import ZipFile
from calibre.customize.ui import run_plugins_on_preprocess
def lit2opf(path, tdir, opts):
from calibre.ebooks.lit.reader import LitReader
print 'Exploding LIT file:', path
reader = LitReader(path)
reader.extract_content(tdir, False)
opf = None
for opf in walk(tdir):
if opf.lower().endswith('.opf'):
break
if not opf.endswith('.opf'):
opf = None
if opf is not None: # Check for url-quoted filenames
_opf = OPF(opf, os.path.dirname(opf))
replacements = []
for item in _opf.itermanifest():
href = item.get('href', '')
path = os.path.join(os.path.dirname(opf), *(href.split('/')))
if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
npath = path
path = path.replace('&', '%26')
replacements.append((path, npath))
if replacements:
print 'Fixing quoted filenames...'
for path, npath in replacements:
if os.path.exists(path):
os.rename(path, npath)
for f in walk(tdir):
with open(f, 'r+b') as f:
raw = f.read()
for path, npath in replacements:
raw = raw.replace(os.path.basename(path), os.path.basename(npath))
f.seek(0)
f.truncate()
f.write(raw)
return opf
def mobi2opf(path, tdir, opts):
from calibre.ebooks.mobi.reader import MobiReader
print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
reader = MobiReader(path)
reader.extract_content(tdir)
files = list(walk(tdir))
opts.encoding = 'utf-8'
for f in files:
if f.lower().endswith('.opf'):
return f
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
opf = OPFCreator(tdir, mi)
opf.create_manifest([(hf[0], None)])
opf.create_spine([hf[0]])
ans = os.path.join(tdir, 'metadata.opf')
opf.render(open(ans, 'wb'))
return ans
def fb22opf(path, tdir, opts):
from calibre.ebooks.lrf.fb2.convert_from import to_html
print 'Converting FB2 to HTML...'
return to_html(path, tdir)
def rtf2opf(path, tdir, opts):
from calibre.ebooks.lrf.rtf.convert_from import generate_html
generate_html(path, tdir)
return os.path.join(tdir, 'metadata.opf')
def txt2opf(path, tdir, opts):
from calibre.ebooks.lrf.txt.convert_from import generate_html
generate_html(path, opts.encoding, tdir)
return os.path.join(tdir, 'metadata.opf')
def pdf2opf(path, tdir, opts):
from calibre.ebooks.lrf.pdf.convert_from import generate_html
generate_html(path, tdir)
opts.dont_split_on_page_breaks = True
return os.path.join(tdir, 'metadata.opf')
def epub2opf(path, tdir, opts):
zf = ZipFile(path)
zf.extractall(tdir)
opts.chapter_mark = 'none'
encfile = os.path.join(tdir, 'META-INF', 'encryption.xml')
opf = None
for f in walk(tdir):
if f.lower().endswith('.opf'):
opf = f
break
if opf and os.path.exists(encfile):
if not process_encryption(encfile, opf):
raise DRMError(os.path.basename(path))
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
return opf
def odt2epub(path, tdir, opts):
from calibre.ebooks.odt.to_oeb import Extract
opts.encoding = 'utf-8'
return Extract()(path, tdir)
MAP = {
'lit' : lit2opf,
'mobi' : mobi2opf,
'prc' : mobi2opf,
'azw' : mobi2opf,
'fb2' : fb22opf,
'rtf' : rtf2opf,
'txt' : txt2opf,
'pdf' : pdf2opf,
'epub' : epub2opf,
'odt' : odt2epub,
}
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
def unarchive(path, tdir):
extract(path, tdir)
files = list(walk(tdir))
for ext in ['opf'] + list(MAP.keys()):
for f in files:
if f.lower().endswith('.'+ext):
@ -147,32 +34,32 @@ def unarchive(path, tdir):
return f, ext
return find_html_index(files)
def any2epub(opts, path, notification=None, create_epub=True,
def any2epub(opts, path, notification=None, create_epub=True,
oeb_cover=False, extract_to=None):
path = run_plugins_on_preprocess(path)
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
if ext in ['rar', 'zip', 'oebzip']:
path, ext = unarchive(path, tdir1)
print 'Found %s file in archive'%(ext.upper())
if ext in MAP.keys():
path = MAP[ext](path, tdir2, opts)
ext = 'opf'
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
raise ValueError('Conversion from %s is not supported'%ext.upper())
print 'Creating EPUB file...'
html2epub(path, opts, notification=notification,
html2epub(path, opts, notification=notification,
create_epub=create_epub, oeb_cover=oeb_cover,
extract_to=extract_to)

View File

@ -11,12 +11,12 @@ from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
class EPUBInput(InputFormatPlugin):
name = 'EPUB Input'
author = 'Kovid Goyal'
description = 'Convert EPUB files (.epub) to HTML'
file_types = set(['epub'])
@classmethod
def decrypt_font(cls, key, path):
raw = open(path, 'rb').read()
@ -26,7 +26,7 @@ class EPUBInput(InputFormatPlugin):
with open(path, 'wb') as f:
f.write(decrypt)
f.write(raw[1024:])
@classmethod
def process_ecryption(cls, encfile, opf, log):
key = None
@ -51,25 +51,75 @@ class EPUBInput(InputFormatPlugin):
traceback.print_exc()
return False
@classmethod
def rationalize_cover(self, opf):
guide_cover, guide_elem = None, None
for guide_elem in opf.iterguide():
if guide_elem.get('type', '').lower() == 'cover':
guide_cover = guide_elem.get('href', '')
break
if not guide_cover:
return
spine = list(opf.iterspine())
if not spine:
return
idref = spine[0].get('idref', '')
manifest = list(opf.itermanifest())
if not manifest:
return
if manifest[0].get('id', False) != idref:
return
spine[0].getparent().remove(spine[0])
guide_elem.set('href', 'calibre_raster_cover.jpg')
for elem in list(opf.iterguide()):
if elem.get('type', '').lower() == 'titlepage':
elem.getparent().remove(elem)
from calibre.ebooks.oeb.base import OPF
t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
t.set('type', 'titlepage')
t.set('href', guide_cover)
t.set('title', 'Title Page')
from calibre.ebooks import render_html
open('calibre_raster_cover.jpg', 'wb').write(
render_html(guide_cover).data)
def convert(self, stream, options, file_ext, log, accelerators):
from calibre.utils.zipfile import ZipFile
from calibre import walk
from calibre.ebooks import DRMError
from calibre.ebooks.metadata.opf2 import OPF
zf = ZipFile(stream)
zf.extractall(os.getcwd())
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = None
for f in walk('.'):
for f in walk(u'.'):
if f.lower().endswith('.opf'):
opf = f
opf = os.path.abspath(f)
break
path = getattr(stream, 'name', 'stream')
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
if os.path.exists(encfile):
if not self.process_encryption(encfile, opf, log):
raise DRMError(os.path.basename(path))
return os.path.join(os.getcwd(), opf)
opf = os.path.relpath(opf, os.getcwdu())
parts = os.path.split(opf)
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
if len(parts) > 1:
delta = '/'.join(parts[:-1])+'/'
for elem in opf.itermanifest():
elem.set('href', delta+elem.get('href'))
for elem in opf.iterguide():
elem.set('href', delta+elem.get('href'))
self.rationalize_cover(opf)
with open('content.opf', 'wb') as nopf:
nopf.write(opf.render())
return os.path.abspath('content.opf')

View File

@ -0,0 +1,74 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
"""
Convert .fb2 files to .lrf
"""
import os
from base64 import b64decode
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
class FB2Input(InputFormatPlugin):
name = 'FB2 Input'
author = 'Anatoly Shipitsin'
description = 'Convert FB2 files to HTML'
file_types = set(['fb2'])
recommendations = set([
('level1_toc', '//h:h1', OptionRecommendation.MED),
('level2_toc', '//h:h2', OptionRecommendation.MED),
('level3_toc', '//h:h3', OptionRecommendation.MED),
])
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.resources import fb2_xsl
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
log.debug('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(stream, parser)
self.extract_embedded_content(doc)
log.debug('Converting XML to HTML...')
styledoc = etree.fromstring(fb2_xsl)
transform = etree.XSLT(styledoc)
result = transform(doc)
open('index.xhtml', 'wb').write(transform.tostring(result))
stream.seek(0)
mi = get_metadata(stream, 'fb2')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwdu(), mi)
entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
opf.create_manifest(entries)
opf.create_spine(['index.xhtml'])
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
if href is not None:
if href.startswith('#'):
href = href[1:]
opf.guide.set_cover(os.path.abspath(href))
opf.render(open('metadata.opf', 'wb'))
return os.path.join(os.getcwd(), 'metadata.opf')
def extract_embedded_content(self, doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)

View File

@ -1,125 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
"""
Convert .fb2 files to .lrf
"""
import os, sys, shutil, logging
from base64 import b64decode
from lxml import etree
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers
from calibre.resources import fb2_xsl
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata import MetaInformation
def option_parser():
parser = lrf_option_parser(
_('''%prog [options] mybook.fb2
%prog converts mybook.fb2 to mybook.lrf'''))
parser.add_option('--debug-html-generation', action='store_true', default=False,
dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
parser.add_option('--keep-intermediate-files', action='store_true', default=False,
help=_('Keep generated HTML files after completing conversion to LRF.'))
return parser
def extract_embedded_content(doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)
def to_html(fb2file, tdir):
fb2file = os.path.abspath(fb2file)
cwd = os.getcwd()
try:
os.chdir(tdir)
print 'Parsing XML...'
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(fb2file, parser)
extract_embedded_content(doc)
print 'Converting XML to HTML...'
styledoc = etree.fromstring(fb2_xsl)
transform = etree.XSLT(styledoc)
result = transform(doc)
open('index.html', 'wb').write(transform.tostring(result))
try:
mi = get_metadata(open(fb2file, 'rb'), 'fb2')
except:
mi = MetaInformation(None, None)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(fb2file))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(tdir, mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
opf.render(open('metadata.opf', 'wb'))
return os.path.join(tdir, 'metadata.opf')
finally:
os.chdir(cwd)
def generate_html(fb2file, encoding, logger):
tdir = PersistentTemporaryDirectory('_fb22lrf')
to_html(fb2file, tdir)
return os.path.join(tdir, 'index.html')
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('fb22lrf')
setup_cli_handlers(logger, level)
fb2 = os.path.abspath(os.path.expanduser(path))
f = open(fb2, 'rb')
mi = get_metadata(f, 'fb2')
f.close()
htmlfile = generate_html(fb2, options.encoding, logger)
tdir = os.path.dirname(htmlfile)
cwd = os.getcwdu()
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title:
mi.title = os.path.splitext(os.path.basename(fb2))[0]
if (not options.title or options.title == _('Unknown')):
options.title = mi.title
if (not options.author or options.author == _('Unknown')) and mi.authors:
options.author = mi.authors.pop()
if (not options.category or options.category == _('Unknown')) and mi.category:
options.category = mi.category
if (not options.freetext or options.freetext == _('Unknown')) and mi.comments:
options.freetext = mi.comments
os.chdir(tdir)
html_process_file(htmlfile, options, logger)
finally:
os.chdir(cwd)
if getattr(options, 'keep_intermediate_files', False):
logger.debug('Intermediate files in '+ tdir)
else:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No fb2 file specified'
return 1
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,190 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys, shutil, logging, glob
from lxml import etree
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers
from calibre.libwand import convert, WandException
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.lrf.rtf.xsl import xhtml
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
def option_parser():
parser = lrf_option_parser(
_('''%prog [options] mybook.rtf
%prog converts mybook.rtf to mybook.lrf''')
)
parser.add_option('--keep-intermediate-files', action='store_true', default=False)
return parser
def convert_images(html, logger):
wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
for wmf in wmfs:
target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
try:
convert(wmf, target)
html = html.replace(os.path.basename(wmf), os.path.basename(target))
except WandException, err:
logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
continue
return html
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('rtf2lrf')
setup_cli_handlers(logger, level)
rtf = os.path.abspath(os.path.expanduser(path))
f = open(rtf, 'rb')
mi = get_metadata(f, 'rtf')
f.close()
tdir = PersistentTemporaryDirectory('_rtf2lrf')
html = generate_html(rtf, tdir)
cwd = os.getcwdu()
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title:
mi.title = os.path.splitext(os.path.basename(rtf))[0]
if (not options.title or options.title == 'Unknown'):
options.title = mi.title
if (not options.author or options.author == 'Unknown') and mi.author:
options.author = mi.author
if (not options.category or options.category == 'Unknown') and mi.category:
options.category = mi.category
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
options.freetext = mi.comments
os.chdir(tdir)
html_process_file(html, options, logger)
finally:
os.chdir(cwd)
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
logger.debug('Intermediate files in '+ tdir)
else:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No rtf file specified'
return 1
process_file(args[1], options, logger)
return 0
def generate_xml(rtfpath, tdir):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = os.path.join(tdir, 'index.xml')
cwd = os.getcwdu()
os.chdir(tdir)
rtfpath = os.path.abspath(rtfpath)
try:
parser = ParseRtf(
in_file = rtfpath,
out_file = ofile,
# Convert symbol fonts to unicode equivelents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivelents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivelents.
# Default is 1.
convert_wingdings = 1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = 1,
# Form lists from RTF. Default is 1.
form_lists = 1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
# Group borders. Default is 1.
group_borders = 1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 0,
)
parser.parse_rtf()
finally:
os.chdir(cwd)
return ofile
def generate_html(rtfpath, tdir):
print 'Converting RTF to XML...'
rtfpath = os.path.abspath(rtfpath)
try:
xml = generate_xml(rtfpath, tdir)
except RtfInvalidCodeException:
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
tdir = os.path.dirname(xml)
cwd = os.getcwdu()
os.chdir(tdir)
try:
print 'Parsing XML...'
parser = etree.XMLParser(recover=True, no_network=True)
try:
doc = etree.parse(xml, parser)
except:
raise
print 'Parsing failed. Trying to clean up XML...'
soup = BeautifulStoneSoup(open(xml, 'rb').read())
doc = etree.fromstring(str(soup))
print 'Converting XML to HTML...'
styledoc = etree.fromstring(xhtml)
transform = etree.XSLT(styledoc)
result = transform(doc)
tdir = os.path.dirname(xml)
html = os.path.join(tdir, 'index.html')
f = open(html, 'wb')
res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
f.write(res)
f.close()
try:
mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
except:
mi = MetaInformation(None, None)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(tdir, mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
opf.render(open('metadata.opf', 'wb'))
finally:
os.chdir(cwd)
return html
if __name__ == '__main__':
sys.exit(main())

View File

@ -4,13 +4,15 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch cover from LibraryThing.com based on ISBN number.
'''
import sys, socket, os, re, mechanize
import sys, socket, os, re
from calibre import browser as _browser
from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.BeautifulSoup import BeautifulSoup
browser = None
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
class LibraryThingError(Exception):
pass
@ -30,15 +32,21 @@ def login(username, password, force=True):
browser['formusername'] = username
browser['formpassword'] = password
browser.submit()
def cover_from_isbn(isbn, timeout=5.):
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
global browser
if browser is None:
browser = _browser()
_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout)
src = None
src = None
try:
return browser.open(OPENLIBRARY%isbn).read(), 'jpg'
except:
pass # Cover not found
if username and password:
login(username, password, force=False)
try:
src = browser.open('http://www.librarything.com/isbn/'+isbn).read().decode('utf-8', 'replace')
except Exception, err:
@ -55,7 +63,7 @@ def cover_from_isbn(isbn, timeout=5.):
url = url.find('img')
if url is None:
raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
url = re.sub(r'_SX\d+', '', url['src'])
url = re.sub(r'_S[XY]\d+', '', url['src'])
cover_data = browser.open(url).read()
return cover_data, url.rpartition('.')[-1]
finally:
@ -68,9 +76,9 @@ _('''
Fetch a cover image for the book identified by ISBN from LibraryThing.com
'''))
parser.add_option('-u', '--username', default=None,
parser.add_option('-u', '--username', default=None,
help='Username for LibraryThing.com')
parser.add_option('-p', '--password', default=None,
parser.add_option('-p', '--password', default=None,
help='Password for LibraryThing.com')
return parser
@ -81,13 +89,8 @@ def main(args=sys.argv):
parser.print_help()
return 1
isbn = args[1]
if opts.username and opts.password:
try:
login(opts.username, opts.password)
except mechanize.FormNotFoundError:
raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
cover_data, ext = cover_from_isbn(isbn)
cover_data, ext = cover_from_isbn(isbn, username=opts.username,
password=opts.password)
if not ext:
ext = 'jpg'
oname = os.path.abspath(isbn+'.'+ext)
@ -96,4 +99,4 @@ def main(args=sys.argv):
return 0
if __name__ == '__main__':
sys.exit(main())
sys.exit(main())

View File

@ -0,0 +1,67 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert an ODT file into a Open Ebook
'''
import os
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
from calibre.customize.conversion import InputFormatPlugin
class Extract(ODF2XHTML):
def extract_pictures(self, zf):
if not os.path.exists('Pictures'):
os.makedirs('Pictures')
for name in zf.namelist():
if name.startswith('Pictures'):
data = zf.read(name)
with open(name, 'wb') as f:
f.write(data)
def __call__(self, stream, odir):
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
if not os.path.exists(odir):
os.makedirs(odir)
with CurrentDir(odir):
print 'Extracting ODT file...'
html = self.odf2xhtml(stream)
with open('index.xhtml', 'wb') as f:
f.write(html.encode('utf-8'))
zf = ZipFile(stream, 'r')
self.extract_pictures(zf)
stream.seek(0)
mi = get_metadata(stream, 'odt')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
opf.create_spine([os.path.abspath('index.xhtml')])
with open('metadata.opf', 'wb') as f:
opf.render(f)
return os.path.abspath('metadata.opf')
class ODTInput(InputFormatPlugin):
name = 'ODT Input'
author = 'Kovid Goyal'
description = 'Convert ODT (OpenOffice) files to HTML'
file_types = set(['odt'])
def convert(self, stream, options, file_ext, log,
accelerators):
return Extract()(stream, '.')

View File

@ -1,72 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert an ODT file into a Open Ebook
'''
import os, sys
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
from calibre.utils.zipfile import ZipFile
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.odt import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
class Extract(ODF2XHTML):
def extract_pictures(self, zf):
if not os.path.exists('Pictures'):
os.makedirs('Pictures')
for name in zf.namelist():
if name.startswith('Pictures'):
data = zf.read(name)
with open(name, 'wb') as f:
f.write(data)
def __call__(self, path, odir):
if not os.path.exists(odir):
os.makedirs(odir)
path = os.path.abspath(path)
with CurrentDir(odir):
print 'Extracting ODT file...'
html = self.odf2xhtml(path)
with open('index.html', 'wb') as f:
f.write(html.encode('utf-8'))
with open(path, 'rb') as f:
zf = ZipFile(f, 'r')
self.extract_pictures(zf)
f.seek(0)
mi = get_metadata(f)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(path))
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
opf.create_spine([os.path.abspath('index.html')])
with open('metadata.opf', 'wb') as f:
opf.render(f)
return os.path.abspath('metadata.opf')
def option_parser():
parser = OptionParser('%prog [options] file.odt')
parser.add_option('-o', '--output-dir', default='.',
help=_('The output directory. Defaults to the current directory.'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No ODT file specified'
return 1
Extract()(args[1], os.path.abspath(opts.output_dir))
print 'Extracted to', os.path.abspath(opts.output_dir)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -41,10 +41,12 @@ NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink'
CALIBRE_NS = 'http://calibre.kovidgoyal.net/2009/metadata'
RE_NS = 'http://exslt.org/regular-expressions'
XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
'svg': SVG_NS, 'xl' : XLINK_NS}
'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS}
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
'xsi': XSI_NS, 'calibre': CALIBRE_NS}
@ -1024,7 +1026,7 @@ class Manifest(object):
media_type = XHTML_MIME
elif media_type in OEB_STYLES:
media_type = CSS_MIME
attrib = {'id': item.id, 'href': item.href,
attrib = {'id': item.id, 'href': urlunquote(item.href),
'media-type': media_type}
if item.fallback:
attrib['fallback'] = item.fallback
@ -1236,7 +1238,7 @@ class Guide(object):
def to_opf2(self, parent=None):
elem = element(parent, OPF('guide'))
for ref in self.refs.values():
attrib = {'type': ref.type, 'href': ref.href}
attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
if ref.title:
attrib['title'] = ref.title
element(elem, OPF('reference'), attrib=attrib)
@ -1256,19 +1258,34 @@ class TOC(object):
:attr:`klass`: Optional semantic class referenced by this node.
:attr:`id`: Option unique identifier for this node.
"""
def __init__(self, title=None, href=None, klass=None, id=None):
def __init__(self, title=None, href=None, klass=None, id=None,
play_order=None):
self.title = title
self.href = urlnormalize(href) if href else href
self.klass = klass
self.id = id
self.nodes = []
self.play_order = 0
if play_order is None:
play_order = self.next_play_order()
self.play_order = play_order
def add(self, title, href, klass=None, id=None):
def add(self, title, href, klass=None, id=None, play_order=0):
"""Create and return a new sub-node of this node."""
node = TOC(title, href, klass, id)
node = TOC(title, href, klass, id, play_order)
self.nodes.append(node)
return node
def remove(self, node):
for child in self.nodes:
if child is node:
self.nodes.remove(child)
return True
else:
if child.remove(node):
return True
return False
def iter(self):
"""Iterate over this node and all descendants in depth-first order."""
yield self
@ -1276,6 +1293,18 @@ class TOC(object):
for node in child.iter():
yield node
def count(self):
return len(list(self.iter())) - 1
def next_play_order(self):
return max([x.play_order for x in self.iter()])+1
def has_href(self, href):
for x in self.iter():
if x.href == href:
return True
return False
def iterdescendants(self):
"""Iterate over all descendant nodes in depth-first order."""
for child in self.nodes:
@ -1309,6 +1338,10 @@ class TOC(object):
except ValueError:
return 1
def __str__(self):
return 'TOC: %s --> %s'%(self.title, self.href)
def to_opf1(self, tour):
for node in self.nodes:
element(tour, 'site', attrib={
@ -1319,7 +1352,7 @@ class TOC(object):
def to_ncx(self, parent):
for node in self.nodes:
id = node.id or unicode(uuid.uuid4())
attrib = {'id': id, 'playOrder': '0'}
attrib = {'id': id, 'playOrder': str(node.play_order)}
if node.klass:
attrib['class'] = node.klass
point = element(parent, NCX('navPoint'), attrib=attrib)
@ -1329,6 +1362,34 @@ class TOC(object):
node.to_ncx(point)
return parent
def rationalize_play_orders(self):
'''
Ensure that all nodes with the same play_order have the same href and
with different play_orders have different hrefs.
'''
def po_node(n):
for x in self.iter():
if x is n:
return
if x.play_order == n.play_order:
return x
def href_node(n):
for x in self.iter():
if x is n:
return
if x.href == n.href:
return x
for x in self.iter():
y = po_node(x)
if y is not None:
if x.href != y.href:
x.play_order = getattr(href_node(x), 'play_order',
self.next_play_order())
y = href_node(x)
if y is not None:
x.play_order = y.play_order
class PageList(object):
"""Collection of named "pages" to mapped positions within an OEB data model

View File

@ -118,6 +118,7 @@ class EbookIterator(object):
print 'Loaded embedded font:', repr(family)
def __enter__(self):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__()
from calibre.ebooks.conversion.plumber import Plumber
@ -137,9 +138,11 @@ class EbookIterator(object):
cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
cfile = os.path.join(os.path.dirname(self.spine[0]), 'calibre_ei_cover.html')
cfile = os.path.join(os.path.dirname(self.spine[0]),
'calibre_iterator_cover.html')
open(cfile, 'wb').write(TITLEPAGE%cover)
self.spine[0:0] = [SpineItem(cfile)]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine:
@ -221,3 +224,6 @@ class EbookIterator(object):
def __exit__(self, *args):
self._tdir.__exit__(*args)
for x in self.delete_on_exit:
if os.path.exists(x):
os.remove(x)

View File

@ -343,7 +343,8 @@ class OEBReader(object):
continue
id = child.get('id')
klass = child.get('class')
node = toc.add(title, href, id=id, klass=klass)
po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
node = toc.add(title, href, id=id, klass=klass, play_order=po)
self._toc_from_navpoint(item, node, child)
def _toc_from_ncx(self, item):

View File

@ -88,7 +88,7 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
def __init__(self, css, namespaces=XPNSMAP):
css = self.MIN_SPACE_RE.sub(r'\1', css)
path = css_to_xpath(css)
@ -103,10 +103,10 @@ class CSSSelector(etree.XPath):
self.css)
class Stylizer(object):
class Stylizer(object):
STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], extra_css=''):
self.oeb = oeb
self.profile = profile
self.logger = oeb.logger
@ -135,6 +135,11 @@ class Stylizer(object):
(path, item.href))
continue
stylesheets.append(sitem.data)
if extra_css:
text = XHTML_CSS_NAMESPACE + extra_css
stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS
stylesheets.append(stylesheet)
rules = []
index = 0
self.stylesheets = set()
@ -159,7 +164,7 @@ class Stylizer(object):
self.style(elem)._update_cssdict(cssdict)
for elem in xpath(tree, '//h:*[@style]'):
self.style(elem)._apply_style_attr()
def _fetch_css_file(self, path):
hrefs = self.oeb.manifest.hrefs
if path not in hrefs:
@ -171,7 +176,7 @@ class Stylizer(object):
return (None, None)
data = item.data.cssText
return ('utf-8', data)
def flatten_rule(self, rule, href, index):
results = []
if isinstance(rule, CSSStyleRule):
@ -185,7 +190,7 @@ class Stylizer(object):
style = self.flatten_style(rule.style)
self.page_rule.update(style)
return results
def flatten_style(self, cssstyle):
style = {}
for prop in cssstyle:
@ -202,7 +207,7 @@ class Stylizer(object):
if size in FONT_SIZE_NAMES:
style['font-size'] = "%dpt" % self.profile.fnames[size]
return style
def _normalize_edge(self, cssvalue, name):
style = {}
if isinstance(cssvalue, CSSValueList):
@ -224,7 +229,7 @@ class Stylizer(object):
for edge, value in itertools.izip(edges, values):
style["%s-%s" % (name, edge)] = value
return style
def _normalize_font(self, cssvalue):
composition = ('font-style', 'font-variant', 'font-weight',
'font-size', 'line-height', 'font-family')
@ -271,7 +276,7 @@ class Stylizer(object):
class Style(object):
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$')
def __init__(self, element, stylizer):
self._element = element
self._profile = stylizer.profile
@ -285,7 +290,7 @@ class Style(object):
def _update_cssdict(self, cssdict):
self._style.update(cssdict)
def _apply_style_attr(self):
attrib = self._element.attrib
if 'style' not in attrib:
@ -297,7 +302,7 @@ class Style(object):
except CSSSyntaxError:
return
self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self):
return (self._element.getparent() is not None)
@ -346,7 +351,7 @@ class Style(object):
elif unit == 'in':
result = value * 72.0
elif unit == 'pt':
result = value
result = value
elif unit == 'em':
font = font or self.fontSize
result = value * font
@ -421,7 +426,7 @@ class Style(object):
result = self._unit_convert(width, base=base)
self._width = result
return self._width
@property
def height(self):
if self._height is None:
@ -463,27 +468,27 @@ class Style(object):
result = 1.2 * self.fontSize
self._lineHeight = result
return self._lineHeight
@property
def marginTop(self):
return self._unit_convert(
self._get('margin-top'), base=self.height)
@property
def marginBottom(self):
return self._unit_convert(
self._get('margin-bottom'), base=self.height)
@property
def paddingTop(self):
return self._unit_convert(
self._get('padding-top'), base=self.height)
@property
def paddingBottom(self):
return self._unit_convert(
self._get('padding-bottom'), base=self.height)
def __str__(self):
items = self._style.items()
items.sort()

View File

@ -116,7 +116,8 @@ class CSSFlattener(object):
profile = self.context.source
for item in self.oeb.spine:
html = item.data
stylizer = Stylizer(html, item.href, self.oeb, profile)
stylizer = Stylizer(html, item.href, self.oeb, profile,
extra_css=self.context.extra_css)
self.stylizers[item] = stylizer
def baseline_node(self, node, stylizer, sizes, csize):

View File

@ -15,12 +15,10 @@ from lxml.etree import XPath as _XPath
from lxml import etree
from lxml.cssselect import CSSSelector
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \
rewrite_links
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
urldefrag, rewrite_links
from calibre.ebooks.epub import tostring, rules
NAMESPACES = dict(XPNSMAP)
NAMESPACES['re'] = 'http://exslt.org/regular-expressions'
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@ -104,7 +102,10 @@ class Split(object):
page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()):
elem.set('pb_order', str(i))
try:
elem.set('pb_order', str(i))
except TypeError: # Cant set attributes on comment nodes etc.
continue
page_breaks = list(page_breaks)
page_breaks.sort(cmp=
@ -118,7 +119,7 @@ class Split(object):
page_break_ids.append(id)
for elem in item.data.iter():
elem.attrib.pop('pb_order')
elem.attrib.pop('pb_order', False)
if elem.get('pb_before', False):
elem.attrib.pop('pb_before')

View File

@ -0,0 +1,159 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from lxml import etree
from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC
XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
class DetectStructure(object):
def __call__(self, oeb, opts):
self.log = oeb.log
self.oeb = oeb
self.opts = opts
self.log('Detecting structure...')
self.detect_chapters()
if self.oeb.auto_generated_toc or opts.use_auto_toc:
orig_toc = self.oeb.toc
self.oeb.toc = TOC()
self.create_level_based_toc()
if self.oeb.toc.count() < 1:
if not opts.no_chapters_in_toc and self.detected_chapters:
self.create_toc_from_chapters()
if self.oeb.toc.count() < opts.toc_threshold:
self.create_toc_from_links()
if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
self.oeb.toc = orig_toc
else:
self.oeb.auto_generated_toc = True
self.log('Auto generated TOC with %d entries.' %
self.oeb.toc.count())
if opts.toc_filter is not None:
regexp = re.compile(opts.toc_filter)
for node in self.oeb.toc.iter():
if not node.title or regexp.search(node.title) is not None:
self.oeb.toc.remove(node)
def detect_chapters(self):
self.detected_chapters = []
if self.opts.chapter:
chapter_xpath = XPath(self.opts.chapter)
for item in self.oeb.spine:
for x in chapter_xpath(item.data):
self.detected_chapters.append((item, x))
chapter_mark = self.opts.chapter_mark
page_break_before = 'display: block; page-break-before: always'
page_break_after = 'display: block; page-break-after: always'
for item, elem in self.detected_chapters:
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
self.log('\tDetected chapter:', text[:50])
if chapter_mark == 'none':
continue
elif chapter_mark == 'rule':
mark = etree.Element('hr')
elif chapter_mark == 'pagebreak':
mark = etree.Element('div', style=page_break_after)
else: # chapter_mark == 'both':
mark = etree.Element('hr', style=page_break_before)
elem.addprevious(mark)
def create_level_based_toc(self):
if self.opts.level1_toc is None:
return
for item in self.oeb.spine:
self.add_leveled_toc_items(item)
def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order()
for item, elem in self.detected_chapters:
text, href = self.elem_to_link(item, elem, counter)
self.oeb.toc.add(text, href, play_order=counter)
counter += 1
def create_toc_from_links(self):
for item in self.oeb.spine:
for a in item.data.xpath('//h:a[@href]'):
href = a.get('href')
purl = urlparse(href)
if not purl[0] or purl[0] == 'file':
href, frag = purl.path, purl.fragment
href = item.abshref(href)
if frag:
href = '#'.join((href, frag))
if not self.oeb.toc.has_href(href):
text = u' '.join([t.strip() for t in \
a.xpath('descendant::text()')])
text = text[:100].strip()
if not self.oeb.toc.has_text(text):
self.oeb.toc.add(text, href,
play_order=self.oeb.toc.next_play_order())
def elem_to_link(self, item, elem, counter):
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
text = text[:100].strip()
id = elem.get('id', 'calibre_toc_%d'%counter)
elem.set('id', id)
href = '#'.join((item.href, id))
return text, href
def add_leveled_toc_items(self, item):
level1 = XPath(self.opts.level1_toc)(item.data)
level1_order = []
counter = 1
if level1:
added = {}
for elem in level1:
text, _href = self.elem_to_link(item, elem, counter)
counter += 1
if text:
node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order())
level1_order.append(node)
added[elem] = node
#node.add(_('Top'), _href)
if self.opts.level2_toc is not None:
added2 = {}
level2 = list(XPath(self.opts.level2_toc)(item.data))
for elem in level2:
level1 = None
for item in item.data.iterdescendants():
if item in added.keys():
level1 = added[item]
elif item == elem and level1 is not None:
text, _href = self.elem_to_link(item, elem, counter)
counter += 1
if text:
added2[elem] = level1.add(text, _href,
play_order=self.oeb.toc.next_play_order())
if self.opts.level3_toc is not None:
level3 = list(XPath(self.opts.level3_toc)(item.data))
for elem in level3:
level2 = None
for item in item.data.iterdescendants():
if item in added2.keys():
level2 = added2[item]
elif item == elem and level2 is not None:
text, _href = \
self.elem_to_link(item, elem, counter)
counter += 1
if text:
level2.add(text, _href,
play_order=self.oeb.toc.next_play_order())

View File

@ -0,0 +1,101 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
class RTFInput(InputFormatPlugin):
name = 'RTF Input'
author = 'Kovid Goyal'
description = 'Convert RTF files to HTML'
file_types = set(['rtf'])
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'out.xml'
parser = ParseRtf(
in_file = stream,
out_file = ofile,
# Convert symbol fonts to unicode equivelents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivelents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivelents.
# Default is 1.
convert_wingdings = 1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = 1,
# Form lists from RTF. Default is 1.
form_lists = 1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
# Group borders. Default is 1.
group_borders = 1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 0,
)
parser.parse_rtf()
ans = open('out.xml').read()
os.remove('out.xml')
return ans
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.rtf.xsl import xhtml
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
self.log = log
self.log('Converting RTF to XML...')
try:
xml = self.generate_xml(stream)
except RtfInvalidCodeException:
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.'))
self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser)
self.log('Converting XML to HTML...')
styledoc = etree.fromstring(xhtml)
transform = etree.XSLT(styledoc)
result = transform(doc)
html = 'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
f.write(res)
stream.seek(0)
mi = get_metadata(stream, 'rtf')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.xhtml', None)])
opf.create_spine(['index.xhtml'])
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')

View File

@ -149,9 +149,10 @@ class ParseRtf:
self.__group_borders = group_borders
self.__empty_paragraphs = empty_paragraphs
self.__no_dtd = no_dtd
def __check_file(self, the_file, type):
"""Check to see if files exist"""
if hasattr(the_file, 'read'): return
if the_file == None:
if type == "file_to_parse":
message = "You must provide a file for the script to work"
@ -545,13 +546,12 @@ class ParseRtf:
def __make_temp_file(self,file):
"""Make a temporary file to parse"""
write_file="rtf_write_file"
read_obj = open(file,'r')
read_obj = file if hasattr(file, 'read') else open(file,'r')
write_obj = open(write_file, 'w')
line = "dummy"
while line:
line = read_obj.read(1000)
write_obj.write(line )
read_obj.close()
write_obj.close()
return write_file
"""

View File

@ -58,10 +58,12 @@ class Pict:
return line[18:]
def __make_dir(self):
""" Make a dirctory to put the image data in"""
base_name = os.path.basename(self.__orig_file)
base_name = os.path.basename(getattr(self.__orig_file, 'name',
self.__orig_file))
base_name = os.path.splitext(base_name)[0]
if self.__out_file:
dir_name = os.path.dirname(self.__out_file)
dir_name = os.path.dirname(getattr(self.__out_file, 'name',
self.__out_file))
else:
dir_name = os.path.dirname(self.__orig_file)
# self.__output_to_file_func()

View File

@ -16,16 +16,14 @@ from calibre.gui2 import qstring_to_unicode, error_dialog, file_icon_provider, \
from calibre.gui2.dialogs.metadata_single_ui import Ui_MetadataSingleDialog
from calibre.gui2.dialogs.fetch_metadata import FetchMetadata
from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.gui2.dialogs.password import PasswordDialog
from calibre.gui2.widgets import ProgressIndicator
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, authors_to_string
from calibre.ebooks.metadata.library_thing import login, cover_from_isbn
from calibre.ebooks.metadata.library_thing import cover_from_isbn
from calibre import islinux
from calibre.ebooks.metadata.meta import get_metadata
from calibre.utils.config import prefs
from calibre.customize.ui import run_plugins_on_import
from calibre.gui2 import config as gui_conf
class CoverFetcher(QThread):
@ -60,9 +58,8 @@ class CoverFetcher(QThread):
return
self.isbn = results[0]
if self.username and self.password:
login(self.username, self.password, force=False)
self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0]
self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout,
username=self.username, password=self.password)[0]
except Exception, e:
self.exception = e
self.traceback = traceback.format_exc()
@ -290,7 +287,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.series_index.setValue(self.db.series_index(row))
QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index)
QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index)
QObject.connect(self.password_button, SIGNAL('clicked()'), self.change_password)
self.show()
height_of_rest = self.frameGeometry().height() - self.cover.height()
@ -363,30 +359,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
tag_string = ', '.join(d.tags)
self.tags.setText(tag_string)
def lt_password_dialog(self):
return PasswordDialog(self, 'LibraryThing account',
_('<p>Enter your username and password for '
'<b>LibraryThing.com</b>. This is <b>optional</b>. It will '
'make fetching of covers faster and more reliable.<br/>If '
'you do not have an account, you can '
'<a href=\'http://www.librarything.com\'>register</a> for '
'free.</p>'))
def change_password(self):
d = self.lt_password_dialog()
d.exec_()
def fetch_cover(self):
isbn = unicode(self.isbn.text()).strip()
d = self.lt_password_dialog()
if not gui_conf['asked_library_thing_password'] and \
(not d.username() or not d.password()):
d.exec_()
gui_conf['asked_library_thing_password'] = True
self.fetch_cover_button.setEnabled(False)
self.setCursor(Qt.WaitCursor)
title, author = map(unicode, (self.title.text(), self.authors.text()))
self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn,
self.cover_fetcher = CoverFetcher(None, None, isbn,
self.timeout, title, author)
self.cover_fetcher.start()
self._hangcheck = QTimer(self)

View File

@ -589,17 +589,7 @@
<item>
<widget class="QPushButton" name="fetch_cover_button">
<property name="text">
<string>Fetch &amp;cover image from server</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="password_button">
<property name="toolTip">
<string>Change the username and/or password for your account at LibraryThing.com</string>
</property>
<property name="text">
<string>Change &amp;password</string>
<string>Download &amp;cover</string>
</property>
</widget>
</item>
@ -655,7 +645,6 @@
<tabstop>comments</tabstop>
<tabstop>fetch_metadata_button</tabstop>
<tabstop>fetch_cover_button</tabstop>
<tabstop>password_button</tabstop>
<tabstop>formats</tabstop>
<tabstop>add_format_button</tabstop>
<tabstop>remove_format_button</tabstop>

View File

@ -27,10 +27,6 @@ entry_points = {
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
@ -151,8 +147,6 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.gui2.main import option_parser as guiop
from calibre.utils.smtp import option_parser as smtp_op
@ -181,10 +175,6 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_words('calibre-smtp', smtp_op, []))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -266,7 +266,7 @@ class BasicNewsRecipe(object):
def get_feeds(self):
'''
Return a list of :term:RSS feeds to fetch for this profile. Each element of the list
Return a list of :term:`RSS` feeds to fetch for this profile. Each element of the list
must be a 2-element tuple of the form (title, url). If title is None or an
empty string, the title from the feed is used. This method is useful if your recipe
needs to do some processing to figure out the list of feeds to download. If

View File

@ -182,11 +182,6 @@ def cnv_points(attribute, arg, element):
raise ValueError, "Points must be string or [(0,0),(1,1)] - not %s" % arg
return strarg
def cnv_position(attribute, arg, element):
if element == (STYLENS,u'tab-stop'):
return cnv_length(attribute, arg, element)
return cnv_string(attribute, arg, element)
def cnv_positiveInteger(attribute, arg, element):
return str(arg)
@ -253,7 +248,7 @@ attrconverters = {
((CHARTNS,u'gap-width'), None): cnv_string,
((CHARTNS,u'interpolation'), None): cnv_string,
((CHARTNS,u'interval-major'), None): cnv_string,
((CHARTNS,u'interval-minor'), None): cnv_string,
((CHARTNS,u'interval-minor-divisor'), None): cnv_string,
((CHARTNS,u'japanese-candle-stick'), None): cnv_boolean,
((CHARTNS,u'label-arrangement'), None): cnv_string,
((CHARTNS,u'label-cell-address'), None): cnv_string,
@ -352,6 +347,7 @@ attrconverters = {
((DRAWNS,u'caption-line-length'), None): cnv_length,
((DRAWNS,u'caption-point-x'), None): cnv_string,
((DRAWNS,u'caption-point-y'), None): cnv_string,
((DRAWNS,u'caption-id'), None): cnv_IDREF,
((DRAWNS,u'caption-type'), None): cnv_string,
((DRAWNS,u'chain-next-name'), None): cnv_string,
((DRAWNS,u'class-id'), None): cnv_string,
@ -476,6 +472,7 @@ attrconverters = {
((DRAWNS,u'modifiers'), None): cnv_string,
((DRAWNS,u'name'), None): cnv_NCName,
# ((DRAWNS,u'name'), None): cnv_string,
((DRAWNS,u'nav-order'), None): cnv_IDREF,
((DRAWNS,u'nohref'), None): cnv_string,
((DRAWNS,u'notify-on-update-of-ranges'), None): cnv_string,
((DRAWNS,u'object'), None): cnv_string,
@ -683,7 +680,7 @@ attrconverters = {
((MANIFESTNS,'initialisation-vector'), None): cnv_string,
((MANIFESTNS,'iteration-count'), None): cnv_nonNegativeInteger,
((MANIFESTNS,'key-derivation-name'), None): cnv_string,
((MANIFESTNS,'manifest:media-type'), None): cnv_string,
((MANIFESTNS,'media-type'), None): cnv_string,
((MANIFESTNS,'salt'), None): cnv_string,
((MANIFESTNS,'size'), None): cnv_nonNegativeInteger,
((METANS,u'cell-count'), None): cnv_nonNegativeInteger,
@ -746,6 +743,7 @@ attrconverters = {
((OFFICENS,u'target-frame'), None): cnv_string,
((OFFICENS,u'target-frame-name'), None): cnv_string,
((OFFICENS,u'time-value'), None): cnv_duration,
((OFFICENS,u'title'), None): cnv_string,
((OFFICENS,u'value'), None): cnv_double,
((OFFICENS,u'value-type'), None): cnv_string,
((OFFICENS,u'version'), None): cnv_string,
@ -783,6 +781,7 @@ attrconverters = {
((PRESENTATIONNS,u'preset-id'), None): cnv_string,
((PRESENTATIONNS,u'preset-sub-type'), None): cnv_string,
((PRESENTATIONNS,u'show'), None): cnv_string,
((PRESENTATIONNS,u'show-end-of-presentation-slide'), None): cnv_boolean,
((PRESENTATIONNS,u'show-logo'), None): cnv_boolean,
((PRESENTATIONNS,u'source'), None): cnv_string,
((PRESENTATIONNS,u'speed'), None): cnv_string,
@ -873,6 +872,8 @@ attrconverters = {
((STYLENS,u'flow-with-text'), None): cnv_boolean,
((STYLENS,u'font-adornments'), None): cnv_string,
((STYLENS,u'font-charset'), None): cnv_string,
((STYLENS,u'font-charset-asian'), None): cnv_string,
((STYLENS,u'font-charset-complex'), None): cnv_string,
((STYLENS,u'font-family-asian'), None): cnv_string,
((STYLENS,u'font-family-complex'), None): cnv_string,
((STYLENS,u'font-family-generic-asian'), None): cnv_string,
@ -949,7 +950,8 @@ attrconverters = {
((STYLENS,u'page-usage'), None): cnv_string,
((STYLENS,u'paper-tray-name'), None): cnv_string,
((STYLENS,u'parent-style-name'), None): cnv_StyleNameRef,
((STYLENS,u'position'), None): cnv_position,
((STYLENS,u'position'), (STYLENS,u'tab-stop')): cnv_length,
((STYLENS,u'position'), None): cnv_string,
((STYLENS,u'print'), None): cnv_string,
((STYLENS,u'print-content'), None): cnv_boolean,
((STYLENS,u'print-orientation'), None): cnv_string,
@ -1015,7 +1017,7 @@ attrconverters = {
((STYLENS,u'wrap'), None): cnv_string,
((STYLENS,u'wrap-contour'), None): cnv_boolean,
((STYLENS,u'wrap-contour-mode'), None): cnv_string,
((STYLENS,u'wrap-dynamic-treshold'), None): cnv_string,
((STYLENS,u'wrap-dynamic-threshold'), None): cnv_length,
((STYLENS,u'writing-mode-automatic'), None): cnv_boolean,
((STYLENS,u'writing-mode'), None): cnv_string,
((SVGNS,u'accent-height'), None): cnv_integer,
@ -1122,7 +1124,7 @@ attrconverters = {
((TABLENS,u'database-table-name'), None): cnv_string,
((TABLENS,u'date-end'), None): cnv_string,
((TABLENS,u'date-start'), None): cnv_string,
((TABLENS,u'date-value-type'), None): cnv_date,
((TABLENS,u'date-value'), None): cnv_date,
((TABLENS,u'default-cell-style-name'), None): cnv_StyleNameRef,
((TABLENS,u'direction'), None): cnv_string,
((TABLENS,u'display-border'), None): cnv_boolean,
@ -1304,7 +1306,7 @@ attrconverters = {
((TEXTNS,u'database-name'), None): cnv_string,
((TEXTNS,u'date-adjust'), None): cnv_duration,
((TEXTNS,u'date-value'), None): cnv_date,
((TEXTNS,u'date-value'), None): cnv_dateTime,
# ((TEXTNS,u'date-value'), None): cnv_dateTime,
((TEXTNS,u'default-style-name'), None): cnv_StyleNameRef,
((TEXTNS,u'description'), None): cnv_string,
((TEXTNS,u'display'), None): cnv_string,
@ -1366,6 +1368,7 @@ attrconverters = {
((TEXTNS,u'outline-level'), None): cnv_string,
((TEXTNS,u'page-adjust'), None): cnv_integer,
((TEXTNS,u'pages'), None): cnv_string,
((TEXTNS,u'paragraph-style-name'), None): cnv_StyleNameRef,
((TEXTNS,u'placeholder-type'), None): cnv_string,
((TEXTNS,u'prefix'), None): cnv_string,
((TEXTNS,u'protected'), None): cnv_boolean,
@ -1417,6 +1420,7 @@ attrconverters = {
((TEXTNS,u'use-objects'), None): cnv_boolean,
((TEXTNS,u'use-other-objects'), None): cnv_boolean,
((TEXTNS,u'use-outline-level'), None): cnv_boolean,
((TEXTNS,u'use-soft-page-breaks'), None): cnv_boolean,
((TEXTNS,u'use-spreadsheet-objects'), None): cnv_boolean,
((TEXTNS,u'use-tables'), None): cnv_boolean,
((TEXTNS,u'value'), None): cnv_nonNegativeInteger,

View File

@ -258,10 +258,7 @@ class Element(Node):
self.ownerDocument = None
self.childNodes=[]
self.allowed_children = grammar.allowed_children.get(self.qname)
namespace = self.qname[0]
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
prefix = self.get_nsprefix(self.qname[0])
self.tagName = prefix + ":" + self.qname[1]
if text is not None:
self.addText(text)
@ -296,6 +293,13 @@ class Element(Node):
if self.getAttrNS(r[0],r[1]) is None:
raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)
def get_nsprefix(self, namespace):
if namespace is None: namespace = ""
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
return prefix
def allowed_attributes(self):
return grammar.allowed_attributes.get(self.qname)
@ -378,24 +382,18 @@ class Element(Node):
Must overwrite, If attribute already exists.
"""
allowed_attrs = self.allowed_attributes()
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
prefix = self.get_nsprefix(namespace)
# if allowed_attrs and (namespace, localpart) not in allowed_attrs:
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
c = AttrConverters()
self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
def getAttrNS(self, namespace, localpart):
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
prefix = self.get_nsprefix(namespace)
return self.attributes.get(prefix + ":" + localpart)
def removeAttrNS(self, namespace, localpart):
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
prefix = self.get_nsprefix(namespace)
del self.attributes[prefix + ":" + localpart]
def getAttribute(self, attr):

View File

@ -109,11 +109,6 @@ inline_elements = (
)
struct_elements = (
(CONFIGNS,'config-item-set'),
(TABLENS,u'table-cell'),
)
# It is almost impossible to determine what elements are block elements.
# There are so many that don't fit the form
block_elements = (

File diff suppressed because it is too large Load Diff

View File

@ -29,12 +29,7 @@ from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
from element import Element
from namespaces import OFFICENS
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from cStringIO import StringIO
#
# Parse the XML files
@ -70,7 +65,7 @@ class LoadParser(handler.ContentHandler):
# Add any accumulated text content
content = ''.join(self.data).strip()
if len(content) > 0:
self.parent.addText(content)
self.parent.addText(content, check_grammar=False)
self.data = []
# Create the element
attrdict = {}
@ -109,7 +104,7 @@ class LoadParser(handler.ContentHandler):
self.level = self.level - 1
str = ''.join(self.data)
if len(str.strip()) > 0:
self.curr.addText(str)
self.curr.addText(str, check_grammar=False)
self.data = []
self.curr = self.curr.parentNode
self.parent = self.curr

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -17,7 +17,7 @@
#
# Contributor(s):
#
TOOLSVERSION = u"ODFPY/0.8.2dev"
TOOLSVERSION = u"ODFPY/0.9.1dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@ -71,7 +71,7 @@ nsdict = {
OFFICENS: u'office',
OOONS: u'ooo',
OOOWNS: u'ooow',
OOOCNS: u'ooc',
OOOCNS: u'oooc',
PRESENTATIONNS: u'presentation',
RDFANS: u'rdfa',
SCRIPTNS: u'script',

View File

@ -25,11 +25,7 @@ import xml.sax
from xml.sax import handler, expatreader
from xml.sax.xmlreader import InputSource
from xml.sax.saxutils import escape, quoteattr
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from cStringIO import StringIO
from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \

View File

@ -24,12 +24,7 @@ import zipfile
from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from cStringIO import StringIO
MANIFESTNS="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"

View File

@ -44,19 +44,19 @@ def Chart(**args):
def DdeSource(**args):
return Element(qname = (OFFICENS,'dde-source'), **args)
def Document(version="1.0", **args):
def Document(version="1.1", **args):
return Element(qname = (OFFICENS,'document'), version=version, **args)
def DocumentContent(version="1.0", **args):
def DocumentContent(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-content'), version=version, **args)
def DocumentMeta(version="1.0", **args):
def DocumentMeta(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-meta'), version=version, **args)
def DocumentSettings(version="1.0", **args):
def DocumentSettings(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-settings'), version=version, **args)
def DocumentStyles(version="1.0", **args):
def DocumentStyles(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-styles'), version=version, **args)
def Drawing(**args):

View File

@ -1,71 +0,0 @@
from style import Style, ParagraphProperties, TextProperties
def addOOoStandardStyles(styles):
style = Style(name="Standard", family="paragraph", attributes={'class':"text"})
styles.addElement(style)
style = Style(name="Text_20_body", displayname="Text body", family="paragraph", parentstylename="Standard", attributes={'class':"text"})
p = ParagraphProperties(margintop="0cm", marginbottom="0.212cm")
style.addElement(p)
styles.addElement(style)
style = Style(name="Text_20_body_20_indent", displayname="Text body indent", family="paragraph", parentstylename="Text_20_body", attributes={'class':"text"})
p = ParagraphProperties(marginleft="0.499cm", marginright="0cm", textindent="0cm", autotextindent="false")
style.addElement(p)
styles.addElement(style)
style = Style(name="Salutation", family="paragraph", parentstylename="Standard", attributes={'class':"text"})
p = ParagraphProperties(numberlines="false", linenumber=0)
style.addElement(p)
styles.addElement(style)
style = Style(name="Signature", family="paragraph", parentstylename="Standard", attributes={'class':"text"})
p = ParagraphProperties(numberlines="false", linenumber=0)
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading", family="paragraph", parentstylename="Standard", nextstylename="Text_20_body", attributes={'class':"text"})
p = ParagraphProperties(margintop="0.423cm", marginbottom="0.212cm", keepwithnext="always")
style.addElement(p)
p = TextProperties(fontname="Nimbus Sans L", fontsize="14pt", fontnameasian="DejaVu LGC Sans", fontsizeasian="14pt", fontnamecomplex="DejaVu LGC Sans", fontsizecomplex="14pt")
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading_20_1", displayname="Heading 1", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=1)
p = TextProperties(fontsize="115%", fontweight="bold", fontsizeasian="115%", fontweightasian="bold", fontsizecomplex="115%", fontweightcomplex="bold")
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading_20_2", displayname="Heading 2", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=2)
p = TextProperties(fontsize="14pt", fontstyle="italic", fontweight="bold", fontsizeasian="14pt", fontstyleasian="italic", fontweightasian="bold", fontsizecomplex="14pt", fontstylecomplex="italic", fontweightcomplex="bold")
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading_20_3", displayname="Heading 3", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=3)
p = TextProperties(fontsize="14pt", fontweight="bold", fontsizeasian="14pt", fontweightasian="bold", fontsizecomplex="14pt", fontweightcomplex="bold")
style.addElement(p)
styles.addElement(style)
style = Style(name="List", family="paragraph", parentstylename="Text_20_body", attributes={'class':"list"})
styles.addElement(style)
style = Style(name="Caption", family="paragraph", parentstylename="Standard", attributes={'class':"extra"})
p = ParagraphProperties(margintop="0.212cm", marginbottom="0.212cm", numberlines="false", linenumber="0")
style.addElement(p)
p = TextProperties(fontsize="12pt", fontstyle="italic", fontsizeasian="12pt", fontstyleasian="italic", fontsizecomplex="12pt", fontstylecomplex="italic")
style.addElement(p)
styles.addElement(style)
style = Style(name="Index", family="paragraph", parentstylename="Standard", attributes={'class':"index"})
p = ParagraphProperties(numberlines="false", linenumber=0)
styles.addElement(style)
style = Style(name="Source_20_Text", displayname="Source Text", family="text")
p = TextProperties(fontname="Courier", fontnameasian="Courier", fontnamecomplex="Courier")
style.addElement(p)
styles.addElement(style)
style = Style(name="Variable", family="text")
p = TextProperties(fontstyle="italic", fontstyleasian="italic", fontstylecomplex="italic")
style.addElement(p)
styles.addElement(style)

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -300,6 +300,37 @@ class OpenDocument:
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
return manifestfn
def addPictureFromFile(self, filename, mediatype=None):
""" Add a picture
It uses the same convention as OOo, in that it saves the picture in
the zipfile in the subdirectory 'Pictures'.
If mediatype is not given, it will be guessed from the filename
extension.
"""
if mediatype is None:
mediatype, encoding = mimetypes.guess_type(filename)
if mediatype is None:
mediatype = ''
try: ext = filename[filename.rindex('.'):]
except ValueError: ext=''
else:
ext = mimetypes.guess_extension(mediatype)
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
return manifestfn
def addPictureFromString(self, content, mediatype):
""" Add a picture
It uses the same convention as OOo, in that it saves the picture in
the zipfile in the subdirectory 'Pictures'. The content variable
is a string that contains the binary image data. The mediatype
indicates the image format.
"""
ext = mimetypes.guess_extension(mediatype)
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
return manifestfn
def addThumbnail(self, filecontent=None):
""" Add a fixed thumbnail
The thumbnail in the library is big, so this is pretty useless.
@ -394,6 +425,7 @@ class OpenDocument:
# Write any extra files
for op in self._extra:
if op.filename == "META-INF/documentsignatures.xml": continue # Don't save signatures
self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype))
zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now)
zi.compress_type = zipfile.ZIP_DEFLATED

View File

@ -31,11 +31,7 @@ import xml.sax.saxutils
from odf.namespaces import OFFICENS, TEXTNS
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from cStringIO import StringIO
OUTENCODING = "utf-8"

View File

@ -27,3 +27,8 @@ from element import Element
def Model(**args):
return Element(qname = (XFORMSNS,'model'), **args)
def Instance(**args):
return Element(qname = (XFORMSNS,'instance'), **args)
def Bind(**args):
return Element(qname = (XFORMSNS,'bind'), **args)

View File

@ -139,7 +139,7 @@ class resources(OptionlessCommand):
RESOURCES = dict(
opf_template = 'ebooks/metadata/opf.xml',
ncx_template = 'ebooks/metadata/ncx.xml',
fb2_xsl = 'ebooks/lrf/fb2/fb2.xsl',
fb2_xsl = 'ebooks/fb2/fb2.xsl',
metadata_sqlite = 'library/metadata_sqlite.sql',
jquery = 'gui2/viewer/jquery.js',
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',