Sync to pluginize

This commit is contained in:
John Schember 2009-04-21 19:20:06 -04:00
commit 6f7d0f7696
72 changed files with 20767 additions and 16531 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.5.7' __version__ = '0.5.8'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -282,6 +282,9 @@ from calibre.ebooks.pdb.input import PDBInput
from calibre.ebooks.pdf.input import PDFInput from calibre.ebooks.pdf.input import PDFInput
from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lit.input import LITInput from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
@ -289,7 +292,8 @@ from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput] TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input, ODTInput, RTFInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -59,7 +59,10 @@ class HTMLRenderer(object):
def render_html(path_to_html, width=590, height=750): def render_html(path_to_html, width=590, height=750):
from PyQt4.QtWebKit import QWebPage from PyQt4.QtWebKit import QWebPage
from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize from PyQt4.Qt import QEventLoop, QPalette, Qt, SIGNAL, QUrl, QSize, \
QApplication
if QApplication.instance() is None:
QApplication([])
path_to_html = os.path.abspath(path_to_html) path_to_html = os.path.abspath(path_to_html)
with CurrentDir(os.path.dirname(path_to_html)): with CurrentDir(os.path.dirname(path_to_html)):
page = QWebPage() page = QWebPage()

View File

@ -116,6 +116,25 @@ def add_pipeline_options(parser, plumber):
'font_size_mapping', 'font_size_mapping',
'line_height', 'line_height',
'linearize_tables', 'linearize_tables',
'extra_css',
]
),
'STRUCTURE DETECTION' : (
_('Control auto-detection of document structure.'),
[
'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
]
),
'TABLE OF CONTENTS' : (
_('Control the automatic generation of a Table of Contents. By '
'default, if the source file has a Table of Contents, it will '
'be used in preference to the automatically generated one.'),
[
'level1_toc', 'level2_toc', 'level3_toc',
'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
'use_auto_toc', 'toc_filter',
] ]
), ),
@ -130,7 +149,8 @@ def add_pipeline_options(parser, plumber):
} }
group_order = ['', 'LOOK AND FEEL', 'METADATA', 'DEBUG'] group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
for group in group_order: for group in group_order:
desc, options = groups[group] desc, options = groups[group]
@ -163,6 +183,10 @@ def main(args=sys.argv):
add_pipeline_options(parser, plumber) add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0] opts = parser.parse_args(args)[0]
y = lambda q : os.path.abspath(os.path.expanduser(q))
for x in ('read_metadata_from_opf', 'cover'):
if getattr(opts, x, None) is not None:
setattr(opts, x, y(getattr(opts, x)))
recommendations = [(n.dest, getattr(opts, n.dest), recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \ OptionRecommendation.HIGH) \
for n in parser.options_iter() for n in parser.options_iter()

View File

@ -3,13 +3,21 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, re
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles, output_profiles, \ from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format plugin_for_input_format, plugin_for_output_format
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre import extract, walk
def supported_input_formats():
from calibre.customize.ui import available_input_formats
fmts = available_input_formats()
for x in ('zip', 'rar', 'oebzip'):
fmts.add(x)
return fmts
class OptionValues(object): class OptionValues(object):
pass pass
@ -121,6 +129,105 @@ OptionRecommendation(name='dont_split_on_page_breaks',
) )
), ),
OptionRecommendation(name='level1_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that '
'should be added to the Table of Contents at level one. If '
'this is specified, it takes precedence over other forms '
'of auto-detection.'
)
),
OptionRecommendation(name='level2_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level two. Each entry is added '
'under the previous level one entry.'
)
),
OptionRecommendation(name='level3_toc',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('XPath expression that specifies all tags that should be '
'added to the Table of Contents at level three. Each entry '
'is added under the previous level two entry.'
)
),
OptionRecommendation(name='use_auto_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Normally, if the source file already has a Table of '
'Contents, it is used in preference to the auto-generated one. '
'With this option, the auto-generated one is always used.'
)
),
OptionRecommendation(name='no_chapters_in_toc',
recommended_value=False, level=OptionRecommendation.LOW,
help=_("Don't add auto-detected chapters to the Table of "
'Contents.'
)
),
OptionRecommendation(name='toc_threshold',
recommended_value=6, level=OptionRecommendation.LOW,
help=_(
'If fewer than this number of chapters is detected, then links '
'are added to the Table of Contents. Default: %default')
),
OptionRecommendation(name='max_toc_links',
recommended_value=50, level=OptionRecommendation.LOW,
help=_('Maximum number of links to insert into the TOC. Set to 0 '
'to disable. Default is: %default. Links are only added to the '
'TOC if less than the threshold number of chapters were detected.'
)
),
OptionRecommendation(name='toc_filter',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Remove entries from the Table of Contents whose titles '
'match the specified regular expression. Matching entries and all '
'their children are removed.'
)
),
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
"re:test(., 'chapter|book|section|part', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '
'"chapter","book","section" or "part" as chapter titles as '
'well as any tags that have class="chapter". The expression '
'used must evaluate to a list of elements. To disable chapter '
'detection, use the expression "/". See the XPath Tutorial '
'in the calibre User Manual for further help on using this '
'feature.'
)
),
OptionRecommendation(name='chapter_mark',
recommended_value='pagebreak', level=OptionRecommendation.LOW,
choices=['pagebreak', 'rule', 'both', 'none'],
help=_('Specify how to mark detected chapters. A value of '
'"pagebreak" will insert page breaks before chapters. '
'A value of "rule" will insert a line before chapters. '
'A value of "none" will disable chapter marking and a '
'value of "both" will use both page breaks and lines '
'to mark chapters.')
),
OptionRecommendation(name='extra_css',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Either the path to a CSS stylesheet or raw CSS. '
'This CSS will be appended to the style rules from '
'the source file, so it can be used to override those '
'rules.')
),
OptionRecommendation(name='read_metadata_from_opf', OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
@ -130,6 +237,7 @@ OptionRecommendation(name='read_metadata_from_opf',
'file.') 'file.')
), ),
OptionRecommendation(name='title', OptionRecommendation(name='title',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the title.')), help=_('Set the title.')),
@ -187,11 +295,14 @@ OptionRecommendation(name='language',
help=_('Set the language.')), help=_('Set the language.')),
] ]
input_fmt = os.path.splitext(self.input)[1] input_fmt = os.path.splitext(self.input)[1]
if not input_fmt: if not input_fmt:
raise ValueError('Input file must have an extension') raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower() input_fmt = input_fmt[1:].lower()
if input_fmt in ('zip', 'rar', 'oebzip'):
self.log('Processing archive...')
tdir = PersistentTemporaryDirectory('_plumber')
self.input, input_fmt = self.unarchive(self.input, tdir)
if os.path.exists(self.output) and os.path.isdir(self.output): if os.path.exists(self.output) and os.path.isdir(self.output):
output_fmt = 'oeb' output_fmt = 'oeb'
@ -201,7 +312,7 @@ OptionRecommendation(name='language',
output_fmt = '.oeb' output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower() output_fmt = output_fmt[1:].lower()
self.input_plugin = plugin_for_input_format(input_fmt) self.input_plugin = plugin_for_input_format(input_fmt)
self.output_plugin = plugin_for_output_format(output_fmt) self.output_plugin = plugin_for_output_format(output_fmt)
if self.input_plugin is None: if self.input_plugin is None:
@ -224,6 +335,43 @@ OptionRecommendation(name='language',
# plugins. # plugins.
self.merge_plugin_recommendations() self.merge_plugin_recommendations()
@classmethod
def unarchive(self, path, tdir):
extract(path, tdir)
files = list(walk(tdir))
from calibre.customize.ui import available_input_formats
fmts = available_input_formats()
for x in ('htm', 'html', 'xhtm', 'xhtml'): fmts.remove(x)
for ext in fmts:
for f in files:
if f.lower().endswith('.'+ext):
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
continue
return f, ext
return self.find_html_index(files)
@classmethod
def find_html_index(self, files):
'''
Given a list of files, find the most likely root HTML file in the
list.
'''
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
html_files = [f for f in files if html_pat.search(f) is not None]
if not html_files:
raise ValueError(_('Could not find an ebook inside the archive'))
html_files = [(f, os.stat(f).st_size) for f in html_files]
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
html_files = [f[0] for f in html_files]
for q in ('toc', 'index'):
for f in html_files:
if os.path.splitext(os.path.basename(f))[0].lower() == q:
return f, os.path.splitext(f)[1].lower()[1:]
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
def get_option_by_name(self, name): def get_option_by_name(self, name):
for group in (self.input_options, self.pipeline_options, for group in (self.input_options, self.pipeline_options,
self.output_options): self.output_options):
@ -237,6 +385,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name) rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level: if rec is not None and rec.level <= level:
rec.recommended_value = val rec.recommended_value = val
rec.level = level
def merge_ui_recommendations(self, recommendations): def merge_ui_recommendations(self, recommendations):
''' '''
@ -248,6 +397,7 @@ OptionRecommendation(name='language',
rec = self.get_option_by_name(name) rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level and rec.level < rec.HIGH: if rec is not None and rec.level <= level and rec.level < rec.HIGH:
rec.recommended_value = val rec.recommended_value = val
rec.level = level
def read_user_metadata(self): def read_user_metadata(self):
''' '''
@ -332,6 +482,9 @@ OptionRecommendation(name='language',
self.opts.source = self.opts.input_profile self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile self.opts.dest = self.opts.output_profile
from calibre.ebooks.oeb.transforms.structure import DetectStructure
DetectStructure()(self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size fbase = self.opts.base_font_size
if fbase == 0: if fbase == 0:
@ -342,6 +495,9 @@ OptionRecommendation(name='language',
else: else:
fkey = map(float, fkey.split(',')) fkey = map(float, fkey.split(','))
if self.opts.extra_css and os.path.exists(self.opts.extra_css):
self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
flattener = CSSFlattener(fbase=fbase, fkey=fkey, flattener = CSSFlattener(fbase=fbase, fkey=fkey,
lineh=self.opts.line_height, lineh=self.opts.line_height,
untable=self.opts.linearize_tables) untable=self.opts.linearize_tables)
@ -364,6 +520,8 @@ OptionRecommendation(name='language',
trimmer = ManifestTrimmer() trimmer = ManifestTrimmer()
trimmer(self.oeb, self.opts) trimmer(self.oeb, self.opts)
self.oeb.toc.rationalize_play_orders()
self.log.info('Creating %s...'%self.output_plugin.name) self.log.info('Creating %s...'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log) self.opts, self.log)
@ -384,4 +542,3 @@ def create_oebbook(log, path_or_stream, opts, reader=None):
reader()(oeb, path_or_stream) reader()(oeb, path_or_stream)
return oeb return oeb

View File

@ -15,130 +15,17 @@ from calibre.ebooks import DRMError
from calibre.ebooks.epub import config as common_config from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.customize.ui import run_plugins_on_preprocess from calibre.customize.ui import run_plugins_on_preprocess
def lit2opf(path, tdir, opts):
from calibre.ebooks.lit.reader import LitReader
print 'Exploding LIT file:', path
reader = LitReader(path)
reader.extract_content(tdir, False)
opf = None
for opf in walk(tdir):
if opf.lower().endswith('.opf'):
break
if not opf.endswith('.opf'):
opf = None
if opf is not None: # Check for url-quoted filenames
_opf = OPF(opf, os.path.dirname(opf))
replacements = []
for item in _opf.itermanifest():
href = item.get('href', '')
path = os.path.join(os.path.dirname(opf), *(href.split('/')))
if not os.path.exists(path) and os.path.exists(path.replace('&', '%26')):
npath = path
path = path.replace('&', '%26')
replacements.append((path, npath))
if replacements:
print 'Fixing quoted filenames...'
for path, npath in replacements:
if os.path.exists(path):
os.rename(path, npath)
for f in walk(tdir):
with open(f, 'r+b') as f:
raw = f.read()
for path, npath in replacements:
raw = raw.replace(os.path.basename(path), os.path.basename(npath))
f.seek(0)
f.truncate()
f.write(raw)
return opf
def mobi2opf(path, tdir, opts): SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
from calibre.ebooks.mobi.reader import MobiReader
print 'Exploding MOBI file:', path.encode('utf-8') if isinstance(path, unicode) else path
reader = MobiReader(path)
reader.extract_content(tdir)
files = list(walk(tdir))
opts.encoding = 'utf-8'
for f in files:
if f.lower().endswith('.opf'):
return f
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE)
hf = [f for f in files if html_pat.match(os.path.splitext(f)[1]) is not None]
mi = MetaInformation(os.path.splitext(os.path.basename(path))[0], [_('Unknown')])
opf = OPFCreator(tdir, mi)
opf.create_manifest([(hf[0], None)])
opf.create_spine([hf[0]])
ans = os.path.join(tdir, 'metadata.opf')
opf.render(open(ans, 'wb'))
return ans
def fb22opf(path, tdir, opts):
from calibre.ebooks.lrf.fb2.convert_from import to_html
print 'Converting FB2 to HTML...'
return to_html(path, tdir)
def rtf2opf(path, tdir, opts):
from calibre.ebooks.lrf.rtf.convert_from import generate_html
generate_html(path, tdir)
return os.path.join(tdir, 'metadata.opf')
def txt2opf(path, tdir, opts):
from calibre.ebooks.lrf.txt.convert_from import generate_html
generate_html(path, opts.encoding, tdir)
return os.path.join(tdir, 'metadata.opf')
def pdf2opf(path, tdir, opts):
from calibre.ebooks.lrf.pdf.convert_from import generate_html
generate_html(path, tdir)
opts.dont_split_on_page_breaks = True
return os.path.join(tdir, 'metadata.opf')
def epub2opf(path, tdir, opts):
zf = ZipFile(path)
zf.extractall(tdir)
opts.chapter_mark = 'none'
encfile = os.path.join(tdir, 'META-INF', 'encryption.xml')
opf = None
for f in walk(tdir):
if f.lower().endswith('.opf'):
opf = f
break
if opf and os.path.exists(encfile):
if not process_encryption(encfile, opf):
raise DRMError(os.path.basename(path))
if opf is None:
raise ValueError('%s is not a valid EPUB file'%path)
return opf
def odt2epub(path, tdir, opts):
from calibre.ebooks.odt.to_oeb import Extract
opts.encoding = 'utf-8'
return Extract()(path, tdir)
MAP = {
'lit' : lit2opf,
'mobi' : mobi2opf,
'prc' : mobi2opf,
'azw' : mobi2opf,
'fb2' : fb22opf,
'rtf' : rtf2opf,
'txt' : txt2opf,
'pdf' : pdf2opf,
'epub' : epub2opf,
'odt' : odt2epub,
}
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub'] 'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
def unarchive(path, tdir): def unarchive(path, tdir):
extract(path, tdir) extract(path, tdir)
files = list(walk(tdir)) files = list(walk(tdir))
for ext in ['opf'] + list(MAP.keys()): for ext in ['opf'] + list(MAP.keys()):
for f in files: for f in files:
if f.lower().endswith('.'+ext): if f.lower().endswith('.'+ext):
@ -147,32 +34,32 @@ def unarchive(path, tdir):
return f, ext return f, ext
return find_html_index(files) return find_html_index(files)
def any2epub(opts, path, notification=None, create_epub=True, def any2epub(opts, path, notification=None, create_epub=True,
oeb_cover=False, extract_to=None): oeb_cover=False, extract_to=None):
path = run_plugins_on_preprocess(path) path = run_plugins_on_preprocess(path)
ext = os.path.splitext(path)[1] ext = os.path.splitext(path)[1]
if not ext: if not ext:
raise ValueError('Unknown file type: '+path) raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:] ext = ext.lower()[1:]
if opts.output is None: if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub' opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2): with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
if ext in ['rar', 'zip', 'oebzip']: if ext in ['rar', 'zip', 'oebzip']:
path, ext = unarchive(path, tdir1) path, ext = unarchive(path, tdir1)
print 'Found %s file in archive'%(ext.upper()) print 'Found %s file in archive'%(ext.upper())
if ext in MAP.keys(): if ext in MAP.keys():
path = MAP[ext](path, tdir2, opts) path = MAP[ext](path, tdir2, opts)
ext = 'opf' ext = 'opf'
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None: if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
raise ValueError('Conversion from %s is not supported'%ext.upper()) raise ValueError('Conversion from %s is not supported'%ext.upper())
print 'Creating EPUB file...' print 'Creating EPUB file...'
html2epub(path, opts, notification=notification, html2epub(path, opts, notification=notification,
create_epub=create_epub, oeb_cover=oeb_cover, create_epub=create_epub, oeb_cover=oeb_cover,
extract_to=extract_to) extract_to=extract_to)

View File

@ -11,12 +11,12 @@ from lxml import etree
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
class EPUBInput(InputFormatPlugin): class EPUBInput(InputFormatPlugin):
name = 'EPUB Input' name = 'EPUB Input'
author = 'Kovid Goyal' author = 'Kovid Goyal'
description = 'Convert EPUB files (.epub) to HTML' description = 'Convert EPUB files (.epub) to HTML'
file_types = set(['epub']) file_types = set(['epub'])
@classmethod @classmethod
def decrypt_font(cls, key, path): def decrypt_font(cls, key, path):
raw = open(path, 'rb').read() raw = open(path, 'rb').read()
@ -26,7 +26,7 @@ class EPUBInput(InputFormatPlugin):
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.write(decrypt) f.write(decrypt)
f.write(raw[1024:]) f.write(raw[1024:])
@classmethod @classmethod
def process_ecryption(cls, encfile, opf, log): def process_ecryption(cls, encfile, opf, log):
key = None key = None
@ -51,25 +51,75 @@ class EPUBInput(InputFormatPlugin):
traceback.print_exc() traceback.print_exc()
return False return False
@classmethod
def rationalize_cover(self, opf):
guide_cover, guide_elem = None, None
for guide_elem in opf.iterguide():
if guide_elem.get('type', '').lower() == 'cover':
guide_cover = guide_elem.get('href', '')
break
if not guide_cover:
return
spine = list(opf.iterspine())
if not spine:
return
idref = spine[0].get('idref', '')
manifest = list(opf.itermanifest())
if not manifest:
return
if manifest[0].get('id', False) != idref:
return
spine[0].getparent().remove(spine[0])
guide_elem.set('href', 'calibre_raster_cover.jpg')
for elem in list(opf.iterguide()):
if elem.get('type', '').lower() == 'titlepage':
elem.getparent().remove(elem)
from calibre.ebooks.oeb.base import OPF
t = etree.SubElement(guide_elem.getparent(), OPF('reference'))
t.set('type', 'titlepage')
t.set('href', guide_cover)
t.set('title', 'Title Page')
from calibre.ebooks import render_html
open('calibre_raster_cover.jpg', 'wb').write(
render_html(guide_cover).data)
def convert(self, stream, options, file_ext, log, accelerators): def convert(self, stream, options, file_ext, log, accelerators):
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre import walk from calibre import walk
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre.ebooks.metadata.opf2 import OPF
zf = ZipFile(stream) zf = ZipFile(stream)
zf.extractall(os.getcwd()) zf.extractall(os.getcwd())
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = None opf = None
for f in walk('.'): for f in walk(u'.'):
if f.lower().endswith('.opf'): if f.lower().endswith('.opf'):
opf = f opf = os.path.abspath(f)
break break
path = getattr(stream, 'name', 'stream') path = getattr(stream, 'name', 'stream')
if opf is None: if opf is None:
raise ValueError('%s is not a valid EPUB file'%path) raise ValueError('%s is not a valid EPUB file'%path)
if os.path.exists(encfile): if os.path.exists(encfile):
if not self.process_encryption(encfile, opf, log): if not self.process_encryption(encfile, opf, log):
raise DRMError(os.path.basename(path)) raise DRMError(os.path.basename(path))
return os.path.join(os.getcwd(), opf) opf = os.path.relpath(opf, os.getcwdu())
parts = os.path.split(opf)
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
if len(parts) > 1:
delta = '/'.join(parts[:-1])+'/'
for elem in opf.itermanifest():
elem.set('href', delta+elem.get('href'))
for elem in opf.iterguide():
elem.set('href', delta+elem.get('href'))
self.rationalize_cover(opf)
with open('content.opf', 'wb') as nopf:
nopf.write(opf.render())
return os.path.abspath('content.opf')

View File

@ -0,0 +1,74 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
"""
Convert .fb2 files to .lrf
"""
import os
from base64 import b64decode
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
class FB2Input(InputFormatPlugin):
name = 'FB2 Input'
author = 'Anatoly Shipitsin'
description = 'Convert FB2 files to HTML'
file_types = set(['fb2'])
recommendations = set([
('level1_toc', '//h:h1', OptionRecommendation.MED),
('level2_toc', '//h:h2', OptionRecommendation.MED),
('level3_toc', '//h:h3', OptionRecommendation.MED),
])
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.resources import fb2_xsl
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
log.debug('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(stream, parser)
self.extract_embedded_content(doc)
log.debug('Converting XML to HTML...')
styledoc = etree.fromstring(fb2_xsl)
transform = etree.XSLT(styledoc)
result = transform(doc)
open('index.xhtml', 'wb').write(transform.tostring(result))
stream.seek(0)
mi = get_metadata(stream, 'fb2')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwdu(), mi)
entries = [(f, guess_type(f)[0]) for f in os.listdir('.')]
opf.create_manifest(entries)
opf.create_spine(['index.xhtml'])
for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
if href is not None:
if href.startswith('#'):
href = href[1:]
opf.guide.set_cover(os.path.abspath(href))
opf.render(open('metadata.opf', 'wb'))
return os.path.join(os.getcwd(), 'metadata.opf')
def extract_embedded_content(self, doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)

View File

@ -1,125 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
"""
Convert .fb2 files to .lrf
"""
import os, sys, shutil, logging
from base64 import b64decode
from lxml import etree
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers
from calibre.resources import fb2_xsl
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata import MetaInformation
def option_parser():
parser = lrf_option_parser(
_('''%prog [options] mybook.fb2
%prog converts mybook.fb2 to mybook.lrf'''))
parser.add_option('--debug-html-generation', action='store_true', default=False,
dest='debug_html_generation', help=_('Print generated HTML to stdout and quit.'))
parser.add_option('--keep-intermediate-files', action='store_true', default=False,
help=_('Keep generated HTML files after completing conversion to LRF.'))
return parser
def extract_embedded_content(doc):
for elem in doc.xpath('./*'):
if 'binary' in elem.tag and elem.attrib.has_key('id'):
fname = elem.attrib['id']
data = b64decode(elem.text.strip())
open(fname, 'wb').write(data)
def to_html(fb2file, tdir):
fb2file = os.path.abspath(fb2file)
cwd = os.getcwd()
try:
os.chdir(tdir)
print 'Parsing XML...'
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.parse(fb2file, parser)
extract_embedded_content(doc)
print 'Converting XML to HTML...'
styledoc = etree.fromstring(fb2_xsl)
transform = etree.XSLT(styledoc)
result = transform(doc)
open('index.html', 'wb').write(transform.tostring(result))
try:
mi = get_metadata(open(fb2file, 'rb'), 'fb2')
except:
mi = MetaInformation(None, None)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(fb2file))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(tdir, mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
opf.render(open('metadata.opf', 'wb'))
return os.path.join(tdir, 'metadata.opf')
finally:
os.chdir(cwd)
def generate_html(fb2file, encoding, logger):
tdir = PersistentTemporaryDirectory('_fb22lrf')
to_html(fb2file, tdir)
return os.path.join(tdir, 'index.html')
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('fb22lrf')
setup_cli_handlers(logger, level)
fb2 = os.path.abspath(os.path.expanduser(path))
f = open(fb2, 'rb')
mi = get_metadata(f, 'fb2')
f.close()
htmlfile = generate_html(fb2, options.encoding, logger)
tdir = os.path.dirname(htmlfile)
cwd = os.getcwdu()
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title:
mi.title = os.path.splitext(os.path.basename(fb2))[0]
if (not options.title or options.title == _('Unknown')):
options.title = mi.title
if (not options.author or options.author == _('Unknown')) and mi.authors:
options.author = mi.authors.pop()
if (not options.category or options.category == _('Unknown')) and mi.category:
options.category = mi.category
if (not options.freetext or options.freetext == _('Unknown')) and mi.comments:
options.freetext = mi.comments
os.chdir(tdir)
html_process_file(htmlfile, options, logger)
finally:
os.chdir(cwd)
if getattr(options, 'keep_intermediate_files', False):
logger.debug('Intermediate files in '+ tdir)
else:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No fb2 file specified'
return 1
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,190 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys, shutil, logging, glob
from lxml import etree
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers
from calibre.libwand import convert, WandException
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.lrf.rtf.xsl import xhtml
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
def option_parser():
parser = lrf_option_parser(
_('''%prog [options] mybook.rtf
%prog converts mybook.rtf to mybook.lrf''')
)
parser.add_option('--keep-intermediate-files', action='store_true', default=False)
return parser
def convert_images(html, logger):
wmfs = glob.glob('*.wmf') + glob.glob('*.WMF')
for wmf in wmfs:
target = os.path.join(os.path.dirname(wmf), os.path.splitext(os.path.basename(wmf))[0]+'.jpg')
try:
convert(wmf, target)
html = html.replace(os.path.basename(wmf), os.path.basename(target))
except WandException, err:
logger.warning(u'Unable to convert image %s with error: %s'%(wmf, unicode(err)))
continue
return html
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('rtf2lrf')
setup_cli_handlers(logger, level)
rtf = os.path.abspath(os.path.expanduser(path))
f = open(rtf, 'rb')
mi = get_metadata(f, 'rtf')
f.close()
tdir = PersistentTemporaryDirectory('_rtf2lrf')
html = generate_html(rtf, tdir)
cwd = os.getcwdu()
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title:
mi.title = os.path.splitext(os.path.basename(rtf))[0]
if (not options.title or options.title == 'Unknown'):
options.title = mi.title
if (not options.author or options.author == 'Unknown') and mi.author:
options.author = mi.author
if (not options.category or options.category == 'Unknown') and mi.category:
options.category = mi.category
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
options.freetext = mi.comments
os.chdir(tdir)
html_process_file(html, options, logger)
finally:
os.chdir(cwd)
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
logger.debug('Intermediate files in '+ tdir)
else:
shutil.rmtree(tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No rtf file specified'
return 1
process_file(args[1], options, logger)
return 0
def generate_xml(rtfpath, tdir):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = os.path.join(tdir, 'index.xml')
cwd = os.getcwdu()
os.chdir(tdir)
rtfpath = os.path.abspath(rtfpath)
try:
parser = ParseRtf(
in_file = rtfpath,
out_file = ofile,
# Convert symbol fonts to unicode equivelents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivelents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivelents.
# Default is 1.
convert_wingdings = 1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = 1,
# Form lists from RTF. Default is 1.
form_lists = 1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
# Group borders. Default is 1.
group_borders = 1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 0,
)
parser.parse_rtf()
finally:
os.chdir(cwd)
return ofile
def generate_html(rtfpath, tdir):
print 'Converting RTF to XML...'
rtfpath = os.path.abspath(rtfpath)
try:
xml = generate_xml(rtfpath, tdir)
except RtfInvalidCodeException:
raise Exception(_('This RTF file has a feature calibre does not support. Convert it to HTML and then convert it.'))
tdir = os.path.dirname(xml)
cwd = os.getcwdu()
os.chdir(tdir)
try:
print 'Parsing XML...'
parser = etree.XMLParser(recover=True, no_network=True)
try:
doc = etree.parse(xml, parser)
except:
raise
print 'Parsing failed. Trying to clean up XML...'
soup = BeautifulStoneSoup(open(xml, 'rb').read())
doc = etree.fromstring(str(soup))
print 'Converting XML to HTML...'
styledoc = etree.fromstring(xhtml)
transform = etree.XSLT(styledoc)
result = transform(doc)
tdir = os.path.dirname(xml)
html = os.path.join(tdir, 'index.html')
f = open(html, 'wb')
res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
f.write(res)
f.close()
try:
mi = get_metadata(open(rtfpath, 'rb'), 'rtf')
except:
mi = MetaInformation(None, None)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(rtfpath))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(tdir, mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
opf.render(open('metadata.opf', 'wb'))
finally:
os.chdir(cwd)
return html
if __name__ == '__main__':
sys.exit(main())

View File

@ -4,13 +4,15 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch cover from LibraryThing.com based on ISBN number. Fetch cover from LibraryThing.com based on ISBN number.
''' '''
import sys, socket, os, re, mechanize import sys, socket, os, re
from calibre import browser as _browser from calibre import browser as _browser
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
browser = None browser = None
OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
class LibraryThingError(Exception): class LibraryThingError(Exception):
pass pass
@ -30,15 +32,21 @@ def login(username, password, force=True):
browser['formusername'] = username browser['formusername'] = username
browser['formpassword'] = password browser['formpassword'] = password
browser.submit() browser.submit()
def cover_from_isbn(isbn, timeout=5.):
def cover_from_isbn(isbn, timeout=5., username=None, password=None):
global browser global browser
if browser is None: if browser is None:
browser = _browser() browser = _browser()
_timeout = socket.getdefaulttimeout() _timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout) socket.setdefaulttimeout(timeout)
src = None src = None
try:
return browser.open(OPENLIBRARY%isbn).read(), 'jpg'
except:
pass # Cover not found
if username and password:
login(username, password, force=False)
try: try:
src = browser.open('http://www.librarything.com/isbn/'+isbn).read().decode('utf-8', 'replace') src = browser.open('http://www.librarything.com/isbn/'+isbn).read().decode('utf-8', 'replace')
except Exception, err: except Exception, err:
@ -55,7 +63,7 @@ def cover_from_isbn(isbn, timeout=5.):
url = url.find('img') url = url.find('img')
if url is None: if url is None:
raise LibraryThingError(_('LibraryThing.com server error. Try again later.')) raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
url = re.sub(r'_SX\d+', '', url['src']) url = re.sub(r'_S[XY]\d+', '', url['src'])
cover_data = browser.open(url).read() cover_data = browser.open(url).read()
return cover_data, url.rpartition('.')[-1] return cover_data, url.rpartition('.')[-1]
finally: finally:
@ -68,9 +76,9 @@ _('''
Fetch a cover image for the book identified by ISBN from LibraryThing.com Fetch a cover image for the book identified by ISBN from LibraryThing.com
''')) '''))
parser.add_option('-u', '--username', default=None, parser.add_option('-u', '--username', default=None,
help='Username for LibraryThing.com') help='Username for LibraryThing.com')
parser.add_option('-p', '--password', default=None, parser.add_option('-p', '--password', default=None,
help='Password for LibraryThing.com') help='Password for LibraryThing.com')
return parser return parser
@ -81,13 +89,8 @@ def main(args=sys.argv):
parser.print_help() parser.print_help()
return 1 return 1
isbn = args[1] isbn = args[1]
if opts.username and opts.password: cover_data, ext = cover_from_isbn(isbn, username=opts.username,
try: password=opts.password)
login(opts.username, opts.password)
except mechanize.FormNotFoundError:
raise LibraryThingError(_('LibraryThing.com server error. Try again later.'))
cover_data, ext = cover_from_isbn(isbn)
if not ext: if not ext:
ext = 'jpg' ext = 'jpg'
oname = os.path.abspath(isbn+'.'+ext) oname = os.path.abspath(isbn+'.'+ext)
@ -96,4 +99,4 @@ def main(args=sys.argv):
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@ -0,0 +1,67 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert an ODT file into a Open Ebook
'''
import os
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
from calibre.customize.conversion import InputFormatPlugin
class Extract(ODF2XHTML):
def extract_pictures(self, zf):
if not os.path.exists('Pictures'):
os.makedirs('Pictures')
for name in zf.namelist():
if name.startswith('Pictures'):
data = zf.read(name)
with open(name, 'wb') as f:
f.write(data)
def __call__(self, stream, odir):
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
if not os.path.exists(odir):
os.makedirs(odir)
with CurrentDir(odir):
print 'Extracting ODT file...'
html = self.odf2xhtml(stream)
with open('index.xhtml', 'wb') as f:
f.write(html.encode('utf-8'))
zf = ZipFile(stream, 'r')
self.extract_pictures(zf)
stream.seek(0)
mi = get_metadata(stream, 'odt')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
opf.create_spine([os.path.abspath('index.xhtml')])
with open('metadata.opf', 'wb') as f:
opf.render(f)
return os.path.abspath('metadata.opf')
class ODTInput(InputFormatPlugin):
name = 'ODT Input'
author = 'Kovid Goyal'
description = 'Convert ODT (OpenOffice) files to HTML'
file_types = set(['odt'])
def convert(self, stream, options, file_ext, log,
accelerators):
return Extract()(stream, '.')

View File

@ -1,72 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert an ODT file into a Open Ebook
'''
import os, sys
from odf.odf2xhtml import ODF2XHTML
from calibre import CurrentDir, walk
from calibre.utils.zipfile import ZipFile
from calibre.utils.config import OptionParser
from calibre.ebooks.metadata.odt import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
class Extract(ODF2XHTML):
def extract_pictures(self, zf):
if not os.path.exists('Pictures'):
os.makedirs('Pictures')
for name in zf.namelist():
if name.startswith('Pictures'):
data = zf.read(name)
with open(name, 'wb') as f:
f.write(data)
def __call__(self, path, odir):
if not os.path.exists(odir):
os.makedirs(odir)
path = os.path.abspath(path)
with CurrentDir(odir):
print 'Extracting ODT file...'
html = self.odf2xhtml(path)
with open('index.html', 'wb') as f:
f.write(html.encode('utf-8'))
with open(path, 'rb') as f:
zf = ZipFile(f, 'r')
self.extract_pictures(zf)
f.seek(0)
mi = get_metadata(f)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(path))
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
opf.create_spine([os.path.abspath('index.html')])
with open('metadata.opf', 'wb') as f:
opf.render(f)
return os.path.abspath('metadata.opf')
def option_parser():
parser = OptionParser('%prog [options] file.odt')
parser.add_option('-o', '--output-dir', default='.',
help=_('The output directory. Defaults to the current directory.'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No ODT file specified'
return 1
Extract()(args[1], os.path.abspath(opts.output_dir))
print 'Extracted to', os.path.abspath(opts.output_dir)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -41,10 +41,12 @@ NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
SVG_NS = 'http://www.w3.org/2000/svg' SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink' XLINK_NS = 'http://www.w3.org/1999/xlink'
CALIBRE_NS = 'http://calibre.kovidgoyal.net/2009/metadata' CALIBRE_NS = 'http://calibre.kovidgoyal.net/2009/metadata'
RE_NS = 'http://exslt.org/regular-expressions'
XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS, XPNSMAP = {'h' : XHTML_NS, 'o1' : OPF1_NS, 'o2' : OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS, 'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS, 'xsi': XSI_NS, 'dt' : DCTERMS_NS, 'ncx': NCX_NS,
'svg': SVG_NS, 'xl' : XLINK_NS} 'svg': SVG_NS, 'xl' : XLINK_NS, 're': RE_NS}
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS} OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS, OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
'xsi': XSI_NS, 'calibre': CALIBRE_NS} 'xsi': XSI_NS, 'calibre': CALIBRE_NS}
@ -1024,7 +1026,7 @@ class Manifest(object):
media_type = XHTML_MIME media_type = XHTML_MIME
elif media_type in OEB_STYLES: elif media_type in OEB_STYLES:
media_type = CSS_MIME media_type = CSS_MIME
attrib = {'id': item.id, 'href': item.href, attrib = {'id': item.id, 'href': urlunquote(item.href),
'media-type': media_type} 'media-type': media_type}
if item.fallback: if item.fallback:
attrib['fallback'] = item.fallback attrib['fallback'] = item.fallback
@ -1236,7 +1238,7 @@ class Guide(object):
def to_opf2(self, parent=None): def to_opf2(self, parent=None):
elem = element(parent, OPF('guide')) elem = element(parent, OPF('guide'))
for ref in self.refs.values(): for ref in self.refs.values():
attrib = {'type': ref.type, 'href': ref.href} attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
if ref.title: if ref.title:
attrib['title'] = ref.title attrib['title'] = ref.title
element(elem, OPF('reference'), attrib=attrib) element(elem, OPF('reference'), attrib=attrib)
@ -1256,19 +1258,34 @@ class TOC(object):
:attr:`klass`: Optional semantic class referenced by this node. :attr:`klass`: Optional semantic class referenced by this node.
:attr:`id`: Option unique identifier for this node. :attr:`id`: Option unique identifier for this node.
""" """
def __init__(self, title=None, href=None, klass=None, id=None): def __init__(self, title=None, href=None, klass=None, id=None,
play_order=None):
self.title = title self.title = title
self.href = urlnormalize(href) if href else href self.href = urlnormalize(href) if href else href
self.klass = klass self.klass = klass
self.id = id self.id = id
self.nodes = [] self.nodes = []
self.play_order = 0
if play_order is None:
play_order = self.next_play_order()
self.play_order = play_order
def add(self, title, href, klass=None, id=None): def add(self, title, href, klass=None, id=None, play_order=0):
"""Create and return a new sub-node of this node.""" """Create and return a new sub-node of this node."""
node = TOC(title, href, klass, id) node = TOC(title, href, klass, id, play_order)
self.nodes.append(node) self.nodes.append(node)
return node return node
def remove(self, node):
for child in self.nodes:
if child is node:
self.nodes.remove(child)
return True
else:
if child.remove(node):
return True
return False
def iter(self): def iter(self):
"""Iterate over this node and all descendants in depth-first order.""" """Iterate over this node and all descendants in depth-first order."""
yield self yield self
@ -1276,6 +1293,18 @@ class TOC(object):
for node in child.iter(): for node in child.iter():
yield node yield node
def count(self):
return len(list(self.iter())) - 1
def next_play_order(self):
return max([x.play_order for x in self.iter()])+1
def has_href(self, href):
for x in self.iter():
if x.href == href:
return True
return False
def iterdescendants(self): def iterdescendants(self):
"""Iterate over all descendant nodes in depth-first order.""" """Iterate over all descendant nodes in depth-first order."""
for child in self.nodes: for child in self.nodes:
@ -1309,6 +1338,10 @@ class TOC(object):
except ValueError: except ValueError:
return 1 return 1
def __str__(self):
return 'TOC: %s --> %s'%(self.title, self.href)
def to_opf1(self, tour): def to_opf1(self, tour):
for node in self.nodes: for node in self.nodes:
element(tour, 'site', attrib={ element(tour, 'site', attrib={
@ -1319,7 +1352,7 @@ class TOC(object):
def to_ncx(self, parent): def to_ncx(self, parent):
for node in self.nodes: for node in self.nodes:
id = node.id or unicode(uuid.uuid4()) id = node.id or unicode(uuid.uuid4())
attrib = {'id': id, 'playOrder': '0'} attrib = {'id': id, 'playOrder': str(node.play_order)}
if node.klass: if node.klass:
attrib['class'] = node.klass attrib['class'] = node.klass
point = element(parent, NCX('navPoint'), attrib=attrib) point = element(parent, NCX('navPoint'), attrib=attrib)
@ -1329,6 +1362,34 @@ class TOC(object):
node.to_ncx(point) node.to_ncx(point)
return parent return parent
def rationalize_play_orders(self):
'''
Ensure that all nodes with the same play_order have the same href and
with different play_orders have different hrefs.
'''
def po_node(n):
for x in self.iter():
if x is n:
return
if x.play_order == n.play_order:
return x
def href_node(n):
for x in self.iter():
if x is n:
return
if x.href == n.href:
return x
for x in self.iter():
y = po_node(x)
if y is not None:
if x.href != y.href:
x.play_order = getattr(href_node(x), 'play_order',
self.next_play_order())
y = href_node(x)
if y is not None:
x.play_order = y.play_order
class PageList(object): class PageList(object):
"""Collection of named "pages" to mapped positions within an OEB data model """Collection of named "pages" to mapped positions within an OEB data model

View File

@ -118,6 +118,7 @@ class EbookIterator(object):
print 'Loaded embedded font:', repr(family) print 'Loaded embedded font:', repr(family)
def __enter__(self): def __enter__(self):
self.delete_on_exit = []
self._tdir = TemporaryDirectory('_ebook_iter') self._tdir = TemporaryDirectory('_ebook_iter')
self.base = self._tdir.__enter__() self.base = self._tdir.__enter__()
from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.conversion.plumber import Plumber
@ -137,9 +138,11 @@ class EbookIterator(object):
cover = self.opf.cover cover = self.opf.cover
if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover: if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
cfile = os.path.join(os.path.dirname(self.spine[0]), 'calibre_ei_cover.html') cfile = os.path.join(os.path.dirname(self.spine[0]),
'calibre_iterator_cover.html')
open(cfile, 'wb').write(TITLEPAGE%cover) open(cfile, 'wb').write(TITLEPAGE%cover)
self.spine[0:0] = [SpineItem(cfile)] self.spine[0:0] = [SpineItem(cfile)]
self.delete_on_exit.append(cfile)
if self.opf.path_to_html_toc is not None and \ if self.opf.path_to_html_toc is not None and \
self.opf.path_to_html_toc not in self.spine: self.opf.path_to_html_toc not in self.spine:
@ -221,3 +224,6 @@ class EbookIterator(object):
def __exit__(self, *args): def __exit__(self, *args):
self._tdir.__exit__(*args) self._tdir.__exit__(*args)
for x in self.delete_on_exit:
if os.path.exists(x):
os.remove(x)

View File

@ -343,7 +343,8 @@ class OEBReader(object):
continue continue
id = child.get('id') id = child.get('id')
klass = child.get('class') klass = child.get('class')
node = toc.add(title, href, id=id, klass=klass) po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
node = toc.add(title, href, id=id, klass=klass, play_order=po)
self._toc_from_navpoint(item, node, child) self._toc_from_navpoint(item, node, child)
def _toc_from_ncx(self, item): def _toc_from_ncx(self, item):

View File

@ -88,7 +88,7 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
class CSSSelector(etree.XPath): class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *') MIN_SPACE_RE = re.compile(r' *([>~+]) *')
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:") LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
def __init__(self, css, namespaces=XPNSMAP): def __init__(self, css, namespaces=XPNSMAP):
css = self.MIN_SPACE_RE.sub(r'\1', css) css = self.MIN_SPACE_RE.sub(r'\1', css)
path = css_to_xpath(css) path = css_to_xpath(css)
@ -103,10 +103,10 @@ class CSSSelector(etree.XPath):
self.css) self.css)
class Stylizer(object): class Stylizer(object):
STYLESHEETS = WeakKeyDictionary() STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']): def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], extra_css=''):
self.oeb = oeb self.oeb = oeb
self.profile = profile self.profile = profile
self.logger = oeb.logger self.logger = oeb.logger
@ -135,6 +135,11 @@ class Stylizer(object):
(path, item.href)) (path, item.href))
continue continue
stylesheets.append(sitem.data) stylesheets.append(sitem.data)
if extra_css:
text = XHTML_CSS_NAMESPACE + extra_css
stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS
stylesheets.append(stylesheet)
rules = [] rules = []
index = 0 index = 0
self.stylesheets = set() self.stylesheets = set()
@ -159,7 +164,7 @@ class Stylizer(object):
self.style(elem)._update_cssdict(cssdict) self.style(elem)._update_cssdict(cssdict)
for elem in xpath(tree, '//h:*[@style]'): for elem in xpath(tree, '//h:*[@style]'):
self.style(elem)._apply_style_attr() self.style(elem)._apply_style_attr()
def _fetch_css_file(self, path): def _fetch_css_file(self, path):
hrefs = self.oeb.manifest.hrefs hrefs = self.oeb.manifest.hrefs
if path not in hrefs: if path not in hrefs:
@ -171,7 +176,7 @@ class Stylizer(object):
return (None, None) return (None, None)
data = item.data.cssText data = item.data.cssText
return ('utf-8', data) return ('utf-8', data)
def flatten_rule(self, rule, href, index): def flatten_rule(self, rule, href, index):
results = [] results = []
if isinstance(rule, CSSStyleRule): if isinstance(rule, CSSStyleRule):
@ -185,7 +190,7 @@ class Stylizer(object):
style = self.flatten_style(rule.style) style = self.flatten_style(rule.style)
self.page_rule.update(style) self.page_rule.update(style)
return results return results
def flatten_style(self, cssstyle): def flatten_style(self, cssstyle):
style = {} style = {}
for prop in cssstyle: for prop in cssstyle:
@ -202,7 +207,7 @@ class Stylizer(object):
if size in FONT_SIZE_NAMES: if size in FONT_SIZE_NAMES:
style['font-size'] = "%dpt" % self.profile.fnames[size] style['font-size'] = "%dpt" % self.profile.fnames[size]
return style return style
def _normalize_edge(self, cssvalue, name): def _normalize_edge(self, cssvalue, name):
style = {} style = {}
if isinstance(cssvalue, CSSValueList): if isinstance(cssvalue, CSSValueList):
@ -224,7 +229,7 @@ class Stylizer(object):
for edge, value in itertools.izip(edges, values): for edge, value in itertools.izip(edges, values):
style["%s-%s" % (name, edge)] = value style["%s-%s" % (name, edge)] = value
return style return style
def _normalize_font(self, cssvalue): def _normalize_font(self, cssvalue):
composition = ('font-style', 'font-variant', 'font-weight', composition = ('font-style', 'font-variant', 'font-weight',
'font-size', 'line-height', 'font-family') 'font-size', 'line-height', 'font-family')
@ -271,7 +276,7 @@ class Stylizer(object):
class Style(object): class Style(object):
UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$') UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$')
def __init__(self, element, stylizer): def __init__(self, element, stylizer):
self._element = element self._element = element
self._profile = stylizer.profile self._profile = stylizer.profile
@ -285,7 +290,7 @@ class Style(object):
def _update_cssdict(self, cssdict): def _update_cssdict(self, cssdict):
self._style.update(cssdict) self._style.update(cssdict)
def _apply_style_attr(self): def _apply_style_attr(self):
attrib = self._element.attrib attrib = self._element.attrib
if 'style' not in attrib: if 'style' not in attrib:
@ -297,7 +302,7 @@ class Style(object):
except CSSSyntaxError: except CSSSyntaxError:
return return
self._style.update(self._stylizer.flatten_style(style)) self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self): def _has_parent(self):
return (self._element.getparent() is not None) return (self._element.getparent() is not None)
@ -346,7 +351,7 @@ class Style(object):
elif unit == 'in': elif unit == 'in':
result = value * 72.0 result = value * 72.0
elif unit == 'pt': elif unit == 'pt':
result = value result = value
elif unit == 'em': elif unit == 'em':
font = font or self.fontSize font = font or self.fontSize
result = value * font result = value * font
@ -421,7 +426,7 @@ class Style(object):
result = self._unit_convert(width, base=base) result = self._unit_convert(width, base=base)
self._width = result self._width = result
return self._width return self._width
@property @property
def height(self): def height(self):
if self._height is None: if self._height is None:
@ -463,27 +468,27 @@ class Style(object):
result = 1.2 * self.fontSize result = 1.2 * self.fontSize
self._lineHeight = result self._lineHeight = result
return self._lineHeight return self._lineHeight
@property @property
def marginTop(self): def marginTop(self):
return self._unit_convert( return self._unit_convert(
self._get('margin-top'), base=self.height) self._get('margin-top'), base=self.height)
@property @property
def marginBottom(self): def marginBottom(self):
return self._unit_convert( return self._unit_convert(
self._get('margin-bottom'), base=self.height) self._get('margin-bottom'), base=self.height)
@property @property
def paddingTop(self): def paddingTop(self):
return self._unit_convert( return self._unit_convert(
self._get('padding-top'), base=self.height) self._get('padding-top'), base=self.height)
@property @property
def paddingBottom(self): def paddingBottom(self):
return self._unit_convert( return self._unit_convert(
self._get('padding-bottom'), base=self.height) self._get('padding-bottom'), base=self.height)
def __str__(self): def __str__(self):
items = self._style.items() items = self._style.items()
items.sort() items.sort()

View File

@ -116,7 +116,8 @@ class CSSFlattener(object):
profile = self.context.source profile = self.context.source
for item in self.oeb.spine: for item in self.oeb.spine:
html = item.data html = item.data
stylizer = Stylizer(html, item.href, self.oeb, profile) stylizer = Stylizer(html, item.href, self.oeb, profile,
extra_css=self.context.extra_css)
self.stylizers[item] = stylizer self.stylizers[item] = stylizer
def baseline_node(self, node, stylizer, sizes, csize): def baseline_node(self, node, stylizer, sizes, csize):

View File

@ -15,12 +15,10 @@ from lxml.etree import XPath as _XPath
from lxml import etree from lxml import etree
from lxml.cssselect import CSSSelector from lxml.cssselect import CSSSelector
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP, urldefrag, \ from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
rewrite_links urldefrag, rewrite_links
from calibre.ebooks.epub import tostring, rules from calibre.ebooks.epub import tostring, rules
NAMESPACES = dict(XPNSMAP)
NAMESPACES['re'] = 'http://exslt.org/regular-expressions'
XPath = functools.partial(_XPath, namespaces=NAMESPACES) XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@ -104,7 +102,10 @@ class Split(object):
page_breaks.add(elem) page_breaks.add(elem)
for i, elem in enumerate(item.data.iter()): for i, elem in enumerate(item.data.iter()):
elem.set('pb_order', str(i)) try:
elem.set('pb_order', str(i))
except TypeError: # Cant set attributes on comment nodes etc.
continue
page_breaks = list(page_breaks) page_breaks = list(page_breaks)
page_breaks.sort(cmp= page_breaks.sort(cmp=
@ -118,7 +119,7 @@ class Split(object):
page_break_ids.append(id) page_break_ids.append(id)
for elem in item.data.iter(): for elem in item.data.iter():
elem.attrib.pop('pb_order') elem.attrib.pop('pb_order', False)
if elem.get('pb_before', False): if elem.get('pb_before', False):
elem.attrib.pop('pb_before') elem.attrib.pop('pb_before')

View File

@ -0,0 +1,159 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from lxml import etree
from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC
XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
class DetectStructure(object):
def __call__(self, oeb, opts):
self.log = oeb.log
self.oeb = oeb
self.opts = opts
self.log('Detecting structure...')
self.detect_chapters()
if self.oeb.auto_generated_toc or opts.use_auto_toc:
orig_toc = self.oeb.toc
self.oeb.toc = TOC()
self.create_level_based_toc()
if self.oeb.toc.count() < 1:
if not opts.no_chapters_in_toc and self.detected_chapters:
self.create_toc_from_chapters()
if self.oeb.toc.count() < opts.toc_threshold:
self.create_toc_from_links()
if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
self.oeb.toc = orig_toc
else:
self.oeb.auto_generated_toc = True
self.log('Auto generated TOC with %d entries.' %
self.oeb.toc.count())
if opts.toc_filter is not None:
regexp = re.compile(opts.toc_filter)
for node in self.oeb.toc.iter():
if not node.title or regexp.search(node.title) is not None:
self.oeb.toc.remove(node)
def detect_chapters(self):
self.detected_chapters = []
if self.opts.chapter:
chapter_xpath = XPath(self.opts.chapter)
for item in self.oeb.spine:
for x in chapter_xpath(item.data):
self.detected_chapters.append((item, x))
chapter_mark = self.opts.chapter_mark
page_break_before = 'display: block; page-break-before: always'
page_break_after = 'display: block; page-break-after: always'
for item, elem in self.detected_chapters:
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
self.log('\tDetected chapter:', text[:50])
if chapter_mark == 'none':
continue
elif chapter_mark == 'rule':
mark = etree.Element('hr')
elif chapter_mark == 'pagebreak':
mark = etree.Element('div', style=page_break_after)
else: # chapter_mark == 'both':
mark = etree.Element('hr', style=page_break_before)
elem.addprevious(mark)
def create_level_based_toc(self):
if self.opts.level1_toc is None:
return
for item in self.oeb.spine:
self.add_leveled_toc_items(item)
def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order()
for item, elem in self.detected_chapters:
text, href = self.elem_to_link(item, elem, counter)
self.oeb.toc.add(text, href, play_order=counter)
counter += 1
def create_toc_from_links(self):
for item in self.oeb.spine:
for a in item.data.xpath('//h:a[@href]'):
href = a.get('href')
purl = urlparse(href)
if not purl[0] or purl[0] == 'file':
href, frag = purl.path, purl.fragment
href = item.abshref(href)
if frag:
href = '#'.join((href, frag))
if not self.oeb.toc.has_href(href):
text = u' '.join([t.strip() for t in \
a.xpath('descendant::text()')])
text = text[:100].strip()
if not self.oeb.toc.has_text(text):
self.oeb.toc.add(text, href,
play_order=self.oeb.toc.next_play_order())
def elem_to_link(self, item, elem, counter):
text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
text = text[:100].strip()
id = elem.get('id', 'calibre_toc_%d'%counter)
elem.set('id', id)
href = '#'.join((item.href, id))
return text, href
def add_leveled_toc_items(self, item):
level1 = XPath(self.opts.level1_toc)(item.data)
level1_order = []
counter = 1
if level1:
added = {}
for elem in level1:
text, _href = self.elem_to_link(item, elem, counter)
counter += 1
if text:
node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order())
level1_order.append(node)
added[elem] = node
#node.add(_('Top'), _href)
if self.opts.level2_toc is not None:
added2 = {}
level2 = list(XPath(self.opts.level2_toc)(item.data))
for elem in level2:
level1 = None
for item in item.data.iterdescendants():
if item in added.keys():
level1 = added[item]
elif item == elem and level1 is not None:
text, _href = self.elem_to_link(item, elem, counter)
counter += 1
if text:
added2[elem] = level1.add(text, _href,
play_order=self.oeb.toc.next_play_order())
if self.opts.level3_toc is not None:
level3 = list(XPath(self.opts.level3_toc)(item.data))
for elem in level3:
level2 = None
for item in item.data.iterdescendants():
if item in added2.keys():
level2 = added2[item]
elif item == elem and level2 is not None:
text, _href = \
self.elem_to_link(item, elem, counter)
counter += 1
if text:
level2.add(text, _href,
play_order=self.oeb.toc.next_play_order())

View File

@ -0,0 +1,101 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml import etree
from calibre.customize.conversion import InputFormatPlugin
class RTFInput(InputFormatPlugin):
name = 'RTF Input'
author = 'Kovid Goyal'
description = 'Convert RTF files to HTML'
file_types = set(['rtf'])
def generate_xml(self, stream):
from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
ofile = 'out.xml'
parser = ParseRtf(
in_file = stream,
out_file = ofile,
# Convert symbol fonts to unicode equivelents. Default
# is 1
convert_symbol = 1,
# Convert Zapf fonts to unicode equivelents. Default
# is 1.
convert_zapf = 1,
# Convert Wingding fonts to unicode equivelents.
# Default is 1.
convert_wingdings = 1,
# Convert RTF caps to real caps.
# Default is 1.
convert_caps = 1,
# Indent resulting XML.
# Default is 0 (no indent).
indent = 1,
# Form lists from RTF. Default is 1.
form_lists = 1,
# Convert headings to sections. Default is 0.
headings_to_sections = 1,
# Group paragraphs with the same style name. Default is 1.
group_styles = 1,
# Group borders. Default is 1.
group_borders = 1,
# Write or do not write paragraphs. Default is 0.
empty_paragraphs = 0,
)
parser.parse_rtf()
ans = open('out.xml').read()
os.remove('out.xml')
return ans
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.rtf.xsl import xhtml
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
self.log = log
self.log('Converting RTF to XML...')
try:
xml = self.generate_xml(stream)
except RtfInvalidCodeException:
raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.'))
self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser)
self.log('Converting XML to HTML...')
styledoc = etree.fromstring(xhtml)
transform = etree.XSLT(styledoc)
result = transform(doc)
html = 'index.xhtml'
with open(html, 'wb') as f:
res = transform.tostring(result)
res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
f.write(res)
stream.seek(0)
mi = get_metadata(stream, 'rtf')
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.xhtml', None)])
opf.create_spine(['index.xhtml'])
opf.render(open('metadata.opf', 'wb'))
return os.path.abspath('metadata.opf')

View File

@ -149,9 +149,10 @@ class ParseRtf:
self.__group_borders = group_borders self.__group_borders = group_borders
self.__empty_paragraphs = empty_paragraphs self.__empty_paragraphs = empty_paragraphs
self.__no_dtd = no_dtd self.__no_dtd = no_dtd
def __check_file(self, the_file, type): def __check_file(self, the_file, type):
"""Check to see if files exist""" """Check to see if files exist"""
if hasattr(the_file, 'read'): return
if the_file == None: if the_file == None:
if type == "file_to_parse": if type == "file_to_parse":
message = "You must provide a file for the script to work" message = "You must provide a file for the script to work"
@ -545,13 +546,12 @@ class ParseRtf:
def __make_temp_file(self,file): def __make_temp_file(self,file):
"""Make a temporary file to parse""" """Make a temporary file to parse"""
write_file="rtf_write_file" write_file="rtf_write_file"
read_obj = open(file,'r') read_obj = file if hasattr(file, 'read') else open(file,'r')
write_obj = open(write_file, 'w') write_obj = open(write_file, 'w')
line = "dummy" line = "dummy"
while line: while line:
line = read_obj.read(1000) line = read_obj.read(1000)
write_obj.write(line ) write_obj.write(line )
read_obj.close()
write_obj.close() write_obj.close()
return write_file return write_file
""" """

View File

@ -58,10 +58,12 @@ class Pict:
return line[18:] return line[18:]
def __make_dir(self): def __make_dir(self):
""" Make a dirctory to put the image data in""" """ Make a dirctory to put the image data in"""
base_name = os.path.basename(self.__orig_file) base_name = os.path.basename(getattr(self.__orig_file, 'name',
self.__orig_file))
base_name = os.path.splitext(base_name)[0] base_name = os.path.splitext(base_name)[0]
if self.__out_file: if self.__out_file:
dir_name = os.path.dirname(self.__out_file) dir_name = os.path.dirname(getattr(self.__out_file, 'name',
self.__out_file))
else: else:
dir_name = os.path.dirname(self.__orig_file) dir_name = os.path.dirname(self.__orig_file)
# self.__output_to_file_func() # self.__output_to_file_func()

View File

@ -16,16 +16,14 @@ from calibre.gui2 import qstring_to_unicode, error_dialog, file_icon_provider, \
from calibre.gui2.dialogs.metadata_single_ui import Ui_MetadataSingleDialog from calibre.gui2.dialogs.metadata_single_ui import Ui_MetadataSingleDialog
from calibre.gui2.dialogs.fetch_metadata import FetchMetadata from calibre.gui2.dialogs.fetch_metadata import FetchMetadata
from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.gui2.dialogs.password import PasswordDialog
from calibre.gui2.widgets import ProgressIndicator from calibre.gui2.widgets import ProgressIndicator
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, authors_to_string from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, authors_to_string
from calibre.ebooks.metadata.library_thing import login, cover_from_isbn from calibre.ebooks.metadata.library_thing import cover_from_isbn
from calibre import islinux from calibre import islinux
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
from calibre.gui2 import config as gui_conf
class CoverFetcher(QThread): class CoverFetcher(QThread):
@ -60,9 +58,8 @@ class CoverFetcher(QThread):
return return
self.isbn = results[0] self.isbn = results[0]
if self.username and self.password: self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout,
login(self.username, self.password, force=False) username=self.username, password=self.password)[0]
self.cover_data = cover_from_isbn(self.isbn, timeout=self.timeout)[0]
except Exception, e: except Exception, e:
self.exception = e self.exception = e
self.traceback = traceback.format_exc() self.traceback = traceback.format_exc()
@ -290,7 +287,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.series_index.setValue(self.db.series_index(row)) self.series_index.setValue(self.db.series_index(row))
QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index) QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index)
QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index) QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index)
QObject.connect(self.password_button, SIGNAL('clicked()'), self.change_password)
self.show() self.show()
height_of_rest = self.frameGeometry().height() - self.cover.height() height_of_rest = self.frameGeometry().height() - self.cover.height()
@ -363,30 +359,12 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
tag_string = ', '.join(d.tags) tag_string = ', '.join(d.tags)
self.tags.setText(tag_string) self.tags.setText(tag_string)
def lt_password_dialog(self):
return PasswordDialog(self, 'LibraryThing account',
_('<p>Enter your username and password for '
'<b>LibraryThing.com</b>. This is <b>optional</b>. It will '
'make fetching of covers faster and more reliable.<br/>If '
'you do not have an account, you can '
'<a href=\'http://www.librarything.com\'>register</a> for '
'free.</p>'))
def change_password(self):
d = self.lt_password_dialog()
d.exec_()
def fetch_cover(self): def fetch_cover(self):
isbn = unicode(self.isbn.text()).strip() isbn = unicode(self.isbn.text()).strip()
d = self.lt_password_dialog()
if not gui_conf['asked_library_thing_password'] and \
(not d.username() or not d.password()):
d.exec_()
gui_conf['asked_library_thing_password'] = True
self.fetch_cover_button.setEnabled(False) self.fetch_cover_button.setEnabled(False)
self.setCursor(Qt.WaitCursor) self.setCursor(Qt.WaitCursor)
title, author = map(unicode, (self.title.text(), self.authors.text())) title, author = map(unicode, (self.title.text(), self.authors.text()))
self.cover_fetcher = CoverFetcher(d.username(), d.password(), isbn, self.cover_fetcher = CoverFetcher(None, None, isbn,
self.timeout, title, author) self.timeout, title, author)
self.cover_fetcher.start() self.cover_fetcher.start()
self._hangcheck = QTimer(self) self._hangcheck = QTimer(self)

View File

@ -589,17 +589,7 @@
<item> <item>
<widget class="QPushButton" name="fetch_cover_button"> <widget class="QPushButton" name="fetch_cover_button">
<property name="text"> <property name="text">
<string>Fetch &amp;cover image from server</string> <string>Download &amp;cover</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="password_button">
<property name="toolTip">
<string>Change the username and/or password for your account at LibraryThing.com</string>
</property>
<property name="text">
<string>Change &amp;password</string>
</property> </property>
</widget> </widget>
</item> </item>
@ -655,7 +645,6 @@
<tabstop>comments</tabstop> <tabstop>comments</tabstop>
<tabstop>fetch_metadata_button</tabstop> <tabstop>fetch_metadata_button</tabstop>
<tabstop>fetch_cover_button</tabstop> <tabstop>fetch_cover_button</tabstop>
<tabstop>password_button</tabstop>
<tabstop>formats</tabstop> <tabstop>formats</tabstop>
<tabstop>add_format_button</tabstop> <tabstop>add_format_button</tabstop>
<tabstop>remove_format_button</tabstop> <tabstop>remove_format_button</tabstop>

View File

@ -27,10 +27,6 @@ entry_points = {
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main', 'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'isbndb = calibre.ebooks.metadata.isbndb:main', 'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main', 'librarything = calibre.ebooks.metadata.library_thing:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'comic2mobi = calibre.ebooks.mobi.from_comic:main',
'comic2pdf = calibre.ebooks.pdf.from_comic:main',
'calibre-debug = calibre.debug:main', 'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main', 'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main', 'calibre-fontconfig = calibre.utils.fontconfig:main',
@ -151,8 +147,6 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
from calibre.web.feeds.main import option_parser as feeds2disk from calibre.web.feeds.main import option_parser as feeds2disk
from calibre.web.feeds.recipes import titles as feed_titles from calibre.web.feeds.recipes import titles as feed_titles
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.ebooks.metadata.fetch import option_parser as fem_op from calibre.ebooks.metadata.fetch import option_parser as fem_op
from calibre.gui2.main import option_parser as guiop from calibre.gui2.main import option_parser as guiop
from calibre.utils.smtp import option_parser as smtp_op from calibre.utils.smtp import option_parser as smtp_op
@ -181,10 +175,6 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes()))) f.write(opts_and_exts('ebook-meta', metaop, list(meta_filetypes())))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf'])) f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf'])) f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2epub', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles)) f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('fetch-ebook-metadata', fem_op, [])) f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
f.write(opts_and_words('calibre-smtp', smtp_op, [])) f.write(opts_and_words('calibre-smtp', smtp_op, []))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -266,7 +266,7 @@ class BasicNewsRecipe(object):
def get_feeds(self): def get_feeds(self):
''' '''
Return a list of :term:RSS feeds to fetch for this profile. Each element of the list Return a list of :term:`RSS` feeds to fetch for this profile. Each element of the list
must be a 2-element tuple of the form (title, url). If title is None or an must be a 2-element tuple of the form (title, url). If title is None or an
empty string, the title from the feed is used. This method is useful if your recipe empty string, the title from the feed is used. This method is useful if your recipe
needs to do some processing to figure out the list of feeds to download. If needs to do some processing to figure out the list of feeds to download. If

View File

@ -182,11 +182,6 @@ def cnv_points(attribute, arg, element):
raise ValueError, "Points must be string or [(0,0),(1,1)] - not %s" % arg raise ValueError, "Points must be string or [(0,0),(1,1)] - not %s" % arg
return strarg return strarg
def cnv_position(attribute, arg, element):
if element == (STYLENS,u'tab-stop'):
return cnv_length(attribute, arg, element)
return cnv_string(attribute, arg, element)
def cnv_positiveInteger(attribute, arg, element): def cnv_positiveInteger(attribute, arg, element):
return str(arg) return str(arg)
@ -253,7 +248,7 @@ attrconverters = {
((CHARTNS,u'gap-width'), None): cnv_string, ((CHARTNS,u'gap-width'), None): cnv_string,
((CHARTNS,u'interpolation'), None): cnv_string, ((CHARTNS,u'interpolation'), None): cnv_string,
((CHARTNS,u'interval-major'), None): cnv_string, ((CHARTNS,u'interval-major'), None): cnv_string,
((CHARTNS,u'interval-minor'), None): cnv_string, ((CHARTNS,u'interval-minor-divisor'), None): cnv_string,
((CHARTNS,u'japanese-candle-stick'), None): cnv_boolean, ((CHARTNS,u'japanese-candle-stick'), None): cnv_boolean,
((CHARTNS,u'label-arrangement'), None): cnv_string, ((CHARTNS,u'label-arrangement'), None): cnv_string,
((CHARTNS,u'label-cell-address'), None): cnv_string, ((CHARTNS,u'label-cell-address'), None): cnv_string,
@ -352,6 +347,7 @@ attrconverters = {
((DRAWNS,u'caption-line-length'), None): cnv_length, ((DRAWNS,u'caption-line-length'), None): cnv_length,
((DRAWNS,u'caption-point-x'), None): cnv_string, ((DRAWNS,u'caption-point-x'), None): cnv_string,
((DRAWNS,u'caption-point-y'), None): cnv_string, ((DRAWNS,u'caption-point-y'), None): cnv_string,
((DRAWNS,u'caption-id'), None): cnv_IDREF,
((DRAWNS,u'caption-type'), None): cnv_string, ((DRAWNS,u'caption-type'), None): cnv_string,
((DRAWNS,u'chain-next-name'), None): cnv_string, ((DRAWNS,u'chain-next-name'), None): cnv_string,
((DRAWNS,u'class-id'), None): cnv_string, ((DRAWNS,u'class-id'), None): cnv_string,
@ -476,6 +472,7 @@ attrconverters = {
((DRAWNS,u'modifiers'), None): cnv_string, ((DRAWNS,u'modifiers'), None): cnv_string,
((DRAWNS,u'name'), None): cnv_NCName, ((DRAWNS,u'name'), None): cnv_NCName,
# ((DRAWNS,u'name'), None): cnv_string, # ((DRAWNS,u'name'), None): cnv_string,
((DRAWNS,u'nav-order'), None): cnv_IDREF,
((DRAWNS,u'nohref'), None): cnv_string, ((DRAWNS,u'nohref'), None): cnv_string,
((DRAWNS,u'notify-on-update-of-ranges'), None): cnv_string, ((DRAWNS,u'notify-on-update-of-ranges'), None): cnv_string,
((DRAWNS,u'object'), None): cnv_string, ((DRAWNS,u'object'), None): cnv_string,
@ -683,7 +680,7 @@ attrconverters = {
((MANIFESTNS,'initialisation-vector'), None): cnv_string, ((MANIFESTNS,'initialisation-vector'), None): cnv_string,
((MANIFESTNS,'iteration-count'), None): cnv_nonNegativeInteger, ((MANIFESTNS,'iteration-count'), None): cnv_nonNegativeInteger,
((MANIFESTNS,'key-derivation-name'), None): cnv_string, ((MANIFESTNS,'key-derivation-name'), None): cnv_string,
((MANIFESTNS,'manifest:media-type'), None): cnv_string, ((MANIFESTNS,'media-type'), None): cnv_string,
((MANIFESTNS,'salt'), None): cnv_string, ((MANIFESTNS,'salt'), None): cnv_string,
((MANIFESTNS,'size'), None): cnv_nonNegativeInteger, ((MANIFESTNS,'size'), None): cnv_nonNegativeInteger,
((METANS,u'cell-count'), None): cnv_nonNegativeInteger, ((METANS,u'cell-count'), None): cnv_nonNegativeInteger,
@ -746,6 +743,7 @@ attrconverters = {
((OFFICENS,u'target-frame'), None): cnv_string, ((OFFICENS,u'target-frame'), None): cnv_string,
((OFFICENS,u'target-frame-name'), None): cnv_string, ((OFFICENS,u'target-frame-name'), None): cnv_string,
((OFFICENS,u'time-value'), None): cnv_duration, ((OFFICENS,u'time-value'), None): cnv_duration,
((OFFICENS,u'title'), None): cnv_string,
((OFFICENS,u'value'), None): cnv_double, ((OFFICENS,u'value'), None): cnv_double,
((OFFICENS,u'value-type'), None): cnv_string, ((OFFICENS,u'value-type'), None): cnv_string,
((OFFICENS,u'version'), None): cnv_string, ((OFFICENS,u'version'), None): cnv_string,
@ -783,6 +781,7 @@ attrconverters = {
((PRESENTATIONNS,u'preset-id'), None): cnv_string, ((PRESENTATIONNS,u'preset-id'), None): cnv_string,
((PRESENTATIONNS,u'preset-sub-type'), None): cnv_string, ((PRESENTATIONNS,u'preset-sub-type'), None): cnv_string,
((PRESENTATIONNS,u'show'), None): cnv_string, ((PRESENTATIONNS,u'show'), None): cnv_string,
((PRESENTATIONNS,u'show-end-of-presentation-slide'), None): cnv_boolean,
((PRESENTATIONNS,u'show-logo'), None): cnv_boolean, ((PRESENTATIONNS,u'show-logo'), None): cnv_boolean,
((PRESENTATIONNS,u'source'), None): cnv_string, ((PRESENTATIONNS,u'source'), None): cnv_string,
((PRESENTATIONNS,u'speed'), None): cnv_string, ((PRESENTATIONNS,u'speed'), None): cnv_string,
@ -873,6 +872,8 @@ attrconverters = {
((STYLENS,u'flow-with-text'), None): cnv_boolean, ((STYLENS,u'flow-with-text'), None): cnv_boolean,
((STYLENS,u'font-adornments'), None): cnv_string, ((STYLENS,u'font-adornments'), None): cnv_string,
((STYLENS,u'font-charset'), None): cnv_string, ((STYLENS,u'font-charset'), None): cnv_string,
((STYLENS,u'font-charset-asian'), None): cnv_string,
((STYLENS,u'font-charset-complex'), None): cnv_string,
((STYLENS,u'font-family-asian'), None): cnv_string, ((STYLENS,u'font-family-asian'), None): cnv_string,
((STYLENS,u'font-family-complex'), None): cnv_string, ((STYLENS,u'font-family-complex'), None): cnv_string,
((STYLENS,u'font-family-generic-asian'), None): cnv_string, ((STYLENS,u'font-family-generic-asian'), None): cnv_string,
@ -949,7 +950,8 @@ attrconverters = {
((STYLENS,u'page-usage'), None): cnv_string, ((STYLENS,u'page-usage'), None): cnv_string,
((STYLENS,u'paper-tray-name'), None): cnv_string, ((STYLENS,u'paper-tray-name'), None): cnv_string,
((STYLENS,u'parent-style-name'), None): cnv_StyleNameRef, ((STYLENS,u'parent-style-name'), None): cnv_StyleNameRef,
((STYLENS,u'position'), None): cnv_position, ((STYLENS,u'position'), (STYLENS,u'tab-stop')): cnv_length,
((STYLENS,u'position'), None): cnv_string,
((STYLENS,u'print'), None): cnv_string, ((STYLENS,u'print'), None): cnv_string,
((STYLENS,u'print-content'), None): cnv_boolean, ((STYLENS,u'print-content'), None): cnv_boolean,
((STYLENS,u'print-orientation'), None): cnv_string, ((STYLENS,u'print-orientation'), None): cnv_string,
@ -1015,7 +1017,7 @@ attrconverters = {
((STYLENS,u'wrap'), None): cnv_string, ((STYLENS,u'wrap'), None): cnv_string,
((STYLENS,u'wrap-contour'), None): cnv_boolean, ((STYLENS,u'wrap-contour'), None): cnv_boolean,
((STYLENS,u'wrap-contour-mode'), None): cnv_string, ((STYLENS,u'wrap-contour-mode'), None): cnv_string,
((STYLENS,u'wrap-dynamic-treshold'), None): cnv_string, ((STYLENS,u'wrap-dynamic-threshold'), None): cnv_length,
((STYLENS,u'writing-mode-automatic'), None): cnv_boolean, ((STYLENS,u'writing-mode-automatic'), None): cnv_boolean,
((STYLENS,u'writing-mode'), None): cnv_string, ((STYLENS,u'writing-mode'), None): cnv_string,
((SVGNS,u'accent-height'), None): cnv_integer, ((SVGNS,u'accent-height'), None): cnv_integer,
@ -1122,7 +1124,7 @@ attrconverters = {
((TABLENS,u'database-table-name'), None): cnv_string, ((TABLENS,u'database-table-name'), None): cnv_string,
((TABLENS,u'date-end'), None): cnv_string, ((TABLENS,u'date-end'), None): cnv_string,
((TABLENS,u'date-start'), None): cnv_string, ((TABLENS,u'date-start'), None): cnv_string,
((TABLENS,u'date-value-type'), None): cnv_date, ((TABLENS,u'date-value'), None): cnv_date,
((TABLENS,u'default-cell-style-name'), None): cnv_StyleNameRef, ((TABLENS,u'default-cell-style-name'), None): cnv_StyleNameRef,
((TABLENS,u'direction'), None): cnv_string, ((TABLENS,u'direction'), None): cnv_string,
((TABLENS,u'display-border'), None): cnv_boolean, ((TABLENS,u'display-border'), None): cnv_boolean,
@ -1304,7 +1306,7 @@ attrconverters = {
((TEXTNS,u'database-name'), None): cnv_string, ((TEXTNS,u'database-name'), None): cnv_string,
((TEXTNS,u'date-adjust'), None): cnv_duration, ((TEXTNS,u'date-adjust'), None): cnv_duration,
((TEXTNS,u'date-value'), None): cnv_date, ((TEXTNS,u'date-value'), None): cnv_date,
((TEXTNS,u'date-value'), None): cnv_dateTime, # ((TEXTNS,u'date-value'), None): cnv_dateTime,
((TEXTNS,u'default-style-name'), None): cnv_StyleNameRef, ((TEXTNS,u'default-style-name'), None): cnv_StyleNameRef,
((TEXTNS,u'description'), None): cnv_string, ((TEXTNS,u'description'), None): cnv_string,
((TEXTNS,u'display'), None): cnv_string, ((TEXTNS,u'display'), None): cnv_string,
@ -1366,6 +1368,7 @@ attrconverters = {
((TEXTNS,u'outline-level'), None): cnv_string, ((TEXTNS,u'outline-level'), None): cnv_string,
((TEXTNS,u'page-adjust'), None): cnv_integer, ((TEXTNS,u'page-adjust'), None): cnv_integer,
((TEXTNS,u'pages'), None): cnv_string, ((TEXTNS,u'pages'), None): cnv_string,
((TEXTNS,u'paragraph-style-name'), None): cnv_StyleNameRef,
((TEXTNS,u'placeholder-type'), None): cnv_string, ((TEXTNS,u'placeholder-type'), None): cnv_string,
((TEXTNS,u'prefix'), None): cnv_string, ((TEXTNS,u'prefix'), None): cnv_string,
((TEXTNS,u'protected'), None): cnv_boolean, ((TEXTNS,u'protected'), None): cnv_boolean,
@ -1417,6 +1420,7 @@ attrconverters = {
((TEXTNS,u'use-objects'), None): cnv_boolean, ((TEXTNS,u'use-objects'), None): cnv_boolean,
((TEXTNS,u'use-other-objects'), None): cnv_boolean, ((TEXTNS,u'use-other-objects'), None): cnv_boolean,
((TEXTNS,u'use-outline-level'), None): cnv_boolean, ((TEXTNS,u'use-outline-level'), None): cnv_boolean,
((TEXTNS,u'use-soft-page-breaks'), None): cnv_boolean,
((TEXTNS,u'use-spreadsheet-objects'), None): cnv_boolean, ((TEXTNS,u'use-spreadsheet-objects'), None): cnv_boolean,
((TEXTNS,u'use-tables'), None): cnv_boolean, ((TEXTNS,u'use-tables'), None): cnv_boolean,
((TEXTNS,u'value'), None): cnv_nonNegativeInteger, ((TEXTNS,u'value'), None): cnv_nonNegativeInteger,

View File

@ -258,10 +258,7 @@ class Element(Node):
self.ownerDocument = None self.ownerDocument = None
self.childNodes=[] self.childNodes=[]
self.allowed_children = grammar.allowed_children.get(self.qname) self.allowed_children = grammar.allowed_children.get(self.qname)
namespace = self.qname[0] prefix = self.get_nsprefix(self.qname[0])
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
self.tagName = prefix + ":" + self.qname[1] self.tagName = prefix + ":" + self.qname[1]
if text is not None: if text is not None:
self.addText(text) self.addText(text)
@ -296,6 +293,13 @@ class Element(Node):
if self.getAttrNS(r[0],r[1]) is None: if self.getAttrNS(r[0],r[1]) is None:
raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName) raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)
def get_nsprefix(self, namespace):
if namespace is None: namespace = ""
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
return prefix
def allowed_attributes(self): def allowed_attributes(self):
return grammar.allowed_attributes.get(self.qname) return grammar.allowed_attributes.get(self.qname)
@ -378,24 +382,18 @@ class Element(Node):
Must overwrite, If attribute already exists. Must overwrite, If attribute already exists.
""" """
allowed_attrs = self.allowed_attributes() allowed_attrs = self.allowed_attributes()
prefix = _nsassign(namespace) prefix = self.get_nsprefix(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
# if allowed_attrs and (namespace, localpart) not in allowed_attrs: # if allowed_attrs and (namespace, localpart) not in allowed_attrs:
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName) # raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
c = AttrConverters() c = AttrConverters()
self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname) self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
def getAttrNS(self, namespace, localpart): def getAttrNS(self, namespace, localpart):
prefix = _nsassign(namespace) prefix = self.get_nsprefix(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
return self.attributes.get(prefix + ":" + localpart) return self.attributes.get(prefix + ":" + localpart)
def removeAttrNS(self, namespace, localpart): def removeAttrNS(self, namespace, localpart):
prefix = _nsassign(namespace) prefix = self.get_nsprefix(namespace)
if not self.namespaces.has_key(namespace):
self.namespaces[namespace] = prefix
del self.attributes[prefix + ":" + localpart] del self.attributes[prefix + ":" + localpart]
def getAttribute(self, attr): def getAttribute(self, attr):

View File

@ -109,11 +109,6 @@ inline_elements = (
) )
struct_elements = (
(CONFIGNS,'config-item-set'),
(TABLENS,u'table-cell'),
)
# It is almost impossible to determine what elements are block elements. # It is almost impossible to determine what elements are block elements.
# There are so many that don't fit the form # There are so many that don't fit the form
block_elements = ( block_elements = (

File diff suppressed because it is too large Load Diff

View File

@ -29,12 +29,7 @@ from xml.sax.xmlreader import InputSource
import xml.sax.saxutils import xml.sax.saxutils
from element import Element from element import Element
from namespaces import OFFICENS from namespaces import OFFICENS
from cStringIO import StringIO
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
# #
# Parse the XML files # Parse the XML files
@ -70,7 +65,7 @@ class LoadParser(handler.ContentHandler):
# Add any accumulated text content # Add any accumulated text content
content = ''.join(self.data).strip() content = ''.join(self.data).strip()
if len(content) > 0: if len(content) > 0:
self.parent.addText(content) self.parent.addText(content, check_grammar=False)
self.data = [] self.data = []
# Create the element # Create the element
attrdict = {} attrdict = {}
@ -109,7 +104,7 @@ class LoadParser(handler.ContentHandler):
self.level = self.level - 1 self.level = self.level - 1
str = ''.join(self.data) str = ''.join(self.data)
if len(str.strip()) > 0: if len(str.strip()) > 0:
self.curr.addText(str) self.curr.addText(str, check_grammar=False)
self.data = [] self.data = []
self.curr = self.curr.parentNode self.curr = self.curr.parentNode
self.parent = self.curr self.parent = self.curr

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency # Copyright (C) 2006-2009 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -17,7 +17,7 @@
# #
# Contributor(s): # Contributor(s):
# #
TOOLSVERSION = u"ODFPY/0.8.2dev" TOOLSVERSION = u"ODFPY/0.9.1dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@ -71,7 +71,7 @@ nsdict = {
OFFICENS: u'office', OFFICENS: u'office',
OOONS: u'ooo', OOONS: u'ooo',
OOOWNS: u'ooow', OOOWNS: u'ooow',
OOOCNS: u'ooc', OOOCNS: u'oooc',
PRESENTATIONNS: u'presentation', PRESENTATIONNS: u'presentation',
RDFANS: u'rdfa', RDFANS: u'rdfa',
SCRIPTNS: u'script', SCRIPTNS: u'script',

View File

@ -25,11 +25,7 @@ import xml.sax
from xml.sax import handler, expatreader from xml.sax import handler, expatreader
from xml.sax.xmlreader import InputSource from xml.sax.xmlreader import InputSource
from xml.sax.saxutils import escape, quoteattr from xml.sax.saxutils import escape, quoteattr
from cStringIO import StringIO
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \ from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \ FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \

View File

@ -24,12 +24,7 @@ import zipfile
from xml.sax import make_parser,handler from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource from xml.sax.xmlreader import InputSource
import xml.sax.saxutils import xml.sax.saxutils
from cStringIO import StringIO
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
MANIFESTNS="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" MANIFESTNS="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"

View File

@ -44,19 +44,19 @@ def Chart(**args):
def DdeSource(**args): def DdeSource(**args):
return Element(qname = (OFFICENS,'dde-source'), **args) return Element(qname = (OFFICENS,'dde-source'), **args)
def Document(version="1.0", **args): def Document(version="1.1", **args):
return Element(qname = (OFFICENS,'document'), version=version, **args) return Element(qname = (OFFICENS,'document'), version=version, **args)
def DocumentContent(version="1.0", **args): def DocumentContent(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-content'), version=version, **args) return Element(qname = (OFFICENS, 'document-content'), version=version, **args)
def DocumentMeta(version="1.0", **args): def DocumentMeta(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-meta'), version=version, **args) return Element(qname = (OFFICENS, 'document-meta'), version=version, **args)
def DocumentSettings(version="1.0", **args): def DocumentSettings(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-settings'), version=version, **args) return Element(qname = (OFFICENS, 'document-settings'), version=version, **args)
def DocumentStyles(version="1.0", **args): def DocumentStyles(version="1.1", **args):
return Element(qname = (OFFICENS, 'document-styles'), version=version, **args) return Element(qname = (OFFICENS, 'document-styles'), version=version, **args)
def Drawing(**args): def Drawing(**args):

View File

@ -1,71 +0,0 @@
from style import Style, ParagraphProperties, TextProperties
def addOOoStandardStyles(styles):
style = Style(name="Standard", family="paragraph", attributes={'class':"text"})
styles.addElement(style)
style = Style(name="Text_20_body", displayname="Text body", family="paragraph", parentstylename="Standard", attributes={'class':"text"})
p = ParagraphProperties(margintop="0cm", marginbottom="0.212cm")
style.addElement(p)
styles.addElement(style)
style = Style(name="Text_20_body_20_indent", displayname="Text body indent", family="paragraph", parentstylename="Text_20_body", attributes={'class':"text"})
p = ParagraphProperties(marginleft="0.499cm", marginright="0cm", textindent="0cm", autotextindent="false")
style.addElement(p)
styles.addElement(style)
style = Style(name="Salutation", family="paragraph", parentstylename="Standard", attributes={'class':"text"})
p = ParagraphProperties(numberlines="false", linenumber=0)
style.addElement(p)
styles.addElement(style)
style = Style(name="Signature", family="paragraph", parentstylename="Standard", attributes={'class':"text"})
p = ParagraphProperties(numberlines="false", linenumber=0)
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading", family="paragraph", parentstylename="Standard", nextstylename="Text_20_body", attributes={'class':"text"})
p = ParagraphProperties(margintop="0.423cm", marginbottom="0.212cm", keepwithnext="always")
style.addElement(p)
p = TextProperties(fontname="Nimbus Sans L", fontsize="14pt", fontnameasian="DejaVu LGC Sans", fontsizeasian="14pt", fontnamecomplex="DejaVu LGC Sans", fontsizecomplex="14pt")
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading_20_1", displayname="Heading 1", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=1)
p = TextProperties(fontsize="115%", fontweight="bold", fontsizeasian="115%", fontweightasian="bold", fontsizecomplex="115%", fontweightcomplex="bold")
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading_20_2", displayname="Heading 2", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=2)
p = TextProperties(fontsize="14pt", fontstyle="italic", fontweight="bold", fontsizeasian="14pt", fontstyleasian="italic", fontweightasian="bold", fontsizecomplex="14pt", fontstylecomplex="italic", fontweightcomplex="bold")
style.addElement(p)
styles.addElement(style)
style = Style(name="Heading_20_3", displayname="Heading 3", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=3)
p = TextProperties(fontsize="14pt", fontweight="bold", fontsizeasian="14pt", fontweightasian="bold", fontsizecomplex="14pt", fontweightcomplex="bold")
style.addElement(p)
styles.addElement(style)
style = Style(name="List", family="paragraph", parentstylename="Text_20_body", attributes={'class':"list"})
styles.addElement(style)
style = Style(name="Caption", family="paragraph", parentstylename="Standard", attributes={'class':"extra"})
p = ParagraphProperties(margintop="0.212cm", marginbottom="0.212cm", numberlines="false", linenumber="0")
style.addElement(p)
p = TextProperties(fontsize="12pt", fontstyle="italic", fontsizeasian="12pt", fontstyleasian="italic", fontsizecomplex="12pt", fontstylecomplex="italic")
style.addElement(p)
styles.addElement(style)
style = Style(name="Index", family="paragraph", parentstylename="Standard", attributes={'class':"index"})
p = ParagraphProperties(numberlines="false", linenumber=0)
styles.addElement(style)
style = Style(name="Source_20_Text", displayname="Source Text", family="text")
p = TextProperties(fontname="Courier", fontnameasian="Courier", fontnamecomplex="Courier")
style.addElement(p)
styles.addElement(style)
style = Style(name="Variable", family="text")
p = TextProperties(fontstyle="italic", fontstyleasian="italic", fontstylecomplex="italic")
style.addElement(p)
styles.addElement(style)

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2008 Søren Roug, European Environment Agency # Copyright (C) 2006-2009 Søren Roug, European Environment Agency
# #
# This library is free software; you can redistribute it and/or # This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public # modify it under the terms of the GNU Lesser General Public
@ -300,6 +300,37 @@ class OpenDocument:
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype) self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
return manifestfn return manifestfn
def addPictureFromFile(self, filename, mediatype=None):
""" Add a picture
It uses the same convention as OOo, in that it saves the picture in
the zipfile in the subdirectory 'Pictures'.
If mediatype is not given, it will be guessed from the filename
extension.
"""
if mediatype is None:
mediatype, encoding = mimetypes.guess_type(filename)
if mediatype is None:
mediatype = ''
try: ext = filename[filename.rindex('.'):]
except ValueError: ext=''
else:
ext = mimetypes.guess_extension(mediatype)
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
return manifestfn
def addPictureFromString(self, content, mediatype):
""" Add a picture
It uses the same convention as OOo, in that it saves the picture in
the zipfile in the subdirectory 'Pictures'. The content variable
is a string that contains the binary image data. The mediatype
indicates the image format.
"""
ext = mimetypes.guess_extension(mediatype)
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
return manifestfn
def addThumbnail(self, filecontent=None): def addThumbnail(self, filecontent=None):
""" Add a fixed thumbnail """ Add a fixed thumbnail
The thumbnail in the library is big, so this is pretty useless. The thumbnail in the library is big, so this is pretty useless.
@ -394,6 +425,7 @@ class OpenDocument:
# Write any extra files # Write any extra files
for op in self._extra: for op in self._extra:
if op.filename == "META-INF/documentsignatures.xml": continue # Don't save signatures
self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype)) self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype))
zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now) zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now)
zi.compress_type = zipfile.ZIP_DEFLATED zi.compress_type = zipfile.ZIP_DEFLATED

View File

@ -31,11 +31,7 @@ import xml.sax.saxutils
from odf.namespaces import OFFICENS, TEXTNS from odf.namespaces import OFFICENS, TEXTNS
try: from cStringIO import StringIO
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
OUTENCODING = "utf-8" OUTENCODING = "utf-8"

View File

@ -27,3 +27,8 @@ from element import Element
def Model(**args): def Model(**args):
return Element(qname = (XFORMSNS,'model'), **args) return Element(qname = (XFORMSNS,'model'), **args)
def Instance(**args):
return Element(qname = (XFORMSNS,'instance'), **args)
def Bind(**args):
return Element(qname = (XFORMSNS,'bind'), **args)

View File

@ -139,7 +139,7 @@ class resources(OptionlessCommand):
RESOURCES = dict( RESOURCES = dict(
opf_template = 'ebooks/metadata/opf.xml', opf_template = 'ebooks/metadata/opf.xml',
ncx_template = 'ebooks/metadata/ncx.xml', ncx_template = 'ebooks/metadata/ncx.xml',
fb2_xsl = 'ebooks/lrf/fb2/fb2.xsl', fb2_xsl = 'ebooks/fb2/fb2.xsl',
metadata_sqlite = 'library/metadata_sqlite.sql', metadata_sqlite = 'library/metadata_sqlite.sql',
jquery = 'gui2/viewer/jquery.js', jquery = 'gui2/viewer/jquery.js',
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js', jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',