Merged upstream changes

This commit is contained in:
Marshall T. Vandegrift 2008-07-01 17:15:43 -04:00
commit 09212273c3
87 changed files with 24048 additions and 5648 deletions

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
'''
Create linux binary.
'''
import glob, sys, subprocess, tarfile, os, re
import glob, sys, subprocess, tarfile, os, re, py_compile
HOME = '/home/kovid'
PYINSTALLER = os.path.expanduser('~/build/pyinstaller')
CALIBREPREFIX = '___'
@ -64,17 +64,19 @@ temp = ['keyword', 'codeop']
recipes = ['calibre', 'web', 'feeds', 'recipes']
prefix = '.'.join(recipes)+'.'
recipes_toc = []
extra_toc = [
('keyword', '/usr/lib/python2.5/keyword.pyo', 'PYSOURCE'),
('codeop', '/usr/lib/python2.5/codeop.pyo', 'PYSOURCE')
]
for f in glob.glob(os.path.join(CALIBRESRC, *(recipes+['*.py']))):
temp.append(prefix + os.path.basename(f).partition('.')[0])
hook = os.path.expanduser('~/temp/hook-calibre.py')
f = open(hook, 'wb')
hook_script = 'hiddenimports = %s'%repr(temp)
f.write(hook_script)
py_compile.compile(f, doraise=True)
recipes_toc.append((prefix + os.path.basename(f).partition('.')[0], f+'o', 'PYSOURCE'))
sys.path.insert(0, CALIBRESRC)
from calibre.linux import entry_points
executables, scripts = ['calibre_postinstall', 'parallel'], \
executables, scripts = ['calibre_postinstall', 'calibre-parallel'], \
[os.path.join(CALIBRESRC, 'calibre', 'linux.py'), os.path.join(CALIBRESRC, 'calibre', 'parallel.py')]
for entry in entry_points['console_scripts'] + entry_points['gui_scripts']:
@ -82,18 +84,19 @@ for entry in entry_points['console_scripts'] + entry_points['gui_scripts']:
executables.append(fields[0].strip())
scripts.append(os.path.join(CALIBRESRC, *map(lambda x: x.strip(), fields[1].split(':')[0].split('.')))+'.py')
recipes = Analysis(glob.glob(os.path.join(CALIBRESRC, 'calibre', 'web', 'feeds', 'recipes', '*.py')),
pathex=[CALIBRESRC], hookspath=[os.path.dirname(hook)], excludes=excludes)
analyses = [Analysis([os.path.join(HOMEPATH,'support/_mountzlib.py'), os.path.join(HOMEPATH,'support/useUnicode.py'), loader, script],
pathex=[PYINSTALLER, CALIBRESRC, CALIBREPLUGINS], excludes=excludes) for script in scripts]
pyz = TOC()
binaries = TOC()
pyz += extra_toc
pyz += recipes_toc
for a in analyses:
pyz = a.pure + pyz
binaries = a.binaries + binaries
pyz = PYZ(pyz + recipes.pure, name='library.pyz')
pyz = PYZ(pyz, name='library.pyz')
built_executables = []
for script, exe, a in zip(scripts, executables, analyses):

View File

@ -39,6 +39,7 @@ print >>loader, '%(function)s()'
loader.close()
os.chmod(loader_path, 0700)
os.environ['PYTHONHOME'] = resources_dir
os.environ['FC_CONFIG_DIR'] = os.path.join(resources_dir, 'fonts')
os.execv(loader_path, sys.argv)
'''
CHECK_SYMLINKS_PRESCRIPT = \
@ -240,13 +241,18 @@ _check_symlinks_prescript()
print 'Adding plugins'
module_dir = os.path.join(resource_dir, 'lib', 'python2.5', 'lib-dynload')
print 'Adding fontconfig'
for f in glob.glob(os.path.expanduser('~/fontconfig/*')):
for f in glob.glob(os.path.expanduser('~/fontconfig2/*')):
os.link(f, os.path.join(frameworks_dir, os.path.basename(f)))
for src, dest in plugin_files:
if 'dylib' in dest:
os.link(src, os.path.join(frameworks_dir, dest))
else:
os.link(src, os.path.join(module_dir, dest))
dst = os.path.join(resource_dir, 'fonts')
if os.path.exists(dst):
shutil.rmtree(dst)
shutil.copytree('/usr/local/etc/fonts', dst, symlinks=False)
print
print 'Adding IPython'
dst = os.path.join(resource_dir, 'lib', 'python2.5', 'IPython')
@ -271,15 +277,15 @@ sys.frameworks_dir = os.path.join(os.path.dirname(os.environ['RESOURCEPATH']), '
f.write(src)
f.close()
print
print 'Adding GUI main.py'
print 'Adding main scripts to site-packages'
f = zipfile.ZipFile(os.path.join(self.dist_dir, APPNAME+'.app', 'Contents', 'Resources', 'lib', 'python2.5', 'site-packages.zip'), 'a', zipfile.ZIP_DEFLATED)
f.write('src/calibre/gui2/main.py', 'calibre/gui2/main.py')
for script in scripts['gui']+scripts['console']:
f.write(script, script.partition('/')[-1])
f.close()
print
print 'Building disk image'
BuildAPP.makedmg(os.path.join(self.dist_dir, APPNAME+'.app'), APPNAME+'-'+VERSION)
def main():
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
sys.argv[1:2] = ['py2app']
@ -295,10 +301,11 @@ def main():
'iconfile' : 'icons/library.icns',
'frameworks': ['libusb.dylib', 'libunrar.dylib'],
'includes' : ['sip', 'pkg_resources', 'PyQt4.QtXml',
'PyQt4.QtSvg',
'PyQt4.QtSvg', 'PyQt4.QtWebKit',
'mechanize', 'ClientForm', 'usbobserver',
'genshi', 'calibre.web.feeds.recipes.*',
'keyword', 'codeop', 'pydoc'],
'calibre.ebooks.lrf.any.*', 'calibre.ebooks.lrf.feeds.*',
'keyword', 'codeop', 'pydoc', 'readline'],
'packages' : ['PIL', 'Authorization', 'rtf2xml', 'lxml'],
'excludes' : ['IPython'],
'plist' : { 'CFBundleGetInfoString' : '''calibre, an E-book management application.'''
@ -308,7 +315,10 @@ def main():
'CFBundleVersion':APPNAME + ' ' + VERSION,
'LSMinimumSystemVersion':'10.4.3',
'LSMultipleInstancesProhibited':'true',
'NSHumanReadableCopyright':'Copyright 2006, Kovid Goyal',
'NSHumanReadableCopyright':'Copyright 2008, Kovid Goyal',
'LSEnvironment':{
'FC_CONFIG_DIR':'@executable_path/../Resources/fonts',
}
},
},
},

View File

@ -13,9 +13,6 @@ print 'Setup', APPNAME, 'version:', VERSION
epsrc = re.compile(r'entry_points = (\{.*?\})', re.DOTALL).search(open('src/%s/linux.py'%APPNAME, 'rb').read()).group(1)
entry_points = eval(epsrc, {'__appname__': APPNAME})
if 'win32' in sys.platform.lower() or 'win64' in sys.platform.lower():
entry_points['console_scripts'].append('parallel = %s.parallel:main'%APPNAME)
def _ep_to_script(ep, base='src'):
return (base+os.path.sep+re.search(r'.*=\s*(.*?):', ep).group(1).replace('.', '/')+'.py').strip()

View File

@ -1,7 +1,7 @@
''' E-book management software'''
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
__version__ = '0.4.72'
__version__ = '0.4.76'
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid at kovidgoyal.net>"
__appname__ = 'calibre'
@ -38,6 +38,40 @@ try:
except:
preferred_encoding = 'utf-8'
_abspath = os.path.abspath
def my_abspath(path, encoding=sys.getfilesystemencoding()):
'''
Work around for buggy os.path.abspath. This function accepts either byte strings,
in which it calls os.path.abspath, or unicode string, in which case it first converts
to byte strings using `encoding`, calls abspath and then decodes back to unicode.
'''
to_unicode = False
if isinstance(path, unicode):
path = path.encode(encoding)
to_unicode = True
res = _abspath(path)
if to_unicode:
res = res.decode(encoding)
return res
os.path.abspath = my_abspath
_join = os.path.join
def my_join(a, *p):
encoding=sys.getfilesystemencoding()
p = [a] + list(p)
_unicode = False
for i in p:
if isinstance(i, unicode):
_unicode = True
break
p = [i.encode(encoding) if isinstance(i, unicode) else i for i in p]
res = _join(*p)
if _unicode:
res = res.decode(encoding)
return res
os.path.join = my_join
def osx_version():
if isosx:
@ -75,6 +109,8 @@ class ColoredFormatter(Formatter):
def setup_cli_handlers(logger, level):
if os.environ.get('CALIBRE_WORKER', None) is not None and logger.handlers:
return
logger.setLevel(level)
if level == logging.WARNING:
handler = logging.StreamHandler(sys.stdout)
@ -88,6 +124,7 @@ def setup_cli_handlers(logger, level):
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.DEBUG)
handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s'))
logger.addHandler(handler)
class CustomHelpFormatter(IndentedHelpFormatter):
@ -361,9 +398,10 @@ def relpath(target, base=os.curdir):
Base can be a directory specified either as absolute or relative to current dir.
"""
if not os.path.exists(target):
raise OSError, 'Target does not exist: '+target
#if not os.path.exists(target):
# raise OSError, 'Target does not exist: '+target
if target == base:
raise ValueError('target and base are both: %s'%target)
if not os.path.isdir(base):
raise OSError, 'Base is not a directory or does not exist: '+base
@ -371,13 +409,13 @@ def relpath(target, base=os.curdir):
target_list = (os.path.abspath(target)).split(os.sep)
# On the windows platform the target may be on a completely different drive from the base.
if iswindows and base_list[0] <> target_list[0]:
raise OSError, 'Target is on a different drive to base. Target: '+target_list[0].upper()+', base: '+base_list[0].upper()
if iswindows and base_list[0].upper() != target_list[0].upper():
raise OSError, 'Target is on a different drive to base. Target: '+repr(target)+', base: '+repr(base)
# Starting from the filepath root, work out how much of the filepath is
# shared by base and target.
for i in range(min(len(base_list), len(target_list))):
if base_list[i] <> target_list[i]: break
if base_list[i] != target_list[i]: break
else:
# If we broke out of the loop, i is pointing to the first differing path elements.
# If we didn't break out of the loop, i is pointing to identical path elements.

View File

@ -351,11 +351,20 @@ class PRS505(Device):
os.utime(path, None)
def upload_books(self, files, names, on_card=False, end_session=True):
if on_card and not self._card_prefix:
raise ValueError(_('The reader has no storage card connected.'))
path = os.path.join(self._card_prefix, self.CARD_PATH_PREFIX) if on_card \
else os.path.join(self._main_prefix, 'database', 'media', 'books')
infiles = [file if hasattr(file, 'read') else open(file, 'rb') for file in files]
for f in infiles: f.seek(0, 2)
sizes = [f.tell() for f in infiles]
def get_size(obj):
if hasattr(obj, 'seek'):
obj.seek(0, 2)
size = obj.tell()
obj.seek(0)
return size
return os.path.getsize(obj)
sizes = map(get_size, files)
size = sum(sizes)
space = self.free_space()
mspace = space[0]
@ -370,13 +379,18 @@ class PRS505(Device):
paths, ctimes = [], []
names = iter(names)
for infile in infiles:
for infile in files:
close = False
if not hasattr(infile, 'read'):
infile, close = open(infile, 'rb'), True
infile.seek(0)
name = names.next()
paths.append(os.path.join(path, name))
if not os.path.exists(os.path.dirname(paths[-1])):
os.makedirs(os.path.dirname(paths[-1]))
self.put_file(infile, paths[-1], replace_file=True)
if close:
infile.close()
ctimes.append(os.path.getctime(paths[-1]))
return zip(paths, sizes, ctimes, cycle([on_card]))

View File

@ -15,6 +15,6 @@ class ConversionError(Exception):
class UnknownFormatError(Exception):
pass
BOOK_EXTENSIONS = ['lrf', 'lrx', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'epub', 'pdf', 'prc', 'mobi', 'azw',
'epub', 'fb2', 'djvu']
'epub', 'fb2', 'djvu', 'lrx']

View File

@ -9,8 +9,6 @@ from optparse import OptionValueError
from htmlentitydefs import name2codepoint
from uuid import uuid4
from fontTools.ttLib import TTLibError
from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, PutObj, \
Paragraph, TextStyle, BlockStyle
@ -121,7 +119,6 @@ def option_parser(usage, gui_mode=False):
laf.add_option('--ignore-colors', action='store_true', default=False, dest='ignore_colors',
help=_('Render all content as black on white instead of the colors specified by the HTML or CSS.'))
page = parser.add_option_group('PAGE OPTIONS')
profiles = profile_map.keys()
page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
@ -139,6 +136,11 @@ def option_parser(usage, gui_mode=False):
help=_('''Top margin of page. Default is %default px.'''))
page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int',
help=_('''Bottom margin of page. Default is %default px.'''))
page.add_option('--render-tables-as-images', default=False, action='store_true',
help=_('Render tables in the HTML as images (useful if the document has large or complex tables)'))
page.add_option('--text-size-multiplier-for-rendered-tables', type='float', default=1.0,
help=_('Multiply the size of text in rendered tables by this factor. Default is %default'))
link = parser.add_option_group('LINK PROCESSING OPTIONS')
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
dest='link_levels',
@ -154,12 +156,13 @@ def option_parser(usage, gui_mode=False):
chapter = parser.add_option_group('CHAPTER OPTIONS')
chapter.add_option('--disable-chapter-detection', action='store_true',
default=False, dest='disable_chapter_detection',
help=_('''Prevent the automatic insertion of page breaks'''
''' before detected chapters.'''))
help=_('''Prevent the automatic detection chapters.'''))
chapter.add_option('--chapter-regex', dest='chapter_regex',
default='chapter|book|appendix',
help=_('''The regular expression used to detect chapter titles.'''
''' It is searched for in heading tags (h1-h6). Defaults to %default'''))
chapter.add_option('--chapter-attr', default='$,,$',
help=_('Detect a chapter beginning at an element having the specified attribute. The format for this option is tagname regexp,attribute name,attribute value regexp. For example to match all heading tags that have the attribute class="chapter" you would use "h\d,class,chapter". Default is %default'''))
chapter.add_option('--page-break-before-tag', dest='page_break', default='h[12]',
help=_('''If html2lrf does not find any page breaks in the '''
'''html file and cannot detect chapter headings, it will '''

View File

@ -66,7 +66,7 @@ def traverse_subdirs(tdir):
return tdir
def handle_archive(path):
tdir = tempfile.mkdtemp(prefix=__appname__+'_')
tdir = tempfile.mkdtemp(prefix=__appname__+'_'+'archive_')
extract(path, tdir)
files = []
cdir = traverse_subdirs(tdir)
@ -75,9 +75,10 @@ def handle_archive(path):
pat = os.path.join(cdir, '*.'+ext)
files.extend(glob.glob(pat))
file = largest_file(files)
if file:
return tdir, file
file = find_htmlfile(cdir)
if not file:
file = find_htmlfile(cdir)
if isinstance(file, str):
file = file.decode(sys.getfilesystemencoding())
return tdir, file
def process_file(path, options, logger=None):
@ -109,7 +110,7 @@ def process_file(path, options, logger=None):
if not newpath:
raise UnknownFormatError('Could not find ebook in archive')
path = newpath
logger.info('Found ebook in archive: %s', path)
logger.info('Found ebook in archive: %s', repr(path))
try:
ext = os.path.splitext(path)[1][1:].lower()
convertor = None
@ -158,7 +159,10 @@ def main(args=sys.argv, logger=None, gui_mode=False):
print _('No file to convert specified.')
return 1
return process_file(args[1], options, logger)
src = args[1]
if not isinstance(src, unicode):
src = src.decode(sys.getfilesystemencoding())
return process_file(src, options, logger)
if __name__ == '__main__':
sys.exit(main())

View File

@ -42,7 +42,7 @@ def process_file(path, options, logger=None):
tdir = generate_html(epub, logger)
try:
ocf = OCFDirReader(tdir)
htmlfile = ocf.opf.spine.items().next().href
htmlfile = ocf.opf.spine[0].path
options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE])
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'

View File

@ -30,7 +30,7 @@ from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks import ConversionError
from calibre.ebooks.lrf.html.table import Table
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
fit_image, LoggingInterface
fit_image, LoggingInterface, preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.opf import OPFReader
from calibre.devices.interface import Device
@ -242,6 +242,7 @@ class HTMLConverter(object, LoggingInterface):
self.override_css = {}
self.override_pcss = {}
self.table_render_job_server = None
if self._override_css is not None:
if os.access(self._override_css, os.R_OK):
@ -260,38 +261,43 @@ class HTMLConverter(object, LoggingInterface):
paths = [os.path.abspath(path) for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode) else path for path in paths]
while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths:
if path in self.processed_files:
continue
try:
self.add_file(path)
except KeyboardInterrupt:
raise
except:
if self.link_level == 0: # Die on errors in the first level
try:
while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths:
if path in self.processed_files:
continue
try:
self.add_file(path)
except KeyboardInterrupt:
raise
for link in self.links:
if link['path'] == path:
self.links.remove(link)
break
self.log_warn('Could not process '+path)
if self.verbose:
self.log_exception(' ')
self.links = self.process_links()
self.link_level += 1
paths = [link['path'] for link in self.links]
except:
if self.link_level == 0: # Die on errors in the first level
raise
for link in self.links:
if link['path'] == path:
self.links.remove(link)
break
self.log_warn('Could not process '+path)
if self.verbose:
self.log_exception(' ')
self.links = self.process_links()
self.link_level += 1
paths = [link['path'] for link in self.links]
if self.current_page is not None and self.current_page.has_text():
self.book.append(self.current_page)
if self.current_page is not None and self.current_page.has_text():
self.book.append(self.current_page)
for text, tb in self.extra_toc_entries:
self.book.addTocEntry(text, tb)
for text, tb in self.extra_toc_entries:
self.book.addTocEntry(text, tb)
if self.base_font_size > 0:
self.log_info('\tRationalizing font sizes...')
self.book.rationalize_font_sizes(self.base_font_size)
if self.base_font_size > 0:
self.log_info('\tRationalizing font sizes...')
self.book.rationalize_font_sizes(self.base_font_size)
finally:
if self.table_render_job_server is not None:
self.table_render_job_server.killall()
def is_baen(self, soup):
return bool(soup.find('meta', attrs={'name':'Publisher',
@ -362,9 +368,10 @@ class HTMLConverter(object, LoggingInterface):
else:
self.css[selector] = self.override_css[selector]
self.file_name = os.path.basename(path)
self.log_info(_('Processing %s'), path if self.verbose else self.file_name)
upath = path.encode('utf-8') if isinstance(path, unicode) else path
upath = path.encode(sys.getfilesystemencoding()) if isinstance(path, unicode) else path
self.file_name = os.path.basename(upath.decode(sys.getfilesystemencoding()))
self.log_info(_('Processing %s'), repr(upath) if self.verbose else repr(self.file_name))
if not os.path.exists(upath):
upath = upath.replace('&', '%26') #convertlit replaces & with %26 in file names
f = open(upath, 'rb')
@ -380,12 +387,15 @@ class HTMLConverter(object, LoggingInterface):
self.log_info(_('\tConverting to BBeB...'))
self.current_style = {}
self.page_break_found = False
if not isinstance(path, unicode):
path = path.decode(sys.getfilesystemencoding())
self.target_prefix = path
self.previous_text = '\n'
self.tops[path] = self.parse_file(soup)
self.processed_files.append(path)
def parse_css(self, style):
"""
Parse the contents of a <style> tag or .css file.
@ -494,7 +504,9 @@ class HTMLConverter(object, LoggingInterface):
top = self.current_block
self.current_block.must_append = True
self.soup = soup
self.process_children(soup, {}, {})
self.soup = None
if self.current_para and self.current_block:
self.current_para.append_to(self.current_block)
@ -625,6 +637,8 @@ class HTMLConverter(object, LoggingInterface):
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
ascii_text = text
if not isinstance(path, unicode):
path = path.decode(sys.getfilesystemencoding())
if path in self.processed_files:
if path+fragment in self.targets.keys():
tb = get_target_block(path+fragment, self.targets)
@ -857,11 +871,12 @@ class HTMLConverter(object, LoggingInterface):
append_text(src)
else:
srcs = src.split('\n')
for src in srcs:
if src:
append_text(src)
if len(srcs) > 1:
self.line_break()
for src in srcs[:-1]:
append_text(src)
self.line_break()
last = srcs[-1]
if len(last):
append_text(last)
def line_break(self):
self.current_para.append(CR())
@ -1424,6 +1439,18 @@ class HTMLConverter(object, LoggingInterface):
return
except KeyError:
pass
if not self.disable_chapter_detection and \
(self.chapter_attr[0].match(tagname) and \
tag.has_key(self.chapter_attr[1]) and \
self.chapter_attr[2].match(tag[self.chapter_attr[1]])):
self.log_debug('Detected chapter %s', tagname)
self.end_page()
self.page_break_found = True
if self.options.add_chapters_to_toc:
self.extra_toc_entries.append((self.get_text(tag,
limit=1000), self.current_block))
end_page = self.process_page_breaks(tag, tagname, tag_css)
try:
if tagname in ["title", "script", "meta", 'del', 'frameset']:
@ -1680,18 +1707,48 @@ class HTMLConverter(object, LoggingInterface):
self.previous_text = ' '
self.process_children(tag, tag_css, tag_pseudo_css)
elif tagname == 'table' and not self.ignore_tables and not self.in_table:
tag_css = self.tag_css(tag)[0] # Table should not inherit CSS
try:
self.process_table(tag, tag_css)
except Exception, err:
self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err))
self.log_debug('', exc_info=True)
self.log_debug(_('Bad table:\n%s'), str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
if self.minimize_memory_usage:
tag.extract()
if self.render_tables_as_images:
if self.table_render_job_server is None:
from calibre.parallel import Server
self.table_render_job_server = Server(number_of_workers=1)
print 'Rendering table...'
from calibre.ebooks.lrf.html.table_as_image import render_table
pheight = int(self.current_page.pageStyle.attrs['textheight'])
pwidth = int(self.current_page.pageStyle.attrs['textwidth'])
images = render_table(self.table_render_job_server,
self.soup, tag, tag_css,
os.path.dirname(self.target_prefix),
pwidth, pheight, self.profile.dpi,
self.text_size_multiplier_for_rendered_tables)
for path, width, height in images:
stream = ImageStream(path, encoding='PNG')
im = Image(stream, x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
pb = self.current_block
self.end_current_para()
self.process_alignment(tag_css)
self.current_para.append(Plot(im, xsize=width*720./self.profile.dpi,
ysize=height*720./self.profile.dpi))
self.current_block.append(self.current_para)
self.current_page.append(self.current_block)
self.current_block = self.book.create_text_block(
textStyle=pb.textStyle,
blockStyle=pb.blockStyle)
self.current_para = Paragraph()
else:
tag_css = self.tag_css(tag)[0] # Table should not inherit CSS
try:
self.process_table(tag, tag_css)
except Exception, err:
self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err))
self.log_debug('', exc_info=True)
self.log_debug(_('Bad table:\n%s'), str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
if self.minimize_memory_usage:
tag.extract()
else:
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
@ -1743,6 +1800,9 @@ def process_file(path, options, logger=None):
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('html2lrf')
setup_cli_handlers(logger, level)
if not isinstance(path, unicode):
path = path.decode(sys.getfilesystemencoding())
path = os.path.abspath(path)
default_title = filename_to_utf8(os.path.splitext(os.path.basename(path))[0])
dirpath = os.path.dirname(path)
@ -1821,9 +1881,14 @@ def process_file(path, options, logger=None):
re.compile('$')
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
re.compile('$')
cq = options.chapter_attr.split(',')
options.chapter_attr = [re.compile(cq[0], re.IGNORECASE), cq[1],
re.compile(cq[2], re.IGNORECASE)]
options.force_page_break = fpb
options.link_exclude = le
options.page_break = pb
if not isinstance(options.chapter_regex, unicode):
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
fpba = options.force_page_break_attr.split(',')
if len(fpba) != 3:
@ -1906,7 +1971,7 @@ def try_opf(path, options, logger):
continue
if not getattr(options, 'cover', None) and orig_cover is not None:
options.cover = orig_cover
options.spine = [i.href for i in opf.spine.items()]
options.spine = [i.path for i in opf.spine if i.path]
if not getattr(options, 'toc', None):
options.toc = opf.toc
except Exception:
@ -1940,7 +2005,8 @@ def main(args=sys.argv):
except Exception, err:
print >> sys.stderr, err
return 1
if not isinstance(src, unicode):
src = src.decode(sys.getfilesystemencoding())
process_file(src, options)
return 0

View File

@ -0,0 +1,104 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Render HTML tables as images.
'''
import os, tempfile, atexit, shutil, time
from PyQt4.Qt import QWebPage, QUrl, QApplication, QSize, \
SIGNAL, QPainter, QImage, QObject, Qt
__app = None
class HTMLTableRenderer(QObject):
def __init__(self, html, base_dir, width, height, dpi, factor):
'''
`width, height`: page width and height in pixels
`base_dir`: The directory in which the HTML file that contains the table resides
'''
QObject.__init__(self)
self.app = None
self.width, self.height, self.dpi = width, height, dpi
self.base_dir = base_dir
self.page = QWebPage()
self.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
self.page.mainFrame().setTextSizeMultiplier(factor)
self.page.mainFrame().setHtml(html,
QUrl('file:'+os.path.abspath(self.base_dir)))
self.images = []
self.tdir = tempfile.mkdtemp(prefix='calibre_render_table')
def render_html(self, ok):
try:
if not ok:
return
cwidth, cheight = self.page.mainFrame().contentsSize().width(), self.page.mainFrame().contentsSize().height()
self.page.setViewportSize(QSize(cwidth, cheight))
factor = float(self.width)/cwidth if cwidth > self.width else 1
cutoff_height = int(self.height/factor)-3
image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
image.setDotsPerMeterX(self.dpi*(100/2.54))
image.setDotsPerMeterX(self.dpi*(100/2.54))
painter = QPainter(image)
self.page.mainFrame().render(painter)
painter.end()
cheight = image.height()
cwidth = image.width()
pos = 0
while pos < cheight:
img = image.copy(0, pos, cwidth, min(cheight-pos, cutoff_height))
pos += cutoff_height-20
if cwidth > self.width:
img = img.scaledToWidth(self.width, Qt.SmoothTransform)
f = os.path.join(self.tdir, '%d.png'%pos)
img.save(f)
self.images.append((f, img.width(), img.height()))
finally:
QApplication.quit()
def render_table(server, soup, table, css, base_dir, width, height, dpi, factor=1.0):
head = ''
for e in soup.findAll(['link', 'style']):
head += unicode(e)+'\n\n'
style = ''
for key, val in css.items():
style += key + ':%s;'%val
html = u'''\
<html>
<head>
%s
</head>
<body style="width: %dpx; background: white">
<style type="text/css">
table {%s}
</style>
%s
</body>
</html>
'''%(head, width-10, style, unicode(table))
server.run_job(1, 'render_table',
args=[html, base_dir, width, height, dpi, factor])
res = None
while res is None:
time.sleep(2)
res = server.result(1)
result, exception, traceback = res
if exception:
print 'Failed to render table'
print exception
print traceback
images, tdir = result
atexit.register(shutil.rmtree, tdir)
return images
def do_render(html, base_dir, width, height, dpi, factor):
app = QApplication.instance()
if app is None:
app = QApplication([])
tr = HTMLTableRenderer(html, base_dir, width, height, dpi, factor)
app.exec_()
return tr.images, tr.tdir

View File

@ -57,7 +57,7 @@ def process_file(path, options, logger=None):
if opf:
path = opf[0]
opf = OPFReader(path)
htmlfile = opf.spine.items().next().href.replace('&', '%26') #convertlit replaces & with %26
htmlfile = opf.spine[0].path.replace('&', '%26') #convertlit replaces & with %26
options.opf = path
else:
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))

View File

@ -375,7 +375,8 @@ class LRFMetaFile(object):
delta = insert_into_file(self._file, stream, self.info_start, \
self.info_start + orig_size - 4)
self.toc_object_offset += delta
if self.toc_object_offset > 0:
self.toc_object_offset += delta
self.object_index_offset += delta
self.update_object_offsets(delta)
@ -685,3 +686,4 @@ def main(args=sys.argv):
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,14 +1,17 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
"""
Provides metadata editing support for PDF and RTF files. For LRF metadata, use
the L{lrf.meta} module.
Provides abstraction for metadata reading.writing from a variety of ebook formats.
"""
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import os, mimetypes, sys
from urllib import unquote, quote
from urlparse import urlparse
from calibre import __version__ as VERSION
from calibre import __version__ as VERSION, relpath
from calibre import OptionParser
def get_parser(extension):
@ -24,6 +27,136 @@ def get_parser(extension):
help=_('Set the comment'))
return parser
class Resource(object):
'''
Represents a resource (usually a file on the filesystem or a URL pointing
to the web. Such resources are commonly referred to in OPF files.
They have the interface:
:member:`path`
:member:`mime_type`
:method:`href`
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
self._href = None
self._basedir = None
self.path = None
self.fragment = ''
try:
self.mime_type = mimetypes.guess_type(href_or_path)[0]
except:
self.mime_type = None
if self.mime_type is None:
self.mime_type = 'application/octet-stream'
if is_path:
path = href_or_path
if not os.path.isabs(path):
path = os.path.abspath(os.path.join(path, basedir))
if isinstance(path, str):
path = path.decode(sys.getfilesystemencoding())
self.path = path
else:
url = urlparse(href_or_path)
if url[0] not in ('', 'file'):
self._href = href_or_path
else:
pc = url[2]
if isinstance(pc, unicode):
pc.encode('utf-8')
pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
self.fragment = unquote(url[-1])
def href(self, basedir=None):
'''
Return a URL pointing to this resource. If it is a file on the filesystem
the URL is relative to `basedir`.
`basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
If this resource has no basedir, then the current working directory is used as the basedir.
'''
if basedir is None:
if self._basedir:
basedir = self._basedir
else:
basedir = os.getcwd()
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode) else self.fragment
frag = '#'+quote(f) if self.fragment else ''
if self.path == basedir:
return ''+frag
try:
rpath = relpath(self.path, basedir)
except OSError: # On windows path and basedir could be on different drives
rpath = self.path
if isinstance(rpath, unicode):
rpath = rpath.encode('utf-8')
return quote(rpath.replace(os.sep, '/'))+frag
def set_basedir(self, path):
self._basedir = path
def basedir(self):
return self._basedir
def __repr__(self):
return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
class ResourceCollection(object):
def __init__(self):
self._resources = []
def __iter__(self):
for r in self._resources:
yield r
def __len__(self):
return len(self._resources)
def __getitem__(self, index):
return self._resources[index]
def __bool__(self):
return len(self._resources) > 0
def __str__(self):
resources = map(repr, self)
return '[%s]'%', '.join(resources)
def __repr__(self):
return str(self)
def append(self, resource):
if not isinstance(resource, Resource):
raise ValueError('Can only append objects of type Resource')
self._resources.append(resource)
def remove(self, resource):
self._resources.remove(resource)
@staticmethod
def from_directory_contents(top, topdown=True):
collection = ResourceCollection()
for spec in os.walk(top, topdown=topdown):
path = os.path.abspath(os.path.join(spec[0], spec[1]))
res = Resource.from_path(path)
res.set_basedir(top)
collection.append(res)
return collection
def set_basedir(self, path):
for res in self:
res.set_basedir(path)
class MetaInformation(object):
'''Convenient encapsulation of book metadata'''
@ -32,13 +165,13 @@ class MetaInformation(object):
ans = MetaInformation(mi.title, mi.authors)
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id',
'manifest', 'spine', 'toc', 'cover'):
'isbn', 'tags', 'cover_data', 'application_id', 'guide',
'manifest', 'spine', 'toc', 'cover', 'language'):
if hasattr(mi, attr):
setattr(ans, attr, getattr(mi, attr))
def __init__(self, title, authors=['Unknown']):
def __init__(self, title, authors=[_('Unknown')]):
'''
@param title: title or "Unknown" or a MetaInformation object
@param authors: List of strings or []
@ -63,12 +196,14 @@ class MetaInformation(object):
self.rating = None if not mi else mi.rating
self.isbn = None if not mi else mi.isbn
self.tags = [] if not mi else mi.tags
self.language = None if not mi else mi.language # Typically a string describing the language
#: mi.cover_data = (ext, data)
self.cover_data = mi.cover_data if (mi and hasattr(mi, 'cover_data')) else (None, None)
self.application_id = mi.application_id if (mi and hasattr(mi, 'application_id')) else None
self.manifest = getattr(mi, 'manifest', None)
self.toc = getattr(mi, 'toc', None)
self.spine = getattr(mi, 'spine', None)
self.guide = getattr(mi, 'guide', None)
self.cover = getattr(mi, 'cover', None)
def smart_update(self, mi):
@ -84,7 +219,8 @@ class MetaInformation(object):
for attr in ('author_sort', 'title_sort', 'comments', 'category',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover'):
'isbn', 'application_id', 'manifest', 'spine', 'toc',
'cover', 'language', 'guide'):
if hasattr(mi, attr):
val = getattr(mi, attr)
if val is not None:
@ -114,7 +250,9 @@ class MetaInformation(object):
if self.tags:
ans += u'Tags : ' +unicode(self.tags) + '\n'
if self.series:
ans += u'Series : '+unicode(self.series) + '(%d)'%self.series_index
ans += u'Series : '+unicode(self.series) + ' #%d\n'%self.series_index
if self.language:
ans += u'Language : ' + unicode(self.language) + u'\n'
return ans.strip()
def __nonzero__(self):

View File

@ -5,7 +5,9 @@ Support for reading the metadata from a lit file.
'''
import sys, struct, cStringIO, os
from itertools import repeat
from calibre import relpath
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFReader
@ -188,7 +190,7 @@ class UnBinary(object):
def write_spaces(self, depth):
self.buf.write(u' '.join(u'' for i in range(depth)))
self.buf.write(u''.join(repeat(' ', depth)))
def item_path(self, internal_id):
for i in self.manifest:
@ -692,6 +694,7 @@ class LitFile(object):
try:
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
xml = \
'''\
<?xml version="1.0" encoding="UTF-8" ?>
@ -721,9 +724,10 @@ def get_metadata(stream):
try:
litfile = LitFile(stream)
src = litfile.meta.encode('utf-8')
mi = OPFReader(cStringIO.StringIO(src))
mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
cover_url, cover_item = mi.cover, None
if cover_url:
cover_url = relpath(cover_url, os.getcwd())
for item in litfile.manifest:
if item.path == cover_url:
cover_item = item.internal

View File

@ -1,92 +1,204 @@
import cStringIO
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import uuid
'''Read/Write metadata from Open Packaging Format (.opf) files.'''
import sys, re, os, mimetypes
from urllib import unquote
from urlparse import urlparse
import xml.dom.minidom as dom
from itertools import repeat
import sys, re, os, glob
from calibre import __appname__
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
from calibre.ebooks.lrf import entity_to_unicode
from calibre.ebooks.metadata import get_parser
from calibre.ebooks.metadata import get_parser, Resource, ResourceCollection
from calibre.ebooks.metadata.toc import TOC
class ManifestItem(object):
def __init__(self, item, cwd):
self.id = item['id'] if item.has_key('id') else ''
self.href = urlparse(unquote(item['href']))[2] if item.has_key('href') else ''
if not os.path.isabs(self.href):
self.href = os.path.join(cwd, self.href)
self.href = os.path.normpath(self.href)
if not os.path.exists(self.href): # Bug in Baen OPF files
nhref = os.path.join(os.path.dirname(self.href), os.path.basename(self.href).replace('__p_.htm', '__c_.htm'))
if os.path.exists(nhref):
self.href = nhref
self.media_type = item['media-type'] if item.has_key('media-type') else ''
class OPFSoup(BeautifulStoneSoup):
def __init__(self, raw):
BeautifulStoneSoup.__init__(self, raw,
convertEntities=BeautifulSoup.HTML_ENTITIES,
selfClosingTags=['item', 'itemref', 'reference'])
class ManifestItem(Resource):
@staticmethod
def from_opf_manifest_item(item, basedir):
if item.has_key('href'):
res = ManifestItem(item['href'], basedir=basedir, is_path=False)
mt = item.get('media-type', '').strip()
if mt:
res.mime_type = mt
return res
@apply
def media_type():
def fget(self):
return self.mime_type
def fset(self, val):
self.mime_type = val
return property(fget=fget, fset=fset)
def __unicode__(self):
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href, self.media_type)
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
def __str__(self):
return unicode(self).encode('utf-8')
def __repr__(self):
return unicode(self)
def __getitem__(self, index):
if index == 0:
return self.href
return self.href()
if index == 1:
return self.media_type
raise IndexError('%d out of bounds.'%index)
class Manifest(list):
class Manifest(ResourceCollection):
@staticmethod
def from_opf_manifest_element(manifest, dir):
m = Manifest()
for item in manifest.findAll('item'):
try:
m.append(ManifestItem.from_opf_manifest_item(item, dir))
id = item.get('id', '')
if not id:
id = 'id%d'%m.next_id
m[-1].id = id
m.next_id += 1
except ValueError:
continue
return m
@staticmethod
def from_paths(entries):
'''
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
'''
m = Manifest()
for path, mt in entries:
mi = ManifestItem(path, is_path=True)
if mt:
mi.mime_type = mt
mi.id = 'id%d'%m.next_id
m.next_id += 1
m.append(mi)
return m
def __init__(self):
ResourceCollection.__init__(self)
self.next_id = 1
def __init__(self, soup, dir):
manifest = soup.find('manifest')
if manifest is not None:
for item in manifest.findAll('item'):
self.append(ManifestItem(item, dir))
def item(self, id):
for i in self:
if i.id == id:
return i
class Spine(object):
def id_for_path(self, path):
path = os.path.normpath(os.path.abspath(path))
for i in self:
if i.path and os.path.normpath(i.path) == path:
return i.id
def __init__(self, soup, manifest):
def path_for_id(self, id):
for i in self:
if i.id == id:
return i.path
class Spine(ResourceCollection):
class Item(Resource):
def __init__(self, idfunc, *args, **kwargs):
Resource.__init__(self, *args, **kwargs)
self.is_linear = True
self.id = idfunc(self.path)
@staticmethod
def from_opf_spine_element(spine, manifest):
s = Spine(manifest)
for itemref in spine.findAll('itemref'):
if itemref.has_key('idref'):
r = Spine.Item(s.manifest.id_for_path,
s.manifest.path_for_id(itemref['idref']), is_path=True)
r.is_linear = itemref.get('linear', 'yes') == 'yes'
s.append(r)
return s
@staticmethod
def from_paths(paths, manifest):
s = Spine(manifest)
for path in paths:
try:
s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True))
except:
continue
return s
def __init__(self, manifest):
ResourceCollection.__init__(self)
self.manifest = manifest
self.linear_ids, self.nonlinear_ids = [], []
spine = soup.find('spine')
if spine is not None:
for itemref in spine.findAll('itemref'):
if itemref.has_key('idref'):
if itemref.get('linear', 'yes') == 'yes':
self.linear_ids.append(itemref['idref'])
else:
self.nonlinear_ids.append(itemref['idref'])
def linear_items(self):
for id in self.linear_ids:
yield self.manifest.item(id)
for r in self:
if r.is_linear:
yield r.path
def nonlinear_items(self):
for id in self.nonlinear_ids:
yield self.manifest.item(id)
for r in self:
if not r.is_linear:
yield r.path
def items(self):
for i in self.linear_ids + self.nonlinear_ids:
mi = self.manifest.item(i)
if getattr(mi, 'href', None):
yield mi
for i in self:
yield i.path
def __iter__(self):
for i in self.linear_ids + self.nonlinear_ids:
yield i
class Guide(ResourceCollection):
class Reference(Resource):
@staticmethod
def from_opf_resource_item(ref, basedir):
title, href, type = ref.get('title', ''), ref['href'], ref['type']
res = Guide.Reference(href, basedir, is_path=False)
res.title = title
res.type = type
return res
def __repr__(self):
ans = '<reference type="%s" href="%s" '%(self.type, self.href())
if self.title:
ans += 'title="%s" '%self.title
return ans + '/>'
@staticmethod
def from_opf_guide(guide_elem, base_dir=os.getcwdu()):
coll = Guide()
for ref in guide_elem.findAll('reference'):
try:
ref = Guide.Reference.from_opf_resource_item(ref, base_dir)
coll.append(ref)
except:
continue
return coll
def set_cover(self, path):
map(self.remove, [i for i in self if 'cover' in i.type.lower()])
for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
self.append(Guide.Reference(path, is_path=True))
self[-1].type = type
self[-1].title = ''
class standard_field(object):
@ -97,8 +209,6 @@ class standard_field(object):
def __get__(self, obj, typ=None):
return getattr(obj, 'get_'+self.name)()
def __set__(self, obj, val):
getattr(obj, 'set_'+self.name)(val)
class OPF(MetaInformation):
@ -109,6 +219,7 @@ class OPF(MetaInformation):
application_id = standard_field('application_id')
title = standard_field('title')
authors = standard_field('authors')
language = standard_field('language')
title_sort = standard_field('title_sort')
author_sort = standard_field('author_sort')
comments = standard_field('comments')
@ -121,93 +232,15 @@ class OPF(MetaInformation):
rating = standard_field('rating')
tags = standard_field('tags')
HEADER = '''\
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.2 Package//EN"
"http://openebook.org/dtds/oeb-1.2/oebpkg12.dtd">
'''
def __init__(self):
raise NotImplementedError('Abstract base class')
def _initialize(self):
if not hasattr(self, 'soup'):
self.soup = BeautifulStoneSoup(u'''\
%s
<package unique-identifier="%s_id">
<metadata>
<dc-metadata
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/" />
</metadata>
</package>
'''%(__appname__, self.HEADER))
def _commit(self, doc):
self.soup = BeautifulStoneSoup(doc.toxml('utf-8'), fromEncoding='utf-8')
def _find_element(self, package, name, attrs=[]):
tags = package.getElementsByTagName(name)
for tag in tags:
match = True
for attr, vattr in attrs:
if tag.getAttribute(attr) != vattr:
match = False
break
if match:
return tag
return None
def _set_metadata_element(self, name, value, attrs=[],
type='dc-metadata', replace=False):
self._initialize()
if isinstance(value, basestring):
value = [value]
attrs = [attrs]
doc = dom.parseString(self.soup.__str__('UTF-8').strip())
package = doc.documentElement
metadata = package.getElementsByTagName('metadata')[0]
dcms = metadata.getElementsByTagName(type)
if dcms:
dcm = dcms[0]
else:
dcm = doc.createElement(type)
metadata.appendChild(dcm)
metadata.appendChild(doc.createTextNode('\n'))
tags = dcm.getElementsByTagName(name)
if tags and not replace:
for tag in tags:
tag.parentNode.removeChild(tag)
tag.unlink()
for val, vattrs in zip(value, attrs):
if replace:
el = self._find_element(package, name, vattrs)
if el:
el.parentNode.removeChild(el)
el.unlink()
el = doc.createElement(name)
el.appendChild(doc.createTextNode(val))
for attr, vattr in vattrs:
el.setAttribute(attr, vattr)
dcm.appendChild(el)
dcm.appendChild(doc.createTextNode('\n'))
self._commit(doc)
def get_title(self):
title = self.soup.package.metadata.find('dc:title')
if title and title.string:
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
return self.default_title.strip()
def set_title(self, title):
if not title:
title = 'Unknown'
self._set_metadata_element('dc:title', title)
def get_authors(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
@ -225,12 +258,6 @@ class OPF(MetaInformation):
return [a.strip() for a in ans]
return []
def set_authors(self, authors):
if not authors:
authors = ['Unknown']
attrs = list(repeat([('role', 'aut')], len(authors)))
self._set_metadata_element('dc:creator', authors, attrs)
def get_author_sort(self):
creators = self.soup.package.metadata.findAll('dc:creator')
for elem in creators:
@ -242,18 +269,6 @@ class OPF(MetaInformation):
return self.ENTITY_PATTERN.sub(entity_to_unicode, fa).strip() if fa else None
return None
def set_author_sort(self, aus):
if not aus:
aus = ''
self._initialize()
if not self.authors:
self.set_authors([])
doc = dom.parseString(self.soup.__str__('UTF-8'))
package = doc.documentElement
aut = package.getElementsByTagName('dc:creator')[0]
aut.setAttribute('file-as', aus)
self._commit(doc)
def get_title_sort(self):
title = self.soup.package.find('dc:title')
if title:
@ -261,61 +276,29 @@ class OPF(MetaInformation):
return title['file-as'].strip()
return None
def set_title_sort(self, title_sort):
if not title_sort:
title_sort = ''
self._initialize()
if not self.title:
self.title = None
doc = dom.parseString(self.soup.__str__('UTF-8'))
package = doc.documentElement
tit = package.getElementsByTagName('dc:title')[0]
tit.setAttribute('file-as', title_sort)
self._commit(doc)
def get_comments(self):
comments = self.soup.find('dc:description')
if comments:
return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip()
return None
def set_comments(self, comments):
if not comments:
comments = ''
self._set_metadata_element('dc:description', comments)
def get_uid(self):
package = self.soup.find('package')
if package.has_key('unique-identifier'):
return package['unique-identifier']
def set_uid(self, uid):
package = self.soup.find('package')
package['unique-identifier'] = str(uid)
def get_category(self):
category = self.soup.find('dc:type')
if category:
return self.ENTITY_PATTERN.sub(entity_to_unicode, category.string).strip()
return None
def set_category(self, category):
if not category:
category = ''
self._set_metadata_element('dc:type', category)
def get_publisher(self):
publisher = self.soup.find('dc:publisher')
if publisher:
return self.ENTITY_PATTERN.sub(entity_to_unicode, publisher.string).strip()
return None
def set_publisher(self, category):
if not category:
category = 'Unknown'
self._set_metadata_element('dc:publisher', category)
def get_isbn(self):
for item in self.soup.package.metadata.findAll('dc:identifier'):
scheme = item.get('scheme')
@ -325,10 +308,11 @@ class OPF(MetaInformation):
return str(item.string).strip()
return None
def set_isbn(self, isbn):
if isbn:
self._set_metadata_element('dc:identifier', isbn, [('scheme', 'ISBN')],
replace=True)
def get_language(self):
item = self.soup.package.metadata.find('dc:language')
if not item:
return _('Unknown')
return ''.join(item.findAll(text=True)).strip()
def get_application_id(self):
for item in self.soup.package.metadata.findAll('dc:identifier'):
@ -336,40 +320,15 @@ class OPF(MetaInformation):
return str(item.string).strip()
return None
def set_application_id(self, val):
if val:
self._set_metadata_element('dc:identifier', str(val), [('scheme', __appname__), ('id', __appname__+'_id')],
replace=True)
def get_cover(self):
guide = self.soup.package.find('guide')
if guide:
references = guide.findAll('reference')
for reference in references:
type = reference.get('type')
if not type:
continue
if type.lower() in ['cover', 'other.ms-coverimage-standard', "other.ms-coverimage"]:
return reference.get('href')
return None
def set_cover(self, path):
self._initialize()
doc = dom.parseString(self.soup.__str__('UTF-8'))
package = doc.documentElement
guide = package.getElementsByTagName('guide')
if guide:
guide = guide[0]
else:
guide = doc.createElement('guide')
package.appendChild(guide)
el = self._find_element(guide, 'reference', [('type', 'cover')])
if not el:
el = doc.createElement('reference')
guide.appendChild(el)
el.setAttribute('type', 'cover')
el.setAttribute('href', path)
self._commit(doc)
guide = getattr(self, 'guide', [])
if not guide:
guide = []
references = [ref for ref in guide if 'cover' in ref.type.lower()]
for candidate in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
matches = [r for r in references if r.type.lower() == candidate and r.path]
if matches:
return matches[0].path
def possible_cover_prefixes(self):
isbn, ans = [], []
@ -388,11 +347,6 @@ class OPF(MetaInformation):
return str(s.string).strip()
return None
def set_series(self, val):
if not val:
val = ''
self._set_metadata_element('series', val, type='x-metadata')
def get_series_index(self):
s = self.soup.package.metadata.find('series-index')
if s:
@ -402,11 +356,6 @@ class OPF(MetaInformation):
return None
return None
def set_series_index(self, val):
if not val:
val = 1
self._set_metadata_element('series-index', str(val), type='x-metadata')
def get_rating(self):
xm = self.soup.package.metadata.find('x-metadata')
if not xm:
@ -419,11 +368,6 @@ class OPF(MetaInformation):
return None
return None
def set_rating(self, val):
if not val:
val = 0
self._set_metadata_element('rating', str(val), type='x-metadata')
def get_tags(self):
ans = []
subs = self.soup.findAll('dc:subject')
@ -433,41 +377,6 @@ class OPF(MetaInformation):
ans.append(val)
return [unicode(a).strip() for a in ans]
def set_tags(self, tags):
self._set_metadata_element('dc:subject', tags)
def write(self, stream):
from lxml import etree
root = etree.fromstring(unicode(self.soup))
root.text = '\n%4s'%' '
for child in root:
child.text = '\n%8s'%' '
child.tail = '\n%4s'%' ' if child is not root[-1] else '\n'
for grandchild in child:
grandchild.tail = '\n%8s'%' ' if grandchild is not child[-1] else '\n%4s'%' '
metadata = root.find('metadata')
if metadata is not None:
for parent in ['dc-metadata', 'x-metadata']:
parent = metadata.find(parent)
if parent is None:
continue
parent.text = '\n%12s'%' '
for child in parent:
child.tail = '\n%8s'%' ' if child is parent[-1] else '\n%12s'%' '
def fix_self_closing_tags(el):
''' Makes tags that have only whitespace content self closing '''
if len(el) == 0 and (el.text is None or el.text.strip() == ''):
el.text = None
for child in el:
fix_self_closing_tags(child)
fix_self_closing_tags(root)
raw = self.HEADER + etree.tostring(root, encoding='UTF-8')
stream.write(raw+'\n')
class OPFReader(OPF):
@ -480,15 +389,27 @@ class OPFReader(OPF):
self.default_title = stream.name if hasattr(stream, 'name') else 'Unknown'
if hasattr(stream, 'seek'):
stream.seek(0)
self.soup = BeautifulStoneSoup(stream.read())
self.soup = OPFSoup(stream.read())
if manage:
stream.close()
self.manifest = Manifest(self.soup, dir)
self.spine = Spine(self.soup, self.manifest)
self.manifest = Manifest()
m = self.soup.find('manifest')
if m is not None:
self.manifest = Manifest.from_opf_manifest_element(m, dir)
self.spine = None
spine = self.soup.find('spine')
if spine is not None:
self.spine = Spine.from_opf_spine_element(spine, self.manifest)
self.toc = TOC(base_path=dir)
self.toc.read_from_opf(self)
guide = self.soup.find('guide')
if guide is not None:
self.guide = Guide.from_opf_guide(guide, dir)
self.base_dir = dir
self.cover_data = (None, None)
class OPFCreator(MetaInformation):
def __init__(self, base_path, *args, **kwargs):
@ -502,45 +423,34 @@ class OPFCreator(MetaInformation):
self.base_path = os.path.abspath(base_path)
if self.application_id is None:
self.application_id = str(uuid.uuid4())
self.toc = None
if isinstance(self.manifest, Manifest):
manifest = []
for path, mt in self.manifest:
if not path.startswith(self.base_path):
raise ValueError('Inavlid manifest item %s for base path %s'%(path, self.base_path))
path = path[len(self.base_path)+1:]
manifest.append((path, mt))
self.manifest = manifest
if not isinstance(self.toc, TOC):
self.toc = None
if not self.authors:
self.authors = [_('Unknown')]
if self.guide is None:
self.guide = Guide()
if self.cover:
self.guide.set_cover(self.cover)
def create_manifest(self, entries):
'''
Create <manifest>
@param entries: List of (path, mime-type)
@param base_path: It is used to convert each path into a path relative to itself
@type entries: list of 2-tuples
'''
rentries = []
base_path = self.base_path
mimetypes.init()
for href, mt in entries:
href = os.path.abspath(href)
if not href.startswith(base_path):
raise ValueError('OPF should only refer to files below it. %s is above %s'%(href, base_path))
href = href[len(base_path)+1:].replace(os.sep, '/')
if not mt:
mt = mimetypes.guess_type(href)[0]
if not mt:
mt = ''
rentries.append((href, mt))
self.manifest = rentries
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
'''
entries = map(lambda x: x if os.path.isabs(x[0]) else
(os.path.abspath(os.path.join(self.base_path, x[0])), x[1]),
entries)
self.manifest = Manifest.from_paths(entries)
self.manifest.set_basedir(self.base_path)
def create_manifest_from_files_in(self, files_and_dirs):
#self.base_path = os.path.commonprefix(files_and_dirs)
entries = []
def dodir(dir):
for root, dirs, files in os.walk(dir):
for spec in os.walk(dir):
root, files = spec[0], spec[-1]
for name in files:
path = os.path.join(root, name)
if os.path.isfile(path):
@ -556,47 +466,49 @@ class OPFCreator(MetaInformation):
def create_spine(self, entries):
'''
Create the <spine> element. Must first call L{create_manifest}.
@param: List of paths
@type param: list of strings
Create the <spine> element. Must first call :method:`create_manifest`.
`entries`: List of paths
'''
self.spine = []
for path in entries:
if not os.path.isabs(path):
path = os.path.join(self.base_path, path)
if not path.startswith(self.base_path):
raise ValueError('Invalid entry %s for base path %s'%(path, self.base_path))
href = path[len(self.base_path)+1:]
in_manifest = False
for i, m in enumerate(self.manifest):
if m[0] == href:
in_manifest = True
break
if not in_manifest:
raise ValueError('%s is not in the manifest. (%s)'%(href, path))
self.spine.append(i)
entries = map(lambda x: x if os.path.isabs(x) else
os.path.abspath(os.path.join(self.base_path, x)), entries)
self.spine = Spine.from_paths(entries, self.manifest)
def set_toc(self, toc):
'''
Set the toc. You must call L{create_spine} before calling this
Set the toc. You must call :method:`create_spine` before calling this
method.
@param toc: A Table of Contents
@type toc: L{TOC}
`toc`: A :class:`TOC` object
'''
self.toc = toc
def create_guide(self, guide_element):
self.guide = Guide.from_opf_guide(guide_element, self.base_path)
self.guide.set_basedir(self.base_path)
def render(self, opf_stream, ncx_stream=None):
from calibre.resources import opf_template
from genshi.template import MarkupTemplate
from calibre.utils.genshi.template import MarkupTemplate
template = MarkupTemplate(opf_template)
if self.manifest:
self.manifest.set_basedir(self.base_path)
if not self.guide:
self.guide = Guide()
if self.cover:
cover = self.cover
if not os.path.isabs(cover):
cover = os.path.abspath(os.path.join(self.base_path, cover))
self.guide.set_cover(cover)
self.guide.set_basedir(self.base_path)
opf = template.generate(__appname__=__appname__, mi=self).render('xml')
opf_stream.write(opf)
opf_stream.flush()
toc = getattr(self, 'toc', None)
if toc is not None and ncx_stream is not None:
toc.render(ncx_stream, self.application_id)
ncx_stream.flush()
def option_parser():
return get_parser('opf')
@ -607,18 +519,35 @@ def main(args=sys.argv):
if len(args) != 2:
parser.print_help()
return 1
mi = MetaInformation(OPFReader(open(args[1], 'rb')))
mi = MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
write = False
if opts.title is not None:
mi.title = opts.title.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if opts.authors is not None:
aus = [i.strip().replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') for i in opts.authors.split(',')]
mi.authors = aus
write = True
if opts.category is not None:
mi.category = opts.category.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
write = True
if opts.comment is not None:
mi.comments = opts.comment.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
mo = OPFCreator(os.getcwd(), mi)
mo.render(open(args[1], 'wb'))
write = True
if write:
mo = OPFCreator(os.path.dirname(args[1]), mi)
ncx = cStringIO.StringIO()
mo.render(open(args[1], 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
f = glob.glob(os.path.join(os.path.dirname(args[1]), '*.ncx'))
if f:
f = open(f[0], 'wb')
else:
f = open(os.path.splitext(args[1])[0]+'.ncx', 'wb')
f.write(ncx)
f.close()
print MetaInformation(OPFReader(open(args[1], 'rb'), os.path.abspath(os.path.dirname(args[1]))))
return 0
if __name__ == '__main__':

View File

@ -10,6 +10,7 @@
<dc:creator opf:role="aut" py:for="i, author in enumerate(mi.authors)" py:with="attrs={'file-as':mi.author_sort if i==0 else None}" py:attrs="attrs">${author}</dc:creator>
<dc:identifier scheme="${__appname__}" id="${__appname__}_id">${mi.application_id}</dc:identifier>
<dc:language>${mi.language if mi.language else 'Unknown'}</dc:language>
<dc:type py:if="mi.category">${mi.category}</dc:type>
<dc:description py:if="mi.comments">${mi.comments}</dc:description>
<dc:publisher py:if="mi.publisher">${mi.publisher}</dc:publisher>
@ -17,23 +18,28 @@
<series py:if="mi.series">${mi.series}</series>
<series-index py:if="mi.series_index is not None">${mi.series_index}</series-index>
<rating py:if="mi.rating is not None">${mi.rating}</rating>
<dc:subject py:if="mi.tags is not None" py:for="tag in mi.tags">${tag}</dc:subject>
<py:for each="tag in mi.tags">
<dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
</py:for>
</metadata>
<guide>
<reference py:if="mi.cover" type="cover" href="${mi.cover}" />
<guide py:if="getattr(mi, 'guide', None)">
<py:for each="ref in mi.guide">
<reference type="${ref.type}" href="${ref.href()}" py:with="attrs={'title': ref.title if ref.title else None}" py:attrs="attrs" />
</py:for>
</guide>
<manifest py:if="getattr(mi, 'manifest', None)">
<py:for each="i, m in enumerate(mi.manifest)">
<item id="${str(i)}" href="${m[0]}" media-type="${m[1]}" />
</py:for>
</manifest>
<spine py:if="getattr(mi, 'spine', None)"
py:with="attrs={'toc':'ncx' if mi.toc else None}" py:attrs="attrs">
<py:for each="idref in mi.spine">
<itemref idref="${str(idref)}" />
<py:for each="resource in mi.spine">
<itemref idref="${resource.id}" />
</py:for>
</spine>
<manifest py:if="getattr(mi, 'manifest', None)">
<py:for each="ref in mi.manifest">
<item id="${ref.id}" href="${ref.href()}" media-type="${ref.mime_type}" />
</py:for>
</manifest>
</package>

View File

@ -65,8 +65,8 @@ class TOC(list):
toc = opfreader.soup.find('guide').find('reference', attrs={'type':'toc'})['href']
except:
for item in opfreader.manifest:
if 'toc' in item.href.lower():
toc = item.href
if 'toc' in item.href().lower():
toc = item.href()
break
if toc is not None:
@ -120,6 +120,9 @@ class TOC(list):
process_navpoint(c, nd)
nm = soup.find('navmap')
if nm is None:
raise ValueError('NCX files must have a <navmap> element.')
for elem in nm:
if getattr(elem, 'name', None) == 'navpoint':
process_navpoint(elem, self)
@ -138,7 +141,7 @@ class TOC(list):
def render(self, stream, uid):
from calibre.resources import ncx_template
from genshi.template import MarkupTemplate
from calibre.utils.genshi.template import MarkupTemplate
doctype = ('ncx', "-//NISO//DTD ncx 2005-1//EN", "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd")
template = MarkupTemplate(ncx_template)
raw = template.generate(uid=uid, toc=self, __appname__=__appname__)

View File

@ -0,0 +1,159 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
main_language = {
0 : "NEUTRAL",
54 : "AFRIKAANS",
28 : "ALBANIAN",
1 : "ARABIC",
43 : "ARMENIAN",
77 : "ASSAMESE",
44 : "AZERI",
45 : "BASQUE",
35 : "BELARUSIAN",
69 : "BENGALI",
2 : "BULGARIAN",
3 : "CATALAN",
4 : "CHINESE",
26 : "CROATIAN",
5 : "CZECH",
6 : "DANISH",
19 : "DUTCH",
9 : "ENGLISH",
37 : "ESTONIAN",
56 : "FAEROESE",
41 : "FARSI",
11 : "FINNISH",
12 : "FRENCH",
55 : "GEORGIAN",
7 : "GERMAN",
8 : "GREEK",
71 : "GUJARATI",
13 : "HEBREW",
57 : "HINDI",
14 : "HUNGARIAN",
15 : "ICELANDIC",
33 : "INDONESIAN",
16 : "ITALIAN",
17 : "JAPANESE",
75 : "KANNADA",
63 : "KAZAK",
87 : "KONKANI",
18 : "KOREAN",
38 : "LATVIAN",
39 : "LITHUANIAN",
47 : "MACEDONIAN",
62 : "MALAY",
76 : "MALAYALAM",
58 : "MALTESE",
78 : "MARATHI",
97 : "NEPALI",
20 : "NORWEGIAN",
72 : "ORIYA",
21 : "POLISH",
22 : "PORTUGUESE",
70 : "PUNJABI",
23 : "RHAETOROMANIC",
24 : "ROMANIAN",
25 : "RUSSIAN",
59 : "SAMI",
79 : "SANSKRIT",
26 : "SERBIAN",
27 : "SLOVAK",
36 : "SLOVENIAN",
46 : "SORBIAN",
10 : "SPANISH",
48 : "SUTU",
65 : "SWAHILI",
29 : "SWEDISH",
73 : "TAMIL",
68 : "TATAR",
74 : "TELUGU",
30 : "THAI",
49 : "TSONGA",
50 : "TSWANA",
31 : "TURKISH",
34 : "UKRAINIAN",
32 : "URDU",
67 : "UZBEK",
42 : "VIETNAMESE",
52 : "XHOSA",
53 : "ZULU",
}
sub_language = {
0 : "NEUTRAL",
1 : "ARABIC_SAUDI_ARABIA",
2 : "ARABIC_IRAQ",
3 : "ARABIC_EGYPT",
4 : "ARABIC_LIBYA",
5 : "ARABIC_ALGERIA",
6 : "ARABIC_MOROCCO",
7 : "ARABIC_TUNISIA",
8 : "ARABIC_OMAN",
9 : "ARABIC_YEMEN",
10 : "ARABIC_SYRIA",
11 : "ARABIC_JORDAN",
12 : "ARABIC_LEBANON",
13 : "ARABIC_KUWAIT",
14 : "ARABIC_UAE",
15 : "ARABIC_BAHRAIN",
16 : "ARABIC_QATAR",
1 : "AZERI_LATIN",
2 : "AZERI_CYRILLIC",
1 : "CHINESE_TRADITIONAL",
2 : "CHINESE_SIMPLIFIED",
3 : "CHINESE_HONGKONG",
4 : "CHINESE_SINGAPORE",
1 : "DUTCH",
2 : "DUTCH_BELGIAN",
1 : "FRENCH",
2 : "FRENCH_BELGIAN",
3 : "FRENCH_CANADIAN",
4 : "FRENCH_SWISS",
5 : "FRENCH_LUXEMBOURG",
6 : "FRENCH_MONACO",
1 : "GERMAN",
2 : "GERMAN_SWISS",
3 : "GERMAN_AUSTRIAN",
4 : "GERMAN_LUXEMBOURG",
5 : "GERMAN_LIECHTENSTEIN",
1 : "ITALIAN",
2 : "ITALIAN_SWISS",
1 : "KOREAN",
1 : "LITHUANIAN",
1 : "MALAY_MALAYSIA",
2 : "MALAY_BRUNEI_DARUSSALAM",
1 : "NORWEGIAN_BOKMAL",
2 : "NORWEGIAN_NYNORSK",
2 : "PORTUGUESE",
1 : "PORTUGUESE_BRAZILIAN",
2 : "SERBIAN_LATIN",
3 : "SERBIAN_CYRILLIC",
1 : "SPANISH",
2 : "SPANISH_MEXICAN",
4 : "SPANISH_GUATEMALA",
5 : "SPANISH_COSTA_RICA",
6 : "SPANISH_PANAMA",
7 : "SPANISH_DOMINICAN_REPUBLIC",
8 : "SPANISH_VENEZUELA",
9 : "SPANISH_COLOMBIA",
10 : "SPANISH_PERU",
11 : "SPANISH_ARGENTINA",
12 : "SPANISH_ECUADOR",
13 : "SPANISH_CHILE",
14 : "SPANISH_URUGUAY",
15 : "SPANISH_PARAGUAY",
16 : "SPANISH_BOLIVIA",
17 : "SPANISH_EL_SALVADOR",
18 : "SPANISH_HONDURAS",
19 : "SPANISH_NICARAGUA",
20 : "SPANISH_PUERTO_RICO",
1 : "SWEDISH",
2 : "SWEDISH_FINLAND",
1 : "UZBEK_LATIN",
2 : "UZBEK_CYRILLIC",
}

View File

@ -17,9 +17,10 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata.toc import TOC
class EXTHHeader(object):
@ -44,27 +45,24 @@ class EXTHHeader(object):
self.cover_offset, = struct.unpack('>L', content)
elif id == 202:
self.thumbnail_offset, = struct.unpack('>L', content)
pos += 3
stop = raw[pos:].find('\x00')
if stop > -1:
self.mi.title = raw[pos:pos+stop].decode(codec, 'ignore')
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
if title:
self.mi.title = title.group(1).decode(codec, 'ignore')
def process_metadata(self, id, content, codec):
if id == 100:
aus = content.split(',')
authors = []
for a in aus:
authors.extend(a.split('&'))
self.mi.authors = [i.decode(codec, 'ignore') for i in authors]
self.mi.authors = [content.decode(codec, 'ignore').strip()]
elif id == 101:
self.mi.publisher = content.decode(codec, 'ignore')
self.mi.publisher = content.decode(codec, 'ignore').strip()
elif id == 103:
self.mi.comments = content.decode(codec, 'ignore')
self.mi.comments = content.decode(codec, 'ignore')
elif id == 104:
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
elif id == 105:
self.mi.category = content.decode(codec, 'ignore')
if not self.mi.tags:
self.mi.tags = []
self.mi.tags.append(content.decode(codec, 'ignore'))
@ -74,6 +72,7 @@ class BookHeader(object):
self.compression_type = raw[:2]
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
self.encryption_type, = struct.unpack('>H', raw[12:14])
self.doctype = raw[16:20]
self.length, self.type, self.codepage, self.unique_id, self.version = \
struct.unpack('>LLLLL', raw[20:40])
@ -98,11 +97,18 @@ class BookHeader(object):
if self.compression_type == 'DH':
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
langcode = struct.unpack('!L', raw[0x5C:0x60])[0]
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
self.language = main_language.get(langid, 'ENGLISH')
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None
if self.exth_flag & 0x40:
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
self.exth.mi.uid = self.unique_id
self.exth.mi.language = self.language
class MobiReader(object):
@ -156,7 +162,7 @@ class MobiReader(object):
processed_records = self.extract_text()
self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec)
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
self.extract_images(processed_records, output_dir)
self.replace_page_breaks()
self.cleanup()
@ -166,27 +172,29 @@ class MobiReader(object):
self.processed_html)
soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
guide = soup.find('guide')
for elem in soup.findAll(['metadata', 'guide']):
elem.extract()
htmlfile = os.path.join(output_dir, self.name+'.html')
for ref in guide.findAll('reference', href=True):
ref['href'] = os.path.basename(htmlfile)+ref['href']
open(htmlfile, 'wb').write(unicode(soup).encode('utf8'))
self.htmlfile = htmlfile
if self.book_header.exth is not None:
opf = self.create_opf(htmlfile)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'))
ncx = cStringIO.StringIO()
opf = self.create_opf(htmlfile, guide)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
ncx = ncx.getvalue()
if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
def cleanup(self):
self.processed_html = re.sub(r'<div height="0(em|%)"></div>', '',
self.processed_html)
self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"',
r'<\1 style="margin-top: \2"',
self.processed_html)
self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"',
r'<\1 style="text-indent: \2"',
self.processed_html)
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"', r'<\1 style="margin-top: \2"', self.processed_html)
self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"', r'<\1 style="text-indent: \2"', self.processed_html)
def create_opf(self, htmlfile):
def create_opf(self, htmlfile, guide=None):
mi = self.book_header.exth.mi
opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'):
@ -198,6 +206,28 @@ class MobiReader(object):
opf.create_manifest(manifest)
opf.create_spine([os.path.basename(htmlfile)])
toc = None
if guide:
opf.create_guide(guide)
for ref in opf.guide:
if ref.type.lower() == 'toc':
toc = ref.href()
if toc:
index = self.processed_html.find('<a name="%s"'%toc.partition('#')[-1])
tocobj = None
if index > -1:
raw = '<html><body>'+self.processed_html[index:]
soup = BeautifulSoup(raw)
tocobj = TOC()
for a in soup.findAll('a', href=True):
try:
text = ''.join(a.findAll(text=True)).strip()
except:
text = ''
tocobj.add_item(toc.partition('#')[0], a['href'][1:], text)
if tocobj is not None:
opf.set_toc(tocobj)
return opf
@ -222,7 +252,6 @@ class MobiReader(object):
elif self.book_header.compression_type == '\x00\x01':
self.mobi_html = ''.join(text_sections)
else:
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
@ -235,7 +264,7 @@ class MobiReader(object):
def add_anchors(self):
positions = set([])
link_pattern = re.compile(r'<a\s+filepos=(\d+)', re.IGNORECASE)
link_pattern = re.compile(r'<[^<>]+filepos=[\'"]{0,1}(\d+)[^<>]*>', re.IGNORECASE)
for match in link_pattern.finditer(self.mobi_html):
positions.add(int(match.group(1)))
positions = list(positions)
@ -252,7 +281,10 @@ class MobiReader(object):
pos = end
self.processed_html += self.mobi_html[pos:]
self.processed_html = link_pattern.sub(lambda match: '<a href="#filepos%d"'%int(match.group(1)),
fpat = re.compile(r'filepos=[\'"]{0,1}(\d+)[\'"]{0,1}', re.IGNORECASE)
def fpos_to_href(match):
return fpat.sub('href="#filepos%d"'%int(match.group(1)), match.group())
self.processed_html = link_pattern.sub(fpos_to_href,
self.processed_html)
def extract_images(self, processed_records, output_dir):

View File

@ -3,12 +3,13 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from PyQt4.QtGui import QDialog, QMessageBox, QListWidgetItem, QIcon
from PyQt4.QtCore import SIGNAL, QTimer, Qt, QSize
from PyQt4.QtCore import SIGNAL, QTimer, Qt, QSize, QVariant
from calibre import islinux, Settings
from calibre.gui2.dialogs.config_ui import Ui_Dialog
from calibre.gui2 import qstring_to_unicode, choose_dir, error_dialog
from calibre.gui2.widgets import FilenamePattern
from calibre.ebooks import BOOK_EXTENSIONS
@ -59,6 +60,11 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
self.show_toolbar_text.setChecked(settings.get('show text in toolbar', True))
for ext in BOOK_EXTENSIONS:
self.single_format.addItem(ext.upper(), QVariant(ext))
single_format = settings.get('save to disk single format', 'lrf')
self.single_format.setCurrentIndex(BOOK_EXTENSIONS.index(single_format))
def compact(self, toggled):
d = Vacuum(self, self.db)
@ -89,6 +95,8 @@ class ConfigDialog(QDialog, Ui_Dialog):
settings.set('show text in toolbar', bool(self.show_toolbar_text.isChecked()))
pattern = self.filename_pattern.commit()
settings.set('filename pattern', pattern)
settings.set('save to disk single format', BOOK_EXTENSIONS[self.single_format.currentIndex()])
if not path or not os.path.exists(path) or not os.path.isdir(path):
d = error_dialog(self, _('Invalid database location'), _('Invalid database location ')+path+_('<br>Must be a directory.'))

View File

@ -1,4 +1,5 @@
<ui version="4.0" >
<author>Kovid Goyal</author>
<class>Dialog</class>
<widget class="QDialog" name="Dialog" >
<property name="geometry" >
@ -76,18 +77,10 @@
<property name="currentIndex" >
<number>0</number>
</property>
<widget class="QWidget" name="page" >
<property name="geometry" >
<rect>
<x>0</x>
<y>0</y>
<width>583</width>
<height>625</height>
</rect>
</property>
<layout class="QGridLayout" >
<item row="0" column="0" >
<layout class="QVBoxLayout" >
<widget class="QWidget" name="page_3" >
<layout class="QVBoxLayout" name="verticalLayout" >
<item>
<layout class="QVBoxLayout" name="_2" >
<item>
<widget class="QLabel" name="label" >
<property name="text" >
@ -99,7 +92,7 @@
</widget>
</item>
<item>
<layout class="QHBoxLayout" >
<layout class="QHBoxLayout" name="_3" >
<item>
<widget class="QLineEdit" name="location" />
</item>
@ -121,7 +114,7 @@
</item>
</layout>
</item>
<item row="1" column="0" >
<item>
<widget class="QCheckBox" name="roman_numerals" >
<property name="text" >
<string>Use &amp;Roman numerals for series number</string>
@ -131,9 +124,35 @@
</property>
</widget>
</item>
<item row="2" column="0" >
<layout class="QHBoxLayout" >
<item>
<item>
<layout class="QGridLayout" name="gridLayout_2" >
<item row="0" column="0" >
<widget class="QLabel" name="label_5" >
<property name="text" >
<string>Format for &amp;single file save:</string>
</property>
<property name="buddy" >
<cstring>single_format</cstring>
</property>
</widget>
</item>
<item row="0" column="1" >
<widget class="QComboBox" name="single_format" />
</item>
<item row="1" column="0" >
<widget class="QLabel" name="label_3" >
<property name="text" >
<string>&amp;Priority for conversion jobs:</string>
</property>
<property name="buddy" >
<cstring>priority</cstring>
</property>
</widget>
</item>
<item row="1" column="1" >
<widget class="QComboBox" name="priority" />
</item>
<item row="2" column="0" >
<widget class="QLabel" name="label_2" >
<property name="text" >
<string>Default network &amp;timeout:</string>
@ -143,7 +162,7 @@
</property>
</widget>
</item>
<item>
<item row="2" column="1" >
<widget class="QSpinBox" name="timeout" >
<property name="toolTip" >
<string>Set the default timeout for network fetches (i.e. anytime we go out to the internet to get information)</string>
@ -164,31 +183,78 @@
</item>
</layout>
</item>
<item row="3" column="0" >
<layout class="QHBoxLayout" >
<item>
<widget class="QLabel" name="label_3" >
<property name="text" >
<string>&amp;Priority for conversion jobs:</string>
</property>
<property name="buddy" >
<cstring>priority</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="priority" />
</item>
</layout>
<item>
<widget class="QGroupBox" name="groupBox_2" >
<property name="title" >
<string>Toolbar</string>
</property>
<layout class="QGridLayout" name="gridLayout" >
<item row="0" column="1" >
<widget class="QComboBox" name="toolbar_button_size" >
<item>
<property name="text" >
<string>Large</string>
</property>
</item>
<item>
<property name="text" >
<string>Medium</string>
</property>
</item>
<item>
<property name="text" >
<string>Small</string>
</property>
</item>
</widget>
</item>
<item row="0" column="0" >
<widget class="QLabel" name="label_4" >
<property name="text" >
<string>&amp;Button size in toolbar</string>
</property>
<property name="buddy" >
<cstring>toolbar_button_size</cstring>
</property>
</widget>
</item>
<item row="1" column="0" >
<widget class="QCheckBox" name="show_toolbar_text" >
<property name="text" >
<string>Show &amp;text in toolbar buttons</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item row="6" column="0" >
<item>
<widget class="QGroupBox" name="groupBox" >
<property name="title" >
<string>Select visible &amp;columns in library view</string>
</property>
<layout class="QGridLayout" name="_4" >
<item row="0" column="0" >
<widget class="QListWidget" name="columns" >
<property name="selectionMode" >
<enum>QAbstractItemView::NoSelection</enum>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="dirs_box" >
<property name="title" >
<string>Frequently used directories</string>
</property>
<layout class="QGridLayout" >
<layout class="QGridLayout" name="_5" >
<item row="0" column="0" >
<layout class="QHBoxLayout" >
<layout class="QHBoxLayout" name="_6" >
<item>
<widget class="QListWidget" name="directory_list" >
<property name="alternatingRowColors" >
@ -203,7 +269,7 @@
</widget>
</item>
<item>
<layout class="QVBoxLayout" >
<layout class="QVBoxLayout" name="_7" >
<item>
<spacer>
<property name="orientation" >
@ -278,83 +344,6 @@
</layout>
</widget>
</item>
<item row="7" column="0" >
<spacer>
<property name="orientation" >
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0" >
<size>
<width>392</width>
<height>16</height>
</size>
</property>
</spacer>
</item>
<item row="5" column="0" >
<widget class="QGroupBox" name="groupBox" >
<property name="title" >
<string>Select visible &amp;columns in library view</string>
</property>
<layout class="QGridLayout" >
<item row="0" column="0" >
<widget class="QListWidget" name="columns" >
<property name="selectionMode" >
<enum>QAbstractItemView::NoSelection</enum>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item row="4" column="0" >
<widget class="QGroupBox" name="groupBox_2" >
<property name="title" >
<string>Toolbar</string>
</property>
<layout class="QGridLayout" name="gridLayout" >
<item row="0" column="1" >
<widget class="QComboBox" name="toolbar_button_size" >
<item>
<property name="text" >
<string>Large</string>
</property>
</item>
<item>
<property name="text" >
<string>Medium</string>
</property>
</item>
<item>
<property name="text" >
<string>Small</string>
</property>
</item>
</widget>
</item>
<item row="0" column="0" >
<widget class="QLabel" name="label_4" >
<property name="text" >
<string>&amp;Button size in toolbar</string>
</property>
<property name="buddy" >
<cstring>toolbar_button_size</cstring>
</property>
</widget>
</item>
<item row="1" column="0" >
<widget class="QCheckBox" name="show_toolbar_text" >
<property name="text" >
<string>Show &amp;text in toolbar buttons</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="page_2" >
@ -362,8 +351,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>583</width>
<height>625</height>
<width>595</width>
<height>638</height>
</rect>
</property>
<layout class="QVBoxLayout" >

View File

@ -9,14 +9,15 @@
<x>0</x>
<y>0</y>
<width>830</width>
<height>700</height>
<height>642</height>
</rect>
</property>
<property name="windowTitle" >
<string>Fetch metadata</string>
</property>
<property name="windowIcon" >
<iconset resource="../images.qrc" >:/images/metadata.svg</iconset>
<iconset resource="../images.qrc" >
<normaloff>:/images/metadata.svg</normaloff>:/images/metadata.svg</iconset>
</property>
<layout class="QVBoxLayout" >
<item>
@ -107,7 +108,7 @@
<item>
<widget class="QDialogButtonBox" name="buttonBox" >
<property name="standardButtons" >
<set>QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok</set>
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>

View File

@ -13,13 +13,17 @@
<string>Details of job</string>
</property>
<property name="windowIcon" >
<iconset resource="../images.qrc" >:/images/view.svg</iconset>
<iconset resource="../images.qrc" >
<normaloff>:/images/view.svg</normaloff>:/images/view.svg</iconset>
</property>
<layout class="QGridLayout" >
<item row="0" column="0" >
<widget class="QTextBrowser" name="log" >
<property name="lineWrapMode" >
<enum>QTextEdit::NoWrap</enum>
<widget class="QTextEdit" name="log" >
<property name="undoRedoEnabled" >
<bool>false</bool>
</property>
<property name="readOnly" >
<bool>true</bool>
</property>
</widget>
</item>

View File

@ -35,7 +35,7 @@ class JobsDialog(QDialog, Ui_JobsDialog):
self.jobs_view.setModel(model)
self.model = model
self.setWindowModality(Qt.NonModal)
self.setWindowTitle(__appname__ + ' - Active Jobs')
self.setWindowTitle(__appname__ + _(' - Jobs'))
QObject.connect(self.jobs_view.model(), SIGNAL('modelReset()'),
self.jobs_view.resizeColumnsToContents)
QObject.connect(self.kill_button, SIGNAL('clicked()'),

View File

@ -1,8 +1,8 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, cPickle, codecs
import os, codecs
from PyQt4.QtCore import QObject, SIGNAL, Qt, QVariant, QByteArray
from PyQt4.QtCore import QObject, SIGNAL, Qt
from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \
QPixmap, QTextEdit, QListWidgetItem, QIcon
@ -48,10 +48,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.gui_mono_family.setModel(self.font_family_model)
self.load_saved_global_defaults()
def __init__(self, window, db, row):
QDialog.__init__(self, window)
Ui_LRFSingleDialog.__init__(self)
self.setupUi(self)
def populate_list(self):
self.__w = []
self.__w.append(QIcon(':/images/dialog_information.svg'))
self.item1 = QListWidgetItem(self.__w[-1], _("Metadata"), self.categoryList)
@ -61,11 +58,17 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.item3 = QListWidgetItem(self.__w[-1], _('Page Setup'), self.categoryList)
self.__w.append(QIcon(':/images/chapters.svg'))
self.item4 = QListWidgetItem(self.__w[-1], _('Chapter Detection'), self.categoryList)
def __init__(self, window, db, row):
QDialog.__init__(self, window)
Ui_LRFSingleDialog.__init__(self)
self.setupUi(self)
self.populate_list()
self.categoryList.setCurrentRow(0)
QObject.connect(self.categoryList, SIGNAL('itemEntered(QListWidgetItem *)'),
self.show_category_help)
QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), self.select_cover)
self.categoryList.leaveEvent = self.reset_help
#self.categoryList.leaveEvent = self.reset_help
self.reset_help()
self.selected_format = None
self.initialize_common()
@ -277,9 +280,9 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
obj.setWhatsThis(help)
self.option_map[guiname] = opt
obj.__class__.enterEvent = show_item_help
obj.leaveEvent = self.reset_help
#obj.leaveEvent = self.reset_help
self.preprocess.__class__.enterEvent = show_item_help
self.preprocess.leaveEvent = self.reset_help
#self.preprocess.leaveEvent = self.reset_help
def show_category_help(self, item):
@ -293,7 +296,8 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
self.set_help(help[text])
def set_help(self, msg):
self.help_view.setHtml('<html><body>%s</body></html>'%(msg,))
if msg and getattr(msg, 'strip', lambda:True)():
self.help_view.setHtml('<html><body>%s</body></html>'%(msg,))
def reset_help(self, *args):
self.set_help(_('<font color="gray">No help available</font>'))
@ -390,6 +394,7 @@ class LRFBulkDialog(LRFSingleDialog):
QDialog.__init__(self, window)
Ui_LRFSingleDialog.__init__(self)
self.setupUi(self)
self.populate_list()
self.categoryList.takeItem(0)
self.stack.removeWidget(self.stack.widget(0))
@ -399,7 +404,14 @@ class LRFBulkDialog(LRFSingleDialog):
self.setWindowTitle(_('Bulk convert ebooks to LRF'))
def accept(self):
self.cmdline = self.cmdline = [unicode(i) for i in self.build_commandline()]
self.cmdline = [unicode(i) for i in self.build_commandline()]
for meta in ('--title', '--author', '--publisher', '--comment'):
try:
index = self.cmdline.index(meta)
self.cmdline[index:index+2] = []
except ValueError:
continue
self.cover_file = None
QDialog.accept(self)

View File

@ -115,7 +115,7 @@
<item row="0" column="0" >
<widget class="QStackedWidget" name="stack" >
<property name="currentIndex" >
<number>0</number>
<number>3</number>
</property>
<widget class="QWidget" name="metadata_page" >
<property name="geometry" >
@ -818,6 +818,39 @@
</property>
</widget>
</item>
<item row="5" column="0" >
<widget class="QCheckBox" name="gui_render_tables_as_images" >
<property name="text" >
<string>&amp;Convert tables to images (good for large/complex tables)</string>
</property>
</widget>
</item>
<item row="6" column="0" >
<widget class="QLabel" name="label_27" >
<property name="text" >
<string>&amp;Multiplier for text size in rendered tables:</string>
</property>
<property name="buddy" >
<cstring>gui_text_size_multiplier_for_rendered_tables</cstring>
</property>
</widget>
</item>
<item row="6" column="1" >
<widget class="QDoubleSpinBox" name="gui_text_size_multiplier_for_rendered_tables" >
<property name="enabled" >
<bool>false</bool>
</property>
<property name="decimals" >
<number>2</number>
</property>
<property name="minimum" >
<double>0.100000000000000</double>
</property>
<property name="value" >
<double>1.000000000000000</double>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="chapterdetection_page" >
@ -918,6 +951,19 @@
<item row="2" column="1" >
<widget class="QLineEdit" name="gui_force_page_break_before_attr" />
</item>
<item row="3" column="0" >
<widget class="QLabel" name="label_28" >
<property name="text" >
<string>Detect chapter &amp;at tag:</string>
</property>
<property name="buddy" >
<cstring>gui_chapter_attr</cstring>
</property>
</widget>
</item>
<item row="3" column="1" >
<widget class="QLineEdit" name="gui_chapter_attr" />
</item>
</layout>
</widget>
</item>
@ -1048,8 +1094,8 @@ p, li { white-space: pre-wrap; }
<slot>setCurrentIndex(int)</slot>
<hints>
<hint type="sourcelabel" >
<x>191</x>
<y>236</y>
<x>184</x>
<y>279</y>
</hint>
<hint type="destinationlabel" >
<x>368</x>
@ -1064,8 +1110,8 @@ p, li { white-space: pre-wrap; }
<slot>setDisabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>428</x>
<y>89</y>
<x>650</x>
<y>122</y>
</hint>
<hint type="destinationlabel" >
<x>788</x>
@ -1073,22 +1119,6 @@ p, li { white-space: pre-wrap; }
</hint>
</hints>
</connection>
<connection>
<sender>gui_header</sender>
<signal>toggled(bool)</signal>
<receiver>gui_headerformat</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>348</x>
<y>340</y>
</hint>
<hint type="destinationlabel" >
<x>823</x>
<y>372</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_disable_chapter_detection</sender>
<signal>toggled(bool)</signal>
@ -1096,12 +1126,60 @@ p, li { white-space: pre-wrap; }
<slot>setDisabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>321</x>
<y>78</y>
<x>543</x>
<y>122</y>
</hint>
<hint type="destinationlabel" >
<x>322</x>
<y>172</y>
<x>544</x>
<y>211</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_render_tables_as_images</sender>
<signal>toggled(bool)</signal>
<receiver>gui_text_size_multiplier_for_rendered_tables</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>298</x>
<y>398</y>
</hint>
<hint type="destinationlabel" >
<x>660</x>
<y>435</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_header</sender>
<signal>toggled(bool)</signal>
<receiver>gui_headerformat</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>330</x>
<y>367</y>
</hint>
<hint type="destinationlabel" >
<x>823</x>
<y>372</y>
</hint>
</hints>
</connection>
<connection>
<sender>gui_disable_chapter_detection</sender>
<signal>toggled(bool)</signal>
<receiver>gui_chapter_attr</receiver>
<slot>setDisabled(bool)</slot>
<hints>
<hint type="sourcelabel" >
<x>344</x>
<y>107</y>
</hint>
<hint type="destinationlabel" >
<x>489</x>
<y>465</y>
</hint>
</hints>
</connection>

View File

@ -8,6 +8,7 @@ from PyQt4.QtGui import QDialog
from calibre.gui2 import qstring_to_unicode
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor
class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def __init__(self, window, rows, db):
@ -20,8 +21,6 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.write_rating = False
self.changed = False
QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync)
QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.series_changed)
QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.series_changed)
QObject.connect(self.rating, SIGNAL('valueChanged(int)'), self.rating_changed)
all_series = self.db.all_series()
@ -31,9 +30,17 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.series.addItem(name)
self.series.lineEdit().setText('')
QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.series_changed)
QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.series_changed)
QObject.connect(self.tag_editor_button, SIGNAL('clicked()'), self.tag_editor)
self.exec_()
def tag_editor(self):
d = TagEditor(self, self.db, None)
d.exec_()
if d.result() == QDialog.Accepted:
tag_string = ', '.join(d.tags)
self.tags.setText(tag_string)
def sync(self):
for id in self.ids:
@ -51,7 +58,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.db.set_publisher(id, pub)
tags = qstring_to_unicode(self.tags.text()).strip()
if tags:
tags = tags.split(',')
tags = map(lambda x: x.strip(), tags.split(','))
self.db.set_tags(id, tags, append=True)
remove_tags = qstring_to_unicode(self.remove_tags.text()).strip()
if remove_tags:

View File

@ -13,9 +13,10 @@
<string>Edit Meta information</string>
</property>
<property name="windowIcon" >
<iconset resource="../images.qrc" >:/images/edit_input.svg</iconset>
<iconset resource="../images.qrc" >
<normaloff>:/images/edit_input.svg</normaloff>:/images/edit_input.svg</iconset>
</property>
<layout class="QGridLayout" >
<layout class="QGridLayout" name="gridLayout_2" >
<item row="0" column="0" >
<widget class="QSplitter" name="splitter" >
<property name="orientation" >
@ -26,16 +27,7 @@
<property name="spacing" >
<number>6</number>
</property>
<property name="leftMargin" >
<number>0</number>
</property>
<property name="topMargin" >
<number>0</number>
</property>
<property name="rightMargin" >
<number>0</number>
</property>
<property name="bottomMargin" >
<property name="margin" >
<number>0</number>
</property>
<item>
@ -43,7 +35,7 @@
<property name="title" >
<string>Meta information</string>
</property>
<layout class="QGridLayout" >
<layout class="QGridLayout" name="gridLayout" >
<item row="0" column="0" >
<widget class="QLabel" name="label_2" >
<property name="text" >
@ -57,7 +49,7 @@
</property>
</widget>
</item>
<item row="0" column="1" >
<item row="0" column="1" colspan="2" >
<widget class="QLineEdit" name="authors" >
<property name="toolTip" >
<string>Change the author(s) of this book. Multiple authors should be separated by a comma</string>
@ -77,7 +69,7 @@
</property>
</widget>
</item>
<item row="1" column="1" >
<item row="1" column="1" colspan="2" >
<widget class="QLineEdit" name="author_sort" >
<property name="toolTip" >
<string>Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles.</string>
@ -94,7 +86,7 @@
</property>
</widget>
</item>
<item row="2" column="1" >
<item row="2" column="1" colspan="2" >
<widget class="QSpinBox" name="rating" >
<property name="toolTip" >
<string>Rating of this book. 0-5 stars</string>
@ -126,7 +118,7 @@
</property>
</widget>
</item>
<item row="3" column="1" >
<item row="3" column="1" colspan="2" >
<widget class="QLineEdit" name="publisher" >
<property name="toolTip" >
<string>Change the publisher of this book</string>
@ -153,6 +145,20 @@
</property>
</widget>
</item>
<item row="4" column="2" >
<widget class="QToolButton" name="tag_editor_button" >
<property name="toolTip" >
<string>Open Tag Editor</string>
</property>
<property name="text" >
<string>Open Tag Editor</string>
</property>
<property name="icon" >
<iconset resource="../images.qrc" >
<normaloff>:/images/chapters.svg</normaloff>:/images/chapters.svg</iconset>
</property>
</widget>
</item>
<item row="5" column="0" >
<widget class="QLabel" name="label" >
<property name="text" >
@ -163,7 +169,7 @@
</property>
</widget>
</item>
<item row="5" column="1" >
<item row="5" column="1" colspan="2" >
<widget class="QLineEdit" name="remove_tags" >
<property name="toolTip" >
<string>Comma separated list of tags to remove from the books. </string>
@ -218,7 +224,7 @@
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons" >
<set>QDialogButtonBox::Cancel|QDialogButtonBox::NoButton|QDialogButtonBox::Ok</set>
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>

View File

@ -9,14 +9,17 @@ from calibre.gui2 import question_dialog, error_dialog
class TagEditor(QDialog, Ui_TagEditor):
def __init__(self, window, db, index):
def __init__(self, window, db, index=None):
QDialog.__init__(self, window)
Ui_TagEditor.__init__(self)
self.setupUi(self)
self.db = db
self.index = index
tags = self.db.tags(self.index)
if self.index is not None:
tags = self.db.tags(self.index)
else:
tags = []
if tags:
tags = [tag.lower().strip() for tag in tags.split(',') if tag.strip()]
tags.sort()

View File

@ -2946,13 +2946,6 @@
id="defs155" />
<clipPath
id="XMLID_216_">
<use
xlink:href="#XMLID_36_"
id="use159"
x="0"
y="0"
width="121"
height="120" />
</clipPath>
<g
clip-path="url(#XMLID_216_)"

Before

Width:  |  Height:  |  Size: 116 KiB

After

Width:  |  Height:  |  Size: 116 KiB

View File

@ -1345,15 +1345,6 @@
y1="60.2349"
x2="78.0723"
y2="59.9854" />
<linearGradient
inkscape:collect="always"
xlink:href="#XMLID_36_"
id="linearGradient34419"
gradientUnits="userSpaceOnUse"
x1="87.5879"
y1="62.2827"
x2="87.5879"
y2="62.0488" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient5227"

Before

Width:  |  Height:  |  Size: 484 KiB

After

Width:  |  Height:  |  Size: 484 KiB

View File

@ -919,15 +919,6 @@
y1="230.0503"
x2="22.5229"
y2="230.0503" />
<linearGradient
inkscape:collect="always"
xlink:href="#XMLID_36_"
id="linearGradient4707"
gradientUnits="userSpaceOnUse"
x1="61.4678"
y1="235.5952"
x2="119.1562"
y2="235.5952" />
<linearGradient
inkscape:collect="always"
xlink:href="#XMLID_38_"

Before

Width:  |  Height:  |  Size: 47 KiB

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -86,16 +86,33 @@ class DeviceJob(Job):
class ConversionJob(Job):
''' Jobs that involve conversion of content.'''
def run(self):
last_traceback, exception = None, None
try:
self.result, exception, last_traceback, self.log = \
self.server.run(self.id, self.func, self.args, self.kwargs)
except Exception, err:
last_traceback = traceback.format_exc()
exception = (exception.__class__.__name__, unicode(str(err), 'utf8', 'replace'))
def __init__(self, *args, **kwdargs):
Job.__init__(self, *args, **kwdargs)
self.log = ''
self.last_traceback, self.exception = last_traceback, exception
def run(self):
result = None
self.server.run_job(self.id, self.func, progress=self.progress,
args=self.args, kwdargs=self.kwargs,
output=self.output)
res = None
while res is None:
time.sleep(2)
res = self.server.result(self.id)
if res is None:
exception, tb = 'UnknownError: This should not have happened', ''
else:
result, exception, tb = res
self.result, self.last_traceback, self.exception = result, tb, exception
def output(self, msg):
if self.log is None:
self.log = ''
self.log += msg
self.emit(SIGNAL('output_received()'))
def formatted_log(self):
return '<h2>Log:</h2><pre>%s</pre>'%self.log
def notify(self):
self.emit(SIGNAL('jobdone(PyQt_PyObject, PyQt_PyObject, PyQt_PyObject, PyQt_PyObject, PyQt_PyObject, PyQt_PyObject)'),
@ -109,10 +126,13 @@ class ConversionJob(Job):
def formatted_error(self):
if self.exception is None:
return ''
ans = u'<p><b>%s</b>: %s</p>'%self.exception
ans = u'<p><b>%s</b>:'%self.exception
ans += '<h2>Traceback:</h2><pre>%s</pre>'%self.last_traceback
return ans
def progress(self, percent, msg):
self.emit(SIGNAL('update_progress(int, PyQt_PyObject)'), self.id, percent)
class JobManager(QAbstractTableModel):
PRIORITY = {'Idle' : QThread.IdlePriority,
@ -149,9 +169,9 @@ class JobManager(QAbstractTableModel):
try:
if isinstance(job, DeviceJob):
job.terminate()
self.process_server.kill(job.id)
except:
continue
self.process_server.killall()
def timerEvent(self, event):
if event.timerId() == self.timer_id:
@ -241,7 +261,10 @@ class JobManager(QAbstractTableModel):
id = self.next_id
job = job_class(id, description, slot, priority, *args, **kwargs)
job.server = self.process_server
QObject.connect(job, SIGNAL('status_update(int, int)'), self.status_update, Qt.QueuedConnection)
QObject.connect(job, SIGNAL('status_update(int, int)'), self.status_update,
Qt.QueuedConnection)
self.connect(job, SIGNAL('update_progress(int, PyQt_PyObject)'),
self.update_progress, Qt.QueuedConnection)
self.update_lock.lock()
self.add_queue.append(job)
self.update_lock.unlock()
@ -358,10 +381,19 @@ class JobManager(QAbstractTableModel):
_('Cannot kill already completed jobs.')).exec_()
return
if status == 1:
error_dialog(gui_parent, _('Cannot kill job'),
_('Cannot kill waiting jobs.')).exec_()
return
self.process_server.kill(job.id)
self.update_lock.lock()
try:
self.waiting_jobs.remove(job)
self.finished_jobs.append(job)
self.emit(SIGNAL('job_done(int)'), job.id)
job.result = self.process_server.KILL_RESULT
finally:
self.update_lock.unlock()
else:
self.process_server.kill(job.id)
self.reset()
if len(self.running_jobs) + len(self.waiting_jobs) == 0:
self.emit(SIGNAL('no_more_jobs()'))
class DetailView(QDialog, Ui_Dialog):
@ -370,11 +402,14 @@ class DetailView(QDialog, Ui_Dialog):
self.setupUi(self)
self.setWindowTitle(job.description)
self.job = job
txt = self.job.formatted_error() + self.job.formatted_log()
self.connect(self.job, SIGNAL('output_received()'), self.update)
self.update()
def update(self):
txt = self.job.formatted_error() + self.job.formatted_log()
if not txt:
txt = 'No details available'
self.log.setHtml(txt)
vbar = self.log.verticalScrollBar()
vbar.setValue(vbar.maximum())

View File

@ -150,9 +150,13 @@ class BooksModel(QAbstractTableModel):
''' Return list indices of all cells in index.row()'''
return [ self.index(index.row(), c) for c in range(self.columnCount(None))]
def save_to_disk(self, rows, path, single_dir=False):
def save_to_disk(self, rows, path, single_dir=False, single_format=None):
rows = [row.row() for row in rows]
self.db.export_to_dir(path, rows, self.sorted_on[0] == 1, single_dir=single_dir)
if single_format is None:
return self.db.export_to_dir(path, rows, self.sorted_on[0] == 1, single_dir=single_dir)
else:
return self.db.export_single_format_to_dir(path, rows, single_format)
def delete_books(self, indices):
ids = [ self.id(i) for i in indices ]
@ -312,7 +316,7 @@ class BooksModel(QAbstractTableModel):
metadata.append(mi)
return metadata
def get_preferred_formats(self, rows, formats):
def get_preferred_formats(self, rows, formats, paths=False):
ans = []
for row in (row.row() for row in rows):
format = None
@ -323,14 +327,15 @@ class BooksModel(QAbstractTableModel):
if format:
pt = PersistentTemporaryFile(suffix='.'+format)
pt.write(self.db.format(row, format))
pt.seek(0)
pt.flush()
pt.close() if paths else pt.seek(0)
ans.append(pt)
else:
ans.append(None)
return ans
def id(self, row):
return self.db.id(row.row())
return self.db.id(getattr(row, 'row', lambda:row)())
def title(self, row_number):
return self.db.title(row_number)

View File

@ -1,13 +1,12 @@
from calibre.gui2.library import SearchBox
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, logging, os, traceback, time, cPickle
import sys, logging, os, traceback, time
from PyQt4.QtGui import QKeySequence, QPainter, QDialog, QSpinBox, QSlider
from PyQt4.QtCore import Qt, QObject, SIGNAL, QCoreApplication, QThread, \
QVariant
from PyQt4.QtGui import QKeySequence, QPainter, QDialog, QSpinBox, QSlider, QIcon
from PyQt4.QtCore import Qt, QObject, SIGNAL, QCoreApplication, QThread
from calibre import __appname__, __version__, __author__, setup_cli_handlers, islinux, Settings
from calibre import __appname__, setup_cli_handlers, islinux, Settings
from calibre.ebooks.lrf.lrfparser import LRFDocument
from calibre.gui2 import ORG_NAME, APP_UID, error_dialog, choose_files, Application
@ -57,7 +56,7 @@ class Config(QDialog, Ui_ViewerConfig):
class Main(MainWindow, Ui_MainWindow):
def __init__(self, logger, opts, parent=None):
MainWindow.__init__(self, parent)
MainWindow.__init__(self, opts, parent)
Ui_MainWindow.__init__(self)
self.setupUi(self)
self.setAttribute(Qt.WA_DeleteOnClose)
@ -263,9 +262,12 @@ def file_renderer(stream, opts, parent=None, logger=None):
def option_parser():
from optparse import OptionParser
parser = OptionParser(usage='%prog book.lrf', version=__appname__+' '+__version__,
epilog='Created by ' + __author__)
from calibre.gui2.main_window import option_parser
parser = option_parser('''\
%prog [options] book.lrf
Read the LRF ebook book.lrf
''')
parser.add_option('--verbose', default=False, action='store_true', dest='verbose',
help='Print more information about the rendering process')
parser.add_option('--visual-debug', help='Turn on visual aids to debugging the rendering engine',
@ -283,7 +285,7 @@ def normalize_settings(parser, opts):
for opt in parser.option_list:
if not opt.dest:
continue
if getattr(opts, opt.dest) == opt.default:
if getattr(opts, opt.dest) == opt.default and hasattr(saved_opts, opt.dest):
continue
setattr(saved_opts, opt.dest, getattr(opts, opt.dest))
return saved_opts
@ -298,6 +300,7 @@ def main(args=sys.argv, logger=None):
pid = os.fork() if islinux else -1
if pid <= 0:
app = Application(args)
app.setWindowIcon(QIcon(':/images/viewer.svg'))
QCoreApplication.setOrganizationName(ORG_NAME)
QCoreApplication.setApplicationName(APP_UID)
opts = normalize_settings(parser, opts)

View File

@ -10,7 +10,8 @@ from PyQt4.QtGui import QPixmap, QColor, QPainter, QMenu, QIcon, QMessageBox, \
from PyQt4.QtSvg import QSvgRenderer
from calibre import __version__, __appname__, islinux, sanitize_file_name, \
Settings, pictureflowerror, iswindows, isosx
Settings, pictureflowerror, iswindows, isosx,\
preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.meta import get_metadata, get_filename_pat, set_filename_pat
from calibre.devices.errors import FreeSpaceError
@ -23,7 +24,7 @@ from calibre.gui2 import APP_UID, warning_dialog, choose_files, error_dialog, \
from calibre.gui2.cover_flow import CoverFlow, DatabaseImages
from calibre.library.database import LibraryDatabase
from calibre.gui2.update import CheckForUpdates
from calibre.gui2.main_window import MainWindow
from calibre.gui2.main_window import MainWindow, option_parser
from calibre.gui2.main_ui import Ui_MainWindow
from calibre.gui2.device import DeviceDetector, DeviceManager
from calibre.gui2.status import StatusBar
@ -58,8 +59,8 @@ class Main(MainWindow, Ui_MainWindow):
p.end()
self.default_thumbnail = (pixmap.width(), pixmap.height(), pixmap_to_data(pixmap))
def __init__(self, single_instance, parent=None):
MainWindow.__init__(self, parent)
def __init__(self, single_instance, opts, parent=None):
MainWindow.__init__(self, opts, parent)
self.single_instance = single_instance
if self.single_instance is not None:
self.connect(self.single_instance, SIGNAL('message_received(PyQt_PyObject)'),
@ -77,7 +78,6 @@ class Main(MainWindow, Ui_MainWindow):
self.conversion_jobs = {}
self.persistent_files = []
self.metadata_dialogs = []
self.viewer_job_id = 1
self.default_thumbnail = None
self.device_error_dialog = ConversionErrorDialog(self, _('Error communicating with device'), ' ')
self.device_error_dialog.setModal(Qt.NonModal)
@ -113,6 +113,19 @@ class Main(MainWindow, Ui_MainWindow):
sm = QMenu()
sm.addAction(QIcon(':/images/reader.svg'), _('Send to main memory'))
sm.addAction(QIcon(':/images/sd.svg'), _('Send to storage card'))
sm.addAction(QIcon(':/images/reader.svg'), _('Send to main memory')+' '+_('and delete from library'))
sm.addAction(QIcon(':/images/sd.svg'), _('Send to storage card')+' '+_('and delete from library'))
sm.addSeparator()
sm.addAction(_('Send to storage card by default'))
sm.actions()[-1].setCheckable(True)
def default_sync(checked):
Settings().set('send to device by default', bool(checked))
QObject.disconnect(self.action_sync, SIGNAL("triggered(bool)"), self.sync_to_main_memory)
QObject.disconnect(self.action_sync, SIGNAL("triggered(bool)"), self.sync_to_card)
QObject.connect(self.action_sync, SIGNAL("triggered(bool)"), self.sync_to_card if checked else self.sync_to_main_memory)
QObject.connect(sm.actions()[-1], SIGNAL('toggled(bool)'), default_sync)
sm.actions()[-1].setChecked(Settings().get('send to device by default', False))
default_sync(sm.actions()[-1].isChecked())
self.sync_menu = sm # Needed
md = QMenu()
md.addAction(_('Edit metadata individually'))
@ -129,14 +142,16 @@ class Main(MainWindow, Ui_MainWindow):
QObject.connect(self.add_menu.actions()[2], SIGNAL("triggered(bool)"), self.add_recursive_multiple)
QObject.connect(self.action_del, SIGNAL("triggered(bool)"), self.delete_books)
QObject.connect(self.action_edit, SIGNAL("triggered(bool)"), self.edit_metadata)
QObject.connect(md.actions()[0], SIGNAL('triggered(bool)'), self.edit_metadata)
QObject.connect(md.actions()[0], SIGNAL('triggered(bool)'), partial(self.edit_metadata, bulk=False))
QObject.connect(md.actions()[1], SIGNAL('triggered(bool)'), self.edit_bulk_metadata)
QObject.connect(self.action_sync, SIGNAL("triggered(bool)"), self.sync_to_main_memory)
QObject.connect(sm.actions()[0], SIGNAL('triggered(bool)'), self.sync_to_main_memory)
QObject.connect(sm.actions()[1], SIGNAL('triggered(bool)'), self.sync_to_card)
QObject.connect(sm.actions()[2], SIGNAL('triggered(bool)'), partial(self.sync_to_main_memory, delete_from_library=True))
QObject.connect(sm.actions()[3], SIGNAL('triggered(bool)'), partial(self.sync_to_card, delete_from_library=True))
self.save_menu = QMenu()
self.save_menu.addAction(_('Save to disk'))
self.save_menu.addAction(_('Save to disk in a single directory'))
self.save_menu.addAction(_('Save only %s format to disk')%Settings().get('save to disk single format', 'lrf').upper())
self.view_menu = QMenu()
self.view_menu.addAction(_('View'))
@ -145,6 +160,7 @@ class Main(MainWindow, Ui_MainWindow):
QObject.connect(self.action_save, SIGNAL("triggered(bool)"), self.save_to_disk)
QObject.connect(self.save_menu.actions()[0], SIGNAL("triggered(bool)"), self.save_to_disk)
QObject.connect(self.save_menu.actions()[1], SIGNAL("triggered(bool)"), self.save_to_single_dir)
QObject.connect(self.save_menu.actions()[2], SIGNAL("triggered(bool)"), self.save_single_format_to_disk)
QObject.connect(self.action_view, SIGNAL("triggered(bool)"), self.view_book)
QObject.connect(self.view_menu.actions()[0], SIGNAL("triggered(bool)"), self.view_book)
QObject.connect(self.view_menu.actions()[1], SIGNAL("triggered(bool)"), self.view_specific_format)
@ -241,8 +257,6 @@ class Main(MainWindow, Ui_MainWindow):
self.setMaximumHeight(available_height())
def sync_cf_to_listview(self, index, *args):
if not hasattr(index, 'row') and self.library_view.currentIndex().row() != index:
index = self.library_view.model().index(index, 0)
@ -264,14 +278,6 @@ class Main(MainWindow, Ui_MainWindow):
elif msg.startswith('refreshdb:'):
self.library_view.model().resort()
self.library_view.model().research()
elif msg.startswith('progress:'):
try:
fields = msg.split(':')
job_id, percent = fields[1:3]
job_id, percent = int(job_id), float(percent)
self.job_manager.update_progress(job_id, percent)
except:
pass
else:
print msg
@ -475,7 +481,7 @@ class Main(MainWindow, Ui_MainWindow):
else:
self.upload_books(paths, names, infos, on_card=on_card)
def upload_books(self, files, names, metadata, on_card=False):
def upload_books(self, files, names, metadata, on_card=False, memory=None):
'''
Upload books to device.
@param files: List of either paths to files or file like objects
@ -486,13 +492,14 @@ class Main(MainWindow, Ui_MainWindow):
files, names, on_card=on_card,
job_extra_description=titles
)
self.upload_memory[id] = (metadata, on_card)
self.upload_memory[id] = (metadata, on_card, memory)
def books_uploaded(self, id, description, result, exception, formatted_traceback):
'''
Called once books have been uploaded.
'''
metadata, on_card = self.upload_memory.pop(id)
metadata, on_card, memory = self.upload_memory.pop(id)
if exception:
if isinstance(exception, FreeSpaceError):
where = 'in main memory.' if 'memory' in str(exception) else 'on the storage card.'
@ -512,6 +519,9 @@ class Main(MainWindow, Ui_MainWindow):
view = self.card_view if on_card else self.memory_view
view.model().resort(reset=False)
view.model().research()
if memory[1]:
rows = map(self.library_view.model().db.index, memory[1])
self.library_view.model().delete_books(rows)
############################################################################
@ -559,17 +569,19 @@ class Main(MainWindow, Ui_MainWindow):
############################################################################
############################### Edit metadata ##############################
def edit_metadata(self, checked):
def edit_metadata(self, checked, bulk=None):
'''
Edit metadata of selected books in library individually.
Edit metadata of selected books in library.
'''
rows = self.library_view.selectionModel().selectedRows()
if len(rows) > 1:
return self.edit_bulk_metadata(checked)
if not rows or len(rows) == 0:
d = error_dialog(self, _('Cannot edit metadata'), _('No books selected'))
d.exec_()
return
if bulk or (bulk is None and len(rows) > 1):
return self.edit_bulk_metadata(checked)
for row in rows:
d = MetadataSingleDialog(self, row.row(),
self.library_view.model().db)
@ -594,11 +606,11 @@ class Main(MainWindow, Ui_MainWindow):
############################################################################
############################# Syncing to device#############################
def sync_to_main_memory(self, checked):
self.sync_to_device(False)
def sync_to_main_memory(self, checked, delete_from_library=False):
self.sync_to_device(False, delete_from_library)
def sync_to_card(self, checked):
self.sync_to_device(True)
def sync_to_card(self, checked, delete_from_library=False):
self.sync_to_device(True, delete_from_library)
def cover_to_thumbnail(self, data):
p = QPixmap()
@ -609,7 +621,7 @@ class Main(MainWindow, Ui_MainWindow):
p = p.scaledToHeight(ht, Qt.SmoothTransformation)
return (p.width(), p.height(), pixmap_to_data(p))
def sync_to_device(self, on_card):
def sync_to_device(self, on_card, delete_from_library):
rows = self.library_view.selectionModel().selectedRows()
if not self.device_manager or not rows or len(rows) == 0:
return
@ -620,8 +632,9 @@ class Main(MainWindow, Ui_MainWindow):
if cdata:
mi['cover'] = self.cover_to_thumbnail(cdata)
metadata = iter(metadata)
files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS)
_files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS, paths=True)
files = [f.name for f in _files]
bad, good, gf, names = [], [], [], []
for f in files:
mi = metadata.next()
@ -636,7 +649,9 @@ class Main(MainWindow, Ui_MainWindow):
try:
smi = MetaInformation(mi['title'], aus2)
smi.comments = mi.get('comments', None)
set_metadata(f, smi, f.name.rpartition('.')[2])
_f = open(f, 'r+b')
set_metadata(_f, smi, f.rpartition('.')[2])
_f.close()
except:
print 'Error setting metadata in book:', mi['title']
traceback.print_exc()
@ -653,8 +668,9 @@ class Main(MainWindow, Ui_MainWindow):
prefix = prefix.encode('ascii', 'ignore')
else:
prefix = prefix.decode('ascii', 'ignore').encode('ascii', 'ignore')
names.append('%s_%d%s'%(prefix, id, os.path.splitext(f.name)[1]))
self.upload_books(gf, names, good, on_card)
names.append('%s_%d%s'%(prefix, id, os.path.splitext(f)[1]))
remove = [self.library_view.model().id(r) for r in rows] if delete_from_library else []
self.upload_books(gf, names, good, on_card, memory=(_files, remove))
self.status_bar.showMessage(_('Sending books to device.'), 5000)
if bad:
bad = '\n'.join('<li>%s</li>'%(i,) for i in bad)
@ -666,20 +682,32 @@ class Main(MainWindow, Ui_MainWindow):
############################################################################
############################## Save to disk ################################
def save_single_format_to_disk(self, checked):
self.save_to_disk(checked, True, Settings().get('save to disk single format', 'lrf'))
def save_to_single_dir(self, checked):
self.save_to_disk(checked, True)
def save_to_disk(self, checked, single_dir=False):
def save_to_disk(self, checked, single_dir=False, single_format=None):
rows = self.current_view().selectionModel().selectedRows()
if not rows or len(rows) == 0:
d = error_dialog(self, _('Cannot save to disk'), _('No books selected'))
d.exec_()
return
dir = choose_dir(self, 'save to disk dialog', ('Choose destination directory'))
if not dir:
return
if self.current_view() == self.library_view:
self.current_view().model().save_to_disk(rows, dir, single_dir=single_dir)
failures = self.current_view().model().save_to_disk(rows, dir,
single_dir=single_dir, single_format=single_format)
if failures and single_format is not None:
msg = _('<p>Could not save the following books to disk, because the %s format is not available for them:<ul>')%single_format.upper()
for f in failures:
msg += '<li>%s</li>'%f[1]
msg += '</ul>'
warning_dialog(self, _('Could not save some ebooks'), msg).exec_()
QDesktopServices.openUrl(QUrl('file:'+dir))
else:
paths = self.current_view().model().paths(rows)
self.job_manager.run_device_job(self.books_saved,
@ -746,6 +774,15 @@ class Main(MainWindow, Ui_MainWindow):
for i, row in enumerate([r.row() for r in rows]):
cmdline = list(d.cmdline)
mi = self.library_view.model().db.get_metadata(row)
if mi.title:
cmdline.extend(['--title', mi.title])
if mi.authors:
cmdline.extend(['--author', ','.join(mi.authors)])
if mi.publisher:
cmdline.extend(['--publisher', mi.publisher])
if mi.comments:
cmdline.extend(['--comment', mi.comments])
data = None
for fmt in LRF_PREFERRED_SOURCE_FORMATS:
try:
@ -771,7 +808,7 @@ class Main(MainWindow, Ui_MainWindow):
cmdline.append(pt.name)
id = self.job_manager.run_conversion_job(self.book_converted,
'any2lrf', args=[cmdline],
job_description='Convert book %d of %d'%(i, len(rows)))
job_description='Convert book %d of %d'%(i+1, len(rows)))
self.conversion_jobs[id] = (d.cover_file, pt, of, d.output_format,
@ -851,15 +888,16 @@ class Main(MainWindow, Ui_MainWindow):
self._view_file(result)
def _view_file(self, name):
if name.upper().endswith('.LRF'):
args = ['lrfviewer', name]
self.job_manager.process_server.run('viewer%d'%self.viewer_job_id,
'lrfviewer', kwdargs=dict(args=args),
monitor=False)
self.viewer_job_id += 1
else:
QDesktopServices.openUrl(QUrl('file:'+name))#launch(name)
time.sleep(2) # User feedback
self.setCursor(Qt.BusyCursor)
try:
if name.upper().endswith('.LRF'):
args = ['lrfviewer', name]
self.job_manager.process_server.run_free_job('lrfviewer', kwdargs=dict(args=args))
else:
QDesktopServices.openUrl(QUrl('file:'+name))#launch(name)
time.sleep(5) # User feedback
finally:
self.unsetCursor()
def view_specific_format(self, triggered):
rows = self.library_view.selectionModel().selectedRows()
@ -1064,24 +1102,48 @@ class Main(MainWindow, Ui_MainWindow):
self.device_error_dialog.show()
def conversion_job_exception(self, id, description, exception, formatted_traceback, log):
def safe_print(msgs, file=sys.stderr):
for i, msg in enumerate(msgs):
if not msg:
msg = ''
if isinstance(msg, unicode):
msgs[i] = msg.encode(preferred_encoding, 'replace')
msg = ' '.join(msgs)
print >>file, msg
def safe_unicode(self, arg):
if not arg:
arg = unicode(repr(arg))
if isinstance(arg, str):
arg = arg.decode(preferred_encoding, 'replace')
if not isinstance(arg, unicode):
try:
arg = unicode(repr(arg))
except:
arg = u'Could not convert to unicode'
return arg
only_msg = getattr(exception, 'only_msg', False)
description, exception, formatted_traceback, log = map(safe_unicode,
(description, exception, formatted_traceback, log))
try:
print >>sys.stderr, 'Error in job:', description.encode('utf8')
safe_print('Error in job:', description)
if log:
print >>sys.stderr, log.encode('utf8', 'ignore') if isinstance(log, unicode) else log
print >>sys.stderr, exception
print >>sys.stderr, formatted_traceback.encode('utf8', 'ignore') if isinstance(formatted_traceback, unicode) else formatted_traceback
safe_print(log)
safe_print(exception)
safe_print(formatted_traceback)
except:
pass
if getattr(exception, 'only_msg', False):
error_dialog(self, _('Conversion Error'), unicode(exception)).exec_()
if only_msg:
error_dialog(self, _('Conversion Error'), exception).exec_()
return
msg = u'<p><b>%s</b>: %s</p>'%exception
msg = u'<p><b>%s</b>:'%exception
msg += u'<p>Failed to perform <b>job</b>: '+description
msg += u'<p>Detailed <b>traceback</b>:<pre>'
msg += formatted_traceback + '</pre>'
msg += '<p><b>Log:</b></p><pre>'
if log:
msg += log.encode('utf8', 'ignore') if isinstance(log, unicode) else log.decode('utf8', 'ignore')
msg += formatted_traceback + u'</pre>'
msg += u'<p><b>Log:</b></p><pre>'
msg += log
ConversionErrorDialog(self, 'Conversion Error', msg, show=True)
@ -1162,6 +1224,13 @@ def main(args=sys.argv):
pid = os.fork() if islinux else -1
if pid <= 0:
parser = option_parser('''\
%prog [opts] [path_to_ebook]
Launch the main calibre Graphical User Interface and optionally add the ebook at
path_to_ebook to the database.
''')
opts, args = parser.parse_args(args)
app = Application(args)
app.setWindowIcon(QIcon(':/library'))
QCoreApplication.setOrganizationName(ORG_NAME)
@ -1169,7 +1238,7 @@ def main(args=sys.argv):
single_instance = None if SingleApplication is None else SingleApplication('calibre GUI')
if not singleinstance('calibre GUI'):
if single_instance is not None and single_instance.is_running() and \
single_instance.send_message('launched:'+repr(sys.argv)):
single_instance.send_message('launched:'+repr(args)):
return 0
QMessageBox.critical(None, 'Cannot Start '+__appname__,
@ -1177,14 +1246,14 @@ def main(args=sys.argv):
return 1
initialize_file_icon_provider()
try:
main = Main(single_instance)
main = Main(single_instance, opts)
except DatabaseLocked, err:
QMessageBox.critical(None, 'Cannot Start '+__appname__,
'<p>Another program is using the database. <br/>Perhaps %s is already running?<br/>If not try deleting the file %s'%(__appname__, err.lock_file_path))
return 1
sys.excepthook = main.unhandled_exception
if len(sys.argv) > 1:
main.add_filesystem_book(sys.argv[1])
if len(args) > 1:
main.add_filesystem_book(args[1])
return app.exec_()
return 0
@ -1195,7 +1264,7 @@ if __name__ == '__main__':
except:
if not iswindows: raise
from PyQt4.QtGui import QErrorMessage
logfile = os.path.expanduser('~/calibre.log')
logfile = os.path.join(os.path.expanduser('~'), 'calibre.log')
if os.path.exists(logfile):
log = open(logfile).read()
if log.strip():

View File

@ -3,13 +3,43 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import StringIO, traceback, sys
from PyQt4.QtGui import QMainWindow
from PyQt4.Qt import QMainWindow, QString, Qt, QFont
from calibre.gui2.dialogs.conversion_error import ConversionErrorDialog
from calibre import OptionParser
def option_parser(usage='''\
Usage: %prog [options]
Launch the Graphical User Interface
'''):
parser = OptionParser(usage)
parser.add_option('--redirect-console-output', default=False, action='store_true', dest='redirect',
help=_('Redirect console output to a dialog window (both stdout and stderr). Useful on windows where GUI apps do not have a output streams.'))
return parser
class DebugWindow(ConversionErrorDialog):
def __init__(self, parent):
ConversionErrorDialog.__init__(self, parent, 'Console output', '')
self.setModal(Qt.NonModal)
font = QFont()
font.setStyleHint(QFont.TypeWriter)
self.text.setFont(font)
def write(self, msg):
self.text.setPlainText(self.text.toPlainText()+QString(msg))
def flush(self):
pass
class MainWindow(QMainWindow):
def __init__(self, parent=None):
def __init__(self, opts, parent=None):
QMainWindow.__init__(self, parent)
if getattr(opts, 'redirect', False):
self.__console_redirect = DebugWindow(self)
sys.stdout = sys.stderr = self.__console_redirect
self.__console_redirect.show()
def unhandled_exception(self, type, value, tb):
try:
@ -19,7 +49,7 @@ class MainWindow(QMainWindow):
print >>sys.stderr, fe
msg = '<p><b>' + unicode(str(value), 'utf8', 'replace') + '</b></p>'
msg += '<p>Detailed <b>traceback</b>:<pre>'+fe+'</pre>'
d = ConversionErrorDialog(self, 'ERROR: Unhandled exception', msg)
d = ConversionErrorDialog(self, _('ERROR: Unhandled exception'), msg)
d.exec_()
except:
pass

View File

@ -43,6 +43,7 @@ else:
makefile.extra_lib_dirs = ['..\\..\\.build\\release', '../../.build', '.']
makefile.extra_libs = ['pictureflow0' if 'win' in sys.platform and 'darwin' not in sys.platform else "pictureflow"]
makefile.extra_cflags = ['-arch i386', '-arch ppc'] if 'darwin' in sys.platform else []
makefile.extra_lflags = ['-arch i386', '-arch ppc'] if 'darwin' in sys.platform else []
makefile.extra_cxxflags = makefile.extra_cflags
# Generate the Makefile itself.

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import textwrap, re
import re
from PyQt4.QtGui import QStatusBar, QMovie, QLabel, QFrame, QHBoxLayout, QPixmap, \
QVBoxLayout, QSizePolicy, QToolButton, QIcon

View File

@ -231,7 +231,7 @@ class JobsView(TableView):
def __init__(self, parent):
TableView.__init__(self, parent)
self.connect(self, SIGNAL('activated(QModelIndex)'), self.show_details)
self.connect(self, SIGNAL('doubleClicked(QModelIndex)'), self.show_details)
def show_details(self, index):
row = index.row()

View File

@ -11,7 +11,10 @@ import sys, os
from textwrap import TextWrapper
from calibre import OptionParser, Settings, terminal_controller, preferred_encoding
from calibre.gui2 import SingleApplication
try:
from calibre.utils.single_qt_application import send_message
except:
send_message = None
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
from calibre.library.database import LibraryDatabase, text_to_tokens
@ -97,14 +100,14 @@ List the books available in the calibre database.
if not set(fields).issubset(FIELDS):
parser.print_help()
print
print _('Invalid fields. Available fields:'), ','.join(FIELDS)
print >>sys.stderr, _('Invalid fields. Available fields:'), ','.join(FIELDS)
return 1
db = get_db(dbpath, opts)
if not opts.sort_by in FIELDS:
parser.print_help()
print
print _('Invalid sort field. Available fields:'), ','.join(FIELDS)
print >>sys.stderr, _('Invalid sort field. Available fields:'), ','.join(FIELDS)
return 1
do_list(db, fields, opts.sort_by, opts.ascending, opts.search)
@ -118,6 +121,7 @@ class DevNull(object):
NULL = DevNull()
def do_add(db, paths, one_book_per_directory, recurse, add_duplicates):
orig = sys.stdout
sys.stdout = NULL
try:
files, dirs = [], []
@ -166,27 +170,26 @@ def do_add(db, paths, one_book_per_directory, recurse, add_duplicates):
for mi, formats in dir_dups:
db.import_book(mi, formats)
else:
print _('The following books were not added as they already exist in the database (see --duplicates option):')
print >>sys.stderr, _('The following books were not added as they already exist in the database (see --duplicates option):')
for mi, formats in dir_dups:
title = mi.title
if isinstance(title, unicode):
title = title.encode(preferred_encoding)
print '\t', title + ':'
print >>sys.stderr, '\t', title + ':'
for path in formats:
print '\t\t ', path
print >>sys.stderr, '\t\t ', path
if file_duplicates:
for path, mi in zip(file_duplicates[0], file_duplicates[2]):
title = mi.title
if isinstance(title, unicode):
title = title.encode(preferred_encoding)
print '\t', title+':'
print '\t\t ', path
print >>sys.stderr, '\t', title+':'
print >>sys.stderr, '\t\t ', path
if SingleApplication is not None:
sa = SingleApplication('calibre GUI')
sa.send_message('refreshdb:')
if send_message is not None:
send_message('refreshdb:', 'calibre GUI')
finally:
sys.stdout = sys.__stdout__
sys.stdout = orig
@ -209,7 +212,7 @@ the directory related options below.
if len(args) < 2:
parser.print_help()
print
print _('You must specify at least one file to add')
print >>sys.stderr, _('You must specify at least one file to add')
return 1
do_add(get_db(dbpath, opts), args[1:], opts.one_book_per_directory, opts.recurse, opts.duplicates)
return 0
@ -222,9 +225,9 @@ def do_remove(db, ids):
for y in x:
db.delete_book(y)
if SingleApplication is not None:
sa = SingleApplication('calibre GUI')
sa.send_message('refreshdb:')
if send_message is not None:
send_message('refreshdb:', 'calibre GUI')
def command_remove(args, dbpath):
parser = get_parser(_(
@ -239,7 +242,7 @@ list of id numbers (you can get id numbers by using the list command). For examp
if len(args) < 2:
parser.print_help()
print
print _('You must specify at least one book to remove')
print >>sys.stderr, _('You must specify at least one book to remove')
return 1
ids = []
@ -270,7 +273,7 @@ by id. You can get id by using the list command. If the format already exists, i
if len(args) < 3:
parser.print_help()
print
print _('You must specify an id and an ebook file')
print >>sys.stderr, _('You must specify an id and an ebook file')
return 1
id, file, fmt = int(args[1]), open(args[2], 'rb'), os.path.splitext(args[2])[-1]
@ -296,7 +299,7 @@ do nothing.
if len(args) < 3:
parser.print_help()
print
print _('You must specify an id and a format')
print >>sys.stderr, _('You must specify an id and a format')
return 1
id, fmt = int(args[1]), args[2].upper()
@ -327,7 +330,7 @@ id is an id number from the list command.
if len(args) < 2:
parser.print_help()
print
print _('You must specify an id')
print >>sys.stderr, _('You must specify an id')
return 1
id = int(args[1])
do_show_metadata(get_db(dbpath, opts), id, opts.as_opf)
@ -337,9 +340,8 @@ def do_set_metadata(db, id, stream):
mi = OPFReader(stream)
db.set_metadata(id, mi)
do_show_metadata(db, id, False)
if SingleApplication is not None:
sa = SingleApplication('calibre GUI')
sa.send_message('refreshdb:')
if send_message is not None:
send_message('refreshdb:', 'calibre GUI')
def command_set_metadata(args, dbpath):
parser = get_parser(_(
@ -355,15 +357,47 @@ show_metadata command.
if len(args) < 3:
parser.print_help()
print
print _('You must specify an id and a metadata file')
print >>sys.stderr, _('You must specify an id and a metadata file')
return 1
id, opf = int(args[1]), open(args[2], 'rb')
do_set_metadata(get_db(dbpath, opts), id, opf)
return 0
def do_export(db, ids, dir, single_dir, by_author):
if ids is None:
ids = db.all_ids()
db.export_to_dir(dir, ids, byauthor=by_author, single_dir=single_dir, index_is_id=True)
def command_export(args, dbpath):
parser = get_parser(_('''\
%prog export [options] ids
Export the books specified by ids (a comma separated list) to the filesystem.
The export operation saves all formats of the book, its cover and metadata (in
an opf file). You can get id numbers from the list command.
'''))
parser.add_option('--all', default=False, action='store_true',
help=_('Export all books in database, ignoring the list of ids.'))
parser.add_option('--to-dir', default='.',
help=(_('Export books to the specified directory. Default is')+' %default'))
parser.add_option('--single-dir', default=False, action='store_true',
help=_('Export all books into a single directory'))
parser.add_option('--by-author', default=False, action='store_true',
help=_('Create file names as author - title instead of title - author'))
opts, args = parser.parse_args(sys.argv[1:]+args)
if (len(args) < 2 and not opts.all):
parser.print_help()
print
print >>sys.stderr, _('You must specify some ids or the %s option')%'--all'
return 1
ids = None if opts.all else map(int, args[1].split(','))
dir = os.path.abspath(os.path.expanduser(opts.to_dir))
do_export(get_db(dbpath, opts), ids, dir, opts.single_dir, opts.by_author)
return 0
def main(args=sys.argv):
commands = ('list', 'add', 'remove', 'add_format', 'remove_format',
'show_metadata', 'set_metadata')
'show_metadata', 'set_metadata', 'export')
parser = OptionParser(_(
'''\
%%prog command [options] [arguments]

View File

@ -1384,6 +1384,9 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
self.conn.execute('VACUUM;')
self.conn.commit()
def all_ids(self):
return [i[0] for i in self.conn.execute('SELECT id FROM books').fetchall()]
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
index_is_id=False):
if not os.path.exists(dir):
@ -1418,7 +1421,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
name = au + ' - ' + title if byauthor else title + ' - ' + au
name += '_'+id
base = dir if single_dir else tpath
mi = OPFCreator(base, self.get_metadata(idx, index_is_id=index_is_id))
cover = self.cover(idx, index_is_id=index_is_id)
if cover is not None:
@ -1427,6 +1429,8 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
open(cpath, 'wb').write(cover)
mi.cover = cname
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
if not mi.authors:
mi.authors = [_('Unknown')]
mi.render(f)
f.close()
@ -1544,6 +1548,25 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
duplicates.extend(res)
return duplicates
def export_single_format_to_dir(self, dir, indices, format, index_is_id=False):
if not index_is_id:
indices = map(self.id, indices)
failures = []
for id in indices:
try:
data = self.format(id, format, index_is_id=True)
except:
failures.append((id, self.title(id, index_is_id=True)))
title = self.title(id, index_is_id=True)
au = self.authors(id, index_is_id=True)
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname)
open(os.path.join(dir, fname), 'wb').write(data)
return failures
class SearchToken(object):

View File

@ -46,9 +46,10 @@ entry_points = {
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-debug = calibre.debug:main',
'calibredb = calibre.library.cli:main',
'calibre-fontconfig = calibre.utils.fontconfig:main',
'calibre-parallel = calibre.parallel:main',
],
'gui_scripts' : [
__appname__+' = calibre.gui2.main:main',
@ -353,7 +354,7 @@ def install_man_pages(fatal_errors):
prog = src[:src.index('=')].strip()
if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta',
'markdown-calibre', 'calibre-debug', 'fb2-meta',
'calibre-fontconfig'):
'calibre-fontconfig', 'calibre-parallel'):
continue
help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,
'--section', '1', '--no-info', '--include',

View File

@ -8,6 +8,8 @@ Download and install the linux binary.
'''
import sys, os, shutil, tarfile, subprocess, tempfile, urllib2, re, stat
MOBILEREAD='https://dev.mobileread.com/dist/kovid/calibre/'
class TerminalController:
"""
A class that can be used to portably generate formatted output to
@ -239,7 +241,7 @@ def do_postinstall(destdir):
def download_tarball():
pb = ProgressBar(TerminalController(sys.stdout), 'Downloading calibre...')
src = urllib2.urlopen('http://calibre.kovidgoyal.net/downloads/latest-linux-binary.tar.bz2')
src = urllib2.urlopen(MOBILEREAD+'calibre-%version-i686.tar.bz2')
size = int(src.info()['content-length'])
f = tempfile.NamedTemporaryFile()
while f.tell() < size:

View File

@ -10,7 +10,7 @@ from sphinx.ext.autodoc import get_module_charset, prepare_docstring
from docutils.statemachine import ViewList
from docutils import nodes
from genshi.template import TextTemplate
from genshi.template import OldTextTemplate as TextTemplate
sys.path.append(os.path.abspath('../../../'))
from calibre.linux import entry_points

View File

@ -1,178 +1,719 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Used to run jobs in parallel in separate processes.
Used to run jobs in parallel in separate processes. Features output streaming,
support for progress notification as well as job killing. The worker processes
are controlled via a simple protocol run over TCP/IP sockets. The control happens
mainly in two class, :class:`Server` and :class:`Overseer`. The worker is
encapsulated in the function :function:`worker`. Every worker process
has the environment variable :envvar:`CALIBRE_WORKER` defined.
The worker control protocol has two modes of operation. In the first mode, the
worker process listens for commands from the controller process. The controller
process can either hand off a job to the worker or tell the worker to die.
Once a job is handed off to the worker, the protocol enters the second mode, where
the controller listens for messages from the worker. The worker can send progress updates
as well as console output (i.e. text that would normally have been written to stdout
or stderr by the job). Once the job completes (or raises an exception) the worker
returns the result (or exception) to the controller adnt he protocol reverts to the first mode.
In the second mode, the controller can also send the worker STOP messages, in which case
the worker interrupts the job and dies. The sending of progress and console output messages
is buffered and asynchronous to prevent the job from being IO bound.
'''
import re, sys, tempfile, os, cPickle, traceback, atexit, binascii, time, subprocess
import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \
subprocess, socket, collections, binascii, re, tempfile, thread
from select import select
from functools import partial
from threading import RLock, Thread, Event
from calibre.ptempfile import PersistentTemporaryFile
from calibre import iswindows, detect_ncpus, isosx
from calibre.ebooks.lrf.any.convert_from import main as any2lrf
from calibre.ebooks.lrf.web.convert_from import main as web2lrf
from calibre.ebooks.lrf.feeds.convert_from import main as feeds2lrf
from calibre.gui2.lrf_renderer.main import main as lrfviewer
from calibre import iswindows, __appname__, islinux
try:
from calibre.utils.single_qt_application import SingleApplication
except:
SingleApplication = None
sa = None
job_id = None
def report_progress(percent, msg=''):
if sa is not None and job_id is not None:
msg = 'progress:%s:%f:%s'%(job_id, percent, msg)
sa.send_message(msg)
#: A mapping from job names to functions that perform the jobs
PARALLEL_FUNCS = {
'any2lrf' : partial(any2lrf, gui_mode=True),
'web2lrf' : web2lrf,
'lrfviewer' : lrfviewer,
'feeds2lrf' : partial(feeds2lrf, notification=report_progress),
}
'any2lrf' :
('calibre.ebooks.lrf.any.convert_from', 'main', dict(gui_mode=True), None),
python = sys.executable
popen = subprocess.Popen
'lrfviewer' :
('calibre.gui2.lrf_renderer.main', 'main', {}, None),
if iswindows:
if hasattr(sys, 'frozen'):
python = os.path.join(os.path.dirname(python), 'parallel.exe')
else:
python = os.path.join(os.path.dirname(python), 'Scripts\\parallel.exe')
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
'feeds2lrf' :
('calibre.ebooks.lrf.feeds.convert_from', 'main', {}, 'notification'),
if islinux and hasattr(sys, 'frozen_path'):
python = os.path.join(getattr(sys, 'frozen_path'), 'parallel')
popen = partial(subprocess.Popen, cwd=getattr(sys, 'frozen_path'))
'render_table' :
('calibre.ebooks.lrf.html.table_as_image', 'do_render', {}, None),
}
def cleanup(tdir):
try:
import shutil
shutil.rmtree(tdir, True)
except:
pass
class Server(object):
isfrozen = hasattr(sys, 'frozen')
#: Interval in seconds at which child processes are polled for status information
INTERVAL = 0.1
KILL_RESULT = 'Server: job killed by user|||#@#$%&*)*(*$#$%#$@&'
win32event = __import__('win32event') if iswindows else None
win32process = __import__('win32process') if iswindows else None
msvcrt = __import__('msvcrt') if iswindows else None
class WorkerStatus(object):
'''
A platform independent class to control child processes. Provides the
methods:
.. method:: WorkerStatus.is_alive()
Return True is the child process is alive (i.e. it hasn't exited and returned a return code).
.. method:: WorkerStatus.returncode()
Wait for the child process to exit and return its return code (blocks until child returns).
.. method:: WorkerStatus.kill()
Forcibly terminates child process using operating system specific semantics.
'''
def __init__(self, obj):
'''
`obj`: On windows a process handle, on unix a subprocess.Popen object.
'''
self.obj = obj
self.win32process = win32process # Needed if kill is called during shutdown of interpreter
self.os = os
self.signal = signal
ext = 'windows' if iswindows else 'unix'
for func in ('is_alive', 'returncode', 'kill'):
setattr(self, func, getattr(self, func+'_'+ext))
def is_alive_unix(self):
return self.obj.poll() == None
def returncode_unix(self):
return self.obj.wait()
def kill_unix(self):
os.kill(self.obj.pid, self.signal.SIGKILL)
def is_alive_windows(self):
return win32event.WaitForSingleObject(self.obj, 0) != win32event.WAIT_OBJECT_0
def returncode_windows(self):
return win32process.GetExitCodeProcess(self.obj)
def kill_windows(self, returncode=-1):
self.win32process.TerminateProcess(self.obj, returncode)
class WorkerMother(object):
'''
Platform independent object for launching child processes. All processes
have the environment variable :envvar:`CALIBRE_WORKER` set.
..method:: WorkerMother.spawn_free_spirit(arg)
Launch a non monitored process with argument `arg`.
..method:: WorkerMother.spawn_worker(arg)
Launch a monitored and controllable process with argument `arg`.
'''
def __init__(self):
self.tdir = tempfile.mkdtemp('', '%s_IPC_'%__appname__)
atexit.register(cleanup, self.tdir)
self.kill_jobs = []
ext = 'windows' if iswindows else 'osx' if isosx else 'linux'
self.os = os # Needed incase cleanup called when interpreter is shutting down
if iswindows:
self.executable = os.path.join(os.path.dirname(sys.executable),
'calibre-parallel.exe' if isfrozen else 'Scripts\\calibre-parallel.exe')
elif isosx:
self.executable = sys.executable
self.prefix = ''
if isfrozen:
fd = getattr(sys, 'frameworks_dir')
contents = os.path.dirname(fd)
resources = os.path.join(contents, 'Resources')
sp = os.path.join(resources, 'lib', 'python'+sys.version[:3], 'site-packages.zip')
def kill(self, job_id):
'''
Kill the job identified by job_id.
'''
self.kill_jobs.append(str(job_id))
self.prefix += 'import sys; sys.frameworks_dir = "%s"; sys.frozen = "macosx_app"; '%fd
self.prefix += 'sys.path.insert(0, %s); '%repr(sp)
self.env = {}
if fd not in os.environ['PATH']:
self.env['PATH'] = os.environ['PATH']+':'+fd
self.env['PYTHONHOME'] = resources
else:
self.executable = os.path.join(getattr(sys, 'frozen_path'), 'calibre-parallel') \
if isfrozen else 'calibre-parallel'
def _terminate(self, process):
'''
Kill process.
'''
self.spawn_worker_windows = lambda arg : self.spawn_free_spirit_windows(arg, type='worker')
self.spawn_worker_linux = lambda arg : self.spawn_free_spirit_linux(arg, type='worker')
self.spawn_worker_osx = lambda arg : self.spawn_free_spirit_osx(arg, type='worker')
for func in ('spawn_free_spirit', 'spawn_worker'):
setattr(self, func, getattr(self, func+'_'+ext))
def cleanup_child_windows(self, child, name=None, fd=None):
try:
child.kill()
except:
pass
try:
if fd is not None:
self.os.close(fd)
except:
pass
try:
if name is not None and os.path.exists(name):
self.os.unlink(name)
except:
pass
def cleanup_child_linux(self, child):
try:
child.kill()
except:
pass
def get_env(self):
env = dict(os.environ)
env['CALIBRE_WORKER'] = '1'
if hasattr(self, 'env'):
env.update(self.env)
return env
def spawn_free_spirit_osx(self, arg, type='free_spirit'):
script = 'from calibre.parallel import main; main(args=["calibre-parallel", %s]);'%repr(arg)
cmdline = [self.executable, '-c', self.prefix+script]
child = WorkerStatus(subprocess.Popen(cmdline, env=self.get_env()))
atexit.register(self.cleanup_child_linux, child)
return child
def spawn_free_spirit_linux(self, arg, type='free_spirit'):
cmdline = [self.executable, arg]
child = WorkerStatus(subprocess.Popen(cmdline, env=self.get_env()))
atexit.register(self.cleanup_child_linux, child)
return child
def spawn_free_spirit_windows(self, arg, type='free_spirit'):
fd, name = tempfile.mkstemp('.log', 'calibre_'+type+'_')
handle = msvcrt.get_osfhandle(fd)
si = win32process.STARTUPINFO()
si.hStdOutput = handle
si.hStdError = handle
cmdline = self.executable + ' ' + str(arg)
hProcess = \
win32process.CreateProcess(
None, # Application Name
cmdline, # Command line
None, # processAttributes
None, # threadAttributes
1, # bInheritHandles
win32process.CREATE_NO_WINDOW, # Dont want ugly console popping up
self.get_env(), # New environment
None, # Current directory
si
)[0]
child = WorkerStatus(hProcess)
atexit.register(self.cleanup_child_windows, child, name, fd)
return child
mother = WorkerMother()
def write(socket, msg, timeout=5):
'''
Write a message on socket. If `msg` is unicode, it is encoded in utf-8.
Raises a `RuntimeError` if the socket is not ready for writing or the writing fails.
`msg` is broken into chunks of size 4096 and sent. The :function:`read` function
automatically re-assembles the chunks into whole message.
'''
if isinstance(msg, unicode):
msg = msg.encode('utf-8')
length = None
while len(msg) > 0:
if length is None:
length = len(msg)
chunk = ('%-12d'%length) + msg[:4096-12]
msg = msg[4096-12:]
else:
chunk, msg = msg[:4096], msg[4096:]
w = select([], [socket], [], timeout)[1]
if not w:
raise RuntimeError('Write to socket timed out')
if socket.sendall(chunk) is not None:
raise RuntimeError('Failed to write chunk to socket')
def read(socket, timeout=5):
'''
Read a message from `socket`. The message must have been sent with the :function:`write`
function. Raises a `RuntimeError` if the message is corrpted. Can return an
empty string.
'''
buf = cStringIO.StringIO()
length = None
while select([socket],[],[],timeout)[0]:
msg = socket.recv(4096)
if not msg:
break
if length is None:
length, msg = int(msg[:12]), msg[12:]
buf.write(msg)
if buf.tell() >= length:
break
if not length:
return ''
msg = buf.getvalue()[:length]
if len(msg) < length:
raise RuntimeError('Corrupted packet received')
return msg
class RepeatingTimer(Thread):
'''
Calls a specified function repeatedly at a specified interval. Runs in a
daemon thread (i.e. the interpreter can exit while it is still running).
Call :meth:`start()` to start it.
'''
def repeat(self):
while True:
self.event.wait(self.interval)
if self.event.isSet():
break
self.action()
def __init__(self, interval, func, name):
self.event = Event()
self.interval = interval
self.action = func
Thread.__init__(self, target=self.repeat, name=name)
self.setDaemon(True)
class ControlError(Exception):
pass
class Overseer(object):
'''
Responsible for controlling worker processes. The main interface is the
methods, :meth:`initialize_job`, :meth:`control`.
'''
KILL_RESULT = 'Server: job killed by user|||#@#$%&*)*(*$#$%#$@&'
INTERVAL = 0.1
def __init__(self, server, port, timeout=5):
self.worker_status = mother.spawn_worker('127.0.0.1:%d'%port)
self.socket = server.accept()[0]
# Needed if terminate called hwen interpreter is shutting down
self.os = os
self.signal = signal
self.working = False
self.timeout = timeout
self.last_job_time = time.time()
self.job_id = None
self._stop = False
if not select([self.socket], [], [], 120)[0]:
raise RuntimeError(_('Could not launch worker process.'))
ID = self.read().split(':')
if ID[0] != 'CALIBRE_WORKER':
raise RuntimeError('Impostor')
self.worker_pid = int(ID[1])
self.write('OK')
if self.read() != 'WAITING':
raise RuntimeError('Worker sulking')
def terminate(self):
'Kill worker process.'
try:
if self.socket:
self.write('STOP:')
time.sleep(1)
self.socket.shutdown(socket.SHUT_RDWR)
except:
pass
if iswindows:
win32api = __import__('win32api')
try:
win32api.TerminateProcess(int(process.pid), -1)
handle = win32api.OpenProcess(1, False, self.worker_pid)
win32api.TerminateProcess(handle, -1)
except:
pass
else:
import signal
os.kill(process.pid, signal.SIGKILL)
time.sleep(0.05)
try:
self.os.kill(self.worker_pid, self.signal.SIGKILL)
time.sleep(0.05)
except:
pass
def write(self, msg, timeout=None):
write(self.socket, msg, timeout=self.timeout if timeout is None else timeout)
def run(self, job_id, func, args=[], kwdargs={}, monitor=True):
def read(self, timeout=None):
return read(self.socket, timeout=self.timeout if timeout is None else timeout)
def __eq__(self, other):
return hasattr(other, 'process') and hasattr(other, 'worker_pid') and self.worker_pid == other.worker_pid
def __bool__(self):
return self.worker_status.is_alive()
def select(self, timeout=0):
return select([self.socket], [self.socket], [self.socket], timeout)
def initialize_job(self, job):
'''
Run a job in a separate process.
@param job_id: A unique (per server) identifier
@param func: One of C{PARALLEL_FUNCS.keys()}
@param args: A list of arguments to pass of C{func}
@param kwdargs: A dictionary of keyword arguments to pass to C{func}
@param monitor: If False launch the child process and return. Do not monitor/communicate with it.
@return: (result, exception, formatted_traceback, log) where log is the combined
stdout + stderr of the child process; or None if monitor is True. If a job is killed
by a call to L{kill()} then result will be L{KILL_RESULT}
Sends `job` to worker process. Can raise `ControlError` if worker process
does not respond appropriately. In this case, this Overseer is useless
and should be discarded.
`job`: An instance of :class:`Job`.
'''
job_id = str(job_id)
job_dir = os.path.join(self.tdir, job_id)
if os.path.exists(job_dir):
raise ValueError('Cannot run job. The job_id %s has already been used.'%job_id)
os.mkdir(job_dir)
self.job_id = job.job_id
self.working = True
self.write('JOB:'+cPickle.dumps((job.func, job.args, job.kwdargs), -1))
msg = self.read()
if msg != 'OK':
raise ControlError('Failed to initialize job on worker %d:%s'%(self.worker_pid, msg))
self.output = job.output if callable(job.output) else sys.stdout.write
self.progress = job.progress if callable(job.progress) else None
self.job = job
job_data = os.path.join(job_dir, 'job_data.pickle')
cPickle.dump((job_id, func, args, kwdargs), open(job_data, 'wb'), -1)
prefix = ''
if hasattr(sys, 'frameworks_dir'):
fd = getattr(sys, 'frameworks_dir')
prefix = 'import sys; sys.frameworks_dir = "%s"; sys.frozen = "macosx_app"; '%fd
if fd not in os.environ['PATH']:
os.environ['PATH'] += ':'+fd
cmd = prefix + 'from calibre.parallel import run_job; run_job(\'%s\')'%binascii.hexlify(job_data)
def control(self):
'''
Listens for messages from the worker process and dispatches them
appropriately. If the worker process dies unexpectedly, returns a result
of None with a ControlError indicating the worker died.
if not monitor:
popen([python, '-c', cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE,
stderr=subprocess.PIPE)
Returns a :class:`Result` instance or None, if the worker is still working.
'''
if select([self.socket],[],[],0)[0]:
msg = self.read()
word, msg = msg.partition(':')[0], msg.partition(':')[-1]
if word == 'RESULT':
self.write('OK')
return Result(cPickle.loads(msg), None, None)
elif word == 'OUTPUT':
self.write('OK')
try:
self.output(''.join(cPickle.loads(msg)))
except:
self.output('Bad output message: '+ repr(msg))
elif word == 'PROGRESS':
self.write('OK')
percent = None
try:
percent, msg = cPickle.loads(msg)[-1]
except:
print 'Bad progress update:', repr(msg)
if self.progress and percent is not None:
self.progress(percent, msg)
elif word == 'ERROR':
self.write('OK')
return Result(None, *cPickle.loads(msg))
else:
self.terminate()
return Result(None, ControlError('Worker sent invalid msg: %s', repr(msg)), '')
if not self.worker_status.is_alive():
return Result(None, ControlError('Worker process died unexpectedly with returncode: %d'%self.process.returncode), '')
class Job(object):
def __init__(self, job_id, func, args, kwdargs, output, progress, done):
self.job_id = job_id
self.func = func
self.args = args
self.kwdargs = kwdargs
self.output = output
self.progress = progress
self.done = done
class Result(object):
def __init__(self, result, exception, traceback):
self.result = result
self.exception = exception
self.traceback = traceback
def __len__(self):
return 3
def __item__(self, i):
return (self.result, self.exception, self.traceback)[i]
def __iter__(self):
return iter((self.result, self.exception, self.traceback))
class Server(Thread):
KILL_RESULT = Overseer.KILL_RESULT
START_PORT = 10013
def __init__(self, number_of_workers=detect_ncpus()):
Thread.__init__(self)
self.setDaemon(True)
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.port = self.START_PORT
while True:
try:
self.server_socket.bind(('localhost', self.port))
break
except:
self.port += 1
self.server_socket.listen(5)
self.number_of_workers = number_of_workers
self.pool, self.jobs, self.working, self.results = [], collections.deque(), [], {}
atexit.register(self.killall)
atexit.register(self.close)
self.job_lock = RLock()
self.overseer_lock = RLock()
self.working_lock = RLock()
self.result_lock = RLock()
self.pool_lock = RLock()
self.start()
def close(self):
try:
self.server_socket.shutdown(socket.SHUT_RDWR)
except:
pass
def add_job(self, job):
with self.job_lock:
self.jobs.append(job)
def store_result(self, result, id=None):
if id:
with self.job_lock:
self.results[id] = result
def result(self, id):
with self.result_lock:
return self.results.pop(id, None)
def run(self):
while True:
job = None
with self.job_lock:
if len(self.jobs) > 0 and len(self.working) < self.number_of_workers:
job = self.jobs.popleft()
with self.pool_lock:
o = None
while self.pool:
o = self.pool.pop()
try:
o.initialize_job(job)
break
except:
o.terminate()
if o is None:
o = Overseer(self.server_socket, self.port)
try:
o.initialize_job(job)
except Exception, err:
o.terminate()
res = Result(None, unicode(err), traceback.format_exc())
job.done(res)
o = None
if o:
with self.working_lock:
self.working.append(o)
with self.working_lock:
done = []
for o in self.working:
try:
res = o.control()
except Exception, err:
res = Result(None, unicode(err), traceback.format_exc())
o.terminate()
if isinstance(res, Result):
o.job.done(res)
done.append(o)
for o in done:
self.working.remove(o)
if o:
with self.pool_lock:
self.pool.append(o)
try:
time.sleep(1)
except:
return
def killall(self):
with self.pool_lock:
map(lambda x: x.terminate(), self.pool)
self.pool = []
def kill(self, job_id):
with self.working_lock:
pop = None
for o in self.working:
if o.job_id == job_id:
o.terminate()
o.job.done(Result(self.KILL_RESULT, None, ''))
pop = o
break
if pop is not None:
self.working.remove(pop)
def run_job(self, job_id, func, args=[], kwdargs={},
output=None, progress=None, done=None):
'''
Run a job in a separate process. Supports job control, output redirection
and progress reporting.
'''
if done is None:
done = partial(self.store_result, id=job_id)
job = Job(job_id, func, args, kwdargs, output, progress, done)
with self.job_lock:
self.jobs.append(job)
def run_free_job(self, func, args=[], kwdargs={}):
pt = PersistentTemporaryFile('.pickle', '_IPC_')
pt.write(cPickle.dumps((func, args, kwdargs)))
pt.close()
mother.spawn_free_spirit(binascii.hexlify(pt.name))
##########################################################################################
##################################### CLIENT CODE #####################################
##########################################################################################
class BufferedSender(object):
def __init__(self, socket):
self.socket = socket
self.wbuf, self.pbuf = [], []
self.wlock, self.plock = RLock(), RLock()
self.timer = RepeatingTimer(0.5, self.send, 'BufferedSender')
self.timer.start()
def write(self, msg):
if not isinstance(msg, basestring):
msg = unicode(msg)
with self.wlock:
self.wbuf.append(msg)
def send(self):
if select([self.socket], [], [], 0)[0]:
msg = read(self.socket)
if msg == 'PING:':
write(self.socket, 'OK')
elif msg:
self.socket.shutdown(socket.SHUT_RDWR)
thread.interrupt_main()
time.sleep(1)
raise SystemExit
if not select([], [self.socket], [], 30)[1]:
print >>sys.__stderr__, 'Cannot pipe to overseer'
return
output = open(os.path.join(job_dir, 'output.txt'), 'wb')
p = popen([python, '-c', cmd], stdout=output, stderr=output,
stdin=subprocess.PIPE)
p.stdin.close()
while p.returncode is None:
if job_id in self.kill_jobs:
self._terminate(p)
return self.KILL_RESULT, None, None, _('Job killed by user')
time.sleep(0.1)
p.poll()
with self.wlock:
if self.wbuf:
msg = cPickle.dumps(self.wbuf, -1)
self.wbuf = []
write(self.socket, 'OUTPUT:'+msg)
read(self.socket, 10)
with self.plock:
if self.pbuf:
msg = cPickle.dumps(self.pbuf, -1)
self.pbuf = []
write(self.socket, 'PROGRESS:'+msg)
read(self.socket, 10)
def notify(self, percent, msg=''):
with self.plock:
self.pbuf.append((percent, msg))
def flush(self):
pass
def get_func(name):
module, func, kwdargs, notification = PARALLEL_FUNCS[name]
module = __import__(module, fromlist=[1])
func = getattr(module, func)
return func, kwdargs, notification
def work(client_socket, func, args, kwdargs):
func, kargs, notification = get_func(func)
if notification is not None and hasattr(sys.stdout, 'notify'):
kargs[notification] = sys.stdout.notify
kargs.update(kwdargs)
res = func(*args, **kargs)
if hasattr(sys.stdout, 'send'):
sys.stdout.send()
return res
output.close()
job_result = os.path.join(job_dir, 'job_result.pickle')
if not os.path.exists(job_result):
result, exception, traceback = None, ('ParallelRuntimeError',
'The worker process died unexpectedly.'), ''
def worker(host, port):
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client_socket.connect((host, port))
write(client_socket, 'CALIBRE_WORKER:%d'%os.getpid())
msg = read(client_socket, timeout=10)
if msg != 'OK':
return 1
write(client_socket, 'WAITING')
sys.stdout = BufferedSender(client_socket)
sys.stderr = sys.stdout
while True:
if not select([client_socket], [], [], 60)[0]:
time.sleep(1)
continue
msg = read(client_socket, timeout=60)
if msg.startswith('JOB:'):
func, args, kwdargs = cPickle.loads(msg[4:])
write(client_socket, 'OK')
try:
result = work(client_socket, func, args, kwdargs)
write(client_socket, 'RESULT:'+ cPickle.dumps(result))
except (Exception, SystemExit), err:
exception = (err.__class__.__name__, unicode(str(err), 'utf-8', 'replace'))
tb = traceback.format_exc()
write(client_socket, 'ERROR:'+cPickle.dumps((exception, tb),-1))
if read(client_socket, 10) != 'OK':
break
gc.collect()
elif msg == 'PING:':
write(client_socket, 'OK')
elif msg == 'STOP:':
client_socket.shutdown(socket.SHUT_RDWR)
return 0
elif not msg:
time.sleep(1)
else:
result, exception, traceback = cPickle.load(open(job_result, 'rb'))
log = open(output.name, 'rb').read()
print >>sys.__stderr__, 'Invalid protocols message', msg
return 1
return result, exception, traceback, log
def run_job(job_data):
global sa, job_id
if SingleApplication is not None:
sa = SingleApplication('calibre GUI')
job_data = binascii.unhexlify(job_data)
base = os.path.dirname(job_data)
job_result = os.path.join(base, 'job_result.pickle')
job_id, func, args, kwdargs = cPickle.load(open(job_data, 'rb'))
func = PARALLEL_FUNCS[func]
exception, tb = None, None
def free_spirit(path):
func, args, kwdargs = cPickle.load(open(path, 'rb'))
try:
result = func(*args, **kwdargs)
except (Exception, SystemExit), err:
result = None
exception = (err.__class__.__name__, unicode(str(err), 'utf-8', 'replace'))
tb = traceback.format_exc()
if os.path.exists(os.path.dirname(job_result)):
cPickle.dump((result, exception, tb), open(job_result, 'wb'))
def main():
src = sys.argv[2]
job_data = re.search(r'run_job\(\'([a-f0-9A-F]+)\'\)', src).group(1)
run_job(job_data)
os.unlink(path)
except:
pass
func, kargs = get_func(func)[:2]
kargs.update(kwdargs)
func(*args, **kargs)
def main(args=sys.argv):
args = args[1].split(':')
if len(args) == 1:
free_spirit(binascii.unhexlify(re.sub(r'[^a-f0-9A-F]', '', args[0])))
else:
worker(args[0].replace("'", ''), int(args[1]))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, re
import sys, re, os
""" Get information about the terminal we are running in """
@ -94,7 +94,7 @@ class TerminalController:
except: return
# If the stream isn't a tty, then assume it has no capabilities.
if not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return
if os.environ.get('CALIBRE_WORKER', None) is not None or not hasattr(term_stream, 'isatty') or not term_stream.isatty(): return
# Check the terminal type. If we fail, then assume that the
# terminal has no capabilities.

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import re, glob
import re
from pkg_resources import resource_filename
from trac.core import Component, implements
@ -12,7 +12,7 @@ from trac.util import Markup
__appname__ = 'calibre'
DOWNLOAD_DIR = '/var/www/calibre.kovidgoyal.net/htdocs/downloads'
LINUX_INSTALLER = '/var/www/calibre.kovidgoyal.net/calibre/src/calibre/linux_installer.py'
MOBILEREAD = 'https://dev.mobileread.com/dist/kovid/calibre/'
class OS(dict):
"""Dictionary with a default value for unknown keys."""
@ -37,7 +37,6 @@ class Distribution(object):
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
('convertlit', '1.8', 'convertlit', None, None),
('lxml', '1.3.3', 'lxml', 'python-lxml', 'python-lxml'),
('genshi', '0.4.4', 'genshi', 'python-genshi', 'python-genshi'),
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
]
@ -81,9 +80,8 @@ class Distribution(object):
self.command = cmd.strip()
easy_install = 'easy_install'
if os == 'debian':
self.command += '\n'+prefix + 'cp -R /usr/share/pycentral/fonttools/site-packages/FontTools* /usr/lib/python2.5/site-packages/'
easy_install = 'easy_install-2.5'
self.command += '\n'+prefix+easy_install+' -U TTFQuery calibre \n'+prefix+'calibre_postinstall'
self.command += '\n'+prefix+easy_install+' -U calibre \n'+prefix+'calibre_postinstall'
try:
self.manual = Markup(self.MANUAL_MAP[os])
except KeyError:
@ -119,7 +117,7 @@ class Download(Component):
if req.path_info == '/download':
return self.top_level(req)
elif req.path_info == '/download_linux_binary_installer':
req.send(open(LINUX_INSTALLER).read(), 'text/x-python')
req.send(open(LINUX_INSTALLER).read().replace('%version', self.version_from_filename()), 'text/x-python')
else:
match = re.match(r'\/download_(\S+)', req.path_info)
if match:
@ -153,8 +151,7 @@ class Download(Component):
def version_from_filename(self):
try:
file = glob.glob(DOWNLOAD_DIR+'/*.exe')[0]
return re.search(r'\S+-(\d+\.\d+\.\d+)\.', file).group(1)
return open(DOWNLOAD_DIR+'/latest_version', 'rb').read().strip()
except:
return '0.0.0'
@ -165,7 +162,7 @@ class Download(Component):
installer_name='Windows installer',
title='Download %s for windows'%(__appname__),
compatibility='%s works on Windows XP and Windows Vista.'%(__appname__,),
path='/downloads/'+file, app=__appname__,
path=MOBILEREAD+file, app=__appname__,
note=Markup(\
'''
<p>If you are using the <b>SONY PRS-500</b> and %(appname)s does not detect your reader, read on:</p>
@ -203,7 +200,7 @@ You can uninstall a driver by right clicking on it and selecting uninstall.
installer_name='OS X universal dmg',
title='Download %s for OS X'%(__appname__),
compatibility='%s works on OS X Tiger and above.'%(__appname__,),
path='/downloads/'+file, app=__appname__,
path=MOBILEREAD+file, app=__appname__,
note=Markup(\
'''
<ol>
@ -232,7 +229,7 @@ If not, head over to <a href="http://calibre.kovidgoyal.net/wiki/Development#Tra
def linux(self, req):
operating_systems = [
OS({'name' : 'binary', 'title': 'All distros'}),
OS({'name' : 'binary', 'title': 'Distro neutral'}),
OS({'name' : 'gentoo', 'title': 'Gentoo'}),
OS({'name' : 'ubuntu', 'title': 'Ubuntu'}),
OS({'name' : 'fedora', 'title': 'Fedora'}),

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,7 @@ match to a given font specification. The main functions in this module are:
'''
import sys, os, locale, codecs
from ctypes import cdll, c_void_p, Structure, c_int, POINTER, c_ubyte, c_char, \
from ctypes import cdll, c_void_p, Structure, c_int, POINTER, c_ubyte, c_char, util, \
pointer, byref, create_string_buffer, Union, c_char_p, c_double
try:
@ -37,17 +37,19 @@ isosx = 'darwin' in sys.platform
def load_library():
if isosx:
lib = 'libfontconfig.1.dylib'
if hasattr(sys, 'frameworks_dir'):
lib = os.path.join(getattr(sys, 'frameworks_dir'), lib)
lib = os.path.join(getattr(sys, 'frameworks_dir'), 'libfontconfig.1.dylib') \
if hasattr(sys, 'frameworks_dir') else util.find_library('fontconfig')
return cdll.LoadLibrary(lib)
elif iswindows:
return cdll.LoadLibrary('libfontconfig-1')
else:
try:
return cdll.LoadLibrary('libfontconfig.so')
return cdll.LoadLibrary(util.find_library('fontconfig'))
except:
return cdll.LoadLibrary('libfontconfig.so.1')
try:
return cdll.LoadLibrary('libfontconfig.so')
except:
return cdll.LoadLibrary('libfontconfig.so.1')
class FcPattern(Structure):
_fields_ = [
@ -118,9 +120,29 @@ lib.FcFontSort.argtypes = [c_void_p, POINTER(FcPattern), c_int, c_void_p, POINTE
lib.FcFontSort.restype = POINTER(FcFontSet)
lib.FcFontRenderPrepare.argtypes = [c_void_p, POINTER(FcPattern), POINTER(FcPattern)]
lib.FcFontRenderPrepare.restype = POINTER(FcPattern)
lib.FcConfigCreate.restype = c_void_p
lib.FcConfigSetCurrent.argtypes = [c_void_p]
lib.FcConfigSetCurrent.restype = c_int
lib.FcConfigParseAndLoad.argtypes = [c_void_p, POINTER(c_char), c_int]
lib.FcConfigParseAndLoad.restype = c_int
lib.FcConfigBuildFonts.argtypes = [c_void_p]
lib.FcConfigBuildFonts.restype = c_int
if not lib.FcInit():
# Initialize the fontconfig library. This has to be done manually
# for the OS X bundle as it has its own private fontconfig.
if hasattr(sys, 'frameworks_dir'):
config_dir = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), 'Resources', 'fonts')
if isinstance(config_dir, unicode):
config_dir = config_dir.encode(sys.getfilesystemencoding())
config = lib.FcConfigCreate()
if not lib.FcConfigParseAndLoad(config, os.path.join(config_dir, 'fonts.conf'), 1):
raise RuntimeError('Could not parse the fontconfig configuration')
if not lib.FcConfigBuildFonts(config):
raise RuntimeError('Could not build fonts')
if not lib.FcConfigSetCurrent(config):
raise RuntimeError('Could not set font config')
elif not lib.FcInit():
raise RuntimeError(_('Could not initialize the fontconfig library'))
def find_font_families(allowed_extensions=['ttf']):
@ -151,12 +173,11 @@ def find_font_families(allowed_extensions=['ttf']):
ext = os.path.splitext(path)[1]
if ext:
ext = ext[1:].lower()
if allowed_extensions and ext in allowed_extensions:
if (not allowed_extensions) or (allowed_extensions and ext in allowed_extensions):
if lib.FcPatternGetString(pat, 'family', 0, byref(family)) != FcResultMatch.value:
raise RuntimeError('Error processing pattern')
font_families.append(str(family.contents.value))
lib.FcObjectSetDestroy(oset)
lib.FcPatternDestroy(empty_pattern)
lib.FcFontSetDestroy(fs)

View File

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""This package provides various means for generating and processing web markup
(XML or HTML).
The design is centered around the concept of streams of markup events (similar
in concept to SAX parsing events) which can be processed in a uniform manner
independently of where or how they are produced.
"""
__docformat__ = 'restructuredtext en'
try:
__version__ = __import__('pkg_resources').get_distribution('Genshi').version
except:
pass
from calibre.utils.genshi.core import *
from calibre.utils.genshi.input import ParseError, XML, HTML

View File

@ -0,0 +1,362 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Support for programmatically generating markup streams from Python code using
a very simple syntax. The main entry point to this module is the `tag` object
(which is actually an instance of the ``ElementFactory`` class). You should
rarely (if ever) need to directly import and use any of the other classes in
this module.
Elements can be created using the `tag` object using attribute access. For
example:
>>> doc = tag.p('Some text and ', tag.a('a link', href='http://example.org/'), '.')
>>> doc
<Element "p">
This produces an `Element` instance which can be further modified to add child
nodes and attributes. This is done by "calling" the element: positional
arguments are added as child nodes (alternatively, the `Element.append` method
can be used for that purpose), whereas keywords arguments are added as
attributes:
>>> doc(tag.br)
<Element "p">
>>> print doc
<p>Some text and <a href="http://example.org/">a link</a>.<br/></p>
If an attribute name collides with a Python keyword, simply append an underscore
to the name:
>>> doc(class_='intro')
<Element "p">
>>> print doc
<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
As shown above, an `Element` can easily be directly rendered to XML text by
printing it or using the Python ``str()`` function. This is basically a
shortcut for converting the `Element` to a stream and serializing that
stream:
>>> stream = doc.generate()
>>> stream #doctest: +ELLIPSIS
<genshi.core.Stream object at ...>
>>> print stream
<p class="intro">Some text and <a href="http://example.org/">a link</a>.<br/></p>
The `tag` object also allows creating "fragments", which are basically lists
of nodes (elements or text) that don't have a parent element. This can be useful
for creating snippets of markup that are attached to a parent element later (for
example in a template). Fragments are created by calling the `tag` object, which
returns an object of type `Fragment`:
>>> fragment = tag('Hello, ', tag.em('world'), '!')
>>> fragment
<Fragment>
>>> print fragment
Hello, <em>world</em>!
"""
try:
set
except NameError:
from sets import Set as set
from calibre.utils.genshi.core import Attrs, Markup, Namespace, QName, Stream, \
START, END, TEXT
__all__ = ['Fragment', 'Element', 'ElementFactory', 'tag']
__docformat__ = 'restructuredtext en'
class Fragment(object):
"""Represents a markup fragment, which is basically just a list of element
or text nodes.
"""
__slots__ = ['children']
def __init__(self):
"""Create a new fragment."""
self.children = []
def __add__(self, other):
return Fragment()(self, other)
def __call__(self, *args):
"""Append any positional arguments as child nodes.
:see: `append`
"""
map(self.append, args)
return self
def __iter__(self):
return self._generate()
def __repr__(self):
return '<%s>' % self.__class__.__name__
def __str__(self):
return str(self.generate())
def __unicode__(self):
return unicode(self.generate())
def __html__(self):
return Markup(self.generate())
def append(self, node):
"""Append an element or string as child node.
:param node: the node to append; can be an `Element`, `Fragment`, or a
`Stream`, or a Python string or number
"""
if isinstance(node, (Stream, Element, basestring, int, float, long)):
# For objects of a known/primitive type, we avoid the check for
# whether it is iterable for better performance
self.children.append(node)
elif isinstance(node, Fragment):
self.children.extend(node.children)
elif node is not None:
try:
map(self.append, iter(node))
except TypeError:
self.children.append(node)
def _generate(self):
for child in self.children:
if isinstance(child, Fragment):
for event in child._generate():
yield event
elif isinstance(child, Stream):
for event in child:
yield event
else:
if not isinstance(child, basestring):
child = unicode(child)
yield TEXT, child, (None, -1, -1)
def generate(self):
"""Return a markup event stream for the fragment.
:rtype: `Stream`
"""
return Stream(self._generate())
def _kwargs_to_attrs(kwargs):
attrs = []
names = set()
for name, value in kwargs.items():
name = name.rstrip('_').replace('_', '-')
if value is not None and name not in names:
attrs.append((QName(name), unicode(value)))
names.add(name)
return Attrs(attrs)
class Element(Fragment):
"""Simple XML output generator based on the builder pattern.
Construct XML elements by passing the tag name to the constructor:
>>> print Element('strong')
<strong/>
Attributes can be specified using keyword arguments. The values of the
arguments will be converted to strings and any special XML characters
escaped:
>>> print Element('textarea', rows=10, cols=60)
<textarea rows="10" cols="60"/>
>>> print Element('span', title='1 < 2')
<span title="1 &lt; 2"/>
>>> print Element('span', title='"baz"')
<span title="&#34;baz&#34;"/>
The " character is escaped using a numerical entity.
The order in which attributes are rendered is undefined.
If an attribute value evaluates to `None`, that attribute is not included
in the output:
>>> print Element('a', name=None)
<a/>
Attribute names that conflict with Python keywords can be specified by
appending an underscore:
>>> print Element('div', class_='warning')
<div class="warning"/>
Nested elements can be added to an element using item access notation.
The call notation can also be used for this and for adding attributes
using keyword arguments, as one would do in the constructor.
>>> print Element('ul')(Element('li'), Element('li'))
<ul><li/><li/></ul>
>>> print Element('a')('Label')
<a>Label</a>
>>> print Element('a')('Label', href="target")
<a href="target">Label</a>
Text nodes can be nested in an element by adding strings instead of
elements. Any special characters in the strings are escaped automatically:
>>> print Element('em')('Hello world')
<em>Hello world</em>
>>> print Element('em')(42)
<em>42</em>
>>> print Element('em')('1 < 2')
<em>1 &lt; 2</em>
This technique also allows mixed content:
>>> print Element('p')('Hello ', Element('b')('world'))
<p>Hello <b>world</b></p>
Quotes are not escaped inside text nodes:
>>> print Element('p')('"Hello"')
<p>"Hello"</p>
Elements can also be combined with other elements or strings using the
addition operator, which results in a `Fragment` object that contains the
operands:
>>> print Element('br') + 'some text' + Element('br')
<br/>some text<br/>
Elements with a namespace can be generated using the `Namespace` and/or
`QName` classes:
>>> from genshi.core import Namespace
>>> xhtml = Namespace('http://www.w3.org/1999/xhtml')
>>> print Element(xhtml.html, lang='en')
<html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
"""
__slots__ = ['tag', 'attrib']
def __init__(self, tag_, **attrib):
Fragment.__init__(self)
self.tag = QName(tag_)
self.attrib = _kwargs_to_attrs(attrib)
def __call__(self, *args, **kwargs):
"""Append any positional arguments as child nodes, and keyword arguments
as attributes.
:return: the element itself so that calls can be chained
:rtype: `Element`
:see: `Fragment.append`
"""
self.attrib |= _kwargs_to_attrs(kwargs)
Fragment.__call__(self, *args)
return self
def __repr__(self):
return '<%s "%s">' % (self.__class__.__name__, self.tag)
def _generate(self):
yield START, (self.tag, self.attrib), (None, -1, -1)
for kind, data, pos in Fragment._generate(self):
yield kind, data, pos
yield END, self.tag, (None, -1, -1)
def generate(self):
"""Return a markup event stream for the fragment.
:rtype: `Stream`
"""
return Stream(self._generate())
class ElementFactory(object):
"""Factory for `Element` objects.
A new element is created simply by accessing a correspondingly named
attribute of the factory object:
>>> factory = ElementFactory()
>>> print factory.foo
<foo/>
>>> print factory.foo(id=2)
<foo id="2"/>
Markup fragments (lists of nodes without a parent element) can be created
by calling the factory:
>>> print factory('Hello, ', factory.em('world'), '!')
Hello, <em>world</em>!
A factory can also be bound to a specific namespace:
>>> factory = ElementFactory('http://www.w3.org/1999/xhtml')
>>> print factory.html(lang="en")
<html xmlns="http://www.w3.org/1999/xhtml" lang="en"/>
The namespace for a specific element can be altered on an existing factory
by specifying the new namespace using item access:
>>> factory = ElementFactory()
>>> print factory.html(factory['http://www.w3.org/2000/svg'].g(id=3))
<html><g xmlns="http://www.w3.org/2000/svg" id="3"/></html>
Usually, the `ElementFactory` class is not be used directly. Rather, the
`tag` instance should be used to create elements.
"""
def __init__(self, namespace=None):
"""Create the factory, optionally bound to the given namespace.
:param namespace: the namespace URI for any created elements, or `None`
for no namespace
"""
if namespace and not isinstance(namespace, Namespace):
namespace = Namespace(namespace)
self.namespace = namespace
def __call__(self, *args):
"""Create a fragment that has the given positional arguments as child
nodes.
:return: the created `Fragment`
:rtype: `Fragment`
"""
return Fragment()(*args)
def __getitem__(self, namespace):
"""Return a new factory that is bound to the specified namespace.
:param namespace: the namespace URI or `Namespace` object
:return: an `ElementFactory` that produces elements bound to the given
namespace
:rtype: `ElementFactory`
"""
return ElementFactory(namespace)
def __getattr__(self, name):
"""Create an `Element` with the given name.
:param name: the tag name of the element to create
:return: an `Element` with the specified name
:rtype: `Element`
"""
return Element(self.namespace and self.namespace[name] or name)
tag = ElementFactory()
"""Global `ElementFactory` bound to the default namespace.
:type: `ElementFactory`
"""

View File

@ -0,0 +1,705 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Core classes for markup processing."""
from itertools import chain
import operator
from calibre.utils.genshi.util import plaintext, stripentities, striptags
__all__ = ['Stream', 'Markup', 'escape', 'unescape', 'Attrs', 'Namespace',
'QName']
__docformat__ = 'restructuredtext en'
class StreamEventKind(str):
"""A kind of event on a markup stream."""
__slots__ = []
_instances = {}
def __new__(cls, val):
return cls._instances.setdefault(val, str.__new__(cls, val))
class Stream(object):
"""Represents a stream of markup events.
This class is basically an iterator over the events.
Stream events are tuples of the form::
(kind, data, position)
where ``kind`` is the event kind (such as `START`, `END`, `TEXT`, etc),
``data`` depends on the kind of event, and ``position`` is a
``(filename, line, offset)`` tuple that contains the location of the
original element or text in the input. If the original location is unknown,
``position`` is ``(None, -1, -1)``.
Also provided are ways to serialize the stream to text. The `serialize()`
method will return an iterator over generated strings, while `render()`
returns the complete generated text at once. Both accept various parameters
that impact the way the stream is serialized.
"""
__slots__ = ['events', 'serializer']
START = StreamEventKind('START') #: a start tag
END = StreamEventKind('END') #: an end tag
TEXT = StreamEventKind('TEXT') #: literal text
XML_DECL = StreamEventKind('XML_DECL') #: XML declaration
DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration
START_NS = StreamEventKind('START_NS') #: start namespace mapping
END_NS = StreamEventKind('END_NS') #: end namespace mapping
START_CDATA = StreamEventKind('START_CDATA') #: start CDATA section
END_CDATA = StreamEventKind('END_CDATA') #: end CDATA section
PI = StreamEventKind('PI') #: processing instruction
COMMENT = StreamEventKind('COMMENT') #: comment
def __init__(self, events, serializer=None):
"""Initialize the stream with a sequence of markup events.
:param events: a sequence or iterable providing the events
:param serializer: the default serialization method to use for this
stream
:note: Changed in 0.5: added the `serializer` argument
"""
self.events = events #: The underlying iterable producing the events
self.serializer = serializer #: The default serializion method
def __iter__(self):
return iter(self.events)
def __or__(self, function):
"""Override the "bitwise or" operator to apply filters or serializers
to the stream, providing a syntax similar to pipes on Unix shells.
Assume the following stream produced by the `HTML` function:
>>> from genshi.input import HTML
>>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
>>> print html
<p onclick="alert('Whoa')">Hello, world!</p>
A filter such as the HTML sanitizer can be applied to that stream using
the pipe notation as follows:
>>> from genshi.filters import HTMLSanitizer
>>> sanitizer = HTMLSanitizer()
>>> print html | sanitizer
<p>Hello, world!</p>
Filters can be any function that accepts and produces a stream (where
a stream is anything that iterates over events):
>>> def uppercase(stream):
... for kind, data, pos in stream:
... if kind is TEXT:
... data = data.upper()
... yield kind, data, pos
>>> print html | sanitizer | uppercase
<p>HELLO, WORLD!</p>
Serializers can also be used with this notation:
>>> from genshi.output import TextSerializer
>>> output = TextSerializer()
>>> print html | sanitizer | uppercase | output
HELLO, WORLD!
Commonly, serializers should be used at the end of the "pipeline";
using them somewhere in the middle may produce unexpected results.
:param function: the callable object that should be applied as a filter
:return: the filtered stream
:rtype: `Stream`
"""
return Stream(_ensure(function(self)), serializer=self.serializer)
def filter(self, *filters):
"""Apply filters to the stream.
This method returns a new stream with the given filters applied. The
filters must be callables that accept the stream object as parameter,
and return the filtered stream.
The call::
stream.filter(filter1, filter2)
is equivalent to::
stream | filter1 | filter2
:param filters: one or more callable objects that should be applied as
filters
:return: the filtered stream
:rtype: `Stream`
"""
return reduce(operator.or_, (self,) + filters)
def render(self, method=None, encoding='utf-8', out=None, **kwargs):
"""Return a string representation of the stream.
Any additional keyword arguments are passed to the serializer, and thus
depend on the `method` parameter value.
:param method: determines how the stream is serialized; can be either
"xml", "xhtml", "html", "text", or a custom serializer
class; if `None`, the default serialization method of
the stream is used
:param encoding: how the output string should be encoded; if set to
`None`, this method returns a `unicode` object
:param out: a file-like object that the output should be written to
instead of being returned as one big string; note that if
this is a file or socket (or similar), the `encoding` must
not be `None` (that is, the output must be encoded)
:return: a `str` or `unicode` object (depending on the `encoding`
parameter), or `None` if the `out` parameter is provided
:rtype: `basestring`
:see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
:note: Changed in 0.5: added the `out` parameter
"""
from calibre.utils.genshi.output import encode
if method is None:
method = self.serializer or 'xml'
generator = self.serialize(method=method, **kwargs)
return encode(generator, method=method, encoding=encoding, out=out)
def select(self, path, namespaces=None, variables=None):
"""Return a new stream that contains the events matching the given
XPath expression.
>>> from genshi import HTML
>>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
>>> print stream.select('elem')
<elem>foo</elem><elem>bar</elem>
>>> print stream.select('elem/text()')
foobar
Note that the outermost element of the stream becomes the *context
node* for the XPath test. That means that the expression "doc" would
not match anything in the example above, because it only tests against
child elements of the outermost element:
>>> print stream.select('doc')
<BLANKLINE>
You can use the "." expression to match the context node itself
(although that usually makes little sense):
>>> print stream.select('.')
<doc><elem>foo</elem><elem>bar</elem></doc>
:param path: a string containing the XPath expression
:param namespaces: mapping of namespace prefixes used in the path
:param variables: mapping of variable names to values
:return: the selected substream
:rtype: `Stream`
:raises PathSyntaxError: if the given path expression is invalid or not
supported
"""
from genshi.path import Path
return Path(path).select(self, namespaces, variables)
def serialize(self, method='xml', **kwargs):
"""Generate strings corresponding to a specific serialization of the
stream.
Unlike the `render()` method, this method is a generator that returns
the serialized output incrementally, as opposed to returning a single
string.
Any additional keyword arguments are passed to the serializer, and thus
depend on the `method` parameter value.
:param method: determines how the stream is serialized; can be either
"xml", "xhtml", "html", "text", or a custom serializer
class; if `None`, the default serialization method of
the stream is used
:return: an iterator over the serialization results (`Markup` or
`unicode` objects, depending on the serialization method)
:rtype: ``iterator``
:see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer
"""
from calibre.utils.genshi.output import get_serializer
if method is None:
method = self.serializer or 'xml'
return get_serializer(method, **kwargs)(_ensure(self))
def __str__(self):
return self.render()
def __unicode__(self):
return self.render(encoding=None)
def __html__(self):
return self
START = Stream.START
END = Stream.END
TEXT = Stream.TEXT
XML_DECL = Stream.XML_DECL
DOCTYPE = Stream.DOCTYPE
START_NS = Stream.START_NS
END_NS = Stream.END_NS
START_CDATA = Stream.START_CDATA
END_CDATA = Stream.END_CDATA
PI = Stream.PI
COMMENT = Stream.COMMENT
def _ensure(stream):
"""Ensure that every item on the stream is actually a markup event."""
stream = iter(stream)
event = stream.next()
# Check whether the iterable is a real markup event stream by examining the
# first item it yields; if it's not we'll need to do some conversion
if type(event) is not tuple or len(event) != 3:
for event in chain([event], stream):
if hasattr(event, 'totuple'):
event = event.totuple()
else:
event = TEXT, unicode(event), (None, -1, -1)
yield event
return
# This looks like a markup event stream, so we'll just pass it through
# unchanged
yield event
for event in stream:
yield event
class Attrs(tuple):
"""Immutable sequence type that stores the attributes of an element.
Ordering of the attributes is preserved, while access by name is also
supported.
>>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
>>> attrs
Attrs([('href', '#'), ('title', 'Foo')])
>>> 'href' in attrs
True
>>> 'tabindex' in attrs
False
>>> attrs.get('title')
'Foo'
Instances may not be manipulated directly. Instead, the operators ``|`` and
``-`` can be used to produce new instances that have specific attributes
added, replaced or removed.
To remove an attribute, use the ``-`` operator. The right hand side can be
either a string or a set/sequence of strings, identifying the name(s) of
the attribute(s) to remove:
>>> attrs - 'title'
Attrs([('href', '#')])
>>> attrs - ('title', 'href')
Attrs()
The original instance is not modified, but the operator can of course be
used with an assignment:
>>> attrs
Attrs([('href', '#'), ('title', 'Foo')])
>>> attrs -= 'title'
>>> attrs
Attrs([('href', '#')])
To add a new attribute, use the ``|`` operator, where the right hand value
is a sequence of ``(name, value)`` tuples (which includes `Attrs`
instances):
>>> attrs | [('title', 'Bar')]
Attrs([('href', '#'), ('title', 'Bar')])
If the attributes already contain an attribute with a given name, the value
of that attribute is replaced:
>>> attrs | [('href', 'http://example.org/')]
Attrs([('href', 'http://example.org/')])
"""
__slots__ = []
def __contains__(self, name):
"""Return whether the list includes an attribute with the specified
name.
:return: `True` if the list includes the attribute
:rtype: `bool`
"""
for attr, _ in self:
if attr == name:
return True
def __getslice__(self, i, j):
"""Return a slice of the attributes list.
>>> attrs = Attrs([('href', '#'), ('title', 'Foo')])
>>> attrs[1:]
Attrs([('title', 'Foo')])
"""
return Attrs(tuple.__getslice__(self, i, j))
def __or__(self, attrs):
"""Return a new instance that contains the attributes in `attrs` in
addition to any already existing attributes.
:return: a new instance with the merged attributes
:rtype: `Attrs`
"""
repl = dict([(an, av) for an, av in attrs if an in self])
return Attrs([(sn, repl.get(sn, sv)) for sn, sv in self] +
[(an, av) for an, av in attrs if an not in self])
def __repr__(self):
if not self:
return 'Attrs()'
return 'Attrs([%s])' % ', '.join([repr(item) for item in self])
def __sub__(self, names):
"""Return a new instance with all attributes with a name in `names` are
removed.
:param names: the names of the attributes to remove
:return: a new instance with the attribute removed
:rtype: `Attrs`
"""
if isinstance(names, basestring):
names = (names,)
return Attrs([(name, val) for name, val in self if name not in names])
def get(self, name, default=None):
"""Return the value of the attribute with the specified name, or the
value of the `default` parameter if no such attribute is found.
:param name: the name of the attribute
:param default: the value to return when the attribute does not exist
:return: the attribute value, or the `default` value if that attribute
does not exist
:rtype: `object`
"""
for attr, value in self:
if attr == name:
return value
return default
def totuple(self):
"""Return the attributes as a markup event.
The returned event is a `TEXT` event, the data is the value of all
attributes joined together.
>>> Attrs([('href', '#'), ('title', 'Foo')]).totuple()
('TEXT', u'#Foo', (None, -1, -1))
:return: a `TEXT` event
:rtype: `tuple`
"""
return TEXT, u''.join([x[1] for x in self]), (None, -1, -1)
class Markup(unicode):
"""Marks a string as being safe for inclusion in HTML/XML output without
needing to be escaped.
"""
__slots__ = []
def __add__(self, other):
return Markup(unicode(self) + unicode(escape(other)))
def __radd__(self, other):
return Markup(unicode(escape(other)) + unicode(self))
def __mod__(self, args):
if isinstance(args, dict):
args = dict(zip(args.keys(), map(escape, args.values())))
elif isinstance(args, (list, tuple)):
args = tuple(map(escape, args))
else:
args = escape(args)
return Markup(unicode.__mod__(self, args))
def __mul__(self, num):
return Markup(unicode(self) * num)
def __rmul__(self, num):
return Markup(num * unicode(self))
def __repr__(self):
return '<%s %r>' % (self.__class__.__name__, unicode(self))
def join(self, seq, escape_quotes=True):
"""Return a `Markup` object which is the concatenation of the strings
in the given sequence, where this `Markup` object is the separator
between the joined elements.
Any element in the sequence that is not a `Markup` instance is
automatically escaped.
:param seq: the sequence of strings to join
:param escape_quotes: whether double quote characters in the elements
should be escaped
:return: the joined `Markup` object
:rtype: `Markup`
:see: `escape`
"""
return Markup(unicode(self).join([escape(item, quotes=escape_quotes)
for item in seq]))
def escape(cls, text, quotes=True):
"""Create a Markup instance from a string and escape special characters
it may contain (<, >, & and \").
>>> escape('"1 < 2"')
<Markup u'&#34;1 &lt; 2&#34;'>
If the `quotes` parameter is set to `False`, the \" character is left
as is. Escaping quotes is generally only required for strings that are
to be used in attribute values.
>>> escape('"1 < 2"', quotes=False)
<Markup u'"1 &lt; 2"'>
:param text: the text to escape
:param quotes: if ``True``, double quote characters are escaped in
addition to the other special characters
:return: the escaped `Markup` string
:rtype: `Markup`
"""
if not text:
return cls()
if type(text) is cls:
return text
if hasattr(text, '__html__'):
return Markup(text.__html__())
text = unicode(text).replace('&', '&amp;') \
.replace('<', '&lt;') \
.replace('>', '&gt;')
if quotes:
text = text.replace('"', '&#34;')
return cls(text)
escape = classmethod(escape)
def unescape(self):
"""Reverse-escapes &, <, >, and \" and returns a `unicode` object.
>>> Markup('1 &lt; 2').unescape()
u'1 < 2'
:return: the unescaped string
:rtype: `unicode`
:see: `genshi.core.unescape`
"""
if not self:
return u''
return unicode(self).replace('&#34;', '"') \
.replace('&gt;', '>') \
.replace('&lt;', '<') \
.replace('&amp;', '&')
def stripentities(self, keepxmlentities=False):
"""Return a copy of the text with any character or numeric entities
replaced by the equivalent UTF-8 characters.
If the `keepxmlentities` parameter is provided and evaluates to `True`,
the core XML entities (``&amp;``, ``&apos;``, ``&gt;``, ``&lt;`` and
``&quot;``) are not stripped.
:return: a `Markup` instance with entities removed
:rtype: `Markup`
:see: `genshi.util.stripentities`
"""
return Markup(stripentities(self, keepxmlentities=keepxmlentities))
def striptags(self):
"""Return a copy of the text with all XML/HTML tags removed.
:return: a `Markup` instance with all tags removed
:rtype: `Markup`
:see: `genshi.util.striptags`
"""
return Markup(striptags(self))
try:
from calibre.utils.genshi._speedups import Markup
except ImportError:
pass # just use the Python implementation
escape = Markup.escape
def unescape(text):
"""Reverse-escapes &, <, >, and \" and returns a `unicode` object.
>>> unescape(Markup('1 &lt; 2'))
u'1 < 2'
If the provided `text` object is not a `Markup` instance, it is returned
unchanged.
>>> unescape('1 &lt; 2')
'1 &lt; 2'
:param text: the text to unescape
:return: the unescsaped string
:rtype: `unicode`
"""
if not isinstance(text, Markup):
return text
return text.unescape()
class Namespace(object):
"""Utility class creating and testing elements with a namespace.
Internally, namespace URIs are encoded in the `QName` of any element or
attribute, the namespace URI being enclosed in curly braces. This class
helps create and test these strings.
A `Namespace` object is instantiated with the namespace URI.
>>> html = Namespace('http://www.w3.org/1999/xhtml')
>>> html
<Namespace "http://www.w3.org/1999/xhtml">
>>> html.uri
u'http://www.w3.org/1999/xhtml'
The `Namespace` object can than be used to generate `QName` objects with
that namespace:
>>> html.body
QName(u'http://www.w3.org/1999/xhtml}body')
>>> html.body.localname
u'body'
>>> html.body.namespace
u'http://www.w3.org/1999/xhtml'
The same works using item access notation, which is useful for element or
attribute names that are not valid Python identifiers:
>>> html['body']
QName(u'http://www.w3.org/1999/xhtml}body')
A `Namespace` object can also be used to test whether a specific `QName`
belongs to that namespace using the ``in`` operator:
>>> qname = html.body
>>> qname in html
True
>>> qname in Namespace('http://www.w3.org/2002/06/xhtml2')
False
"""
def __new__(cls, uri):
if type(uri) is cls:
return uri
return object.__new__(cls)
def __getnewargs__(self):
return (self.uri,)
def __getstate__(self):
return self.uri
def __setstate__(self, uri):
self.uri = uri
def __init__(self, uri):
self.uri = unicode(uri)
def __contains__(self, qname):
return qname.namespace == self.uri
def __ne__(self, other):
return not self == other
def __eq__(self, other):
if isinstance(other, Namespace):
return self.uri == other.uri
return self.uri == other
def __getitem__(self, name):
return QName(self.uri + u'}' + name)
__getattr__ = __getitem__
def __repr__(self):
return '<Namespace "%s">' % self.uri
def __str__(self):
return self.uri.encode('utf-8')
def __unicode__(self):
return self.uri
# The namespace used by attributes such as xml:lang and xml:space
XML_NAMESPACE = Namespace('http://www.w3.org/XML/1998/namespace')
class QName(unicode):
"""A qualified element or attribute name.
The unicode value of instances of this class contains the qualified name of
the element or attribute, in the form ``{namespace-uri}local-name``. The
namespace URI can be obtained through the additional `namespace` attribute,
while the local name can be accessed through the `localname` attribute.
>>> qname = QName('foo')
>>> qname
QName(u'foo')
>>> qname.localname
u'foo'
>>> qname.namespace
>>> qname = QName('http://www.w3.org/1999/xhtml}body')
>>> qname
QName(u'http://www.w3.org/1999/xhtml}body')
>>> qname.localname
u'body'
>>> qname.namespace
u'http://www.w3.org/1999/xhtml'
"""
__slots__ = ['namespace', 'localname']
def __new__(cls, qname):
"""Create the `QName` instance.
:param qname: the qualified name as a string of the form
``{namespace-uri}local-name``, where the leading curly
brace is optional
"""
if type(qname) is cls:
return qname
parts = qname.lstrip(u'{').split(u'}', 1)
if len(parts) > 1:
self = unicode.__new__(cls, u'{%s' % qname)
self.namespace, self.localname = map(unicode, parts)
else:
self = unicode.__new__(cls, qname)
self.namespace, self.localname = None, unicode(qname)
return self
def __getnewargs__(self):
return (self.lstrip('{'),)
def __repr__(self):
return 'QName(%s)' % unicode.__repr__(self.lstrip('{'))

View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Implementation of a number of stream filters."""
from calibre.utils.genshi.filters.html import HTMLFormFiller, HTMLSanitizer
from calibre.utils.genshi.filters.i18n import Translator
from calibre.utils.genshi.filters.transform import Transformer
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,397 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Implementation of a number of stream filters."""
try:
set
except NameError:
from sets import ImmutableSet as frozenset
from sets import Set as set
import re
from calibre.utils.genshi.core import Attrs, QName, stripentities
from calibre.utils.genshi.core import END, START, TEXT, COMMENT
__all__ = ['HTMLFormFiller', 'HTMLSanitizer']
__docformat__ = 'restructuredtext en'
class HTMLFormFiller(object):
"""A stream filter that can populate HTML forms from a dictionary of values.
>>> from genshi.input import HTML
>>> html = HTML('''<form>
... <p><input type="text" name="foo" /></p>
... </form>''')
>>> filler = HTMLFormFiller(data={'foo': 'bar'})
>>> print html | filler
<form>
<p><input type="text" name="foo" value="bar"/></p>
</form>
"""
# TODO: only select the first radio button, and the first select option
# (if not in a multiple-select)
# TODO: only apply to elements in the XHTML namespace (or no namespace)?
def __init__(self, name=None, id=None, data=None):
"""Create the filter.
:param name: The name of the form that should be populated. If this
parameter is given, only forms where the ``name`` attribute
value matches the parameter are processed.
:param id: The ID of the form that should be populated. If this
parameter is given, only forms where the ``id`` attribute
value matches the parameter are processed.
:param data: The dictionary of form values, where the keys are the names
of the form fields, and the values are the values to fill
in.
"""
self.name = name
self.id = id
if data is None:
data = {}
self.data = data
def __call__(self, stream):
"""Apply the filter to the given stream.
:param stream: the markup event stream to filter
"""
in_form = in_select = in_option = in_textarea = False
select_value = option_value = textarea_value = None
option_start = None
option_text = []
no_option_value = False
for kind, data, pos in stream:
if kind is START:
tag, attrs = data
tagname = tag.localname
if tagname == 'form' and (
self.name and attrs.get('name') == self.name or
self.id and attrs.get('id') == self.id or
not (self.id or self.name)):
in_form = True
elif in_form:
if tagname == 'input':
type = attrs.get('type')
if type in ('checkbox', 'radio'):
name = attrs.get('name')
if name and name in self.data:
value = self.data[name]
declval = attrs.get('value')
checked = False
if isinstance(value, (list, tuple)):
if declval:
checked = declval in [unicode(v) for v
in value]
else:
checked = bool(filter(None, value))
else:
if declval:
checked = declval == unicode(value)
elif type == 'checkbox':
checked = bool(value)
if checked:
attrs |= [(QName('checked'), 'checked')]
elif 'checked' in attrs:
attrs -= 'checked'
elif type in (None, 'hidden', 'text'):
name = attrs.get('name')
if name and name in self.data:
value = self.data[name]
if isinstance(value, (list, tuple)):
value = value[0]
if value is not None:
attrs |= [(QName('value'), unicode(value))]
elif tagname == 'select':
name = attrs.get('name')
if name in self.data:
select_value = self.data[name]
in_select = True
elif tagname == 'textarea':
name = attrs.get('name')
if name in self.data:
textarea_value = self.data.get(name)
if isinstance(textarea_value, (list, tuple)):
textarea_value = textarea_value[0]
in_textarea = True
elif in_select and tagname == 'option':
option_start = kind, data, pos
option_value = attrs.get('value')
if option_value is None:
no_option_value = True
option_value = ''
in_option = True
continue
yield kind, (tag, attrs), pos
elif in_form and kind is TEXT:
if in_select and in_option:
if no_option_value:
option_value += data
option_text.append((kind, data, pos))
continue
elif in_textarea:
continue
yield kind, data, pos
elif in_form and kind is END:
tagname = data.localname
if tagname == 'form':
in_form = False
elif tagname == 'select':
in_select = False
select_value = None
elif in_select and tagname == 'option':
if isinstance(select_value, (tuple, list)):
selected = option_value in [unicode(v) for v
in select_value]
else:
selected = option_value == unicode(select_value)
okind, (tag, attrs), opos = option_start
if selected:
attrs |= [(QName('selected'), 'selected')]
elif 'selected' in attrs:
attrs -= 'selected'
yield okind, (tag, attrs), opos
if option_text:
for event in option_text:
yield event
in_option = False
no_option_value = False
option_start = option_value = None
option_text = []
elif tagname == 'textarea':
if textarea_value:
yield TEXT, unicode(textarea_value), pos
in_textarea = False
yield kind, data, pos
else:
yield kind, data, pos
class HTMLSanitizer(object):
"""A filter that removes potentially dangerous HTML tags and attributes
from the stream.
>>> from genshi import HTML
>>> html = HTML('<div><script>alert(document.cookie)</script></div>')
>>> print html | HTMLSanitizer()
<div/>
The default set of safe tags and attributes can be modified when the filter
is instantiated. For example, to allow inline ``style`` attributes, the
following instantation would work:
>>> html = HTML('<div style="background: #000"></div>')
>>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
>>> print html | sanitizer
<div style="background: #000"/>
Note that even in this case, the filter *does* attempt to remove dangerous
constructs from style attributes:
>>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
>>> print html | sanitizer
<div style="color: #000"/>
This handles HTML entities, unicode escapes in CSS and Javascript text, as
well as a lot of other things. However, the style tag is still excluded by
default because it is very hard for such sanitizing to be completely safe,
especially considering how much error recovery current web browsers perform.
:warn: Note that this special processing of CSS is currently only applied to
style attributes, **not** style elements.
"""
SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b',
'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
'ul', 'var'])
SAFE_ATTRS = frozenset(['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'axis', 'bgcolor', 'border', 'cellpadding',
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None])
URI_ATTRS = frozenset(['action', 'background', 'dynsrc', 'href', 'lowsrc',
'src'])
def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS,
safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS):
"""Create the sanitizer.
The exact set of allowed elements and attributes can be configured.
:param safe_tags: a set of tag names that are considered safe
:param safe_attrs: a set of attribute names that are considered safe
:param safe_schemes: a set of URI schemes that are considered safe
:param uri_attrs: a set of names of attributes that contain URIs
"""
self.safe_tags = safe_tags
"The set of tag names that are considered safe."
self.safe_attrs = safe_attrs
"The set of attribute names that are considered safe."
self.uri_attrs = uri_attrs
"The set of names of attributes that may contain URIs."
self.safe_schemes = safe_schemes
"The set of URI schemes that are considered safe."
def __call__(self, stream):
"""Apply the filter to the given stream.
:param stream: the markup event stream to filter
"""
waiting_for = None
for kind, data, pos in stream:
if kind is START:
if waiting_for:
continue
tag, attrs = data
if tag not in self.safe_tags:
waiting_for = tag
continue
new_attrs = []
for attr, value in attrs:
value = stripentities(value)
if attr not in self.safe_attrs:
continue
elif attr in self.uri_attrs:
# Don't allow URI schemes such as "javascript:"
if not self.is_safe_uri(value):
continue
elif attr == 'style':
# Remove dangerous CSS declarations from inline styles
decls = self.sanitize_css(value)
if not decls:
continue
value = '; '.join(decls)
new_attrs.append((attr, value))
yield kind, (tag, Attrs(new_attrs)), pos
elif kind is END:
tag = data
if waiting_for:
if waiting_for == tag:
waiting_for = None
else:
yield kind, data, pos
elif kind is not COMMENT:
if not waiting_for:
yield kind, data, pos
def is_safe_uri(self, uri):
"""Determine whether the given URI is to be considered safe for
inclusion in the output.
The default implementation checks whether the scheme of the URI is in
the set of allowed URIs (`safe_schemes`).
>>> sanitizer = HTMLSanitizer()
>>> sanitizer.is_safe_uri('http://example.org/')
True
>>> sanitizer.is_safe_uri('javascript:alert(document.cookie)')
False
:param uri: the URI to check
:return: `True` if the URI can be considered safe, `False` otherwise
:rtype: `bool`
:since: version 0.4.3
"""
if ':' not in uri:
return True # This is a relative URI
chars = [char for char in uri.split(':', 1)[0] if char.isalnum()]
return ''.join(chars).lower() in self.safe_schemes
def sanitize_css(self, text):
"""Remove potentially dangerous property declarations from CSS code.
In particular, properties using the CSS ``url()`` function with a scheme
that is not considered safe are removed:
>>> sanitizer = HTMLSanitizer()
>>> sanitizer.sanitize_css(u'''
... background: url(javascript:alert("foo"));
... color: #000;
... ''')
[u'color: #000']
Also, the proprietary Internet Explorer function ``expression()`` is
always stripped:
>>> sanitizer.sanitize_css(u'''
... background: #fff;
... color: #000;
... width: e/**/xpression(alert("foo"));
... ''')
[u'background: #fff', u'color: #000']
:param text: the CSS text; this is expected to be `unicode` and to not
contain any character or numeric references
:return: a list of declarations that are considered safe
:rtype: `list`
:since: version 0.4.3
"""
decls = []
text = self._strip_css_comments(self._replace_unicode_escapes(text))
for decl in filter(None, text.split(';')):
decl = decl.strip()
if not decl:
continue
is_evil = False
if 'expression' in decl:
is_evil = True
for match in re.finditer(r'url\s*\(([^)]+)', decl):
if not self.is_safe_uri(match.group(1)):
is_evil = True
break
if not is_evil:
decls.append(decl.strip())
return decls
_NORMALIZE_NEWLINES = re.compile(r'\r\n').sub
_UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub
def _replace_unicode_escapes(self, text):
def _repl(match):
return unichr(int(match.group(1), 16))
return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text))
_CSS_COMMENTS = re.compile(r'/\*.*?\*/').sub
def _strip_css_comments(self, text):
return self._CSS_COMMENTS('', text)

View File

@ -0,0 +1,528 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Utilities for internationalization and localization of templates.
:since: version 0.4
"""
from compiler import ast
try:
frozenset
except NameError:
from sets import ImmutableSet as frozenset
from gettext import gettext
import re
from calibre.utils.genshi.core import Attrs, Namespace, QName, START, END, TEXT, START_NS, \
END_NS, XML_NAMESPACE, _ensure
from calibre.utils.genshi.template.base import Template, EXPR, SUB
from calibre.utils.genshi.template.markup import MarkupTemplate, EXEC
__all__ = ['Translator', 'extract']
__docformat__ = 'restructuredtext en'
I18N_NAMESPACE = Namespace('http://genshi.edgewall.org/i18n')
class Translator(object):
"""Can extract and translate localizable strings from markup streams and
templates.
For example, assume the followng template:
>>> from genshi.template import MarkupTemplate
>>>
>>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
... <head>
... <title>Example</title>
... </head>
... <body>
... <h1>Example</h1>
... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
... </body>
... </html>''', filename='example.html')
For demonstration, we define a dummy ``gettext``-style function with a
hard-coded translation table, and pass that to the `Translator` initializer:
>>> def pseudo_gettext(string):
... return {
... 'Example': 'Beispiel',
... 'Hello, %(name)s': 'Hallo, %(name)s'
... }[string]
>>>
>>> translator = Translator(pseudo_gettext)
Next, the translator needs to be prepended to any already defined filters
on the template:
>>> tmpl.filters.insert(0, translator)
When generating the template output, our hard-coded translations should be
applied as expected:
>>> print tmpl.generate(username='Hans', _=pseudo_gettext)
<html>
<head>
<title>Beispiel</title>
</head>
<body>
<h1>Beispiel</h1>
<p>Hallo, Hans</p>
</body>
</html>
Note that elements defining ``xml:lang`` attributes that do not contain
variable expressions are ignored by this filter. That can be used to
exclude specific parts of a template from being extracted and translated.
"""
IGNORE_TAGS = frozenset([
QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
QName('style'), QName('http://www.w3.org/1999/xhtml}style')
])
INCLUDE_ATTRS = frozenset(['abbr', 'alt', 'label', 'prompt', 'standby',
'summary', 'title'])
def __init__(self, translate=gettext, ignore_tags=IGNORE_TAGS,
include_attrs=INCLUDE_ATTRS, extract_text=True):
"""Initialize the translator.
:param translate: the translation function, for example ``gettext`` or
``ugettext``.
:param ignore_tags: a set of tag names that should not be localized
:param include_attrs: a set of attribute names should be localized
:param extract_text: whether the content of text nodes should be
extracted, or only text in explicit ``gettext``
function calls
"""
self.translate = translate
self.ignore_tags = ignore_tags
self.include_attrs = include_attrs
self.extract_text = extract_text
def __call__(self, stream, ctxt=None, search_text=True, msgbuf=None):
"""Translate any localizable strings in the given stream.
This function shouldn't be called directly. Instead, an instance of
the `Translator` class should be registered as a filter with the
`Template` or the `TemplateLoader`, or applied as a regular stream
filter. If used as a template filter, it should be inserted in front of
all the default filters.
:param stream: the markup event stream
:param ctxt: the template context (not used)
:param search_text: whether text nodes should be translated (used
internally)
:param msgbuf: a `MessageBuffer` object or `None` (used internally)
:return: the localized stream
"""
ignore_tags = self.ignore_tags
include_attrs = self.include_attrs
translate = self.translate
if not self.extract_text:
search_text = False
skip = 0
i18n_msg = I18N_NAMESPACE['msg']
ns_prefixes = []
xml_lang = XML_NAMESPACE['lang']
for kind, data, pos in stream:
# skip chunks that should not be localized
if skip:
if kind is START:
skip += 1
elif kind is END:
skip -= 1
yield kind, data, pos
continue
# handle different events that can be localized
if kind is START:
tag, attrs = data
if tag in self.ignore_tags or \
isinstance(attrs.get(xml_lang), basestring):
skip += 1
yield kind, data, pos
continue
new_attrs = []
changed = False
for name, value in attrs:
newval = value
if search_text and isinstance(value, basestring):
if name in include_attrs:
newval = self.translate(value)
else:
newval = list(self(_ensure(value), ctxt,
search_text=False, msgbuf=msgbuf)
)
if newval != value:
value = newval
changed = True
new_attrs.append((name, value))
if changed:
attrs = Attrs(new_attrs)
if msgbuf:
msgbuf.append(kind, data, pos)
continue
elif i18n_msg in attrs:
msgbuf = MessageBuffer()
attrs -= i18n_msg
yield kind, (tag, attrs), pos
elif search_text and kind is TEXT:
if not msgbuf:
text = data.strip()
if text:
data = data.replace(text, translate(text))
yield kind, data, pos
else:
msgbuf.append(kind, data, pos)
elif not skip and msgbuf and kind is END:
msgbuf.append(kind, data, pos)
if not msgbuf.depth:
for event in msgbuf.translate(translate(msgbuf.format())):
yield event
msgbuf = None
yield kind, data, pos
elif kind is SUB:
subkind, substream = data
new_substream = list(self(substream, ctxt, msgbuf=msgbuf))
yield kind, (subkind, new_substream), pos
elif kind is START_NS and data[1] == I18N_NAMESPACE:
ns_prefixes.append(data[0])
elif kind is END_NS and data in ns_prefixes:
ns_prefixes.remove(data)
else:
yield kind, data, pos
GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext', 'dgettext', 'dngettext',
'ugettext', 'ungettext')
def extract(self, stream, gettext_functions=GETTEXT_FUNCTIONS,
search_text=True, msgbuf=None):
"""Extract localizable strings from the given template stream.
For every string found, this function yields a ``(lineno, function,
message)`` tuple, where:
* ``lineno`` is the number of the line on which the string was found,
* ``function`` is the name of the ``gettext`` function used (if the
string was extracted from embedded Python code), and
* ``message`` is the string itself (a ``unicode`` object, or a tuple
of ``unicode`` objects for functions with multiple string arguments).
>>> from genshi.template import MarkupTemplate
>>>
>>> tmpl = MarkupTemplate('''<html xmlns:py="http://genshi.edgewall.org/">
... <head>
... <title>Example</title>
... </head>
... <body>
... <h1>Example</h1>
... <p>${_("Hello, %(name)s") % dict(name=username)}</p>
... <p>${ngettext("You have %d item", "You have %d items", num)}</p>
... </body>
... </html>''', filename='example.html')
>>>
>>> for lineno, funcname, message in Translator().extract(tmpl.stream):
... print "%d, %r, %r" % (lineno, funcname, message)
3, None, u'Example'
6, None, u'Example'
7, '_', u'Hello, %(name)s'
8, 'ngettext', (u'You have %d item', u'You have %d items', None)
:param stream: the event stream to extract strings from; can be a
regular stream or a template stream
:param gettext_functions: a sequence of function names that should be
treated as gettext-style localization
functions
:param search_text: whether the content of text nodes should be
extracted (used internally)
:note: Changed in 0.4.1: For a function with multiple string arguments
(such as ``ngettext``), a single item with a tuple of strings is
yielded, instead an item for each string argument.
"""
if not self.extract_text:
search_text = False
skip = 0
i18n_msg = I18N_NAMESPACE['msg']
xml_lang = XML_NAMESPACE['lang']
for kind, data, pos in stream:
if skip:
if kind is START:
skip += 1
if kind is END:
skip -= 1
if kind is START and not skip:
tag, attrs = data
if tag in self.ignore_tags or \
isinstance(attrs.get(xml_lang), basestring):
skip += 1
continue
for name, value in attrs:
if search_text and isinstance(value, basestring):
if name in self.include_attrs:
text = value.strip()
if text:
yield pos[1], None, text
else:
for lineno, funcname, text in self.extract(
_ensure(value), gettext_functions,
search_text=False):
yield lineno, funcname, text
if msgbuf:
msgbuf.append(kind, data, pos)
elif i18n_msg in attrs:
msgbuf = MessageBuffer(pos[1])
elif not skip and search_text and kind is TEXT:
if not msgbuf:
text = data.strip()
if text and filter(None, [ch.isalpha() for ch in text]):
yield pos[1], None, text
else:
msgbuf.append(kind, data, pos)
elif not skip and msgbuf and kind is END:
msgbuf.append(kind, data, pos)
if not msgbuf.depth:
yield msgbuf.lineno, None, msgbuf.format()
msgbuf = None
elif kind is EXPR or kind is EXEC:
for funcname, strings in extract_from_code(data,
gettext_functions):
yield pos[1], funcname, strings
elif kind is SUB:
subkind, substream = data
messages = self.extract(substream, gettext_functions,
search_text=search_text and not skip,
msgbuf=msgbuf)
for lineno, funcname, text in messages:
yield lineno, funcname, text
class MessageBuffer(object):
"""Helper class for managing internationalized mixed content.
:since: version 0.5
"""
def __init__(self, lineno=-1):
"""Initialize the message buffer.
:param lineno: the line number on which the first stream event
belonging to the message was found
"""
self.lineno = lineno
self.string = []
self.events = {}
self.depth = 1
self.order = 1
self.stack = [0]
def append(self, kind, data, pos):
"""Append a stream event to the buffer.
:param kind: the stream event kind
:param data: the event data
:param pos: the position of the event in the source
"""
if kind is TEXT:
self.string.append(data)
self.events.setdefault(self.stack[-1], []).append(None)
else:
if kind is START:
self.string.append(u'[%d:' % self.order)
self.events.setdefault(self.order, []).append((kind, data, pos))
self.stack.append(self.order)
self.depth += 1
self.order += 1
elif kind is END:
self.depth -= 1
if self.depth:
self.events[self.stack[-1]].append((kind, data, pos))
self.string.append(u']')
self.stack.pop()
def format(self):
"""Return a message identifier representing the content in the
buffer.
"""
return u''.join(self.string).strip()
def translate(self, string):
"""Interpolate the given message translation with the events in the
buffer and return the translated stream.
:param string: the translated message string
"""
parts = parse_msg(string)
for order, string in parts:
events = self.events[order]
while events:
event = self.events[order].pop(0)
if not event:
if not string:
break
yield TEXT, string, (None, -1, -1)
if not self.events[order] or not self.events[order][0]:
break
else:
yield event
def parse_msg(string, regex=re.compile(r'(?:\[(\d+)\:)|\]')):
"""Parse a translated message using Genshi mixed content message
formatting.
>>> parse_msg("See [1:Help].")
[(0, 'See '), (1, 'Help'), (0, '.')]
>>> parse_msg("See [1:our [2:Help] page] for details.")
[(0, 'See '), (1, 'our '), (2, 'Help'), (1, ' page'), (0, ' for details.')]
>>> parse_msg("[2:Details] finden Sie in [1:Hilfe].")
[(2, 'Details'), (0, ' finden Sie in '), (1, 'Hilfe'), (0, '.')]
>>> parse_msg("[1:] Bilder pro Seite anzeigen.")
[(1, ''), (0, ' Bilder pro Seite anzeigen.')]
:param string: the translated message string
:return: a list of ``(order, string)`` tuples
:rtype: `list`
"""
parts = []
stack = [0]
while True:
mo = regex.search(string)
if not mo:
break
if mo.start() or stack[-1]:
parts.append((stack[-1], string[:mo.start()]))
string = string[mo.end():]
orderno = mo.group(1)
if orderno is not None:
stack.append(int(orderno))
else:
stack.pop()
if not stack:
break
if string:
parts.append((stack[-1], string))
return parts
def extract_from_code(code, gettext_functions):
"""Extract strings from Python bytecode.
>>> from genshi.template.eval import Expression
>>> expr = Expression('_("Hello")')
>>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS))
[('_', u'Hello')]
>>> expr = Expression('ngettext("You have %(num)s item", '
... '"You have %(num)s items", num)')
>>> list(extract_from_code(expr, Translator.GETTEXT_FUNCTIONS))
[('ngettext', (u'You have %(num)s item', u'You have %(num)s items', None))]
:param code: the `Code` object
:type code: `genshi.template.eval.Code`
:param gettext_functions: a sequence of function names
:since: version 0.5
"""
def _walk(node):
if isinstance(node, ast.CallFunc) and isinstance(node.node, ast.Name) \
and node.node.name in gettext_functions:
strings = []
def _add(arg):
if isinstance(arg, ast.Const) \
and isinstance(arg.value, basestring):
strings.append(unicode(arg.value, 'utf-8'))
elif arg and not isinstance(arg, ast.Keyword):
strings.append(None)
[_add(arg) for arg in node.args]
_add(node.star_args)
_add(node.dstar_args)
if len(strings) == 1:
strings = strings[0]
else:
strings = tuple(strings)
yield node.node.name, strings
else:
for child in node.getChildNodes():
for funcname, strings in _walk(child):
yield funcname, strings
return _walk(code.ast)
def extract(fileobj, keywords, comment_tags, options):
"""Babel extraction method for Genshi templates.
:param fileobj: the file-like object the messages should be extracted from
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
:rtype: ``iterator``
"""
template_class = options.get('template_class', MarkupTemplate)
if isinstance(template_class, basestring):
module, clsname = template_class.split(':', 1)
template_class = getattr(__import__(module, {}, {}, [clsname]), clsname)
encoding = options.get('encoding', None)
extract_text = options.get('extract_text', True)
if isinstance(extract_text, basestring):
extract_text = extract_text.lower() in ('1', 'on', 'yes', 'true')
ignore_tags = options.get('ignore_tags', Translator.IGNORE_TAGS)
if isinstance(ignore_tags, basestring):
ignore_tags = ignore_tags.split()
ignore_tags = [QName(tag) for tag in ignore_tags]
include_attrs = options.get('include_attrs', Translator.INCLUDE_ATTRS)
if isinstance(include_attrs, basestring):
include_attrs = include_attrs.split()
include_attrs = [QName(attr) for attr in include_attrs]
tmpl = template_class(fileobj, filename=getattr(fileobj, 'name', None),
encoding=encoding)
translator = Translator(None, ignore_tags, include_attrs, extract_text)
for lineno, func, message in translator.extract(tmpl.stream,
gettext_functions=keywords):
yield lineno, func, message, []

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,449 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Support for constructing markup streams from files, strings, or other
sources.
"""
from itertools import chain
from xml.parsers import expat
try:
frozenset
except NameError:
from sets import ImmutableSet as frozenset
import HTMLParser as html
import htmlentitydefs
from StringIO import StringIO
from calibre.utils.genshi.core import Attrs, QName, Stream, stripentities
from calibre.utils.genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, END_NS, \
START_CDATA, END_CDATA, PI, COMMENT
__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
__docformat__ = 'restructuredtext en'
def ET(element):
"""Convert a given ElementTree element to a markup stream.
:param element: an ElementTree element
:return: a markup stream
"""
tag_name = QName(element.tag.lstrip('{'))
attrs = Attrs([(QName(attr.lstrip('{')), value)
for attr, value in element.items()])
yield START, (tag_name, attrs), (None, -1, -1)
if element.text:
yield TEXT, element.text, (None, -1, -1)
for child in element.getchildren():
for item in ET(child):
yield item
yield END, tag_name, (None, -1, -1)
if element.tail:
yield TEXT, element.tail, (None, -1, -1)
class ParseError(Exception):
"""Exception raised when fatal syntax errors are found in the input being
parsed.
"""
def __init__(self, message, filename=None, lineno=-1, offset=-1):
"""Exception initializer.
:param message: the error message from the parser
:param filename: the path to the file that was parsed
:param lineno: the number of the line on which the error was encountered
:param offset: the column number where the error was encountered
"""
self.msg = message
if filename:
message += ', in ' + filename
Exception.__init__(self, message)
self.filename = filename or '<string>'
self.lineno = lineno
self.offset = offset
class XMLParser(object):
"""Generator-based XML parser based on roughly equivalent code in
Kid/ElementTree.
The parsing is initiated by iterating over the parser object:
>>> parser = XMLParser(StringIO('<root id="2"><child>Foo</child></root>'))
>>> for kind, data, pos in parser:
... print kind, data
START (QName(u'root'), Attrs([(QName(u'id'), u'2')]))
START (QName(u'child'), Attrs())
TEXT Foo
END child
END root
"""
_entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
htmlentitydefs.name2codepoint.items()]
_external_dtd = '\n'.join(_entitydefs)
def __init__(self, source, filename=None, encoding=None):
"""Initialize the parser for the given XML input.
:param source: the XML text as a file-like object
:param filename: the name of the file, if appropriate
:param encoding: the encoding of the file; if not specified, the
encoding is assumed to be ASCII, UTF-8, or UTF-16, or
whatever the encoding specified in the XML declaration
(if any)
"""
self.source = source
self.filename = filename
# Setup the Expat parser
parser = expat.ParserCreate(encoding, '}')
parser.buffer_text = True
parser.returns_unicode = True
parser.ordered_attributes = True
parser.StartElementHandler = self._handle_start
parser.EndElementHandler = self._handle_end
parser.CharacterDataHandler = self._handle_data
parser.StartDoctypeDeclHandler = self._handle_doctype
parser.StartNamespaceDeclHandler = self._handle_start_ns
parser.EndNamespaceDeclHandler = self._handle_end_ns
parser.StartCdataSectionHandler = self._handle_start_cdata
parser.EndCdataSectionHandler = self._handle_end_cdata
parser.ProcessingInstructionHandler = self._handle_pi
parser.XmlDeclHandler = self._handle_xml_decl
parser.CommentHandler = self._handle_comment
# Tell Expat that we'll handle non-XML entities ourselves
# (in _handle_other)
parser.DefaultHandler = self._handle_other
parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
parser.UseForeignDTD()
parser.ExternalEntityRefHandler = self._build_foreign
# Location reporting is only support in Python >= 2.4
if not hasattr(parser, 'CurrentLineNumber'):
self._getpos = self._getpos_unknown
self.expat = parser
self._queue = []
def parse(self):
"""Generator that parses the XML source, yielding markup events.
:return: a markup event stream
:raises ParseError: if the XML text is not well formed
"""
def _generate():
try:
bufsize = 4 * 1024 # 4K
done = False
while 1:
while not done and len(self._queue) == 0:
data = self.source.read(bufsize)
if data == '': # end of data
if hasattr(self, 'expat'):
self.expat.Parse('', True)
del self.expat # get rid of circular references
done = True
else:
if isinstance(data, unicode):
data = data.encode('utf-8')
self.expat.Parse(data, False)
for event in self._queue:
yield event
self._queue = []
if done:
break
except expat.ExpatError, e:
msg = str(e)
raise ParseError(msg, self.filename, e.lineno, e.offset)
return Stream(_generate()).filter(_coalesce)
def __iter__(self):
return iter(self.parse())
def _build_foreign(self, context, base, sysid, pubid):
parser = self.expat.ExternalEntityParserCreate(context)
parser.ParseFile(StringIO(self._external_dtd))
return 1
def _enqueue(self, kind, data=None, pos=None):
if pos is None:
pos = self._getpos()
if kind is TEXT:
# Expat reports the *end* of the text event as current position. We
# try to fix that up here as much as possible. Unfortunately, the
# offset is only valid for single-line text. For multi-line text,
# it is apparently not possible to determine at what offset it
# started
if '\n' in data:
lines = data.splitlines()
lineno = pos[1] - len(lines) + 1
offset = -1
else:
lineno = pos[1]
offset = pos[2] - len(data)
pos = (pos[0], lineno, offset)
self._queue.append((kind, data, pos))
def _getpos_unknown(self):
return (self.filename, -1, -1)
def _getpos(self):
return (self.filename, self.expat.CurrentLineNumber,
self.expat.CurrentColumnNumber)
def _handle_start(self, tag, attrib):
attrs = Attrs([(QName(name), value) for name, value in
zip(*[iter(attrib)] * 2)])
self._enqueue(START, (QName(tag), attrs))
def _handle_end(self, tag):
self._enqueue(END, QName(tag))
def _handle_data(self, text):
self._enqueue(TEXT, text)
def _handle_xml_decl(self, version, encoding, standalone):
self._enqueue(XML_DECL, (version, encoding, standalone))
def _handle_doctype(self, name, sysid, pubid, has_internal_subset):
self._enqueue(DOCTYPE, (name, pubid, sysid))
def _handle_start_ns(self, prefix, uri):
self._enqueue(START_NS, (prefix or '', uri))
def _handle_end_ns(self, prefix):
self._enqueue(END_NS, prefix or '')
def _handle_start_cdata(self):
self._enqueue(START_CDATA)
def _handle_end_cdata(self):
self._enqueue(END_CDATA)
def _handle_pi(self, target, data):
self._enqueue(PI, (target, data))
def _handle_comment(self, text):
self._enqueue(COMMENT, text)
def _handle_other(self, text):
if text.startswith('&'):
# deal with undefined entities
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
self._enqueue(TEXT, text)
except KeyError:
filename, lineno, offset = self._getpos()
error = expat.error('undefined entity "%s": line %d, column %d'
% (text, lineno, offset))
error.code = expat.errors.XML_ERROR_UNDEFINED_ENTITY
error.lineno = lineno
error.offset = offset
raise error
def XML(text):
"""Parse the given XML source and return a markup stream.
Unlike with `XMLParser`, the returned stream is reusable, meaning it can be
iterated over multiple times:
>>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>')
>>> print xml
<doc><elem>Foo</elem><elem>Bar</elem></doc>
>>> print xml.select('elem')
<elem>Foo</elem><elem>Bar</elem>
>>> print xml.select('elem/text()')
FooBar
:param text: the XML source
:return: the parsed XML event stream
:raises ParseError: if the XML text is not well-formed
"""
return Stream(list(XMLParser(StringIO(text))))
class HTMLParser(html.HTMLParser, object):
"""Parser for HTML input based on the Python `HTMLParser` module.
This class provides the same interface for generating stream events as
`XMLParser`, and attempts to automatically balance tags.
The parsing is initiated by iterating over the parser object:
>>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
>>> for kind, data, pos in parser:
... print kind, data
START (QName(u'ul'), Attrs([(QName(u'compact'), u'compact')]))
START (QName(u'li'), Attrs())
TEXT Foo
END li
END ul
"""
_EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
'hr', 'img', 'input', 'isindex', 'link', 'meta',
'param'])
def __init__(self, source, filename=None, encoding='utf-8'):
"""Initialize the parser for the given HTML input.
:param source: the HTML text as a file-like object
:param filename: the name of the file, if known
:param filename: encoding of the file; ignored if the input is unicode
"""
html.HTMLParser.__init__(self)
self.source = source
self.filename = filename
self.encoding = encoding
self._queue = []
self._open_tags = []
def parse(self):
"""Generator that parses the HTML source, yielding markup events.
:return: a markup event stream
:raises ParseError: if the HTML text is not well formed
"""
def _generate():
try:
bufsize = 4 * 1024 # 4K
done = False
while 1:
while not done and len(self._queue) == 0:
data = self.source.read(bufsize)
if data == '': # end of data
self.close()
done = True
else:
self.feed(data)
for kind, data, pos in self._queue:
yield kind, data, pos
self._queue = []
if done:
open_tags = self._open_tags
open_tags.reverse()
for tag in open_tags:
yield END, QName(tag), pos
break
except html.HTMLParseError, e:
msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
raise ParseError(msg, self.filename, e.lineno, e.offset)
return Stream(_generate()).filter(_coalesce)
def __iter__(self):
return iter(self.parse())
def _enqueue(self, kind, data, pos=None):
if pos is None:
pos = self._getpos()
self._queue.append((kind, data, pos))
def _getpos(self):
lineno, column = self.getpos()
return (self.filename, lineno, column)
def handle_starttag(self, tag, attrib):
fixed_attrib = []
for name, value in attrib: # Fixup minimized attributes
if value is None:
value = unicode(name)
elif not isinstance(value, unicode):
value = value.decode(self.encoding, 'replace')
fixed_attrib.append((QName(name), stripentities(value)))
self._enqueue(START, (QName(tag), Attrs(fixed_attrib)))
if tag in self._EMPTY_ELEMS:
self._enqueue(END, QName(tag))
else:
self._open_tags.append(tag)
def handle_endtag(self, tag):
if tag not in self._EMPTY_ELEMS:
while self._open_tags:
open_tag = self._open_tags.pop()
self._enqueue(END, QName(open_tag))
if open_tag.lower() == tag.lower():
break
def handle_data(self, text):
if not isinstance(text, unicode):
text = text.decode(self.encoding, 'replace')
self._enqueue(TEXT, text)
def handle_charref(self, name):
if name.lower().startswith('x'):
text = unichr(int(name[1:], 16))
else:
text = unichr(int(name))
self._enqueue(TEXT, text)
def handle_entityref(self, name):
try:
text = unichr(htmlentitydefs.name2codepoint[name])
except KeyError:
text = '&%s;' % name
self._enqueue(TEXT, text)
def handle_pi(self, data):
target, data = data.split(None, 1)
if data.endswith('?'):
data = data[:-1]
self._enqueue(PI, (target.strip(), data.strip()))
def handle_comment(self, text):
self._enqueue(COMMENT, text)
def HTML(text, encoding='utf-8'):
"""Parse the given HTML source and return a markup stream.
Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
iterated over multiple times:
>>> html = HTML('<body><h1>Foo</h1></body>')
>>> print html
<body><h1>Foo</h1></body>
>>> print html.select('h1')
<h1>Foo</h1>
>>> print html.select('h1/text()')
Foo
:param text: the HTML source
:return: the parsed XML event stream
:raises ParseError: if the HTML text is not well-formed, and error recovery
fails
"""
return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
def _coalesce(stream):
"""Coalesces adjacent TEXT events into a single event."""
textbuf = []
textpos = None
for kind, data, pos in chain(stream, [(None, None, None)]):
if kind is TEXT:
textbuf.append(data)
if textpos is None:
textpos = pos
else:
if textbuf:
yield TEXT, u''.join(textbuf), textpos
del textbuf[:]
textpos = None
if kind:
yield kind, data, pos

View File

@ -0,0 +1,765 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""This module provides different kinds of serialization methods for XML event
streams.
"""
from itertools import chain
try:
frozenset
except NameError:
from sets import ImmutableSet as frozenset
import re
from calibre.utils.genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
from calibre.utils.genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
__all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer',
'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer']
__docformat__ = 'restructuredtext en'
def encode(iterator, method='xml', encoding='utf-8', out=None):
"""Encode serializer output into a string.
:param iterator: the iterator returned from serializing a stream (basically
any iterator that yields unicode objects)
:param method: the serialization method; determines how characters not
representable in the specified encoding are treated
:param encoding: how the output string should be encoded; if set to `None`,
this method returns a `unicode` object
:param out: a file-like object that the output should be written to
instead of being returned as one big string; note that if
this is a file or socket (or similar), the `encoding` must
not be `None` (that is, the output must be encoded)
:return: a `str` or `unicode` object (depending on the `encoding`
parameter), or `None` if the `out` parameter is provided
:since: version 0.4.1
:note: Changed in 0.5: added the `out` parameter
"""
if encoding is not None:
errors = 'replace'
if method != 'text' and not isinstance(method, TextSerializer):
errors = 'xmlcharrefreplace'
_encode = lambda string: string.encode(encoding, errors)
else:
_encode = lambda string: string
if out is None:
return _encode(u''.join(list(iterator)))
for chunk in iterator:
out.write(_encode(chunk))
def get_serializer(method='xml', **kwargs):
"""Return a serializer object for the given method.
:param method: the serialization method; can be either "xml", "xhtml",
"html", "text", or a custom serializer class
Any additional keyword arguments are passed to the serializer, and thus
depend on the `method` parameter value.
:see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer`
:since: version 0.4.1
"""
if isinstance(method, basestring):
method = {'xml': XMLSerializer,
'xhtml': XHTMLSerializer,
'html': HTMLSerializer,
'text': TextSerializer}[method.lower()]
return method(**kwargs)
class DocType(object):
"""Defines a number of commonly used DOCTYPE declarations as constants."""
HTML_STRICT = (
'html', '-//W3C//DTD HTML 4.01//EN',
'http://www.w3.org/TR/html4/strict.dtd'
)
HTML_TRANSITIONAL = (
'html', '-//W3C//DTD HTML 4.01 Transitional//EN',
'http://www.w3.org/TR/html4/loose.dtd'
)
HTML_FRAMESET = (
'html', '-//W3C//DTD HTML 4.01 Frameset//EN',
'http://www.w3.org/TR/html4/frameset.dtd'
)
HTML = HTML_STRICT
HTML5 = ('html', None, None)
XHTML_STRICT = (
'html', '-//W3C//DTD XHTML 1.0 Strict//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
)
XHTML_TRANSITIONAL = (
'html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
)
XHTML_FRAMESET = (
'html', '-//W3C//DTD XHTML 1.0 Frameset//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd'
)
XHTML = XHTML_STRICT
XHTML11 = (
'html', '-//W3C//DTD XHTML 1.1//EN',
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
)
SVG_FULL = (
'svg', '-//W3C//DTD SVG 1.1//EN',
'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd'
)
SVG_BASIC = (
'svg', '-//W3C//DTD SVG Basic 1.1//EN',
'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd'
)
SVG_TINY = (
'svg', '-//W3C//DTD SVG Tiny 1.1//EN',
'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd'
)
SVG = SVG_FULL
def get(cls, name):
"""Return the ``(name, pubid, sysid)`` tuple of the ``DOCTYPE``
declaration for the specified name.
The following names are recognized in this version:
* "html" or "html-strict" for the HTML 4.01 strict DTD
* "html-transitional" for the HTML 4.01 transitional DTD
* "html-frameset" for the HTML 4.01 frameset DTD
* "html5" for the ``DOCTYPE`` proposed for HTML5
* "xhtml" or "xhtml-strict" for the XHTML 1.0 strict DTD
* "xhtml-transitional" for the XHTML 1.0 transitional DTD
* "xhtml-frameset" for the XHTML 1.0 frameset DTD
* "xhtml11" for the XHTML 1.1 DTD
* "svg" or "svg-full" for the SVG 1.1 DTD
* "svg-basic" for the SVG Basic 1.1 DTD
* "svg-tiny" for the SVG Tiny 1.1 DTD
:param name: the name of the ``DOCTYPE``
:return: the ``(name, pubid, sysid)`` tuple for the requested
``DOCTYPE``, or ``None`` if the name is not recognized
:since: version 0.4.1
"""
return {
'html': cls.HTML, 'html-strict': cls.HTML_STRICT,
'html-transitional': DocType.HTML_TRANSITIONAL,
'html-frameset': DocType.HTML_FRAMESET,
'html5': cls.HTML5,
'xhtml': cls.XHTML, 'xhtml-strict': cls.XHTML_STRICT,
'xhtml-transitional': cls.XHTML_TRANSITIONAL,
'xhtml-frameset': cls.XHTML_FRAMESET,
'xhtml11': cls.XHTML11,
'svg': cls.SVG, 'svg-full': cls.SVG_FULL,
'svg-basic': cls.SVG_BASIC,
'svg-tiny': cls.SVG_TINY
}.get(name.lower())
get = classmethod(get)
class XMLSerializer(object):
"""Produces XML text from an event stream.
>>> from genshi.builder import tag
>>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
>>> print ''.join(XMLSerializer()(elem.generate()))
<div><a href="foo"/><br/><hr noshade="True"/></div>
"""
_PRESERVE_SPACE = frozenset()
def __init__(self, doctype=None, strip_whitespace=True,
namespace_prefixes=None):
"""Initialize the XML serializer.
:param doctype: a ``(name, pubid, sysid)`` tuple that represents the
DOCTYPE declaration that should be included at the top
of the generated output, or the name of a DOCTYPE as
defined in `DocType.get`
:param strip_whitespace: whether extraneous whitespace should be
stripped from the output
:note: Changed in 0.4.2: The `doctype` parameter can now be a string.
"""
self.filters = [EmptyTagFilter()]
if strip_whitespace:
self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
if doctype:
self.filters.append(DocTypeInserter(doctype))
def __call__(self, stream):
have_decl = have_doctype = False
in_cdata = False
for filter_ in self.filters:
stream = filter_(stream)
for kind, data, pos in stream:
if kind is START or kind is EMPTY:
tag, attrib = data
buf = ['<', tag]
for attr, value in attrib:
buf += [' ', attr, '="', escape(value), '"']
buf.append(kind is EMPTY and '/>' or '>')
yield Markup(u''.join(buf))
elif kind is END:
yield Markup('</%s>' % data)
elif kind is TEXT:
if in_cdata:
yield data
else:
yield escape(data, quotes=False)
elif kind is COMMENT:
yield Markup('<!--%s-->' % data)
elif kind is XML_DECL and not have_decl:
version, encoding, standalone = data
buf = ['<?xml version="%s"' % version]
if encoding:
buf.append(' encoding="%s"' % encoding)
if standalone != -1:
standalone = standalone and 'yes' or 'no'
buf.append(' standalone="%s"' % standalone)
buf.append('?>\n')
yield Markup(u''.join(buf))
have_decl = True
elif kind is DOCTYPE and not have_doctype:
name, pubid, sysid = data
buf = ['<!DOCTYPE %s']
if pubid:
buf.append(' PUBLIC "%s"')
elif sysid:
buf.append(' SYSTEM')
if sysid:
buf.append(' "%s"')
buf.append('>\n')
yield Markup(u''.join(buf)) % filter(None, data)
have_doctype = True
elif kind is START_CDATA:
yield Markup('<![CDATA[')
in_cdata = True
elif kind is END_CDATA:
yield Markup(']]>')
in_cdata = False
elif kind is PI:
yield Markup('<?%s %s?>' % data)
class XHTMLSerializer(XMLSerializer):
"""Produces XHTML text from an event stream.
>>> from genshi.builder import tag
>>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
>>> print ''.join(XHTMLSerializer()(elem.generate()))
<div><a href="foo"></a><br /><hr noshade="noshade" /></div>
"""
_EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
'hr', 'img', 'input', 'isindex', 'link', 'meta',
'param'])
_BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
'defer', 'disabled', 'ismap', 'multiple',
'nohref', 'noresize', 'noshade', 'nowrap'])
_PRESERVE_SPACE = frozenset([
QName('pre'), QName('http://www.w3.org/1999/xhtml}pre'),
QName('textarea'), QName('http://www.w3.org/1999/xhtml}textarea')
])
def __init__(self, doctype=None, strip_whitespace=True,
namespace_prefixes=None, drop_xml_decl=True):
super(XHTMLSerializer, self).__init__(doctype, False)
self.filters = [EmptyTagFilter()]
if strip_whitespace:
self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
namespace_prefixes = namespace_prefixes or {}
namespace_prefixes['http://www.w3.org/1999/xhtml'] = ''
self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes))
if doctype:
self.filters.append(DocTypeInserter(doctype))
self.drop_xml_decl = drop_xml_decl
def __call__(self, stream):
boolean_attrs = self._BOOLEAN_ATTRS
empty_elems = self._EMPTY_ELEMS
drop_xml_decl = self.drop_xml_decl
have_decl = have_doctype = False
in_cdata = False
for filter_ in self.filters:
stream = filter_(stream)
for kind, data, pos in stream:
if kind is START or kind is EMPTY:
tag, attrib = data
buf = ['<', tag]
for attr, value in attrib:
if attr in boolean_attrs:
value = attr
elif attr == u'xml:lang' and u'lang' not in attrib:
buf += [' lang="', escape(value), '"']
elif attr == u'xml:space':
continue
buf += [' ', attr, '="', escape(value), '"']
if kind is EMPTY:
if tag in empty_elems:
buf.append(' />')
else:
buf.append('></%s>' % tag)
else:
buf.append('>')
yield Markup(u''.join(buf))
elif kind is END:
yield Markup('</%s>' % data)
elif kind is TEXT:
if in_cdata:
yield data
else:
yield escape(data, quotes=False)
elif kind is COMMENT:
yield Markup('<!--%s-->' % data)
elif kind is DOCTYPE and not have_doctype:
name, pubid, sysid = data
buf = ['<!DOCTYPE %s']
if pubid:
buf.append(' PUBLIC "%s"')
elif sysid:
buf.append(' SYSTEM')
if sysid:
buf.append(' "%s"')
buf.append('>\n')
yield Markup(u''.join(buf)) % filter(None, data)
have_doctype = True
elif kind is XML_DECL and not have_decl and not drop_xml_decl:
version, encoding, standalone = data
buf = ['<?xml version="%s"' % version]
if encoding:
buf.append(' encoding="%s"' % encoding)
if standalone != -1:
standalone = standalone and 'yes' or 'no'
buf.append(' standalone="%s"' % standalone)
buf.append('?>\n')
yield Markup(u''.join(buf))
have_decl = True
elif kind is START_CDATA:
yield Markup('<![CDATA[')
in_cdata = True
elif kind is END_CDATA:
yield Markup(']]>')
in_cdata = False
elif kind is PI:
yield Markup('<?%s %s?>' % data)
class HTMLSerializer(XHTMLSerializer):
"""Produces HTML text from an event stream.
>>> from genshi.builder import tag
>>> elem = tag.div(tag.a(href='foo'), tag.br, tag.hr(noshade=True))
>>> print ''.join(HTMLSerializer()(elem.generate()))
<div><a href="foo"></a><br><hr noshade></div>
"""
_NOESCAPE_ELEMS = frozenset([
QName('script'), QName('http://www.w3.org/1999/xhtml}script'),
QName('style'), QName('http://www.w3.org/1999/xhtml}style')
])
def __init__(self, doctype=None, strip_whitespace=True):
"""Initialize the HTML serializer.
:param doctype: a ``(name, pubid, sysid)`` tuple that represents the
DOCTYPE declaration that should be included at the top
of the generated output
:param strip_whitespace: whether extraneous whitespace should be
stripped from the output
"""
super(HTMLSerializer, self).__init__(doctype, False)
self.filters = [EmptyTagFilter()]
if strip_whitespace:
self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
self._NOESCAPE_ELEMS))
self.filters.append(NamespaceFlattener(prefixes={
'http://www.w3.org/1999/xhtml': ''
}))
if doctype:
self.filters.append(DocTypeInserter(doctype))
def __call__(self, stream):
boolean_attrs = self._BOOLEAN_ATTRS
empty_elems = self._EMPTY_ELEMS
noescape_elems = self._NOESCAPE_ELEMS
have_doctype = False
noescape = False
for filter_ in self.filters:
stream = filter_(stream)
for kind, data, pos in stream:
if kind is START or kind is EMPTY:
tag, attrib = data
buf = ['<', tag]
for attr, value in attrib:
if attr in boolean_attrs:
if value:
buf += [' ', attr]
elif ':' in attr:
if attr == 'xml:lang' and u'lang' not in attrib:
buf += [' lang="', escape(value), '"']
elif attr != 'xmlns':
buf += [' ', attr, '="', escape(value), '"']
buf.append('>')
if kind is EMPTY:
if tag not in empty_elems:
buf.append('</%s>' % tag)
yield Markup(u''.join(buf))
if tag in noescape_elems:
noescape = True
elif kind is END:
yield Markup('</%s>' % data)
noescape = False
elif kind is TEXT:
if noescape:
yield data
else:
yield escape(data, quotes=False)
elif kind is COMMENT:
yield Markup('<!--%s-->' % data)
elif kind is DOCTYPE and not have_doctype:
name, pubid, sysid = data
buf = ['<!DOCTYPE %s']
if pubid:
buf.append(' PUBLIC "%s"')
elif sysid:
buf.append(' SYSTEM')
if sysid:
buf.append(' "%s"')
buf.append('>\n')
yield Markup(u''.join(buf)) % filter(None, data)
have_doctype = True
elif kind is PI:
yield Markup('<?%s %s?>' % data)
class TextSerializer(object):
"""Produces plain text from an event stream.
Only text events are included in the output. Unlike the other serializer,
special XML characters are not escaped:
>>> from genshi.builder import tag
>>> elem = tag.div(tag.a('<Hello!>', href='foo'), tag.br)
>>> print elem
<div><a href="foo">&lt;Hello!&gt;</a><br/></div>
>>> print ''.join(TextSerializer()(elem.generate()))
<Hello!>
If text events contain literal markup (instances of the `Markup` class),
that markup is by default passed through unchanged:
>>> elem = tag.div(Markup('<a href="foo">Hello &amp; Bye!</a><br/>'))
>>> print elem.generate().render(TextSerializer)
<a href="foo">Hello &amp; Bye!</a><br/>
You can use the ``strip_markup`` to change this behavior, so that tags and
entities are stripped from the output (or in the case of entities,
replaced with the equivalent character):
>>> print elem.generate().render(TextSerializer, strip_markup=True)
Hello & Bye!
"""
def __init__(self, strip_markup=False):
"""Create the serializer.
:param strip_markup: whether markup (tags and encoded characters) found
in the text should be removed
"""
self.strip_markup = strip_markup
def __call__(self, stream):
strip_markup = self.strip_markup
for event in stream:
if event[0] is TEXT:
data = event[1]
if strip_markup and type(data) is Markup:
data = data.striptags().stripentities()
yield unicode(data)
class EmptyTagFilter(object):
"""Combines `START` and `STOP` events into `EMPTY` events for elements that
have no contents.
"""
EMPTY = StreamEventKind('EMPTY')
def __call__(self, stream):
prev = (None, None, None)
for ev in stream:
if prev[0] is START:
if ev[0] is END:
prev = EMPTY, prev[1], prev[2]
yield prev
continue
else:
yield prev
if ev[0] is not START:
yield ev
prev = ev
EMPTY = EmptyTagFilter.EMPTY
class NamespaceFlattener(object):
r"""Output stream filter that removes namespace information from the stream,
instead adding namespace attributes and prefixes as needed.
:param prefixes: optional mapping of namespace URIs to prefixes
>>> from genshi.input import XML
>>> xml = XML('''<doc xmlns="NS1" xmlns:two="NS2">
... <two:item/>
... </doc>''')
>>> for kind, data, pos in NamespaceFlattener()(xml):
... print kind, repr(data)
START (u'doc', Attrs([(u'xmlns', u'NS1'), (u'xmlns:two', u'NS2')]))
TEXT u'\n '
START (u'two:item', Attrs())
END u'two:item'
TEXT u'\n'
END u'doc'
"""
def __init__(self, prefixes=None):
self.prefixes = {XML_NAMESPACE.uri: 'xml'}
if prefixes is not None:
self.prefixes.update(prefixes)
def __call__(self, stream):
prefixes = dict([(v, [k]) for k, v in self.prefixes.items()])
namespaces = {XML_NAMESPACE.uri: ['xml']}
def _push_ns(prefix, uri):
namespaces.setdefault(uri, []).append(prefix)
prefixes.setdefault(prefix, []).append(uri)
ns_attrs = []
_push_ns_attr = ns_attrs.append
def _make_ns_attr(prefix, uri):
return u'xmlns%s' % (prefix and ':%s' % prefix or ''), uri
def _gen_prefix():
val = 0
while 1:
val += 1
yield 'ns%d' % val
_gen_prefix = _gen_prefix().next
for kind, data, pos in stream:
if kind is START or kind is EMPTY:
tag, attrs = data
tagname = tag.localname
tagns = tag.namespace
if tagns:
if tagns in namespaces:
prefix = namespaces[tagns][-1]
if prefix:
tagname = u'%s:%s' % (prefix, tagname)
else:
_push_ns_attr((u'xmlns', tagns))
_push_ns('', tagns)
new_attrs = []
for attr, value in attrs:
attrname = attr.localname
attrns = attr.namespace
if attrns:
if attrns not in namespaces:
prefix = _gen_prefix()
_push_ns(prefix, attrns)
_push_ns_attr(('xmlns:%s' % prefix, attrns))
else:
prefix = namespaces[attrns][-1]
if prefix:
attrname = u'%s:%s' % (prefix, attrname)
new_attrs.append((attrname, value))
yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos
del ns_attrs[:]
elif kind is END:
tagname = data.localname
tagns = data.namespace
if tagns:
prefix = namespaces[tagns][-1]
if prefix:
tagname = u'%s:%s' % (prefix, tagname)
yield kind, tagname, pos
elif kind is START_NS:
prefix, uri = data
if uri not in namespaces:
prefix = prefixes.get(uri, [prefix])[-1]
_push_ns_attr(_make_ns_attr(prefix, uri))
_push_ns(prefix, uri)
elif kind is END_NS:
if data in prefixes:
uris = prefixes.get(data)
uri = uris.pop()
if not uris:
del prefixes[data]
if uri not in uris or uri != uris[-1]:
uri_prefixes = namespaces[uri]
uri_prefixes.pop()
if not uri_prefixes:
del namespaces[uri]
if ns_attrs:
attr = _make_ns_attr(data, uri)
if attr in ns_attrs:
ns_attrs.remove(attr)
else:
yield kind, data, pos
class WhitespaceFilter(object):
"""A filter that removes extraneous ignorable white space from the
stream.
"""
def __init__(self, preserve=None, noescape=None):
"""Initialize the filter.
:param preserve: a set or sequence of tag names for which white-space
should be preserved
:param noescape: a set or sequence of tag names for which text content
should not be escaped
The `noescape` set is expected to refer to elements that cannot contain
further child elements (such as ``<style>`` or ``<script>`` in HTML
documents).
"""
if preserve is None:
preserve = []
self.preserve = frozenset(preserve)
if noescape is None:
noescape = []
self.noescape = frozenset(noescape)
def __call__(self, stream, ctxt=None, space=XML_NAMESPACE['space'],
trim_trailing_space=re.compile('[ \t]+(?=\n)').sub,
collapse_lines=re.compile('\n{2,}').sub):
mjoin = Markup('').join
preserve_elems = self.preserve
preserve = 0
noescape_elems = self.noescape
noescape = False
textbuf = []
push_text = textbuf.append
pop_text = textbuf.pop
for kind, data, pos in chain(stream, [(None, None, None)]):
if kind is TEXT:
if noescape:
data = Markup(data)
push_text(data)
else:
if textbuf:
if len(textbuf) > 1:
text = mjoin(textbuf, escape_quotes=False)
del textbuf[:]
else:
text = escape(pop_text(), quotes=False)
if not preserve:
text = collapse_lines('\n', trim_trailing_space('', text))
yield TEXT, Markup(text), pos
if kind is START:
tag, attrs = data
if preserve or (tag in preserve_elems or
attrs.get(space) == 'preserve'):
preserve += 1
if not noescape and tag in noescape_elems:
noescape = True
elif kind is END:
noescape = False
if preserve:
preserve -= 1
elif kind is START_CDATA:
noescape = True
elif kind is END_CDATA:
noescape = False
if kind:
yield kind, data, pos
class DocTypeInserter(object):
"""A filter that inserts the DOCTYPE declaration in the correct location,
after the XML declaration.
"""
def __init__(self, doctype):
"""Initialize the filter.
:param doctype: DOCTYPE as a string or DocType object.
"""
if isinstance(doctype, basestring):
doctype = DocType.get(doctype)
self.doctype_event = (DOCTYPE, doctype, (None, -1, -1))
def __call__(self, stream):
doctype_inserted = False
for kind, data, pos in stream:
if not doctype_inserted:
doctype_inserted = True
if kind is XML_DECL:
yield (kind, data, pos)
yield self.doctype_event
continue
yield self.doctype_event
yield (kind, data, pos)
if not doctype_inserted:
yield self.doctype_event

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Implementation of the template engine."""
from calibre.utils.genshi.template.base import Context, Template, TemplateError, \
TemplateRuntimeError, TemplateSyntaxError, \
BadDirectiveError
from calibre.utils.genshi.template.loader import TemplateLoader, TemplateNotFound
from calibre.utils.genshi.template.markup import MarkupTemplate
from calibre.utils.genshi.template.text import TextTemplate, OldTextTemplate, NewTextTemplate
__docformat__ = 'restructuredtext en'

View File

@ -0,0 +1,598 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Basic templating functionality."""
try:
from collections import deque
except ImportError:
class deque(list):
def appendleft(self, x): self.insert(0, x)
def popleft(self): return self.pop(0)
import os
from StringIO import StringIO
import sys
from calibre.utils.genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
from calibre.utils.genshi.input import ParseError
__all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError',
'TemplateSyntaxError', 'BadDirectiveError']
__docformat__ = 'restructuredtext en'
if sys.version_info < (2, 4):
_ctxt2dict = lambda ctxt: ctxt.frames[0]
else:
_ctxt2dict = lambda ctxt: ctxt
class TemplateError(Exception):
"""Base exception class for errors related to template processing."""
def __init__(self, message, filename=None, lineno=-1, offset=-1):
"""Create the exception.
:param message: the error message
:param filename: the filename of the template
:param lineno: the number of line in the template at which the error
occurred
:param offset: the column number at which the error occurred
"""
if filename is None:
filename = '<string>'
self.msg = message #: the error message string
if filename != '<string>' or lineno >= 0:
message = '%s (%s, line %d)' % (self.msg, filename, lineno)
Exception.__init__(self, message)
self.filename = filename #: the name of the template file
self.lineno = lineno #: the number of the line containing the error
self.offset = offset #: the offset on the line
class TemplateSyntaxError(TemplateError):
"""Exception raised when an expression in a template causes a Python syntax
error, or the template is not well-formed.
"""
def __init__(self, message, filename=None, lineno=-1, offset=-1):
"""Create the exception
:param message: the error message
:param filename: the filename of the template
:param lineno: the number of line in the template at which the error
occurred
:param offset: the column number at which the error occurred
"""
if isinstance(message, SyntaxError) and message.lineno is not None:
message = str(message).replace(' (line %d)' % message.lineno, '')
TemplateError.__init__(self, message, filename, lineno)
class BadDirectiveError(TemplateSyntaxError):
"""Exception raised when an unknown directive is encountered when parsing
a template.
An unknown directive is any attribute using the namespace for directives,
with a local name that doesn't match any registered directive.
"""
def __init__(self, name, filename=None, lineno=-1):
"""Create the exception
:param name: the name of the directive
:param filename: the filename of the template
:param lineno: the number of line in the template at which the error
occurred
"""
TemplateSyntaxError.__init__(self, 'bad directive "%s"' % name,
filename, lineno)
class TemplateRuntimeError(TemplateError):
"""Exception raised when an the evaluation of a Python expression in a
template causes an error.
"""
class Context(object):
"""Container for template input data.
A context provides a stack of scopes (represented by dictionaries).
Template directives such as loops can push a new scope on the stack with
data that should only be available inside the loop. When the loop
terminates, that scope can get popped off the stack again.
>>> ctxt = Context(one='foo', other=1)
>>> ctxt.get('one')
'foo'
>>> ctxt.get('other')
1
>>> ctxt.push(dict(one='frost'))
>>> ctxt.get('one')
'frost'
>>> ctxt.get('other')
1
>>> ctxt.pop()
{'one': 'frost'}
>>> ctxt.get('one')
'foo'
"""
def __init__(self, **data):
"""Initialize the template context with the given keyword arguments as
data.
"""
self.frames = deque([data])
self.pop = self.frames.popleft
self.push = self.frames.appendleft
self._match_templates = []
self._choice_stack = []
# Helper functions for use in expressions
def defined(name):
"""Return whether a variable with the specified name exists in the
expression scope."""
return name in self
def value_of(name, default=None):
"""If a variable of the specified name is defined, return its value.
Otherwise, return the provided default value, or ``None``."""
return self.get(name, default)
data.setdefault('defined', defined)
data.setdefault('value_of', value_of)
def __repr__(self):
return repr(list(self.frames))
def __contains__(self, key):
"""Return whether a variable exists in any of the scopes.
:param key: the name of the variable
"""
return self._find(key)[1] is not None
has_key = __contains__
def __delitem__(self, key):
"""Remove a variable from all scopes.
:param key: the name of the variable
"""
for frame in self.frames:
if key in frame:
del frame[key]
def __getitem__(self, key):
"""Get a variables's value, starting at the current scope and going
upward.
:param key: the name of the variable
:return: the variable value
:raises KeyError: if the requested variable wasn't found in any scope
"""
value, frame = self._find(key)
if frame is None:
raise KeyError(key)
return value
def __len__(self):
"""Return the number of distinctly named variables in the context.
:return: the number of variables in the context
"""
return len(self.items())
def __setitem__(self, key, value):
"""Set a variable in the current scope.
:param key: the name of the variable
:param value: the variable value
"""
self.frames[0][key] = value
def _find(self, key, default=None):
"""Retrieve a given variable's value and the frame it was found in.
Intended primarily for internal use by directives.
:param key: the name of the variable
:param default: the default value to return when the variable is not
found
"""
for frame in self.frames:
if key in frame:
return frame[key], frame
return default, None
def get(self, key, default=None):
"""Get a variable's value, starting at the current scope and going
upward.
:param key: the name of the variable
:param default: the default value to return when the variable is not
found
"""
for frame in self.frames:
if key in frame:
return frame[key]
return default
def keys(self):
"""Return the name of all variables in the context.
:return: a list of variable names
"""
keys = []
for frame in self.frames:
keys += [key for key in frame if key not in keys]
return keys
def items(self):
"""Return a list of ``(name, value)`` tuples for all variables in the
context.
:return: a list of variables
"""
return [(key, self.get(key)) for key in self.keys()]
def update(self, mapping):
"""Update the context from the mapping provided."""
self.frames[0].update(mapping)
def push(self, data):
"""Push a new scope on the stack.
:param data: the data dictionary to push on the context stack.
"""
def pop(self):
"""Pop the top-most scope from the stack."""
def _apply_directives(stream, directives, ctxt, **vars):
"""Apply the given directives to the stream.
:param stream: the stream the directives should be applied to
:param directives: the list of directives to apply
:param ctxt: the `Context`
:param vars: additional variables that should be available when Python
code is executed
:return: the stream with the given directives applied
"""
if directives:
stream = directives[0](iter(stream), directives[1:], ctxt, **vars)
return stream
def _eval_expr(expr, ctxt, **vars):
"""Evaluate the given `Expression` object.
:param expr: the expression to evaluate
:param ctxt: the `Context`
:param vars: additional variables that should be available to the
expression
:return: the result of the evaluation
"""
if vars:
ctxt.push(vars)
retval = expr.evaluate(ctxt)
if vars:
ctxt.pop()
return retval
def _exec_suite(suite, ctxt, **vars):
"""Execute the given `Suite` object.
:param suite: the code suite to execute
:param ctxt: the `Context`
:param vars: additional variables that should be available to the
code
"""
if vars:
ctxt.push(vars)
ctxt.push({})
suite.execute(_ctxt2dict(ctxt))
if vars:
top = ctxt.pop()
ctxt.pop()
ctxt.frames[0].update(top)
class TemplateMeta(type):
"""Meta class for templates."""
def __new__(cls, name, bases, d):
if 'directives' in d:
d['_dir_by_name'] = dict(d['directives'])
d['_dir_order'] = [directive[1] for directive in d['directives']]
return type.__new__(cls, name, bases, d)
class Template(object):
"""Abstract template base class.
This class implements most of the template processing model, but does not
specify the syntax of templates.
"""
__metaclass__ = TemplateMeta
EXEC = StreamEventKind('EXEC')
"""Stream event kind representing a Python code suite to execute."""
EXPR = StreamEventKind('EXPR')
"""Stream event kind representing a Python expression."""
INCLUDE = StreamEventKind('INCLUDE')
"""Stream event kind representing the inclusion of another template."""
SUB = StreamEventKind('SUB')
"""Stream event kind representing a nested stream to which one or more
directives should be applied.
"""
serializer = None
_number_conv = unicode # function used to convert numbers to event data
def __init__(self, source, filepath=None, filename=None, loader=None,
encoding=None, lookup='strict', allow_exec=True):
"""Initialize a template from either a string, a file-like object, or
an already parsed markup stream.
:param source: a string, file-like object, or markup stream to read the
template from
:param filepath: the absolute path to the template file
:param filename: the path to the template file relative to the search
path
:param loader: the `TemplateLoader` to use for loading included
templates
:param encoding: the encoding of the `source`
:param lookup: the variable lookup mechanism; either "strict" (the
default), "lenient", or a custom lookup class
:param allow_exec: whether Python code blocks in templates should be
allowed
:note: Changed in 0.5: Added the `allow_exec` argument
"""
self.filepath = filepath or filename
self.filename = filename
self.loader = loader
self.lookup = lookup
self.allow_exec = allow_exec
self._init_filters()
if isinstance(source, basestring):
source = StringIO(source)
else:
source = source
try:
self.stream = list(self._prepare(self._parse(source, encoding)))
except ParseError, e:
raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset)
def __getstate__(self):
state = self.__dict__.copy()
state['filters'] = []
return state
def __setstate__(self, state):
self.__dict__ = state
self._init_filters()
def __repr__(self):
return '<%s "%s">' % (self.__class__.__name__, self.filename)
def _init_filters(self):
self.filters = [self._flatten, self._eval, self._exec]
if self.loader:
self.filters.append(self._include)
def _parse(self, source, encoding):
"""Parse the template.
The parsing stage parses the template and constructs a list of
directives that will be executed in the render stage. The input is
split up into literal output (text that does not depend on the context
data) and directives or expressions.
:param source: a file-like object containing the XML source of the
template, or an XML event stream
:param encoding: the encoding of the `source`
"""
raise NotImplementedError
def _prepare(self, stream):
"""Call the `attach` method of every directive found in the template.
:param stream: the event stream of the template
"""
from calibre.utils.genshi.template.loader import TemplateNotFound
for kind, data, pos in stream:
if kind is SUB:
directives = []
substream = data[1]
for cls, value, namespaces, pos in data[0]:
directive, substream = cls.attach(self, substream, value,
namespaces, pos)
if directive:
directives.append(directive)
substream = self._prepare(substream)
if directives:
yield kind, (directives, list(substream)), pos
else:
for event in substream:
yield event
else:
if kind is INCLUDE:
href, cls, fallback = data
if isinstance(href, basestring) and \
not getattr(self.loader, 'auto_reload', True):
# If the path to the included template is static, and
# auto-reloading is disabled on the template loader,
# the template is inlined into the stream
try:
tmpl = self.loader.load(href, relative_to=pos[0],
cls=cls or self.__class__)
for event in tmpl.stream:
yield event
except TemplateNotFound:
if fallback is None:
raise
for event in self._prepare(fallback):
yield event
continue
elif fallback:
# Otherwise the include is performed at run time
data = href, cls, list(self._prepare(fallback))
yield kind, data, pos
def generate(self, *args, **kwargs):
"""Apply the template to the given context data.
Any keyword arguments are made available to the template as context
data.
Only one positional argument is accepted: if it is provided, it must be
an instance of the `Context` class, and keyword arguments are ignored.
This calling style is used for internal processing.
:return: a markup event stream representing the result of applying
the template to the context data.
"""
vars = {}
if args:
assert len(args) == 1
ctxt = args[0]
if ctxt is None:
ctxt = Context(**kwargs)
else:
vars = kwargs
assert isinstance(ctxt, Context)
else:
ctxt = Context(**kwargs)
stream = self.stream
for filter_ in self.filters:
stream = filter_(iter(stream), ctxt, **vars)
return Stream(stream, self.serializer)
def _eval(self, stream, ctxt, **vars):
"""Internal stream filter that evaluates any expressions in `START` and
`TEXT` events.
"""
filters = (self._flatten, self._eval)
number_conv = self._number_conv
for kind, data, pos in stream:
if kind is START and data[1]:
# Attributes may still contain expressions in start tags at
# this point, so do some evaluation
tag, attrs = data
new_attrs = []
for name, substream in attrs:
if isinstance(substream, basestring):
value = substream
else:
values = []
for subkind, subdata, subpos in self._eval(substream,
ctxt,
**vars):
if subkind is TEXT:
values.append(subdata)
value = [x for x in values if x is not None]
if not value:
continue
new_attrs.append((name, u''.join(value)))
yield kind, (tag, Attrs(new_attrs)), pos
elif kind is EXPR:
result = _eval_expr(data, ctxt, **vars)
if result is not None:
# First check for a string, otherwise the iterable test
# below succeeds, and the string will be chopped up into
# individual characters
if isinstance(result, basestring):
yield TEXT, result, pos
elif isinstance(result, (int, float, long)):
yield TEXT, number_conv(result), pos
elif hasattr(result, '__iter__'):
substream = _ensure(result)
for filter_ in filters:
substream = filter_(substream, ctxt, **vars)
for event in substream:
yield event
else:
yield TEXT, unicode(result), pos
else:
yield kind, data, pos
def _exec(self, stream, ctxt, **vars):
"""Internal stream filter that executes Python code blocks."""
for event in stream:
if event[0] is EXEC:
_exec_suite(event[1], ctxt, **vars)
else:
yield event
def _flatten(self, stream, ctxt, **vars):
"""Internal stream filter that expands `SUB` events in the stream."""
for event in stream:
if event[0] is SUB:
# This event is a list of directives and a list of nested
# events to which those directives should be applied
directives, substream = event[1]
substream = _apply_directives(substream, directives, ctxt,
**vars)
for event in self._flatten(substream, ctxt, **vars):
yield event
else:
yield event
def _include(self, stream, ctxt, **vars):
"""Internal stream filter that performs inclusion of external
template files.
"""
from calibre.utils.genshi.template.loader import TemplateNotFound
for event in stream:
if event[0] is INCLUDE:
href, cls, fallback = event[1]
if not isinstance(href, basestring):
parts = []
for subkind, subdata, subpos in self._eval(href, ctxt,
**vars):
if subkind is TEXT:
parts.append(subdata)
href = u''.join([x for x in parts if x is not None])
try:
tmpl = self.loader.load(href, relative_to=event[2][0],
cls=cls or self.__class__)
for event in tmpl.generate(ctxt, **vars):
yield event
except TemplateNotFound:
if fallback is None:
raise
for filter_ in self.filters:
fallback = filter_(iter(fallback), ctxt, **vars)
for event in fallback:
yield event
else:
yield event
EXEC = Template.EXEC
EXPR = Template.EXPR
INCLUDE = Template.INCLUDE
SUB = Template.SUB

View File

@ -0,0 +1,745 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Implementation of the various template directives."""
import compiler
try:
frozenset
except NameError:
from sets import ImmutableSet as frozenset
from calibre.utils.genshi.core import QName, Stream
from calibre.utils.genshi.path import Path
from calibre.utils.genshi.template.base import TemplateRuntimeError, TemplateSyntaxError, \
EXPR, _apply_directives, _eval_expr, \
_exec_suite
from calibre.utils.genshi.template.eval import Expression, ExpressionASTTransformer, _parse
__all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective',
'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective',
'OtherwiseDirective', 'ReplaceDirective', 'StripDirective',
'WhenDirective', 'WithDirective']
__docformat__ = 'restructuredtext en'
class DirectiveMeta(type):
"""Meta class for template directives."""
def __new__(cls, name, bases, d):
d['tagname'] = name.lower().replace('directive', '')
return type.__new__(cls, name, bases, d)
class Directive(object):
"""Abstract base class for template directives.
A directive is basically a callable that takes three positional arguments:
``ctxt`` is the template data context, ``stream`` is an iterable over the
events that the directive applies to, and ``directives`` is is a list of
other directives on the same stream that need to be applied.
Directives can be "anonymous" or "registered". Registered directives can be
applied by the template author using an XML attribute with the
corresponding name in the template. Such directives should be subclasses of
this base class that can be instantiated with the value of the directive
attribute as parameter.
Anonymous directives are simply functions conforming to the protocol
described above, and can only be applied programmatically (for example by
template filters).
"""
__metaclass__ = DirectiveMeta
__slots__ = ['expr']
def __init__(self, value, template=None, namespaces=None, lineno=-1,
offset=-1):
self.expr = self._parse_expr(value, template, lineno, offset)
def attach(cls, template, stream, value, namespaces, pos):
"""Called after the template stream has been completely parsed.
:param template: the `Template` object
:param stream: the event stream associated with the directive
:param value: the argument value for the directive; if the directive was
specified as an element, this will be an `Attrs` instance
with all specified attributes, otherwise it will be a
`unicode` object with just the attribute value
:param namespaces: a mapping of namespace URIs to prefixes
:param pos: a ``(filename, lineno, offset)`` tuple describing the
location where the directive was found in the source
This class method should return a ``(directive, stream)`` tuple. If
``directive`` is not ``None``, it should be an instance of the `Directive`
class, and gets added to the list of directives applied to the substream
at runtime. `stream` is an event stream that replaces the original
stream associated with the directive.
"""
return cls(value, template, namespaces, *pos[1:]), stream
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
"""Apply the directive to the given stream.
:param stream: the event stream
:param directives: a list of the remaining directives that should
process the stream
:param ctxt: the context data
:param vars: additional variables that should be made available when
Python code is executed
"""
raise NotImplementedError
def __repr__(self):
expr = ''
if getattr(self, 'expr', None) is not None:
expr = ' "%s"' % self.expr.source
return '<%s%s>' % (self.__class__.__name__, expr)
def _parse_expr(cls, expr, template, lineno=-1, offset=-1):
"""Parses the given expression, raising a useful error message when a
syntax error is encountered.
"""
try:
return expr and Expression(expr, template.filepath, lineno,
lookup=template.lookup) or None
except SyntaxError, err:
err.msg += ' in expression "%s" of "%s" directive' % (expr,
cls.tagname)
raise TemplateSyntaxError(err, template.filepath, lineno,
offset + (err.offset or 0))
_parse_expr = classmethod(_parse_expr)
def _assignment(ast):
"""Takes the AST representation of an assignment, and returns a function
that applies the assignment of a given value to a dictionary.
"""
def _names(node):
if isinstance(node, (compiler.ast.AssTuple, compiler.ast.Tuple)):
return tuple([_names(child) for child in node.nodes])
elif isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):
return node.name
def _assign(data, value, names=_names(ast)):
if type(names) is tuple:
for idx in range(len(names)):
_assign(data, value[idx], names[idx])
else:
data[names] = value
return _assign
class AttrsDirective(Directive):
"""Implementation of the ``py:attrs`` template directive.
The value of the ``py:attrs`` attribute should be a dictionary or a sequence
of ``(name, value)`` tuples. The items in that dictionary or sequence are
added as attributes to the element:
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
... <li py:attrs="foo">Bar</li>
... </ul>''')
>>> print tmpl.generate(foo={'class': 'collapse'})
<ul>
<li class="collapse">Bar</li>
</ul>
>>> print tmpl.generate(foo=[('class', 'collapse')])
<ul>
<li class="collapse">Bar</li>
</ul>
If the value evaluates to ``None`` (or any other non-truth value), no
attributes are added:
>>> print tmpl.generate(foo=None)
<ul>
<li>Bar</li>
</ul>
"""
__slots__ = []
def __call__(self, stream, directives, ctxt, **vars):
def _generate():
kind, (tag, attrib), pos = stream.next()
attrs = _eval_expr(self.expr, ctxt, **vars)
if attrs:
if isinstance(attrs, Stream):
try:
attrs = iter(attrs).next()
except StopIteration:
attrs = []
elif not isinstance(attrs, list): # assume it's a dict
attrs = attrs.items()
attrib -= [name for name, val in attrs if val is None]
attrib |= [(QName(name), unicode(val).strip()) for name, val
in attrs if val is not None]
yield kind, (tag, attrib), pos
for event in stream:
yield event
return _apply_directives(_generate(), directives, ctxt, **vars)
class ContentDirective(Directive):
"""Implementation of the ``py:content`` template directive.
This directive replaces the content of the element with the result of
evaluating the value of the ``py:content`` attribute:
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
... <li py:content="bar">Hello</li>
... </ul>''')
>>> print tmpl.generate(bar='Bye')
<ul>
<li>Bye</li>
</ul>
"""
__slots__ = []
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
raise TemplateSyntaxError('The content directive can not be used '
'as an element', template.filepath,
*pos[1:])
expr = cls._parse_expr(value, template, *pos[1:])
return None, [stream[0], (EXPR, expr, pos), stream[-1]]
attach = classmethod(attach)
class DefDirective(Directive):
"""Implementation of the ``py:def`` template directive.
This directive can be used to create "Named Template Functions", which
are template snippets that are not actually output during normal
processing, but rather can be expanded from expressions in other places
in the template.
A named template function can be used just like a normal Python function
from template expressions:
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <p py:def="echo(greeting, name='world')" class="message">
... ${greeting}, ${name}!
... </p>
... ${echo('Hi', name='you')}
... </div>''')
>>> print tmpl.generate(bar='Bye')
<div>
<p class="message">
Hi, you!
</p>
</div>
If a function does not require parameters, the parenthesis can be omitted
in the definition:
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <p py:def="helloworld" class="message">
... Hello, world!
... </p>
... ${helloworld()}
... </div>''')
>>> print tmpl.generate(bar='Bye')
<div>
<p class="message">
Hello, world!
</p>
</div>
"""
__slots__ = ['name', 'args', 'star_args', 'dstar_args', 'defaults']
def __init__(self, args, template, namespaces=None, lineno=-1, offset=-1):
Directive.__init__(self, None, template, namespaces, lineno, offset)
ast = _parse(args).node
self.args = []
self.star_args = None
self.dstar_args = None
self.defaults = {}
if isinstance(ast, compiler.ast.CallFunc):
self.name = ast.node.name
for arg in ast.args:
if isinstance(arg, compiler.ast.Keyword):
self.args.append(arg.name)
self.defaults[arg.name] = Expression(arg.expr,
template.filepath,
lineno,
lookup=template.lookup)
else:
self.args.append(arg.name)
if ast.star_args:
self.star_args = ast.star_args.name
if ast.dstar_args:
self.dstar_args = ast.dstar_args.name
else:
self.name = ast.name
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('function')
return super(DefDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
stream = list(stream)
def function(*args, **kwargs):
scope = {}
args = list(args) # make mutable
for name in self.args:
if args:
scope[name] = args.pop(0)
else:
if name in kwargs:
val = kwargs.pop(name)
else:
val = _eval_expr(self.defaults.get(name), ctxt, **vars)
scope[name] = val
if not self.star_args is None:
scope[self.star_args] = args
if not self.dstar_args is None:
scope[self.dstar_args] = kwargs
ctxt.push(scope)
for event in _apply_directives(stream, directives, ctxt, **vars):
yield event
ctxt.pop()
try:
function.__name__ = self.name
except TypeError:
# Function name can't be set in Python 2.3
pass
# Store the function reference in the bottom context frame so that it
# doesn't get popped off before processing the template has finished
# FIXME: this makes context data mutable as a side-effect
ctxt.frames[-1][self.name] = function
return []
def __repr__(self):
return '<%s "%s">' % (self.__class__.__name__, self.name)
class ForDirective(Directive):
"""Implementation of the ``py:for`` template directive for repeating an
element based on an iterable in the context data.
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
... <li py:for="item in items">${item}</li>
... </ul>''')
>>> print tmpl.generate(items=[1, 2, 3])
<ul>
<li>1</li><li>2</li><li>3</li>
</ul>
"""
__slots__ = ['assign', 'filename']
def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
if ' in ' not in value:
raise TemplateSyntaxError('"in" keyword missing in "for" directive',
template.filepath, lineno, offset)
assign, value = value.split(' in ', 1)
ast = _parse(assign, 'exec')
value = 'iter(%s)' % value.strip()
self.assign = _assignment(ast.node.nodes[0].expr)
self.filename = template.filepath
Directive.__init__(self, value, template, namespaces, lineno, offset)
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('each')
return super(ForDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
iterable = _eval_expr(self.expr, ctxt, **vars)
if iterable is None:
return
assign = self.assign
scope = {}
stream = list(stream)
for item in iterable:
assign(scope, item)
ctxt.push(scope)
for event in _apply_directives(stream, directives, ctxt, **vars):
yield event
ctxt.pop()
def __repr__(self):
return '<%s>' % self.__class__.__name__
class IfDirective(Directive):
"""Implementation of the ``py:if`` template directive for conditionally
excluding elements from being output.
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <b py:if="foo">${bar}</b>
... </div>''')
>>> print tmpl.generate(foo=True, bar='Hello')
<div>
<b>Hello</b>
</div>
"""
__slots__ = []
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('test')
return super(IfDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
value = _eval_expr(self.expr, ctxt, **vars)
if value:
return _apply_directives(stream, directives, ctxt, **vars)
return []
class MatchDirective(Directive):
"""Implementation of the ``py:match`` template directive.
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <span py:match="greeting">
... Hello ${select('@name')}
... </span>
... <greeting name="Dude" />
... </div>''')
>>> print tmpl.generate()
<div>
<span>
Hello Dude
</span>
</div>
"""
__slots__ = ['path', 'namespaces', 'hints']
def __init__(self, value, template, hints=None, namespaces=None,
lineno=-1, offset=-1):
Directive.__init__(self, None, template, namespaces, lineno, offset)
self.path = Path(value, template.filepath, lineno)
self.namespaces = namespaces or {}
self.hints = hints or ()
def attach(cls, template, stream, value, namespaces, pos):
hints = []
if type(value) is dict:
if value.get('buffer', '').lower() == 'false':
hints.append('not_buffered')
if value.get('once', '').lower() == 'true':
hints.append('match_once')
if value.get('recursive', '').lower() == 'false':
hints.append('not_recursive')
value = value.get('path')
return cls(value, template, frozenset(hints), namespaces, *pos[1:]), \
stream
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
ctxt._match_templates.append((self.path.test(ignore_context=True),
self.path, list(stream), self.hints,
self.namespaces, directives))
return []
def __repr__(self):
return '<%s "%s">' % (self.__class__.__name__, self.path.source)
class ReplaceDirective(Directive):
"""Implementation of the ``py:replace`` template directive.
This directive replaces the element with the result of evaluating the
value of the ``py:replace`` attribute:
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <span py:replace="bar">Hello</span>
... </div>''')
>>> print tmpl.generate(bar='Bye')
<div>
Bye
</div>
This directive is equivalent to ``py:content`` combined with ``py:strip``,
providing a less verbose way to achieve the same effect:
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <span py:content="bar" py:strip="">Hello</span>
... </div>''')
>>> print tmpl.generate(bar='Bye')
<div>
Bye
</div>
"""
__slots__ = []
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('value')
if not value:
raise TemplateSyntaxError('missing value for "replace" directive',
template.filepath, *pos[1:])
expr = cls._parse_expr(value, template, *pos[1:])
return None, [(EXPR, expr, pos)]
attach = classmethod(attach)
class StripDirective(Directive):
"""Implementation of the ``py:strip`` template directive.
When the value of the ``py:strip`` attribute evaluates to ``True``, the
element is stripped from the output
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <div py:strip="True"><b>foo</b></div>
... </div>''')
>>> print tmpl.generate()
<div>
<b>foo</b>
</div>
Leaving the attribute value empty is equivalent to a truth value.
This directive is particulary interesting for named template functions or
match templates that do not generate a top-level element:
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <div py:def="echo(what)" py:strip="">
... <b>${what}</b>
... </div>
... ${echo('foo')}
... </div>''')
>>> print tmpl.generate()
<div>
<b>foo</b>
</div>
"""
__slots__ = []
def __call__(self, stream, directives, ctxt, **vars):
def _generate():
if _eval_expr(self.expr, ctxt, **vars):
stream.next() # skip start tag
previous = stream.next()
for event in stream:
yield previous
previous = event
else:
for event in stream:
yield event
return _apply_directives(_generate(), directives, ctxt, **vars)
def attach(cls, template, stream, value, namespaces, pos):
if not value:
return None, stream[1:-1]
return super(StripDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
class ChooseDirective(Directive):
"""Implementation of the ``py:choose`` directive for conditionally selecting
one of several body elements to display.
If the ``py:choose`` expression is empty the expressions of nested
``py:when`` directives are tested for truth. The first true ``py:when``
body is output. If no ``py:when`` directive is matched then the fallback
directive ``py:otherwise`` will be used.
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
... py:choose="">
... <span py:when="0 == 1">0</span>
... <span py:when="1 == 1">1</span>
... <span py:otherwise="">2</span>
... </div>''')
>>> print tmpl.generate()
<div>
<span>1</span>
</div>
If the ``py:choose`` directive contains an expression, the nested
``py:when`` directives are tested for equality to the ``py:choose``
expression:
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/"
... py:choose="2">
... <span py:when="1">1</span>
... <span py:when="2">2</span>
... </div>''')
>>> print tmpl.generate()
<div>
<span>2</span>
</div>
Behavior is undefined if a ``py:choose`` block contains content outside a
``py:when`` or ``py:otherwise`` block. Behavior is also undefined if a
``py:otherwise`` occurs before ``py:when`` blocks.
"""
__slots__ = ['matched', 'value']
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('test')
return super(ChooseDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
info = [False, bool(self.expr), None]
if self.expr:
info[2] = _eval_expr(self.expr, ctxt, **vars)
ctxt._choice_stack.append(info)
for event in _apply_directives(stream, directives, ctxt, **vars):
yield event
ctxt._choice_stack.pop()
class WhenDirective(Directive):
"""Implementation of the ``py:when`` directive for nesting in a parent with
the ``py:choose`` directive.
See the documentation of the `ChooseDirective` for usage.
"""
__slots__ = ['filename']
def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
Directive.__init__(self, value, template, namespaces, lineno, offset)
self.filename = template.filepath
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('test')
return super(WhenDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
info = ctxt._choice_stack and ctxt._choice_stack[-1]
if not info:
raise TemplateRuntimeError('"when" directives can only be used '
'inside a "choose" directive',
self.filename, *stream.next()[2][1:])
if info[0]:
return []
if not self.expr and not info[1]:
raise TemplateRuntimeError('either "choose" or "when" directive '
'must have a test expression',
self.filename, *stream.next()[2][1:])
if info[1]:
value = info[2]
if self.expr:
matched = value == _eval_expr(self.expr, ctxt, **vars)
else:
matched = bool(value)
else:
matched = bool(_eval_expr(self.expr, ctxt, **vars))
info[0] = matched
if not matched:
return []
return _apply_directives(stream, directives, ctxt, **vars)
class OtherwiseDirective(Directive):
"""Implementation of the ``py:otherwise`` directive for nesting in a parent
with the ``py:choose`` directive.
See the documentation of `ChooseDirective` for usage.
"""
__slots__ = ['filename']
def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
Directive.__init__(self, None, template, namespaces, lineno, offset)
self.filename = template.filepath
def __call__(self, stream, directives, ctxt, **vars):
info = ctxt._choice_stack and ctxt._choice_stack[-1]
if not info:
raise TemplateRuntimeError('an "otherwise" directive can only be '
'used inside a "choose" directive',
self.filename, *stream.next()[2][1:])
if info[0]:
return []
info[0] = True
return _apply_directives(stream, directives, ctxt, **vars)
class WithDirective(Directive):
"""Implementation of the ``py:with`` template directive, which allows
shorthand access to variables and expressions.
>>> from genshi.template import MarkupTemplate
>>> tmpl = MarkupTemplate('''<div xmlns:py="http://genshi.edgewall.org/">
... <span py:with="y=7; z=x+10">$x $y $z</span>
... </div>''')
>>> print tmpl.generate(x=42)
<div>
<span>42 7 52</span>
</div>
"""
__slots__ = ['vars']
def __init__(self, value, template, namespaces=None, lineno=-1, offset=-1):
Directive.__init__(self, None, template, namespaces, lineno, offset)
self.vars = []
value = value.strip()
try:
ast = _parse(value, 'exec').node
for node in ast.nodes:
if isinstance(node, compiler.ast.Discard):
continue
elif not isinstance(node, compiler.ast.Assign):
raise TemplateSyntaxError('only assignment allowed in '
'value of the "with" directive',
template.filepath, lineno, offset)
self.vars.append(([_assignment(n) for n in node.nodes],
Expression(node.expr, template.filepath,
lineno, lookup=template.lookup)))
except SyntaxError, err:
err.msg += ' in expression "%s" of "%s" directive' % (value,
self.tagname)
raise TemplateSyntaxError(err, template.filepath, lineno,
offset + (err.offset or 0))
def attach(cls, template, stream, value, namespaces, pos):
if type(value) is dict:
value = value.get('vars')
return super(WithDirective, cls).attach(template, stream, value,
namespaces, pos)
attach = classmethod(attach)
def __call__(self, stream, directives, ctxt, **vars):
frame = {}
ctxt.push(frame)
for targets, expr in self.vars:
value = _eval_expr(expr, ctxt, **vars)
for assign in targets:
assign(frame, value)
for event in _apply_directives(stream, directives, ctxt, **vars):
yield event
ctxt.pop()
def __repr__(self):
return '<%s>' % (self.__class__.__name__)

View File

@ -0,0 +1,823 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Support for "safe" evaluation of Python expressions."""
import __builtin__
from compiler import ast, parse
from compiler.pycodegen import ExpressionCodeGenerator, ModuleCodeGenerator
import new
try:
set
except NameError:
from sets import ImmutableSet as frozenset
from sets import Set as set
from textwrap import dedent
from calibre.utils.genshi.core import Markup
from calibre.utils.genshi.template.base import TemplateRuntimeError
from calibre.utils.genshi.util import flatten
__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup',
'Undefined', 'UndefinedError']
__docformat__ = 'restructuredtext en'
# Check for a Python 2.4 bug in the eval loop
has_star_import_bug = False
try:
class _FakeMapping(object):
__getitem__ = __setitem__ = lambda *a: None
exec 'from sys import *' in {}, _FakeMapping()
except SystemError:
has_star_import_bug = True
except TypeError:
pass # Python 2.3
del _FakeMapping
def _star_import_patch(mapping, modname):
"""This function is used as helper if a Python version with a broken
star-import opcode is in use.
"""
module = __import__(modname, None, None, ['__all__'])
if hasattr(module, '__all__'):
members = module.__all__
else:
members = [x for x in module.__dict__ if not x.startswith('_')]
mapping.update([(name, getattr(module, name)) for name in members])
class Code(object):
"""Abstract base class for the `Expression` and `Suite` classes."""
__slots__ = ['source', 'code', 'ast', '_globals']
def __init__(self, source, filename=None, lineno=-1, lookup='strict',
xform=None):
"""Create the code object, either from a string, or from an AST node.
:param source: either a string containing the source code, or an AST
node
:param filename: the (preferably absolute) name of the file containing
the code
:param lineno: the number of the line on which the code was found
:param lookup: the lookup class that defines how variables are looked
up in the context; can be either "strict" (the default),
"lenient", or a custom lookup class
:param xform: the AST transformer that should be applied to the code;
if `None`, the appropriate transformation is chosen
depending on the mode
"""
if isinstance(source, basestring):
self.source = source
node = _parse(source, mode=self.mode)
else:
assert isinstance(source, ast.Node), \
'Expected string or AST node, but got %r' % source
self.source = '?'
if self.mode == 'eval':
node = ast.Expression(source)
else:
node = ast.Module(None, source)
self.ast = node
self.code = _compile(node, self.source, mode=self.mode,
filename=filename, lineno=lineno, xform=xform)
if lookup is None:
lookup = LenientLookup
elif isinstance(lookup, basestring):
lookup = {'lenient': LenientLookup, 'strict': StrictLookup}[lookup]
self._globals = lookup.globals
def __getstate__(self):
state = {'source': self.source, 'ast': self.ast,
'lookup': self._globals.im_self}
c = self.code
state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code,
c.co_consts, c.co_names, c.co_varnames, c.co_filename,
c.co_name, c.co_firstlineno, c.co_lnotab, (), ())
return state
def __setstate__(self, state):
self.source = state['source']
self.ast = state['ast']
self.code = new.code(0, *state['code'])
self._globals = state['lookup'].globals
def __eq__(self, other):
return (type(other) == type(self)) and (self.code == other.code)
def __hash__(self):
return hash(self.code)
def __ne__(self, other):
return not self == other
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, self.source)
class Expression(Code):
"""Evaluates Python expressions used in templates.
>>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
>>> Expression('test').evaluate(data)
'Foo'
>>> Expression('items[0]').evaluate(data)
1
>>> Expression('items[-1]').evaluate(data)
3
>>> Expression('dict["some"]').evaluate(data)
'thing'
Similar to e.g. Javascript, expressions in templates can use the dot
notation for attribute access to access items in mappings:
>>> Expression('dict.some').evaluate(data)
'thing'
This also works the other way around: item access can be used to access
any object attribute:
>>> class MyClass(object):
... myattr = 'Bar'
>>> data = dict(mine=MyClass(), key='myattr')
>>> Expression('mine.myattr').evaluate(data)
'Bar'
>>> Expression('mine["myattr"]').evaluate(data)
'Bar'
>>> Expression('mine[key]').evaluate(data)
'Bar'
All of the standard Python operators are available to template expressions.
Built-in functions such as ``len()`` are also available in template
expressions:
>>> data = dict(items=[1, 2, 3])
>>> Expression('len(items)').evaluate(data)
3
"""
__slots__ = []
mode = 'eval'
def evaluate(self, data):
"""Evaluate the expression against the given data dictionary.
:param data: a mapping containing the data to evaluate against
:return: the result of the evaluation
"""
__traceback_hide__ = 'before_and_this'
_globals = self._globals(data)
return eval(self.code, _globals, {'__data__': data})
class Suite(Code):
"""Executes Python statements used in templates.
>>> data = dict(test='Foo', items=[1, 2, 3], dict={'some': 'thing'})
>>> Suite("foo = dict['some']").execute(data)
>>> data['foo']
'thing'
"""
__slots__ = []
mode = 'exec'
def execute(self, data):
"""Execute the suite in the given data dictionary.
:param data: a mapping containing the data to execute in
"""
__traceback_hide__ = 'before_and_this'
_globals = self._globals(data)
exec self.code in _globals, data
UNDEFINED = object()
class UndefinedError(TemplateRuntimeError):
"""Exception thrown when a template expression attempts to access a variable
not defined in the context.
:see: `LenientLookup`, `StrictLookup`
"""
def __init__(self, name, owner=UNDEFINED):
if owner is not UNDEFINED:
message = '%s has no member named "%s"' % (repr(owner), name)
else:
message = '"%s" not defined' % name
TemplateRuntimeError.__init__(self, message)
class Undefined(object):
"""Represents a reference to an undefined variable.
Unlike the Python runtime, template expressions can refer to an undefined
variable without causing a `NameError` to be raised. The result will be an
instance of the `Undefined` class, which is treated the same as ``False`` in
conditions, but raise an exception on any other operation:
>>> foo = Undefined('foo')
>>> bool(foo)
False
>>> list(foo)
[]
>>> print foo
undefined
However, calling an undefined variable, or trying to access an attribute
of that variable, will raise an exception that includes the name used to
reference that undefined variable.
>>> foo('bar')
Traceback (most recent call last):
...
UndefinedError: "foo" not defined
>>> foo.bar
Traceback (most recent call last):
...
UndefinedError: "foo" not defined
:see: `LenientLookup`
"""
__slots__ = ['_name', '_owner']
def __init__(self, name, owner=UNDEFINED):
"""Initialize the object.
:param name: the name of the reference
:param owner: the owning object, if the variable is accessed as a member
"""
self._name = name
self._owner = owner
def __iter__(self):
return iter([])
def __nonzero__(self):
return False
def __repr__(self):
return '<%s %r>' % (self.__class__.__name__, self._name)
def __str__(self):
return 'undefined'
def _die(self, *args, **kwargs):
"""Raise an `UndefinedError`."""
__traceback_hide__ = True
raise UndefinedError(self._name, self._owner)
__call__ = __getattr__ = __getitem__ = _die
class LookupBase(object):
"""Abstract base class for variable lookup implementations."""
def globals(cls, data):
"""Construct the globals dictionary to use as the execution context for
the expression or suite.
"""
return {
'__data__': data,
'_lookup_name': cls.lookup_name,
'_lookup_attr': cls.lookup_attr,
'_lookup_item': cls.lookup_item,
'_star_import_patch': _star_import_patch,
'UndefinedError': UndefinedError,
}
globals = classmethod(globals)
def lookup_name(cls, data, name):
__traceback_hide__ = True
val = data.get(name, UNDEFINED)
if val is UNDEFINED:
val = BUILTINS.get(name, val)
if val is UNDEFINED:
val = cls.undefined(name)
return val
lookup_name = classmethod(lookup_name)
def lookup_attr(cls, obj, key):
__traceback_hide__ = True
try:
val = getattr(obj, key)
except AttributeError:
if hasattr(obj.__class__, key):
raise
else:
try:
val = obj[key]
except (KeyError, TypeError):
val = cls.undefined(key, owner=obj)
return val
lookup_attr = classmethod(lookup_attr)
def lookup_item(cls, obj, key):
__traceback_hide__ = True
if len(key) == 1:
key = key[0]
try:
return obj[key]
except (AttributeError, KeyError, IndexError, TypeError), e:
if isinstance(key, basestring):
val = getattr(obj, key, UNDEFINED)
if val is UNDEFINED:
val = cls.undefined(key, owner=obj)
return val
raise
lookup_item = classmethod(lookup_item)
def undefined(cls, key, owner=UNDEFINED):
"""Can be overridden by subclasses to specify behavior when undefined
variables are accessed.
:param key: the name of the variable
:param owner: the owning object, if the variable is accessed as a member
"""
raise NotImplementedError
undefined = classmethod(undefined)
class LenientLookup(LookupBase):
"""Default variable lookup mechanism for expressions.
When an undefined variable is referenced using this lookup style, the
reference evaluates to an instance of the `Undefined` class:
>>> expr = Expression('nothing', lookup='lenient')
>>> undef = expr.evaluate({})
>>> undef
<Undefined 'nothing'>
The same will happen when a non-existing attribute or item is accessed on
an existing object:
>>> expr = Expression('something.nil', lookup='lenient')
>>> expr.evaluate({'something': dict()})
<Undefined 'nil'>
See the documentation of the `Undefined` class for details on the behavior
of such objects.
:see: `StrictLookup`
"""
def undefined(cls, key, owner=UNDEFINED):
"""Return an ``Undefined`` object."""
__traceback_hide__ = True
return Undefined(key, owner=owner)
undefined = classmethod(undefined)
class StrictLookup(LookupBase):
"""Strict variable lookup mechanism for expressions.
Referencing an undefined variable using this lookup style will immediately
raise an ``UndefinedError``:
>>> expr = Expression('nothing', lookup='strict')
>>> expr.evaluate({})
Traceback (most recent call last):
...
UndefinedError: "nothing" not defined
The same happens when a non-existing attribute or item is accessed on an
existing object:
>>> expr = Expression('something.nil', lookup='strict')
>>> expr.evaluate({'something': dict()})
Traceback (most recent call last):
...
UndefinedError: {} has no member named "nil"
"""
def undefined(cls, key, owner=UNDEFINED):
"""Raise an ``UndefinedError`` immediately."""
__traceback_hide__ = True
raise UndefinedError(key, owner=owner)
undefined = classmethod(undefined)
def _parse(source, mode='eval'):
source = source.strip()
if mode == 'exec':
lines = [line.expandtabs() for line in source.splitlines()]
if lines:
first = lines[0]
rest = dedent('\n'.join(lines[1:])).rstrip()
if first.rstrip().endswith(':') and not rest[0].isspace():
rest = '\n'.join([' %s' % line for line in rest.splitlines()])
source = '\n'.join([first, rest])
if isinstance(source, unicode):
source = '\xef\xbb\xbf' + source.encode('utf-8')
return parse(source, mode)
def _compile(node, source=None, mode='eval', filename=None, lineno=-1,
xform=None):
if xform is None:
xform = {'eval': ExpressionASTTransformer}.get(mode,
TemplateASTTransformer)
tree = xform().visit(node)
if isinstance(filename, unicode):
# unicode file names not allowed for code objects
filename = filename.encode('utf-8', 'replace')
elif not filename:
filename = '<string>'
tree.filename = filename
if lineno <= 0:
lineno = 1
if mode == 'eval':
gen = ExpressionCodeGenerator(tree)
name = '<Expression %r>' % (source or '?')
else:
gen = ModuleCodeGenerator(tree)
lines = source.splitlines()
if not lines:
extract = ''
else:
extract = lines[0]
if len(lines) > 1:
extract += ' ...'
name = '<Suite %r>' % (extract)
gen.optimized = True
code = gen.getCode()
# We'd like to just set co_firstlineno, but it's readonly. So we need to
# clone the code object while adjusting the line number
return new.code(0, code.co_nlocals, code.co_stacksize,
code.co_flags | 0x0040, code.co_code, code.co_consts,
code.co_names, code.co_varnames, filename, name, lineno,
code.co_lnotab, (), ())
BUILTINS = __builtin__.__dict__.copy()
BUILTINS.update({'Markup': Markup, 'Undefined': Undefined})
CONSTANTS = frozenset(['False', 'True', 'None', 'NotImplemented', 'Ellipsis'])
class ASTTransformer(object):
"""General purpose base class for AST transformations.
Every visitor method can be overridden to return an AST node that has been
altered or replaced in some way.
"""
def visit(self, node):
if node is None:
return None
if type(node) is tuple:
return tuple([self.visit(n) for n in node])
visitor = getattr(self, 'visit%s' % node.__class__.__name__,
self._visitDefault)
return visitor(node)
def _clone(self, node, *args):
lineno = getattr(node, 'lineno', None)
node = node.__class__(*args)
if lineno is not None:
node.lineno = lineno
if isinstance(node, (ast.Class, ast.Function, ast.Lambda)) or \
hasattr(ast, 'GenExpr') and isinstance(node, ast.GenExpr):
node.filename = '<string>' # workaround for bug in pycodegen
return node
def _visitDefault(self, node):
return node
def visitExpression(self, node):
return self._clone(node, self.visit(node.node))
def visitModule(self, node):
return self._clone(node, node.doc, self.visit(node.node))
def visitStmt(self, node):
return self._clone(node, [self.visit(x) for x in node.nodes])
# Classes, Functions & Accessors
def visitCallFunc(self, node):
return self._clone(node, self.visit(node.node),
[self.visit(x) for x in node.args],
node.star_args and self.visit(node.star_args) or None,
node.dstar_args and self.visit(node.dstar_args) or None
)
def visitClass(self, node):
return self._clone(node, node.name, [self.visit(x) for x in node.bases],
node.doc, self.visit(node.code)
)
def visitFrom(self, node):
if not has_star_import_bug or node.names != [('*', None)]:
# This is a Python 2.4 bug. Only if we have a broken Python
# version we have to apply the hack
return node
new_node = ast.Discard(ast.CallFunc(
ast.Name('_star_import_patch'),
[ast.Name('__data__'), ast.Const(node.modname)], None, None
))
if hasattr(node, 'lineno'): # No lineno in Python 2.3
new_node.lineno = node.lineno
return new_node
def visitFunction(self, node):
args = []
if hasattr(node, 'decorators'):
args.append(self.visit(node.decorators))
return self._clone(node, *args + [
node.name,
node.argnames,
[self.visit(x) for x in node.defaults],
node.flags,
node.doc,
self.visit(node.code)
])
def visitGetattr(self, node):
return self._clone(node, self.visit(node.expr), node.attrname)
def visitLambda(self, node):
node = self._clone(node, node.argnames,
[self.visit(x) for x in node.defaults], node.flags,
self.visit(node.code)
)
return node
def visitSubscript(self, node):
return self._clone(node, self.visit(node.expr), node.flags,
[self.visit(x) for x in node.subs]
)
# Statements
def visitAssert(self, node):
return self._clone(node, self.visit(node.test), self.visit(node.fail))
def visitAssign(self, node):
return self._clone(node, [self.visit(x) for x in node.nodes],
self.visit(node.expr)
)
def visitAssAttr(self, node):
return self._clone(node, self.visit(node.expr), node.attrname,
node.flags
)
def visitAugAssign(self, node):
return self._clone(node, self.visit(node.node), node.op,
self.visit(node.expr)
)
def visitDecorators(self, node):
return self._clone(node, [self.visit(x) for x in node.nodes])
def visitExec(self, node):
return self._clone(node, self.visit(node.expr), self.visit(node.locals),
self.visit(node.globals)
)
def visitFor(self, node):
return self._clone(node, self.visit(node.assign), self.visit(node.list),
self.visit(node.body), self.visit(node.else_)
)
def visitIf(self, node):
return self._clone(node, [self.visit(x) for x in node.tests],
self.visit(node.else_)
)
def _visitPrint(self, node):
return self._clone(node, [self.visit(x) for x in node.nodes],
self.visit(node.dest)
)
visitPrint = visitPrintnl = _visitPrint
def visitRaise(self, node):
return self._clone(node, self.visit(node.expr1), self.visit(node.expr2),
self.visit(node.expr3)
)
def visitReturn(self, node):
return self._clone(node, self.visit(node.value))
def visitTryExcept(self, node):
return self._clone(node, self.visit(node.body), self.visit(node.handlers),
self.visit(node.else_)
)
def visitTryFinally(self, node):
return self._clone(node, self.visit(node.body), self.visit(node.final))
def visitWhile(self, node):
return self._clone(node, self.visit(node.test), self.visit(node.body),
self.visit(node.else_)
)
def visitWith(self, node):
return self._clone(node, self.visit(node.expr),
[self.visit(x) for x in node.vars], self.visit(node.body)
)
def visitYield(self, node):
return self._clone(node, self.visit(node.value))
# Operators
def _visitBoolOp(self, node):
return self._clone(node, [self.visit(x) for x in node.nodes])
visitAnd = visitOr = visitBitand = visitBitor = visitBitxor = _visitBoolOp
visitAssTuple = visitAssList = _visitBoolOp
def _visitBinOp(self, node):
return self._clone(node,
(self.visit(node.left), self.visit(node.right))
)
visitAdd = visitSub = _visitBinOp
visitDiv = visitFloorDiv = visitMod = visitMul = visitPower = _visitBinOp
visitLeftShift = visitRightShift = _visitBinOp
def visitCompare(self, node):
return self._clone(node, self.visit(node.expr),
[(op, self.visit(n)) for op, n in node.ops]
)
def _visitUnaryOp(self, node):
return self._clone(node, self.visit(node.expr))
visitUnaryAdd = visitUnarySub = visitNot = visitInvert = _visitUnaryOp
visitBackquote = visitDiscard = _visitUnaryOp
def visitIfExp(self, node):
return self._clone(node, self.visit(node.test), self.visit(node.then),
self.visit(node.else_)
)
# Identifiers, Literals and Comprehensions
def visitDict(self, node):
return self._clone(node,
[(self.visit(k), self.visit(v)) for k, v in node.items]
)
def visitGenExpr(self, node):
return self._clone(node, self.visit(node.code))
def visitGenExprFor(self, node):
return self._clone(node, self.visit(node.assign), self.visit(node.iter),
[self.visit(x) for x in node.ifs]
)
def visitGenExprIf(self, node):
return self._clone(node, self.visit(node.test))
def visitGenExprInner(self, node):
quals = [self.visit(x) for x in node.quals]
return self._clone(node, self.visit(node.expr), quals)
def visitKeyword(self, node):
return self._clone(node, node.name, self.visit(node.expr))
def visitList(self, node):
return self._clone(node, [self.visit(n) for n in node.nodes])
def visitListComp(self, node):
quals = [self.visit(x) for x in node.quals]
return self._clone(node, self.visit(node.expr), quals)
def visitListCompFor(self, node):
return self._clone(node, self.visit(node.assign), self.visit(node.list),
[self.visit(x) for x in node.ifs]
)
def visitListCompIf(self, node):
return self._clone(node, self.visit(node.test))
def visitSlice(self, node):
return self._clone(node, self.visit(node.expr), node.flags,
node.lower and self.visit(node.lower) or None,
node.upper and self.visit(node.upper) or None
)
def visitSliceobj(self, node):
return self._clone(node, [self.visit(x) for x in node.nodes])
def visitTuple(self, node):
return self._clone(node, [self.visit(n) for n in node.nodes])
class TemplateASTTransformer(ASTTransformer):
"""Concrete AST transformer that implements the AST transformations needed
for code embedded in templates.
"""
def __init__(self):
self.locals = [CONSTANTS]
def visitConst(self, node):
if isinstance(node.value, str):
try: # If the string is ASCII, return a `str` object
node.value.decode('ascii')
except ValueError: # Otherwise return a `unicode` object
return ast.Const(node.value.decode('utf-8'))
return node
def visitAssName(self, node):
if len(self.locals) > 1:
self.locals[-1].add(node.name)
return node
def visitAugAssign(self, node):
if isinstance(node.node, ast.Name) \
and node.node.name not in flatten(self.locals):
name = node.node.name
node.node = ast.Subscript(ast.Name('__data__'), 'OP_APPLY',
[ast.Const(name)])
node.expr = self.visit(node.expr)
return ast.If([
(ast.Compare(ast.Const(name), [('in', ast.Name('__data__'))]),
ast.Stmt([node]))],
ast.Stmt([ast.Raise(ast.CallFunc(ast.Name('UndefinedError'),
[ast.Const(name)]),
None, None)]))
else:
return ASTTransformer.visitAugAssign(self, node)
def visitClass(self, node):
if len(self.locals) > 1:
self.locals[-1].add(node.name)
self.locals.append(set())
try:
return ASTTransformer.visitClass(self, node)
finally:
self.locals.pop()
def visitFor(self, node):
self.locals.append(set())
try:
return ASTTransformer.visitFor(self, node)
finally:
self.locals.pop()
def visitFunction(self, node):
if len(self.locals) > 1:
self.locals[-1].add(node.name)
self.locals.append(set(node.argnames))
try:
return ASTTransformer.visitFunction(self, node)
finally:
self.locals.pop()
def visitGenExpr(self, node):
self.locals.append(set())
try:
return ASTTransformer.visitGenExpr(self, node)
finally:
self.locals.pop()
def visitLambda(self, node):
self.locals.append(set(flatten(node.argnames)))
try:
return ASTTransformer.visitLambda(self, node)
finally:
self.locals.pop()
def visitListComp(self, node):
self.locals.append(set())
try:
return ASTTransformer.visitListComp(self, node)
finally:
self.locals.pop()
def visitName(self, node):
# If the name refers to a local inside a lambda, list comprehension, or
# generator expression, leave it alone
if node.name not in flatten(self.locals):
# Otherwise, translate the name ref into a context lookup
func_args = [ast.Name('__data__'), ast.Const(node.name)]
node = ast.CallFunc(ast.Name('_lookup_name'), func_args)
return node
class ExpressionASTTransformer(TemplateASTTransformer):
"""Concrete AST transformer that implements the AST transformations needed
for code embedded in templates.
"""
def visitGetattr(self, node):
return ast.CallFunc(ast.Name('_lookup_attr'), [
self.visit(node.expr),
ast.Const(node.attrname)
])
def visitSubscript(self, node):
return ast.CallFunc(ast.Name('_lookup_item'), [
self.visit(node.expr),
ast.Tuple([self.visit(sub) for sub in node.subs])
])

View File

@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""String interpolation routines, i.e. the splitting up a given text into some
parts that are literal strings, and others that are Python expressions.
"""
from itertools import chain
import os
import re
from tokenize import PseudoToken
from calibre.utils.genshi.core import TEXT
from calibre.utils.genshi.template.base import TemplateSyntaxError, EXPR
from calibre.utils.genshi.template.eval import Expression
__all__ = ['interpolate']
__docformat__ = 'restructuredtext en'
NAMESTART = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
NAMECHARS = NAMESTART + '.0123456789'
PREFIX = '$'
token_re = re.compile('%s|%s(?s)' % (
r'[uU]?[rR]?("""|\'\'\')((?<!\\)\\\1|.)*?\1',
PseudoToken
))
def interpolate(text, filepath=None, lineno=-1, offset=0, lookup='strict'):
"""Parse the given string and extract expressions.
This function is a generator that yields `TEXT` events for literal strings,
and `EXPR` events for expressions, depending on the results of parsing the
string.
>>> for kind, data, pos in interpolate("hey ${foo}bar"):
... print kind, `data`
TEXT u'hey '
EXPR Expression('foo')
TEXT u'bar'
:param text: the text to parse
:param filepath: absolute path to the file in which the text was found
(optional)
:param lineno: the line number at which the text was found (optional)
:param offset: the column number at which the text starts in the source
(optional)
:param lookup: the variable lookup mechanism; either "lenient" (the
default), "strict", or a custom lookup class
:return: a list of `TEXT` and `EXPR` events
:raise TemplateSyntaxError: when a syntax error in an expression is
encountered
"""
pos = [filepath, lineno, offset]
textbuf = []
textpos = None
for is_expr, chunk in chain(lex(text, pos, filepath), [(True, '')]):
if is_expr:
if textbuf:
yield TEXT, u''.join(textbuf), textpos
del textbuf[:]
textpos = None
if chunk:
try:
expr = Expression(chunk.strip(), pos[0], pos[1],
lookup=lookup)
yield EXPR, expr, tuple(pos)
except SyntaxError, err:
raise TemplateSyntaxError(err, filepath, pos[1],
pos[2] + (err.offset or 0))
else:
textbuf.append(chunk)
if textpos is None:
textpos = tuple(pos)
if '\n' in chunk:
lines = chunk.splitlines()
pos[1] += len(lines) - 1
pos[2] += len(lines[-1])
else:
pos[2] += len(chunk)
def lex(text, textpos, filepath):
offset = pos = 0
end = len(text)
escaped = False
while 1:
if escaped:
offset = text.find(PREFIX, offset + 2)
escaped = False
else:
offset = text.find(PREFIX, pos)
if offset < 0 or offset == end - 1:
break
next = text[offset + 1]
if next == '{':
if offset > pos:
yield False, text[pos:offset]
pos = offset + 2
level = 1
while level:
match = token_re.match(text, pos)
if match is None:
raise TemplateSyntaxError('invalid syntax', filepath,
*textpos[1:])
pos = match.end()
tstart, tend = match.regs[3]
token = text[tstart:tend]
if token == '{':
level += 1
elif token == '}':
level -= 1
yield True, text[offset + 2:pos - 1]
elif next in NAMESTART:
if offset > pos:
yield False, text[pos:offset]
pos = offset
pos += 1
while pos < end:
char = text[pos]
if char not in NAMECHARS:
break
pos += 1
yield True, text[offset + 1:pos].strip()
elif not escaped and next == PREFIX:
if offset > pos:
yield False, text[pos:offset]
escaped = True
pos = offset + 1
else:
yield False, text[pos:offset + 1]
pos = offset + 1
if pos < end:
yield False, text[pos:]

View File

@ -0,0 +1,328 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Template loading and caching."""
import os
try:
import threading
except ImportError:
import dummy_threading as threading
from calibre.utils.genshi.template.base import TemplateError
from calibre.utils.genshi.util import LRUCache
__all__ = ['TemplateLoader', 'TemplateNotFound']
__docformat__ = 'restructuredtext en'
class TemplateNotFound(TemplateError):
"""Exception raised when a specific template file could not be found."""
def __init__(self, name, search_path):
"""Create the exception.
:param name: the filename of the template
:param search_path: the search path used to lookup the template
"""
TemplateError.__init__(self, 'Template "%s" not found' % name)
self.search_path = search_path
class TemplateLoader(object):
"""Responsible for loading templates from files on the specified search
path.
>>> import tempfile
>>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template')
>>> os.write(fd, '<p>$var</p>')
11
>>> os.close(fd)
The template loader accepts a list of directory paths that are then used
when searching for template files, in the given order:
>>> loader = TemplateLoader([os.path.dirname(path)])
The `load()` method first checks the template cache whether the requested
template has already been loaded. If not, it attempts to locate the
template file, and returns the corresponding `Template` object:
>>> from genshi.template import MarkupTemplate
>>> template = loader.load(os.path.basename(path))
>>> isinstance(template, MarkupTemplate)
True
Template instances are cached: requesting a template with the same name
results in the same instance being returned:
>>> loader.load(os.path.basename(path)) is template
True
The `auto_reload` option can be used to control whether a template should
be automatically reloaded when the file it was loaded from has been
changed. Disable this automatic reloading to improve performance.
>>> os.remove(path)
"""
def __init__(self, search_path=None, auto_reload=False,
default_encoding=None, max_cache_size=25, default_class=None,
variable_lookup='strict', allow_exec=True, callback=None):
"""Create the template laoder.
:param search_path: a list of absolute path names that should be
searched for template files, or a string containing
a single absolute path; alternatively, any item on
the list may be a ''load function'' that is passed
a filename and returns a file-like object and some
metadata
:param auto_reload: whether to check the last modification time of
template files, and reload them if they have changed
:param default_encoding: the default encoding to assume when loading
templates; defaults to UTF-8
:param max_cache_size: the maximum number of templates to keep in the
cache
:param default_class: the default `Template` subclass to use when
instantiating templates
:param variable_lookup: the variable lookup mechanism; either "strict"
(the default), "lenient", or a custom lookup
class
:param allow_exec: whether to allow Python code blocks in templates
:param callback: (optional) a callback function that is invoked after a
template was initialized by this loader; the function
is passed the template object as only argument. This
callback can be used for example to add any desired
filters to the template
:see: `LenientLookup`, `StrictLookup`
:note: Changed in 0.5: Added the `allow_exec` argument
"""
from calibre.utils.genshi.template.markup import MarkupTemplate
self.search_path = search_path
if self.search_path is None:
self.search_path = []
elif not isinstance(self.search_path, (list, tuple)):
self.search_path = [self.search_path]
self.auto_reload = auto_reload
"""Whether templates should be reloaded when the underlying file is
changed"""
self.default_encoding = default_encoding
self.default_class = default_class or MarkupTemplate
self.variable_lookup = variable_lookup
self.allow_exec = allow_exec
if callback is not None and not callable(callback):
raise TypeError('The "callback" parameter needs to be callable')
self.callback = callback
self._cache = LRUCache(max_cache_size)
self._uptodate = {}
self._lock = threading.RLock()
def load(self, filename, relative_to=None, cls=None, encoding=None):
"""Load the template with the given name.
If the `filename` parameter is relative, this method searches the
search path trying to locate a template matching the given name. If the
file name is an absolute path, the search path is ignored.
If the requested template is not found, a `TemplateNotFound` exception
is raised. Otherwise, a `Template` object is returned that represents
the parsed template.
Template instances are cached to avoid having to parse the same
template file more than once. Thus, subsequent calls of this method
with the same template file name will return the same `Template`
object (unless the ``auto_reload`` option is enabled and the file was
changed since the last parse.)
If the `relative_to` parameter is provided, the `filename` is
interpreted as being relative to that path.
:param filename: the relative path of the template file to load
:param relative_to: the filename of the template from which the new
template is being loaded, or ``None`` if the
template is being loaded directly
:param cls: the class of the template object to instantiate
:param encoding: the encoding of the template to load; defaults to the
``default_encoding`` of the loader instance
:return: the loaded `Template` instance
:raises TemplateNotFound: if a template with the given name could not
be found
"""
if cls is None:
cls = self.default_class
if relative_to and not os.path.isabs(relative_to):
filename = os.path.join(os.path.dirname(relative_to), filename)
filename = os.path.normpath(filename)
cachekey = filename
self._lock.acquire()
try:
# First check the cache to avoid reparsing the same file
try:
tmpl = self._cache[cachekey]
if not self.auto_reload:
return tmpl
uptodate = self._uptodate[cachekey]
if uptodate is not None and uptodate():
return tmpl
except (KeyError, OSError):
pass
search_path = self.search_path
isabs = False
if os.path.isabs(filename):
# Bypass the search path if the requested filename is absolute
search_path = [os.path.dirname(filename)]
isabs = True
elif relative_to and os.path.isabs(relative_to):
# Make sure that the directory containing the including
# template is on the search path
dirname = os.path.dirname(relative_to)
if dirname not in search_path:
search_path = list(search_path) + [dirname]
isabs = True
elif not search_path:
# Uh oh, don't know where to look for the template
raise TemplateError('Search path for templates not configured')
for loadfunc in search_path:
if isinstance(loadfunc, basestring):
loadfunc = directory(loadfunc)
try:
filepath, filename, fileobj, uptodate = loadfunc(filename)
except IOError:
continue
else:
try:
if isabs:
# If the filename of either the included or the
# including template is absolute, make sure the
# included template gets an absolute path, too,
# so that nested includes work properly without a
# search path
filename = filepath
tmpl = self._instantiate(cls, fileobj, filepath,
filename, encoding=encoding)
if self.callback:
self.callback(tmpl)
self._cache[cachekey] = tmpl
self._uptodate[cachekey] = uptodate
finally:
if hasattr(fileobj, 'close'):
fileobj.close()
return tmpl
raise TemplateNotFound(filename, search_path)
finally:
self._lock.release()
def _instantiate(self, cls, fileobj, filepath, filename, encoding=None):
"""Instantiate and return the `Template` object based on the given
class and parameters.
This function is intended for subclasses to override if they need to
implement special template instantiation logic. Code that just uses
the `TemplateLoader` should use the `load` method instead.
:param cls: the class of the template object to instantiate
:param fileobj: a readable file-like object containing the template
source
:param filepath: the absolute path to the template file
:param filename: the path to the template file relative to the search
path
:param encoding: the encoding of the template to load; defaults to the
``default_encoding`` of the loader instance
:return: the loaded `Template` instance
:rtype: `Template`
"""
if encoding is None:
encoding = self.default_encoding
return cls(fileobj, filepath=filepath, filename=filename, loader=self,
encoding=encoding, lookup=self.variable_lookup,
allow_exec=self.allow_exec)
def directory(path):
"""Loader factory for loading templates from a local directory.
:param path: the path to the local directory containing the templates
:return: the loader function to load templates from the given directory
:rtype: ``function``
"""
def _load_from_directory(filename):
filepath = os.path.join(path, filename)
fileobj = open(filepath, 'U')
mtime = os.path.getmtime(filepath)
def _uptodate():
return mtime == os.path.getmtime(filepath)
return filepath, filename, fileobj, _uptodate
return _load_from_directory
directory = staticmethod(directory)
def package(name, path):
"""Loader factory for loading templates from egg package data.
:param name: the name of the package containing the resources
:param path: the path inside the package data
:return: the loader function to load templates from the given package
:rtype: ``function``
"""
from pkg_resources import resource_stream
def _load_from_package(filename):
filepath = os.path.join(path, filename)
return filepath, filename, resource_stream(name, filepath), None
return _load_from_package
package = staticmethod(package)
def prefixed(**delegates):
"""Factory for a load function that delegates to other loaders
depending on the prefix of the requested template path.
The prefix is stripped from the filename when passing on the load
request to the delegate.
>>> load = prefixed(
... app1 = lambda filename: ('app1', filename, None, None),
... app2 = lambda filename: ('app2', filename, None, None)
... )
>>> print load('app1/foo.html')
('app1', 'app1/foo.html', None, None)
>>> print load('app2/bar.html')
('app2', 'app2/bar.html', None, None)
:param delegates: mapping of path prefixes to loader functions
:return: the loader function
:rtype: ``function``
"""
def _dispatch_by_prefix(filename):
for prefix, delegate in delegates.items():
if filename.startswith(prefix):
if isinstance(delegate, basestring):
delegate = directory(delegate)
filepath, _, fileobj, uptodate = delegate(
filename[len(prefix):].lstrip('/\\')
)
return filepath, filename, fileobj, uptodate
raise TemplateNotFound(filename, delegates.keys())
return _dispatch_by_prefix
prefixed = staticmethod(prefixed)
directory = TemplateLoader.directory
package = TemplateLoader.package
prefixed = TemplateLoader.prefixed

View File

@ -0,0 +1,305 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Markup templating engine."""
from itertools import chain
from calibre.utils.genshi.core import Attrs, Markup, Namespace, Stream, StreamEventKind
from calibre.utils.genshi.core import START, END, START_NS, END_NS, TEXT, PI, COMMENT
from calibre.utils.genshi.input import XMLParser
from calibre.utils.genshi.template.base import BadDirectiveError, Template, \
TemplateSyntaxError, _apply_directives, \
EXEC, INCLUDE, SUB
from calibre.utils.genshi.template.eval import Suite
from calibre.utils.genshi.template.interpolation import interpolate
from calibre.utils.genshi.template.directives import *
from calibre.utils.genshi.template.text import NewTextTemplate
__all__ = ['MarkupTemplate']
__docformat__ = 'restructuredtext en'
class MarkupTemplate(Template):
"""Implementation of the template language for XML-based templates.
>>> tmpl = MarkupTemplate('''<ul xmlns:py="http://genshi.edgewall.org/">
... <li py:for="item in items">${item}</li>
... </ul>''')
>>> print tmpl.generate(items=[1, 2, 3])
<ul>
<li>1</li><li>2</li><li>3</li>
</ul>
"""
DIRECTIVE_NAMESPACE = Namespace('http://genshi.edgewall.org/')
XINCLUDE_NAMESPACE = Namespace('http://www.w3.org/2001/XInclude')
directives = [('def', DefDirective),
('match', MatchDirective),
('when', WhenDirective),
('otherwise', OtherwiseDirective),
('for', ForDirective),
('if', IfDirective),
('choose', ChooseDirective),
('with', WithDirective),
('replace', ReplaceDirective),
('content', ContentDirective),
('attrs', AttrsDirective),
('strip', StripDirective)]
serializer = 'xml'
_number_conv = Markup
def _init_filters(self):
Template._init_filters(self)
# Make sure the include filter comes after the match filter
if self.loader:
self.filters.remove(self._include)
self.filters += [self._match]
if self.loader:
self.filters.append(self._include)
def _parse(self, source, encoding):
streams = [[]] # stacked lists of events of the "compiled" template
dirmap = {} # temporary mapping of directives to elements
ns_prefix = {}
depth = 0
fallbacks = []
includes = []
if not isinstance(source, Stream):
source = XMLParser(source, filename=self.filename,
encoding=encoding)
for kind, data, pos in source:
stream = streams[-1]
if kind is START_NS:
# Strip out the namespace declaration for template directives
prefix, uri = data
ns_prefix[prefix] = uri
if uri not in (self.DIRECTIVE_NAMESPACE,
self.XINCLUDE_NAMESPACE):
stream.append((kind, data, pos))
elif kind is END_NS:
uri = ns_prefix.pop(data, None)
if uri and uri not in (self.DIRECTIVE_NAMESPACE,
self.XINCLUDE_NAMESPACE):
stream.append((kind, data, pos))
elif kind is START:
# Record any directive attributes in start tags
tag, attrs = data
directives = []
strip = False
if tag in self.DIRECTIVE_NAMESPACE:
cls = self._dir_by_name.get(tag.localname)
if cls is None:
raise BadDirectiveError(tag.localname, self.filepath,
pos[1])
args = dict([(name.localname, value) for name, value
in attrs if not name.namespace])
directives.append((cls, args, ns_prefix.copy(), pos))
strip = True
new_attrs = []
for name, value in attrs:
if name in self.DIRECTIVE_NAMESPACE:
cls = self._dir_by_name.get(name.localname)
if cls is None:
raise BadDirectiveError(name.localname,
self.filepath, pos[1])
directives.append((cls, value, ns_prefix.copy(), pos))
else:
if value:
value = list(interpolate(value, self.filepath,
pos[1], pos[2],
lookup=self.lookup))
if len(value) == 1 and value[0][0] is TEXT:
value = value[0][1]
else:
value = [(TEXT, u'', pos)]
new_attrs.append((name, value))
new_attrs = Attrs(new_attrs)
if directives:
index = self._dir_order.index
directives.sort(lambda a, b: cmp(index(a[0]), index(b[0])))
dirmap[(depth, tag)] = (directives, len(stream), strip)
if tag in self.XINCLUDE_NAMESPACE:
if tag.localname == 'include':
include_href = new_attrs.get('href')
if not include_href:
raise TemplateSyntaxError('Include misses required '
'attribute "href"',
self.filepath, *pos[1:])
includes.append((include_href, new_attrs.get('parse')))
streams.append([])
elif tag.localname == 'fallback':
streams.append([])
fallbacks.append(streams[-1])
else:
stream.append((kind, (tag, new_attrs), pos))
depth += 1
elif kind is END:
depth -= 1
if fallbacks and data == self.XINCLUDE_NAMESPACE['fallback']:
assert streams.pop() is fallbacks[-1]
elif data == self.XINCLUDE_NAMESPACE['include']:
fallback = None
if len(fallbacks) == len(includes):
fallback = fallbacks.pop()
streams.pop() # discard anything between the include tags
# and the fallback element
stream = streams[-1]
href, parse = includes.pop()
try:
cls = {
'xml': MarkupTemplate,
'text': NewTextTemplate
}[parse or 'xml']
except KeyError:
raise TemplateSyntaxError('Invalid value for "parse" '
'attribute of include',
self.filepath, *pos[1:])
stream.append((INCLUDE, (href, cls, fallback), pos))
else:
stream.append((kind, data, pos))
# If there have have directive attributes with the corresponding
# start tag, move the events inbetween into a "subprogram"
if (depth, data) in dirmap:
directives, start_offset, strip = dirmap.pop((depth, data))
substream = stream[start_offset:]
if strip:
substream = substream[1:-1]
stream[start_offset:] = [(SUB, (directives, substream),
pos)]
elif kind is PI and data[0] == 'python':
if not self.allow_exec:
raise TemplateSyntaxError('Python code blocks not allowed',
self.filepath, *pos[1:])
try:
suite = Suite(data[1], self.filepath, pos[1],
lookup=self.lookup)
except SyntaxError, err:
raise TemplateSyntaxError(err, self.filepath,
pos[1] + (err.lineno or 1) - 1,
pos[2] + (err.offset or 0))
stream.append((EXEC, suite, pos))
elif kind is TEXT:
for kind, data, pos in interpolate(data, self.filepath, pos[1],
pos[2], lookup=self.lookup):
stream.append((kind, data, pos))
elif kind is COMMENT:
if not data.lstrip().startswith('!'):
stream.append((kind, data, pos))
else:
stream.append((kind, data, pos))
assert len(streams) == 1
return streams[0]
def _match(self, stream, ctxt, match_templates=None, **vars):
"""Internal stream filter that applies any defined match templates
to the stream.
"""
if match_templates is None:
match_templates = ctxt._match_templates
tail = []
def _strip(stream):
depth = 1
while 1:
event = stream.next()
if event[0] is START:
depth += 1
elif event[0] is END:
depth -= 1
if depth > 0:
yield event
else:
tail[:] = [event]
break
for event in stream:
# We (currently) only care about start and end events for matching
# We might care about namespace events in the future, though
if not match_templates or (event[0] is not START and
event[0] is not END):
yield event
continue
for idx, (test, path, template, hints, namespaces, directives) \
in enumerate(match_templates):
if test(event, namespaces, ctxt) is True:
if 'match_once' in hints:
del match_templates[idx]
idx -= 1
# Let the remaining match templates know about the event so
# they get a chance to update their internal state
for test in [mt[0] for mt in match_templates[idx + 1:]]:
test(event, namespaces, ctxt, updateonly=True)
# Consume and store all events until an end event
# corresponding to this start event is encountered
pre_match_templates = match_templates[:idx + 1]
if 'match_once' not in hints and 'not_recursive' in hints:
pre_match_templates.pop()
inner = _strip(stream)
if pre_match_templates:
inner = self._match(inner, ctxt, pre_match_templates)
content = self._include(chain([event], inner, tail), ctxt)
if 'not_buffered' not in hints:
content = list(content)
if tail:
for test in [mt[0] for mt in match_templates]:
test(tail[0], namespaces, ctxt, updateonly=True)
# Make the select() function available in the body of the
# match template
def select(path):
return Stream(content).select(path, namespaces, ctxt)
vars = dict(select=select)
# Recursively process the output
template = _apply_directives(template, directives, ctxt,
**vars)
for event in self._match(
self._exec(
self._eval(
self._flatten(template, ctxt, **vars),
ctxt, **vars),
ctxt, **vars),
ctxt, match_templates[idx + 1:], **vars):
yield event
break
else: # no matches
yield event

View File

@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2007 Edgewall Software
# Copyright (C) 2006 Matthew Good
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Basic support for the template engine plugin API used by TurboGears and
CherryPy/Buffet.
"""
from pkg_resources import resource_filename
from calibre.utils.genshi.input import ET, HTML, XML
from calibre.utils.genshi.output import DocType
from calibre.utils.genshi.template.base import Template
from calibre.utils.genshi.template.loader import TemplateLoader
from calibre.utils.genshi.template.markup import MarkupTemplate
from calibre.utils.genshi.template.text import TextTemplate, NewTextTemplate
__all__ = ['ConfigurationError', 'AbstractTemplateEnginePlugin',
'MarkupTemplateEnginePlugin', 'TextTemplateEnginePlugin']
__docformat__ = 'restructuredtext en'
class ConfigurationError(ValueError):
"""Exception raised when invalid plugin options are encountered."""
class AbstractTemplateEnginePlugin(object):
"""Implementation of the plugin API."""
template_class = None
extension = None
def __init__(self, extra_vars_func=None, options=None):
self.get_extra_vars = extra_vars_func
if options is None:
options = {}
self.options = options
self.default_encoding = options.get('genshi.default_encoding', 'utf-8')
auto_reload = options.get('genshi.auto_reload', '1')
if isinstance(auto_reload, basestring):
auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true')
search_path = filter(None, options.get('genshi.search_path', '').split(':'))
self.use_package_naming = not search_path
try:
max_cache_size = int(options.get('genshi.max_cache_size', 25))
except ValueError:
raise ConfigurationError('Invalid value for max_cache_size: "%s"' %
options.get('genshi.max_cache_size'))
loader_callback = options.get('genshi.loader_callback', None)
if loader_callback and not callable(loader_callback):
raise ConfigurationError('loader callback must be a function')
lookup_errors = options.get('genshi.lookup_errors', 'strict')
if lookup_errors not in ('lenient', 'strict'):
raise ConfigurationError('Unknown lookup errors mode "%s"' %
lookup_errors)
try:
allow_exec = bool(options.get('genshi.allow_exec', True))
except ValueError:
raise ConfigurationError('Invalid value for allow_exec "%s"' %
options.get('genshi.allow_exec'))
self.loader = TemplateLoader(filter(None, search_path),
auto_reload=auto_reload,
max_cache_size=max_cache_size,
default_class=self.template_class,
variable_lookup=lookup_errors,
allow_exec=allow_exec,
callback=loader_callback)
def load_template(self, templatename, template_string=None):
"""Find a template specified in python 'dot' notation, or load one from
a string.
"""
if template_string is not None:
return self.template_class(template_string)
if self.use_package_naming:
divider = templatename.rfind('.')
if divider >= 0:
package = templatename[:divider]
basename = templatename[divider + 1:] + self.extension
templatename = resource_filename(package, basename)
return self.loader.load(templatename)
def _get_render_options(self, format=None, fragment=False):
if format is None:
format = self.default_format
kwargs = {'method': format}
if self.default_encoding:
kwargs['encoding'] = self.default_encoding
return kwargs
def render(self, info, format=None, fragment=False, template=None):
"""Render the template to a string using the provided info."""
kwargs = self._get_render_options(format=format, fragment=fragment)
return self.transform(info, template).render(**kwargs)
def transform(self, info, template):
"""Render the output to an event stream."""
if not isinstance(template, Template):
template = self.load_template(template)
return template.generate(**info)
class MarkupTemplateEnginePlugin(AbstractTemplateEnginePlugin):
"""Implementation of the plugin API for markup templates."""
template_class = MarkupTemplate
extension = '.html'
def __init__(self, extra_vars_func=None, options=None):
AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)
default_doctype = self.options.get('genshi.default_doctype')
if default_doctype:
doctype = DocType.get(default_doctype)
if doctype is None:
raise ConfigurationError('Unknown doctype %r' % default_doctype)
self.default_doctype = doctype
else:
self.default_doctype = None
format = self.options.get('genshi.default_format', 'html').lower()
if format not in ('html', 'xhtml', 'xml', 'text'):
raise ConfigurationError('Unknown output format %r' % format)
self.default_format = format
def _get_render_options(self, format=None, fragment=False):
kwargs = super(MarkupTemplateEnginePlugin,
self)._get_render_options(format, fragment)
if self.default_doctype and not fragment:
kwargs['doctype'] = self.default_doctype
return kwargs
def transform(self, info, template):
"""Render the output to an event stream."""
data = {'ET': ET, 'HTML': HTML, 'XML': XML}
if self.get_extra_vars:
data.update(self.get_extra_vars())
data.update(info)
return super(MarkupTemplateEnginePlugin, self).transform(data, template)
class TextTemplateEnginePlugin(AbstractTemplateEnginePlugin):
"""Implementation of the plugin API for text templates."""
template_class = TextTemplate
extension = '.txt'
default_format = 'text'
def __init__(self, extra_vars_func=None, options=None):
if options is None:
options = {}
new_syntax = options.get('genshi.new_text_syntax')
if isinstance(new_syntax, basestring):
new_syntax = new_syntax.lower() in ('1', 'on', 'yes', 'true')
if new_syntax:
self.template_class = NewTextTemplate
AbstractTemplateEnginePlugin.__init__(self, extra_vars_func, options)

View File

@ -0,0 +1,333 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Plain text templating engine.
This module implements two template language syntaxes, at least for a certain
transitional period. `OldTextTemplate` (aliased to just `TextTemplate`) defines
a syntax that was inspired by Cheetah/Velocity. `NewTextTemplate` on the other
hand is inspired by the syntax of the Django template language, which has more
explicit delimiting of directives, and is more flexible with regards to
white space and line breaks.
In a future release, `OldTextTemplate` will be phased out in favor of
`NewTextTemplate`, as the names imply. Therefore the new syntax is strongly
recommended for new projects, and existing projects may want to migrate to the
new syntax to remain compatible with future Genshi releases.
"""
import re
from calibre.utils.genshi.core import TEXT
from calibre.utils.genshi.template.base import BadDirectiveError, Template, \
TemplateSyntaxError, EXEC, INCLUDE, SUB
from calibre.utils.genshi.template.eval import Suite
from calibre.utils.genshi.template.directives import *
from calibre.utils.genshi.template.directives import Directive
from calibre.utils.genshi.template.interpolation import interpolate
__all__ = ['NewTextTemplate', 'OldTextTemplate', 'TextTemplate']
__docformat__ = 'restructuredtext en'
class NewTextTemplate(Template):
r"""Implementation of a simple text-based template engine. This class will
replace `OldTextTemplate` in a future release.
It uses a more explicit delimiting style for directives: instead of the old
style which required putting directives on separate lines that were prefixed
with a ``#`` sign, directives and commenbtsr are enclosed in delimiter pairs
(by default ``{% ... %}`` and ``{# ... #}``, respectively).
Variable substitution uses the same interpolation syntax as for markup
languages: simple references are prefixed with a dollar sign, more complex
expression enclosed in curly braces.
>>> tmpl = NewTextTemplate('''Dear $name,
...
... {# This is a comment #}
... We have the following items for you:
... {% for item in items %}
... * ${'Item %d' % item}
... {% end %}
... ''')
>>> print tmpl.generate(name='Joe', items=[1, 2, 3]).render()
Dear Joe,
<BLANKLINE>
<BLANKLINE>
We have the following items for you:
<BLANKLINE>
* Item 1
<BLANKLINE>
* Item 2
<BLANKLINE>
* Item 3
<BLANKLINE>
<BLANKLINE>
By default, no spaces or line breaks are removed. If a line break should
not be included in the output, prefix it with a backslash:
>>> tmpl = NewTextTemplate('''Dear $name,
...
... {# This is a comment #}\
... We have the following items for you:
... {% for item in items %}\
... * $item
... {% end %}\
... ''')
>>> print tmpl.generate(name='Joe', items=[1, 2, 3]).render()
Dear Joe,
<BLANKLINE>
We have the following items for you:
* 1
* 2
* 3
<BLANKLINE>
Backslashes are also used to escape the start delimiter of directives and
comments:
>>> tmpl = NewTextTemplate('''Dear $name,
...
... \{# This is a comment #}
... We have the following items for you:
... {% for item in items %}\
... * $item
... {% end %}\
... ''')
>>> print tmpl.generate(name='Joe', items=[1, 2, 3]).render()
Dear Joe,
<BLANKLINE>
{# This is a comment #}
We have the following items for you:
* 1
* 2
* 3
<BLANKLINE>
:since: version 0.5
"""
directives = [('def', DefDirective),
('when', WhenDirective),
('otherwise', OtherwiseDirective),
('for', ForDirective),
('if', IfDirective),
('choose', ChooseDirective),
('with', WithDirective)]
serializer = 'text'
_DIRECTIVE_RE = r'((?<!\\)%s\s*(\w+)\s*(.*?)\s*%s|(?<!\\)%s.*?%s)'
_ESCAPE_RE = r'\\\n|\\(\\)|\\(%s)|\\(%s)'
def __init__(self, source, filepath=None, filename=None, loader=None,
encoding=None, lookup='strict', allow_exec=False,
delims=('{%', '%}', '{#', '#}')):
self.delimiters = delims
Template.__init__(self, source, filepath=filepath, filename=filename,
loader=loader, encoding=encoding, lookup=lookup)
def _get_delims(self):
return self._delims
def _set_delims(self, delims):
if len(delims) != 4:
raise ValueError('delimiers tuple must have exactly four elements')
self._delims = delims
self._directive_re = re.compile(self._DIRECTIVE_RE % tuple(
map(re.escape, delims)
), re.DOTALL)
self._escape_re = re.compile(self._ESCAPE_RE % tuple(
map(re.escape, delims[::2])
))
delimiters = property(_get_delims, _set_delims, """\
The delimiters for directives and comments. This should be a four item tuple
of the form ``(directive_start, directive_end, comment_start,
comment_end)``, where each item is a string.
""")
def _parse(self, source, encoding):
"""Parse the template from text input."""
stream = [] # list of events of the "compiled" template
dirmap = {} # temporary mapping of directives to elements
depth = 0
source = source.read()
if isinstance(source, str):
source = source.decode(encoding or 'utf-8', 'replace')
offset = 0
lineno = 1
_escape_sub = self._escape_re.sub
def _escape_repl(mo):
groups = filter(None, mo.groups())
if not groups:
return ''
return groups[0]
for idx, mo in enumerate(self._directive_re.finditer(source)):
start, end = mo.span(1)
if start > offset:
text = _escape_sub(_escape_repl, source[offset:start])
for kind, data, pos in interpolate(text, self.filepath, lineno,
lookup=self.lookup):
stream.append((kind, data, pos))
lineno += len(text.splitlines())
lineno += len(source[start:end].splitlines())
command, value = mo.group(2, 3)
if command == 'include':
pos = (self.filename, lineno, 0)
value = list(interpolate(value, self.filepath, lineno, 0,
lookup=self.lookup))
if len(value) == 1 and value[0][0] is TEXT:
value = value[0][1]
stream.append((INCLUDE, (value, None, []), pos))
elif command == 'python':
if not self.allow_exec:
raise TemplateSyntaxError('Python code blocks not allowed',
self.filepath, lineno)
try:
suite = Suite(value, self.filepath, lineno,
lookup=self.lookup)
except SyntaxError, err:
raise TemplateSyntaxError(err, self.filepath,
lineno + (err.lineno or 1) - 1)
pos = (self.filename, lineno, 0)
stream.append((EXEC, suite, pos))
elif command == 'end':
depth -= 1
if depth in dirmap:
directive, start_offset = dirmap.pop(depth)
substream = stream[start_offset:]
stream[start_offset:] = [(SUB, ([directive], substream),
(self.filepath, lineno, 0))]
elif command:
cls = self._dir_by_name.get(command)
if cls is None:
raise BadDirectiveError(command)
directive = cls, value, None, (self.filepath, lineno, 0)
dirmap[depth] = (directive, len(stream))
depth += 1
offset = end
if offset < len(source):
text = _escape_sub(_escape_repl, source[offset:])
for kind, data, pos in interpolate(text, self.filepath, lineno,
lookup=self.lookup):
stream.append((kind, data, pos))
return stream
class OldTextTemplate(Template):
"""Legacy implementation of the old syntax text-based templates. This class
is provided in a transition phase for backwards compatibility. New code
should use the `NewTextTemplate` class and the improved syntax it provides.
>>> tmpl = OldTextTemplate('''Dear $name,
...
... We have the following items for you:
... #for item in items
... * $item
... #end
...
... All the best,
... Foobar''')
>>> print tmpl.generate(name='Joe', items=[1, 2, 3]).render()
Dear Joe,
<BLANKLINE>
We have the following items for you:
* 1
* 2
* 3
<BLANKLINE>
All the best,
Foobar
"""
directives = [('def', DefDirective),
('when', WhenDirective),
('otherwise', OtherwiseDirective),
('for', ForDirective),
('if', IfDirective),
('choose', ChooseDirective),
('with', WithDirective)]
serializer = 'text'
_DIRECTIVE_RE = re.compile(r'(?:^[ \t]*(?<!\\)#(end).*\n?)|'
r'(?:^[ \t]*(?<!\\)#((?:\w+|#).*)\n?)',
re.MULTILINE)
def _parse(self, source, encoding):
"""Parse the template from text input."""
stream = [] # list of events of the "compiled" template
dirmap = {} # temporary mapping of directives to elements
depth = 0
source = source.read()
if isinstance(source, str):
source = source.decode(encoding or 'utf-8', 'replace')
offset = 0
lineno = 1
for idx, mo in enumerate(self._DIRECTIVE_RE.finditer(source)):
start, end = mo.span()
if start > offset:
text = source[offset:start]
for kind, data, pos in interpolate(text, self.filepath, lineno,
lookup=self.lookup):
stream.append((kind, data, pos))
lineno += len(text.splitlines())
text = source[start:end].lstrip()[1:]
lineno += len(text.splitlines())
directive = text.split(None, 1)
if len(directive) > 1:
command, value = directive
else:
command, value = directive[0], None
if command == 'end':
depth -= 1
if depth in dirmap:
directive, start_offset = dirmap.pop(depth)
substream = stream[start_offset:]
stream[start_offset:] = [(SUB, ([directive], substream),
(self.filepath, lineno, 0))]
elif command == 'include':
pos = (self.filename, lineno, 0)
stream.append((INCLUDE, (value.strip(), None, []), pos))
elif command != '#':
cls = self._dir_by_name.get(command)
if cls is None:
raise BadDirectiveError(command)
directive = cls, value, None, (self.filepath, lineno, 0)
dirmap[depth] = (directive, len(stream))
depth += 1
offset = end
if offset < len(source):
text = source[offset:].replace('\\#', '#')
for kind, data, pos in interpolate(text, self.filepath, lineno,
lookup=self.lookup):
stream.append((kind, data, pos))
return stream
TextTemplate = OldTextTemplate

View File

@ -0,0 +1,250 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://genshi.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://genshi.edgewall.org/log/.
"""Various utility classes and functions."""
import htmlentitydefs
import re
try:
set
except NameError:
from sets import ImmutableSet as frozenset
from sets import Set as set
__docformat__ = 'restructuredtext en'
class LRUCache(dict):
"""A dictionary-like object that stores only a certain number of items, and
discards its least recently used item when full.
>>> cache = LRUCache(3)
>>> cache['A'] = 0
>>> cache['B'] = 1
>>> cache['C'] = 2
>>> len(cache)
3
>>> cache['A']
0
Adding new items to the cache does not increase its size. Instead, the least
recently used item is dropped:
>>> cache['D'] = 3
>>> len(cache)
3
>>> 'B' in cache
False
Iterating over the cache returns the keys, starting with the most recently
used:
>>> for key in cache:
... print key
D
A
C
This code is based on the LRUCache class from ``myghtyutils.util``, written
by Mike Bayer and released under the MIT license. See:
http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
"""
class _Item(object):
def __init__(self, key, value):
self.previous = self.next = None
self.key = key
self.value = value
def __repr__(self):
return repr(self.value)
def __init__(self, capacity):
self._dict = dict()
self.capacity = capacity
self.head = None
self.tail = None
def __contains__(self, key):
return key in self._dict
def __iter__(self):
cur = self.head
while cur:
yield cur.key
cur = cur.next
def __len__(self):
return len(self._dict)
def __getitem__(self, key):
item = self._dict[key]
self._update_item(item)
return item.value
def __setitem__(self, key, value):
item = self._dict.get(key)
if item is None:
item = self._Item(key, value)
self._dict[key] = item
self._insert_item(item)
else:
item.value = value
self._update_item(item)
self._manage_size()
def __repr__(self):
return repr(self._dict)
def _insert_item(self, item):
item.previous = None
item.next = self.head
if self.head is not None:
self.head.previous = item
else:
self.tail = item
self.head = item
self._manage_size()
def _manage_size(self):
while len(self._dict) > self.capacity:
olditem = self._dict[self.tail.key]
del self._dict[self.tail.key]
if self.tail != self.head:
self.tail = self.tail.previous
self.tail.next = None
else:
self.head = self.tail = None
def _update_item(self, item):
if self.head == item:
return
previous = item.previous
previous.next = item.next
if item.next is not None:
item.next.previous = previous
else:
self.tail = previous
item.previous = None
item.next = self.head
self.head.previous = self.head = item
def flatten(items):
"""Flattens a potentially nested sequence into a flat list.
:param items: the sequence to flatten
>>> flatten((1, 2))
[1, 2]
>>> flatten([1, (2, 3), 4])
[1, 2, 3, 4]
>>> flatten([1, (2, [3, 4]), 5])
[1, 2, 3, 4, 5]
"""
retval = []
for item in items:
if isinstance(item, (frozenset, list, set, tuple)):
retval += flatten(item)
else:
retval.append(item)
return retval
def plaintext(text, keeplinebreaks=True):
"""Returns the text as a `unicode` string with all entities and tags
removed.
>>> plaintext('<b>1 &lt; 2</b>')
u'1 < 2'
The `keeplinebreaks` parameter can be set to ``False`` to replace any line
breaks by simple spaces:
>>> plaintext('''<b>1
... &lt;
... 2</b>''', keeplinebreaks=False)
u'1 < 2'
:param text: the text to convert to plain text
:param keeplinebreaks: whether line breaks in the text should be kept intact
:return: the text with tags and entities removed
"""
text = stripentities(striptags(text))
if not keeplinebreaks:
text = text.replace(u'\n', u' ')
return text
_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
def stripentities(text, keepxmlentities=False):
"""Return a copy of the given text with any character or numeric entities
replaced by the equivalent UTF-8 characters.
>>> stripentities('1 &lt; 2')
u'1 < 2'
>>> stripentities('more &hellip;')
u'more \u2026'
>>> stripentities('&#8230;')
u'\u2026'
>>> stripentities('&#x2026;')
u'\u2026'
If the `keepxmlentities` parameter is provided and is a truth value, the
core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact.
>>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True)
u'1 &lt; 2 \u2026'
"""
def _replace_entity(match):
if match.group(1): # numeric entity
ref = match.group(1)
if ref.startswith('x'):
ref = int(ref[1:], 16)
else:
ref = int(ref, 10)
return unichr(ref)
else: # character entity
ref = match.group(2)
if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
return u'&%s;' % ref
try:
return unichr(htmlentitydefs.name2codepoint[ref])
except KeyError:
if keepxmlentities:
return u'&amp;%s;' % ref
else:
return ref
return _STRIPENTITIES_RE.sub(_replace_entity, text)
_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
def striptags(text):
"""Return a copy of the text with any XML/HTML tags removed.
>>> striptags('<span>Foo</span> bar')
'Foo bar'
>>> striptags('<span class="bar">Foo</span>')
'Foo'
>>> striptags('Foo<br />')
'Foo'
HTML/XML comments are stripped, too:
>>> striptags('<!-- <blub>hehe</blah> -->test')
'test'
:param text: the string to remove tags from
:return: the text with tags removed
"""
return _STRIPTAGS_RE.sub('', text)

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
Enforces running of only a single application instance and allows for messaging between
applications using a local socket.
'''
import atexit
import atexit, os
from PyQt4.QtCore import QByteArray, QDataStream, QIODevice, SIGNAL, QObject, Qt, QString
from PyQt4.QtNetwork import QLocalSocket, QLocalServer
@ -94,8 +94,23 @@ class LocalServer(QLocalServer):
for conn in pop:
self.connections.remove(conn)
def listen(self, name):
if not QLocalServer.listen(self, name):
try:
os.unlink(self.fullServerName())
except:
pass
return QLocalServer.listen(self, name)
return True
def send_message(msg, name, server_name='calibre_server', timeout=5000):
socket = QLocalSocket()
socket.connectToServer(server_name)
if socket.waitForConnected(timeout_connect):
if read_message(socket) == name:
write_message(socket, name+':'+msg, timeout)
class SingleApplication(QObject):
def __init__(self, name, parent=None, server_name='calibre_server'):
@ -124,8 +139,7 @@ class SingleApplication(QObject):
self.mr, Qt.QueuedConnection)
if not self.server.listen(self.server_name):
if not self.server.listen(self.server_name):
self.server = None
self.server = None
if self.server is not None:
atexit.register(self.server.close)

View File

@ -678,7 +678,6 @@ class BasicNewsRecipe(object, LoggingInterface):
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
cpath = getattr(self, 'cover_path', None)
@ -725,6 +724,8 @@ class BasicNewsRecipe(object, LoggingInterface):
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
@ -855,7 +856,7 @@ class CustomIndexRecipe(BasicNewsRecipe):
mi.author_sort = __appname__
mi = OPFCreator(self.output_dir, mi)
mi.create_manifest_from_files_in([self.output_dir])
mi.create_spine(['index.html'])
mi.create_spine([os.path.join(self.output_dir, 'index.html')])
mi.render(open(os.path.join(self.output_dir, 'index.opf'), 'wb'))
def download(self):

View File

@ -2,8 +2,8 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import datetime, locale
from genshi.template import MarkupTemplate
import datetime
from calibre.utils.genshi.template import MarkupTemplate
from calibre import preferred_encoding
@ -65,9 +65,9 @@ class NavBarTemplate(Template):
<py:if test="art == num - 1 and not bottom">
| <a href="${prefix}../../feed_${str(feed+1)}/index.html">Next</a>
</py:if>
| <a href="${prefix}../index.html#article_${str(art)}">Up one level</a>
| <a href="${prefix}../index.html#article_${str(art)}">Section menu</a>
<py:if test="two_levels">
| <a href="${prefix}../../index.html#feed_${str(feed)}">Up two levels</a>
| <a href="${prefix}../../index.html#feed_${str(feed)}">Main menu</a>
</py:if>
<py:if test="art != 0 and not bottom">
| <a href="${prefix}../article_${str(art-1)}/index.html">Previous</a>
@ -152,12 +152,12 @@ class FeedTemplate(Template):
</head>
<body style="page-break-before:always">
<h2>${feed.title}</h2>
<py:if test="feed.image">
<py:if test="getattr(feed, 'image', None)">
<div class="feed_image">
<img alt="${feed.image_alt}" src="${feed.image_url}" />
</div>
</py:if>
<div py:if="feed.description">
<div py:if="getattr(feed, 'description', None)">
${feed.description}<br />
</div>
<ul>

View File

@ -1,5 +1,5 @@
#!/usr/bin/python
import sys, os, shutil, time, tempfile, socket, fcntl, struct
import sys, os, shutil, time, tempfile, socket, fcntl, struct, cStringIO, pycurl, re
sys.path.append('src')
import subprocess
from subprocess import check_call as _check_call
@ -24,6 +24,7 @@ DOCS = PREFIX+"/htdocs/apidocs"
USER_MANUAL = PREFIX+'/htdocs/user_manual'
HTML2LRF = "src/calibre/ebooks/lrf/html/demo"
TXT2LRF = "src/calibre/ebooks/lrf/txt/demo"
MOBILEREAD = 'ftp://dev.mobileread.com/calibre/'
BUILD_SCRIPT ='''\
#!/bin/bash
cd ~/build && \
@ -62,17 +63,18 @@ def start_vm(vm, ssh_host, build_script, sleep=75):
subprocess.check_call(('scp', t.name, ssh_host+':build-'+PROJECT))
subprocess.check_call('ssh -t %s bash build-%s'%(ssh_host, PROJECT), shell=True)
def build_windows():
def build_windows(shutdown=True):
installer = installer_name('exe')
vm = '/vmware/Windows XP/Windows XP Professional.vmx'
start_vm(vm, 'windows', BUILD_SCRIPT%('python setup.py develop', 'python','windows_installer.py'))
subprocess.check_call(('scp', 'windows:build/%s/dist/*.exe'%PROJECT, 'dist'))
if not os.path.exists(installer):
raise Exception('Failed to build installer '+installer)
subprocess.Popen(('ssh', 'windows', 'shutdown', '-s', '-t', '0'))
if shutdown:
subprocess.Popen(('ssh', 'windows', 'shutdown', '-s', '-t', '0'))
return os.path.basename(installer)
def build_osx():
def build_osx(shutdown=True):
installer = installer_name('dmg')
vm = '/vmware/Mac OSX/Mac OSX.vmx'
python = '/Library/Frameworks/Python.framework/Versions/Current/bin/python'
@ -80,18 +82,20 @@ def build_osx():
subprocess.check_call(('scp', 'osx:build/%s/dist/*.dmg'%PROJECT, 'dist'))
if not os.path.exists(installer):
raise Exception('Failed to build installer '+installer)
subprocess.Popen(('ssh', 'osx', 'sudo', '/sbin/shutdown', '-h', 'now'))
if shutdown:
subprocess.Popen(('ssh', 'osx', 'sudo', '/sbin/shutdown', '-h', 'now'))
return os.path.basename(installer)
def build_linux():
def build_linux(shutdown=True):
installer = installer_name('tar.bz2')
vm = '/vmware/linux/libprs500-gentoo.vmx'
start_vm(vm, 'linux', BUILD_SCRIPT%('sudo python setup.py develop', 'python','linux_installer.py'))
subprocess.check_call(('scp', 'linux:/tmp/%s'%os.path.basename(installer), 'dist'))
if not os.path.exists(installer):
raise Exception('Failed to build installer '+installer)
subprocess.Popen(('ssh', 'linux', 'sudo', '/sbin/poweroff'))
if shutdown:
subprocess.Popen(('ssh', 'linux', 'sudo', '/sbin/poweroff'))
return os.path.basename(installer)
def build_installers():
@ -110,19 +114,72 @@ def upload_demo():
check_call('cd src/calibre/ebooks/lrf/txt/demo/ && zip -j /tmp/txt-demo.zip * /tmp/txt2lrf.lrf')
check_call('''scp /tmp/txt-demo.zip divok:%s/'''%(DOWNLOADS,))
def curl_list_dir(url=MOBILEREAD, listonly=1):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(c.FTP_USE_EPSV, 1)
c.setopt(c.NETRC, c.NETRC_REQUIRED)
c.setopt(c.FTPLISTONLY, listonly)
c.setopt(c.FTP_CREATE_MISSING_DIRS, 1)
b = cStringIO.StringIO()
c.setopt(c.WRITEFUNCTION, b.write)
c.perform()
c.close()
return b.getvalue().split() if listonly else b.getvalue().splitlines()
def curl_delete_file(path, url=MOBILEREAD):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(c.FTP_USE_EPSV, 1)
c.setopt(c.NETRC, c.NETRC_REQUIRED)
print 'Deleting file %s on %s'%(path, url)
c.setopt(c.QUOTE, ['dele '+ path])
c.perform()
c.close()
def curl_upload_file(stream, url):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
c.setopt(pycurl.UPLOAD, 1)
c.setopt(c.NETRC, c.NETRC_REQUIRED)
c.setopt(pycurl.READFUNCTION, stream.read)
stream.seek(0, 2)
c.setopt(pycurl.INFILESIZE_LARGE, stream.tell())
stream.seek(0)
c.setopt(c.NOPROGRESS, 0)
c.setopt(c.FTP_CREATE_MISSING_DIRS, 1)
print 'Uploading file %s to url %s' % (getattr(stream, 'name', ''), url)
try:
c.perform()
c.close()
except:
pass
files = curl_list_dir(listonly=0)
for line in files:
line = line.split()
if url.endswith(line[-1]):
size = long(line[4])
stream.seek(0,2)
if size != stream.tell():
raise RuntimeError('curl failed to upload %s correctly'%getattr(stream, 'name', ''))
def upload_installer(name):
bname = os.path.basename(name)
pat = re.compile(bname.replace(__version__, r'\d+\.\d+\.\d+'))
for f in curl_list_dir():
if pat.search(f):
curl_delete_file('/calibre/'+f)
curl_upload_file(open(name, 'rb'), MOBILEREAD+os.path.basename(name))
def upload_installers():
exe, dmg, tbz2 = installer_name('exe'), installer_name('dmg'), installer_name('tar.bz2')
if exe and os.path.exists(exe):
check_call('''ssh divok rm -f %s/calibre\*.exe'''%(DOWNLOADS,))
check_call('''scp %s divok:%s/'''%(exe, DOWNLOADS))
if dmg and os.path.exists(dmg):
check_call('''ssh divok rm -f %s/calibre\*.dmg'''%(DOWNLOADS,))
check_call('''scp %s divok:%s/'''%(dmg, DOWNLOADS))
if tbz2 and os.path.exists(tbz2):
check_call('''ssh divok rm -f %s/calibre-\*-i686.tar.bz2 %s/latest-linux-binary.tar.bz2'''%(DOWNLOADS,DOWNLOADS))
check_call('''scp %s divok:%s/'''%(tbz2, DOWNLOADS))
check_call('''ssh divok ln -s %s/calibre-\*-i686.tar.bz2 %s/latest-linux-binary.tar.bz2'''%(DOWNLOADS,DOWNLOADS))
check_call('''ssh divok chmod a+r %s/\*'''%(DOWNLOADS,))
for i in ('dmg', 'exe', 'tar.bz2'):
upload_installer(installer_name(i))
check_call('''ssh divok echo %s \\> %s/latest_version'''%(__version__, DOWNLOADS))
def upload_docs():
check_call('''epydoc --config epydoc.conf''')

View File

@ -509,11 +509,18 @@ class BuildEXE(build_exe):
shutil.copytree(imfd, tg)
print
print 'Adding GUI main.py'
print 'Adding main scripts'
f = zipfile.ZipFile(os.path.join('build', 'py2exe', 'library.zip'), 'a', zipfile.ZIP_DEFLATED)
f.write('src\\calibre\\gui2\\main.py', 'calibre\\gui2\\main.py')
for i in scripts['console'] + scripts['gui']:
f.write(i, i.partition('\\')[-1])
f.close()
print
print 'Doing DLL redirection' # See http://msdn.microsoft.com/en-us/library/ms682600(VS.85).aspx
for f in glob.glob(os.path.join('build', 'py2exe', '*.exe')):
open(f + '.local', 'wb').write('\n')
print
print
print 'Building Installer'
@ -532,7 +539,7 @@ def main():
sys.argv[1:2] = ['py2exe']
console = [dict(dest_base=basenames['console'][i], script=scripts['console'][i])
for i in range(len(scripts['console']))]
for i in range(len(scripts['console']))]# if not 'parallel.py' in scripts['console'][i] ]
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
setup(
cmdclass = {'py2exe': BuildEXE},
@ -556,14 +563,17 @@ def main():
'sip', 'pkg_resources', 'PyQt4.QtSvg',
'mechanize', 'ClientForm', 'wmi',
'win32file', 'pythoncom', 'rtf2xml',
'win32process', 'win32api', 'msvcrt',
'win32event', 'calibre.ebooks.lrf.any.*',
'calibre.ebooks.lrf.feeds.*',
'lxml', 'lxml._elementpath', 'genshi',
'path', 'pydoc', 'IPython.Extensions.*',
'calibre.web.feeds.recipes.*', 'pydoc',
'calibre.web.feeds.recipes.*', 'PyQt4.QtWebKit',
],
'packages' : ['PIL'],
'excludes' : ["Tkconstants", "Tkinter", "tcl",
"_imagingtk", "ImageTk", "FixTk",
'pydoc'],
"_imagingtk", "ImageTk", "FixTk"
],
'dll_excludes' : ['mswsock.dll'],
},
},