Halve the startup time of worker processes by delay loading cssutils, oeb.stylizer and oeb.base

This commit is contained in:
Kovid Goyal 2011-04-19 12:24:41 -06:00
parent 6b52f4ad89
commit e835131c82
23 changed files with 75 additions and 52 deletions

View File

@ -33,9 +33,6 @@ if False:
fcntl, win32event, isfrozen, __author__, terminal_controller
winerror, win32api, isfreebsd, guess_type
import cssutils
cssutils.log.setLevel(logging.WARN)
def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode):
return raw
@ -679,4 +676,3 @@ main()
ipshell()
sys.argv = old_argv

View File

@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
from calibre.constants import numeric_version
from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.utils.config import test_eight_code
# To archive plugins {{{
@ -98,6 +97,8 @@ class TXT2TXTZ(FileTypePlugin):
on_import = True
def _get_image_references(self, txt, base_dir):
from calibre.ebooks.oeb.base import OEB_IMAGES
images = []
# Textile

View File

@ -18,9 +18,6 @@ from lxml import etree
from calibre import prepare_string_for_xml
from calibre.constants import __appname__, __version__
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
from calibre.utils.magick import Image
class FB2MLizer(object):
@ -71,7 +68,7 @@ class FB2MLizer(object):
return u'<?xml version="1.0" encoding="UTF-8"?>' + output
def clean_text(self, text):
# Condense empty paragraphs into a line break.
# Condense empty paragraphs into a line break.
text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
# Remove empty paragraphs.
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
@ -100,6 +97,7 @@ class FB2MLizer(object):
return text
def fb2_header(self):
from calibre.ebooks.oeb.base import OPF
metadata = {}
metadata['title'] = self.oeb_book.metadata.title[0].value
metadata['appname'] = __appname__
@ -180,6 +178,8 @@ class FB2MLizer(object):
return u'</FictionBook>'
def get_cover(self):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
cover_href = None
# Get the raster cover if it's available.
@ -213,6 +213,8 @@ class FB2MLizer(object):
return u''
def get_text(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
text = ['<body>']
# Create main section if there are no others to create
@ -248,6 +250,8 @@ class FB2MLizer(object):
'''
This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
'''
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
images = []
for item in self.oeb_book.manifest:
# Don't write the image if it's not referenced in the document's text.
@ -344,6 +348,8 @@ class FB2MLizer(object):
@return: List of string representing the XHTML converted to FB2 markup.
'''
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
# Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
return []

View File

@ -315,7 +315,8 @@ class HTMLInput(InputFormatPlugin):
from calibre import guess_type
from calibre.ebooks.oeb.transforms.metadata import \
meta_info_to_oeb_metadata
import cssutils
import cssutils, logging
cssutils.log.setLevel(logging.WARN)
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False)

View File

@ -4,7 +4,6 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
class EasyMeta(object):
@ -12,6 +11,7 @@ class EasyMeta(object):
self.meta = meta
def __iter__(self):
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
meta = self.meta
for item_name in meta.items:
for item in meta[item_name]:

View File

@ -12,7 +12,6 @@ from os.path import dirname, abspath, relpath, exists, basename
from lxml import etree
from templite import Templite
from calibre.ebooks.oeb.base import element
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
@ -51,6 +50,7 @@ class HTMLOutput(OutputFormatPlugin):
'''
Generate table of contents
'''
from calibre.ebooks.oeb.base import element
with CurrentDir(output_dir):
def build_node(current_node, parent=None):
if parent is None:

View File

@ -12,7 +12,6 @@ from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
@ -42,6 +41,8 @@ class HTMLZOutput(OutputFormatPlugin):
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
# HTML
if opts.htmlz_css_type == 'inline':
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
@ -72,7 +73,7 @@ class HTMLZOutput(OutputFormatPlugin):
for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME:
data = unicode(etree.tostring(item.data, encoding=unicode))
data = unicode(etree.tostring(item.data, encoding=unicode))
else:
data = item.data
fname = os.path.join(tdir, 'images', images[item.href])

View File

@ -15,11 +15,7 @@ from urlparse import urldefrag, urlparse, urlunparse, urljoin
from urllib import unquote as urlunquote
from lxml import etree, html
from cssutils import CSSParser, parseString, parseStyle, replaceUrls
from cssutils.css import CSSRule
import calibre
from calibre.constants import filesystem_encoding
from calibre.constants import filesystem_encoding, __version__
from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -179,6 +175,9 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
If the ``link_repl_func`` returns None, the attribute or
tag text will be removed completely.
'''
from cssutils import parseString, parseStyle, replaceUrls, log
log.setLevel(logging.WARN)
if resolve_base_href:
resolve_base_href(root)
for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
@ -1075,7 +1074,9 @@ class Manifest(object):
def _parse_css(self, data):
from cssutils.css import CSSRule
from cssutils import CSSParser, log
log.setLevel(logging.WARN)
def get_style_rules_from_import(import_rule):
ans = []
if not import_rule.styleSheet:
@ -2011,7 +2012,7 @@ class OEBBook(object):
name='dtb:uid', content=unicode(self.uid))
etree.SubElement(head, NCX('meta'),
name='dtb:depth', content=str(self.toc.depth()))
generator = ''.join(['calibre (', calibre.__version__, ')'])
generator = ''.join(['calibre (', __version__, ')'])
etree.SubElement(head, NCX('meta'),
name='dtb:generator', content=generator)
etree.SubElement(head, NCX('meta'),

View File

@ -14,7 +14,6 @@ from mimetypes import guess_type
from collections import defaultdict
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
DC_NSES, OPF, xml2text
@ -172,6 +171,7 @@ class OEBReader(object):
return bad
def _manifest_add_missing(self, invalid):
import cssutils
manifest = self.oeb.manifest
known = set(manifest.hrefs)
unchecked = set(manifest.values())

View File

@ -12,17 +12,18 @@ import os, itertools, re, logging, copy, unicodedata
from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError
import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, CSSFontFaceRule, cssproperties
from cssutils.css import (CSSStyleRule, CSSPageRule, CSSStyleDeclaration,
CSSValueList, CSSFontFaceRule, cssproperties)
from cssutils import profile as cssprofiles
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
from calibre import force_unicode
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES
cssutils.log.setLevel(logging.WARN)
_html_css_stylesheet = None
def html_css_stylesheet():

View File

@ -9,7 +9,6 @@ import posixpath
from urlparse import urldefrag, urlparse
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
@ -25,6 +24,7 @@ class RenameFiles(object): # {{{
self.renamed_items_map = renamed_items_map
def __call__(self, oeb, opts):
import cssutils
self.log = oeb.logger
self.opts = opts
self.oeb = oeb

View File

@ -8,8 +8,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
from urlparse import urldefrag
import cssutils
from calibre.ebooks.oeb.base import CSS_MIME, OEB_DOCS
from calibre.ebooks.oeb.base import urlnormalize, iterlinks
@ -23,6 +21,7 @@ class ManifestTrimmer(object):
return cls()
def __call__(self, oeb, context):
import cssutils
oeb.logger.info('Trimming unused files from manifest...')
self.opts = context
used = set()

View File

@ -21,7 +21,6 @@ except ImportError:
import cStringIO
from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.pdb.header import PdbHeaderBuilder
from calibre.ebooks.pml.pmlml import PMLMLizer
@ -135,6 +134,7 @@ class Writer(FormatWriter):
62-...: Raw image data in 8 bit PNG format.
'''
images = []
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in manifest:
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():

View File

@ -18,7 +18,6 @@ from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.pml.pmlml import PMLMLizer
class PMLOutput(OutputFormatPlugin):
@ -60,6 +59,7 @@ class PMLOutput(OutputFormatPlugin):
pmlz.add_dir(tdir)
def write_images(self, manifest, image_hrefs, out_dir, opts):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in manifest:
if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
if opts.full_image_depth:

View File

@ -12,8 +12,6 @@ import re
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.pdb.ereader import image_name
from calibre.ebooks.pml import unipmlcode
@ -110,6 +108,9 @@ class PMLMLizer(object):
return output
def get_cover_page(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = u''
if 'cover' in self.oeb_book.guide:
output += '\\m="cover.png"\n'
@ -125,6 +126,9 @@ class PMLMLizer(object):
return output
def get_text(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
text = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to PML markup...' % item.href)
@ -180,7 +184,7 @@ class PMLMLizer(object):
links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
for unused in anchors.difference(links):
text = text.replace('\\Q="%s"' % unused, '')
# Remove \Cn tags that are within \x and \Xn tags
text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)
@ -214,6 +218,8 @@ class PMLMLizer(object):
return text
def dump_text(self, elem, stylizer, page, tag_stack=[]):
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return []

View File

@ -11,8 +11,6 @@ Transform OEB content into RB compatible markup.
import re
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.rb import unique_name
TAGS = [
@ -81,6 +79,8 @@ class RBMLizer(object):
return output
def get_cover_page(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = u''
if 'cover' in self.oeb_book.guide:
if self.name_map.get(self.oeb_book.guide['cover'].href, None):
@ -109,6 +109,9 @@ class RBMLizer(object):
return ''.join(toc)
def get_text(self):
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XHTML
output = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
@ -137,6 +140,8 @@ class RBMLizer(object):
return text
def dump_text(self, elem, stylizer, page, tag_stack=[]):
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return [u'']

View File

@ -18,7 +18,6 @@ import cStringIO
from calibre.ebooks.rb.rbml import RBMLizer
from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import unique_name
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.constants import __appname__, __version__
TEXT_RECORD_SIZE = 4096
@ -111,6 +110,7 @@ class RBWriter(object):
return (size, pages)
def _images(self, manifest):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
images = []
used_names = []

View File

@ -14,9 +14,6 @@ import cStringIO
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.filenames import ascii_text
from calibre.utils.magick.draw import save_cover_data_to, identify_data
@ -100,6 +97,8 @@ class RTFMLizer(object):
return self.mlize_spine()
def mlize_spine(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
output = self.header()
if 'titlepage' in self.oeb_book.guide:
href = self.oeb_book.guide['titlepage'].href
@ -154,6 +153,8 @@ class RTFMLizer(object):
return ' }'
def insert_images(self, text):
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
for item in self.oeb_book.manifest:
if item.media_type in OEB_RASTER_IMAGES:
src = os.path.basename(item.href)
@ -201,6 +202,8 @@ class RTFMLizer(object):
return text
def dump_text(self, elem, stylizer, tag_stack=[]):
from calibre.ebooks.oeb.base import XHTML_NS, namespace, barename
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
p = elem.getparent()

View File

@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
import os, uuid
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.oeb.base import DirContainer
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
@ -30,6 +29,7 @@ class SNBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from calibre.ebooks.oeb.base import DirContainer
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:

View File

@ -13,8 +13,6 @@ import re
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
def ProcessFileName(fileName):
# Flat the path
@ -81,6 +79,8 @@ class SNBMLizer(object):
body.append(entity)
def mlize(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
output = [ u'' ]
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
@ -208,6 +208,7 @@ class SNBMLizer(object):
return text
def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''):
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:

View File

@ -11,7 +11,6 @@ from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ebooks.txt.txtml import TXTMLizer
from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
from calibre.ptempfile import TemporaryDirectory, TemporaryFile
@ -103,12 +102,13 @@ class TXTOutput(OutputFormatPlugin):
class TXTZOutput(TXTOutput):
name = 'TXTZ Output'
author = 'John Schember'
file_type = 'txtz'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.oeb.base import OEB_IMAGES
with TemporaryDirectory('_txtz_output') as tdir:
# TXT
with TemporaryFile('index.txt') as tf:
@ -123,10 +123,10 @@ class TXTZOutput(TXTOutput):
os.makedirs(path)
with open(os.path.join(tdir, item.href), 'wb') as imgf:
imgf.write(item.data)
# Metadata
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
txtz = ZipFile(output_path, 'w')
txtz.add_dir(tdir)

View File

@ -12,8 +12,6 @@ import re
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
BLOCK_TAGS = [
'div',
@ -58,12 +56,14 @@ class TXTMLizer(object):
self.toc_titles = []
self.toc_ids = []
self.last_was_heading = False
self.create_flat_toc(self.oeb_book.toc)
return self.mlize_spine()
def mlize_spine(self):
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.stylizer import Stylizer
output = [u'']
output.append(self.get_toc())
for item in self.oeb_book.spine:
@ -139,7 +139,7 @@ class TXTMLizer(object):
# when remove paragraph spacing is enabled.
text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text)
# Remove empty space and newlines at the beginning of the document.
text = re.sub(r'(?u)^[ \n]+', '', text)
@ -185,6 +185,7 @@ class TXTMLizer(object):
@stylizer: The style information attached to the element.
@page: OEB page used to determine absolute urls.
'''
from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:

View File

@ -15,7 +15,6 @@ from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
from calibre.ebooks.chardet import substitute_entites
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.config import config_dir
from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf
@ -4322,6 +4321,8 @@ Author '{0}':
'''
Generate description header from template
'''
from calibre.ebooks.oeb.base import XHTML_NS
def generate_html():
args = dict(
author=author,