Halve the startup time of worker processes by delay loading cssutils, oeb.stylizer and oeb.base

2025-08-11 09:13:57 -04:00 · 2011-04-19 12:24:41 -06:00 · 2011-04-19 12:24:41 -06:00 · e835131c82
commit e835131c82
parent 6b52f4ad89
23 changed files with 75 additions and 52 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -33,9 +33,6 @@ if False:
    fcntl, win32event, isfrozen, __author__, terminal_controller
    winerror, win32api, isfreebsd, guess_type
 import cssutils
 cssutils.log.setLevel(logging.WARN)
 def to_unicode(raw, encoding='utf-8', errors='strict'):
    if isinstance(raw, unicode):
        return raw
@ -679,4 +676,3 @@ main()
    ipshell()
    sys.argv = old_argv
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -9,7 +9,6 @@ from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.utils.config import test_eight_code
 # To archive plugins {{{
@ -98,6 +97,8 @@ class TXT2TXTZ(FileTypePlugin):
    on_import = True
    def _get_image_references(self, txt, base_dir):
        from calibre.ebooks.oeb.base import OEB_IMAGES
        images = []
        # Textile
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -18,9 +18,6 @@ from lxml import etree
 from calibre import prepare_string_for_xml
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES, OPF
 from calibre.utils.magick import Image
 class FB2MLizer(object):
@ -71,7 +68,7 @@ class FB2MLizer(object):
            return u'<?xml version="1.0" encoding="UTF-8"?>' + output
    def clean_text(self, text):
-        # Condense empty paragraphs into a line break. 
+        # Condense empty paragraphs into a line break.
        text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
        # Remove empty paragraphs.
        text = re.sub(r'(?miu)<p>\s*</p>', '', text)
@ -100,6 +97,7 @@ class FB2MLizer(object):
        return text
    def fb2_header(self):
        from calibre.ebooks.oeb.base import OPF
        metadata = {}
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
@ -180,6 +178,8 @@ class FB2MLizer(object):
        return u'</FictionBook>'
    def get_cover(self):
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        cover_href = None
        # Get the raster cover if it's available.
@ -213,6 +213,8 @@ class FB2MLizer(object):
        return u''
    def get_text(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        text = ['<body>']
        # Create main section if there are no others to create
@ -248,6 +250,8 @@ class FB2MLizer(object):
        '''
        This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function.
        '''
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        images = []
        for item in self.oeb_book.manifest:
            # Don't write the image if it's not referenced in the document's text.
@ -344,6 +348,8 @@ class FB2MLizer(object):
        @return: List of string representing the XHTML converted to FB2 markup.
        '''
        from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
        # Ensure what we are converting is not a string and that the fist tag is part of the XHTML namespace.
        if not isinstance(elem_tree.tag, basestring) or namespace(elem_tree.tag) != XHTML_NS:
            return []
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -315,7 +315,8 @@ class HTMLInput(InputFormatPlugin):
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
-        import cssutils
+        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
--- a/src/calibre/ebooks/html/meta.py
+++ b/src/calibre/ebooks/html/meta.py
@ -4,7 +4,6 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
 class EasyMeta(object):
@ -12,6 +11,7 @@ class EasyMeta(object):
        self.meta = meta
    def __iter__(self):
        from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
        meta = self.meta
        for item_name in meta.items:
            for item in meta[item_name]:
--- a/src/calibre/ebooks/html/output.py
+++ b/src/calibre/ebooks/html/output.py
@ -12,7 +12,6 @@ from os.path import dirname, abspath, relpath, exists, basename
 from lxml import etree
 from templite import Templite
 from calibre.ebooks.oeb.base import element
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre import CurrentDir
 from calibre.ptempfile import PersistentTemporaryDirectory
@ -51,6 +50,7 @@ class HTMLOutput(OutputFormatPlugin):
        '''
        Generate table of contents
        '''
        from calibre.ebooks.oeb.base import element
        with CurrentDir(output_dir):
            def build_node(current_node, parent=None):
                if parent is None:
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -12,7 +12,6 @@ from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
@ -42,6 +41,8 @@ class HTMLZOutput(OutputFormatPlugin):
    ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
@ -72,7 +73,7 @@ class HTMLZOutput(OutputFormatPlugin):
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
-                            data = unicode(etree.tostring(item.data, encoding=unicode)) 
+                            data = unicode(etree.tostring(item.data, encoding=unicode))
                        else:
                            data = item.data
                        fname = os.path.join(tdir, 'images', images[item.href])
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -15,11 +15,7 @@ from urlparse import urldefrag, urlparse, urlunparse, urljoin
 from urllib import unquote as urlunquote
 from lxml import etree, html
-from cssutils import CSSParser, parseString, parseStyle, replaceUrls
+from calibre.constants import filesystem_encoding, __version__
 from cssutils.css import CSSRule
 import calibre
 from calibre.constants import filesystem_encoding
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -179,6 +175,9 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
    If the ``link_repl_func`` returns None, the attribute or
    tag text will be removed completely.
    '''
    from cssutils import parseString, parseStyle, replaceUrls, log
    log.setLevel(logging.WARN)
    if resolve_base_href:
        resolve_base_href(root)
    for el, attrib, link, pos in iterlinks(root, find_links_in_css=False):
@ -1075,7 +1074,9 @@ class Manifest(object):
        def _parse_css(self, data):
-
+            from cssutils.css import CSSRule
            from cssutils import CSSParser, log
            log.setLevel(logging.WARN)
            def get_style_rules_from_import(import_rule):
                ans = []
                if not import_rule.styleSheet:
@ -2011,7 +2012,7 @@ class OEBBook(object):
            name='dtb:uid', content=unicode(self.uid))
        etree.SubElement(head, NCX('meta'),
            name='dtb:depth', content=str(self.toc.depth()))
-        generator = ''.join(['calibre (', calibre.__version__, ')'])
+        generator = ''.join(['calibre (', __version__, ')'])
        etree.SubElement(head, NCX('meta'),
            name='dtb:generator', content=generator)
        etree.SubElement(head, NCX('meta'),
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -14,7 +14,6 @@ from mimetypes import guess_type
 from collections import defaultdict
 from lxml import etree
 import cssutils
 from calibre.ebooks.oeb.base import OPF1_NS, OPF2_NS, OPF2_NSMAP, DC11_NS, \
    DC_NSES, OPF, xml2text
@ -172,6 +171,7 @@ class OEBReader(object):
        return bad
    def _manifest_add_missing(self, invalid):
        import cssutils
        manifest = self.oeb.manifest
        known = set(manifest.hrefs)
        unchecked = set(manifest.values())
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -12,17 +12,18 @@ import os, itertools, re, logging, copy, unicodedata
 from weakref import WeakKeyDictionary
 from xml.dom import SyntaxErr as CSSSyntaxError
 import cssutils
-from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
+from cssutils.css import (CSSStyleRule, CSSPageRule, CSSStyleDeclaration,
-    CSSValueList, CSSFontFaceRule, cssproperties
+    CSSValueList, CSSFontFaceRule, cssproperties)
 from cssutils import profile as cssprofiles
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
 from calibre import force_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
 cssutils.log.setLevel(logging.WARN)
 _html_css_stylesheet = None
 def html_css_stylesheet():
--- a/src/calibre/ebooks/oeb/transforms/filenames.py
+++ b/src/calibre/ebooks/oeb/transforms/filenames.py
@ -9,7 +9,6 @@ import posixpath
 from urlparse import urldefrag, urlparse
 from lxml import etree
 import cssutils
 from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
@ -25,6 +24,7 @@ class RenameFiles(object): # {{{
        self.renamed_items_map = renamed_items_map
    def __call__(self, oeb, opts):
        import cssutils
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@ -8,8 +8,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 from urlparse import urldefrag
 import cssutils
 from calibre.ebooks.oeb.base import CSS_MIME, OEB_DOCS
 from calibre.ebooks.oeb.base import urlnormalize, iterlinks
@ -23,6 +21,7 @@ class ManifestTrimmer(object):
        return cls()
    def __call__(self, oeb, context):
        import cssutils
        oeb.logger.info('Trimming unused files from manifest...')
        self.opts = context
        used = set()
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -21,7 +21,6 @@ except ImportError:
 import cStringIO
 from calibre.ebooks.pdb.formatwriter import FormatWriter
 from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.ebooks.pdb.header import PdbHeaderBuilder
 from calibre.ebooks.pml.pmlml import PMLMLizer
@ -135,6 +134,7 @@ class Writer(FormatWriter):
        62-...: Raw image data in 8 bit PNG format.
        '''
        images = []
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        for item in manifest:
            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
--- a/src/calibre/ebooks/pml/output.py
+++ b/src/calibre/ebooks/pml/output.py
@ -18,7 +18,6 @@ from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.ebooks.pml.pmlml import PMLMLizer
 class PMLOutput(OutputFormatPlugin):
@ -60,6 +59,7 @@ class PMLOutput(OutputFormatPlugin):
            pmlz.add_dir(tdir)
    def write_images(self, manifest, image_hrefs, out_dir, opts):
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        for item in manifest:
            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
                if opts.full_image_depth:
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -12,8 +12,6 @@ import re
 from lxml import etree
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.pdb.ereader import image_name
 from calibre.ebooks.pml import unipmlcode
@ -110,6 +108,9 @@ class PMLMLizer(object):
        return output
    def get_cover_page(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML
        output = u''
        if 'cover' in self.oeb_book.guide:
            output += '\\m="cover.png"\n'
@ -125,6 +126,9 @@ class PMLMLizer(object):
        return output
    def get_text(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML
        text = [u'']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to PML markup...' % item.href)
@ -180,7 +184,7 @@ class PMLMLizer(object):
        links = set(re.findall(r'(?<=\\q="#).+?(?=")', text))
        for unused in anchors.difference(links):
            text = text.replace('\\Q="%s"' % unused, '')
-            
+
        # Remove \Cn tags that are within \x and \Xn tags
        text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)
@ -214,6 +218,8 @@ class PMLMLizer(object):
        return text
    def dump_text(self, elem, stylizer, page, tag_stack=[]):
        from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return []
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@ -11,8 +11,6 @@ Transform OEB content into RB compatible markup.
 import re
 from calibre import prepare_string_for_xml
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.rb import unique_name
 TAGS = [
@ -81,6 +79,8 @@ class RBMLizer(object):
        return output
    def get_cover_page(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML
        output = u''
        if 'cover' in self.oeb_book.guide:
            if self.name_map.get(self.oeb_book.guide['cover'].href, None):
@ -109,6 +109,9 @@ class RBMLizer(object):
        return ''.join(toc)
    def get_text(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML
        output = [u'']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
@ -137,6 +140,8 @@ class RBMLizer(object):
        return text
    def dump_text(self, elem, stylizer, page, tag_stack=[]):
        from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return [u'']
--- a/src/calibre/ebooks/rb/writer.py
+++ b/src/calibre/ebooks/rb/writer.py
@ -18,7 +18,6 @@ import cStringIO
 from calibre.ebooks.rb.rbml import RBMLizer
 from calibre.ebooks.rb import HEADER
 from calibre.ebooks.rb import unique_name
 from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.constants import __appname__, __version__
 TEXT_RECORD_SIZE = 4096
@ -111,6 +110,7 @@ class RBWriter(object):
        return (size, pages)
    def _images(self, manifest):
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        images = []
        used_names = []
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -14,9 +14,6 @@ import cStringIO
 from lxml import etree
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \
    OEB_RASTER_IMAGES
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.filenames import ascii_text
 from calibre.utils.magick.draw import save_cover_data_to, identify_data
@ -100,6 +97,8 @@ class RTFMLizer(object):
        return self.mlize_spine()
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        output = self.header()
        if 'titlepage' in self.oeb_book.guide:
            href = self.oeb_book.guide['titlepage'].href
@ -154,6 +153,8 @@ class RTFMLizer(object):
        return ' }'
    def insert_images(self, text):
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        for item in self.oeb_book.manifest:
            if item.media_type in OEB_RASTER_IMAGES:
                src = os.path.basename(item.href)
@ -201,6 +202,8 @@ class RTFMLizer(object):
        return text
    def dump_text(self, elem, stylizer, tag_stack=[]):
        from calibre.ebooks.oeb.base import XHTML_NS, namespace, barename
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            p = elem.getparent()
--- a/src/calibre/ebooks/snb/input.py
+++ b/src/calibre/ebooks/snb/input.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os, uuid
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.oeb.base import DirContainer
 from calibre.ebooks.snb.snbfile import SNBFile
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.filenames import ascii_filename
@ -30,6 +29,7 @@ class SNBInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.oeb.base import DirContainer
        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@ -13,8 +13,6 @@ import re
 from lxml import etree
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 def ProcessFileName(fileName):
    # Flat the path
@ -81,6 +79,8 @@ class SNBMLizer(object):
                    body.append(entity)
    def mlize(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        output = [ u'' ]
        stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
        content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
@ -208,6 +208,7 @@ class SNBMLizer(object):
        return text
    def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''):
        from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@ -11,7 +11,6 @@ from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 from calibre.ptempfile import TemporaryDirectory, TemporaryFile
@ -103,12 +102,13 @@ class TXTOutput(OutputFormatPlugin):
 class TXTZOutput(TXTOutput):
-    
+
    name = 'TXTZ Output'
    author = 'John Schember'
    file_type = 'txtz'
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.oeb.base import OEB_IMAGES
        with TemporaryDirectory('_txtz_output') as tdir:
            # TXT
            with TemporaryFile('index.txt') as tf:
@ -123,10 +123,10 @@ class TXTZOutput(TXTOutput):
                        os.makedirs(path)
                    with open(os.path.join(tdir, item.href), 'wb') as imgf:
                        imgf.write(item.data)
-            
+
            # Metadata
-            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: 
+            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
-            
+
            txtz = ZipFile(output_path, 'w')
            txtz.add_dir(tdir)
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -12,8 +12,6 @@ import re
 from lxml import etree
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 BLOCK_TAGS = [
    'div',
@ -58,12 +56,14 @@ class TXTMLizer(object):
        self.toc_titles = []
        self.toc_ids = []
        self.last_was_heading = False
-        
+
        self.create_flat_toc(self.oeb_book.toc)
        return self.mlize_spine()
    def mlize_spine(self):
        from calibre.ebooks.oeb.base import XHTML
        from calibre.ebooks.oeb.stylizer import Stylizer
        output = [u'']
        output.append(self.get_toc())
        for item in self.oeb_book.spine:
@ -139,7 +139,7 @@ class TXTMLizer(object):
        # when remove paragraph spacing is enabled.
        text = re.sub('(?imu)^[ ]+', '', text)
        text = re.sub('(?imu)[ ]+$', '', text)
-        
+
        # Remove empty space and newlines at the beginning of the document.
        text = re.sub(r'(?u)^[ \n]+', '', text)
@ -185,6 +185,7 @@ class TXTMLizer(object):
        @stylizer: The style information attached to the element.
        @page: OEB page used to determine absolute urls.
        '''
        from calibre.ebooks.oeb.base import XHTML_NS, barename, namespace
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -15,7 +15,6 @@ from calibre.customize import CatalogPlugin
 from calibre.customize.conversion import OptionRecommendation, DummyReporter
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
 from calibre.ebooks.chardet import substitute_entites
 from calibre.ebooks.oeb.base import XHTML_NS
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.config import config_dir
 from calibre.utils.date import format_date, isoformat, is_date_undefined, now as nowf
@ -4322,6 +4321,8 @@ Author '{0}':
            '''
            Generate description header from template
            '''
            from calibre.ebooks.oeb.base import XHTML_NS
            def generate_html():
                args = dict(
                            author=author,