pdf get_cover returns cover image instead of nothing.

2025-12-17 18:45:04 -05:00 · 2009-04-18 07:54:56 -04:00 · 2009-04-18 07:54:56 -04:00 · b104286f61
commit b104286f61
parent 37b820b046 f969ed39fe
24 changed files with 405 additions and 210 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
-        TXTInput, OEBOutput, TXTOutput, PDFOutput]
+        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -41,6 +41,11 @@ class ConversionOption(object):
    def __eq__(self, other):
        return hash(self) == hash(other)
    def clone(self):
        return ConversionOption(name=self.name, help=self.help,
                long_switch=self.long_switch, short_switch=self.short_switch,
                choices=self.choices)
 class OptionRecommendation(object):
    LOW  = 1
    MED  = 2
@ -59,6 +64,10 @@ class OptionRecommendation(object):
        self.validate_parameters()
    def clone(self):
        return OptionRecommendation(recommended_value=self.recommended_value,
                level=self.level, option=self.option.clone())
    def validate_parameters(self):
        if self.option.choices and self.recommended_value not in \
                                                    self.option.choices:
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
            options.debug_input = os.path.abspath(options.debug_input)
            if not os.path.exists(options.debug_input):
                os.makedirs(options.debug_input)
-            shutil.rmtree(options.debug_input)
+            if isinstance(ret, basestring):
-            shutil.copytree(output_dir, options.debug_input)
+                shutil.rmtree(options.debug_input)
                shutil.copytree(output_dir, options.debug_input)
            else:
                from calibre.ebooks.oeb.writer import OEBWriter
                w = OEBWriter(pretty_print=options.pretty_print)
                w(ret, options.debug_input)
            log.info('Input debug saved to:', options.debug_input)
        return ret
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
        raise SystemExit(1)
    output = args[2]
-    if output.startswith('.'):
+    if output.startswith('.') and output != '.':
        output = os.path.splitext(os.path.basename(input))[0]+output
    output = os.path.abspath(output)
@ -171,7 +171,8 @@ def main(args=sys.argv):
    plumber.run()
-    log(_('Output saved to'), ' ', plumber.output)
+    if plumber.opts.debug_input is None:
        log(_('Output saved to'), ' ', plumber.output)
    return 0
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -32,8 +32,8 @@ class Plumber(object):
        :param input: Path to input file.
        :param output: Path to output file/directory
        '''
-        self.input = input
+        self.input = os.path.abspath(input)
-        self.output = output
+        self.output = os.path.abspath(output)
        self.log = log
        # Initialize the conversion options that are independent of input and
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
 ]
-        input_fmt = os.path.splitext(input)[1]
+        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
            raise ValueError('Input file must have an extension')
        input_fmt = input_fmt[1:].lower()
-        if os.path.exists(output) and os.path.isdir(output):
+        if os.path.exists(self.output) and os.path.isdir(self.output):
            output_fmt = 'oeb'
        else:
-            output_fmt = os.path.splitext(output)[1]
+            output_fmt = os.path.splitext(self.output)[1]
            if not output_fmt:
                output_fmt = '.oeb'
            output_fmt = output_fmt[1:].lower()
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
        self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
                                    self.input_fmt, self.log,
                                    accelerators, tdir)
        if self.opts.debug_input is not None:
            self.log('Debug input called, aborting the rest of the pipeline.')
            return
        if not hasattr(self.oeb, 'manifest'):
            self.oeb = create_oebbook(self.log, self.oeb, self.opts)
@ -365,18 +368,20 @@ OptionRecommendation(name='language',
        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
-def create_oebbook(log, opfpath, opts):
+def create_oebbook(log, path_or_stream, opts, reader=None):
    '''
-    Create an OEBBook from an OPF file.
+    Create an OEBBook.
    '''
    from calibre.ebooks.oeb.reader import OEBReader
    from calibre.ebooks.oeb.base import OEBBook
    html_preprocessor = HTMLPreProcessor()
    reader = OEBReader()
    oeb = OEBBook(log, html_preprocessor=html_preprocessor,
            pretty_print=opts.pretty_print)
    # Read OEB Book into OEBBook
-    log.info('Parsing all content...')
+    log('Parsing all content...')
-    reader(oeb, opfpath)
+    if reader is None:
        from calibre.ebooks.oeb.reader import OEBReader
        reader = OEBReader
    reader()(oeb, path_or_stream)
    return oeb
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
                   )
        ),
        OptionRecommendation(name='dont_package',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Normally this input plugin re-arranges all the input '
                'files into a standard folder hierarchy. Only use this option '
                'if you know what you are doing as it can result in various '
                'nasty side effects in the rest of of the conversion pipeline.'
                )
        ),
    ])
    def convert(self, stream, opts, file_ext, log,
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
            mi.render(open('metadata.opf', 'wb'))
            opfpath = os.path.abspath('metadata.opf')
        if opts.dont_package:
            return opfpath
        from calibre.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, opfpath, opts)
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -0,0 +1,24 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.customize.conversion import InputFormatPlugin
 class LITInput(InputFormatPlugin):
    name        = 'LIT Input'
    author      = 'Marshall T. Vandegrift'
    description = 'Convert LIT files to HTML'
    file_types  = set(['lit'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.lit.reader import LitReader
        from calibre.ebooks.conversion.plumber import create_oebbook
        return create_oebbook(log, stream, options, reader=LitReader)
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,13 +7,12 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'
-import sys, struct, os
+import struct, os
 import functools
 import re
 from urlparse import urldefrag
 from cStringIO import StringIO
 from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -1,10 +1,10 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
 import sys, os, cStringIO
 from threading import Thread
 from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
@ -13,7 +13,8 @@ from pyPdf import PdfFileReader, PdfFileWriter
 import Image
 try:
    from calibre.utils.PythonMagickWand import \
-        NewMagickWand, MagickReadImage, MagickSetImageFormat, MagickWriteImage
+        NewMagickWand, MagickReadImage, MagickSetImageFormat, \
        MagickWriteImage, ImageMagick
    _imagemagick_loaded = True
 except:
    _imagemagick_loaded = False
@ -51,9 +52,23 @@ def get_metadata(stream, extract_cover=True):
        print >>sys.stderr, msg.encode('utf8')
    return mi
 class MetadataWriter(Thread):
    def __init__(self, out_pdf, buf):
        self.out_pdf = out_pdf
        self.buf = buf
        Thread.__init__(self)
        self.daemon = True
    def run(self):
        try:
            self.out_pdf.write(self.buf)
        except RuntimeError:
            pass
 def set_metadata(stream, mi):
    stream.seek(0)
-    # Use a cStringIO object for the pdf because we will want to over
+    # Use a StringIO object for the pdf because we will want to over
    # write it later and if we are working on the stream directly it
    # could cause some issues.
    raw = cStringIO.StringIO(stream.read())
@ -61,10 +76,18 @@ def set_metadata(stream, mi):
    title = mi.title if mi.title else orig_pdf.documentInfo.title
    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
    out_str = cStringIO.StringIO()
    writer = MetadataWriter(out_pdf, out_str)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
-    out_str = cStringIO.StringIO()
+    writer.start()
-    out_pdf.write(out_str)
+    writer.join(10) # Wait 10 secs for writing to complete
    out_pdf.killed = True
    writer.join()
    if out_pdf.killed:
        print 'Failed to set metadata: took too long'
        return
    stream.seek(0)
    stream.truncate()
    out_str.seek(0)
@ -72,35 +95,32 @@ def set_metadata(stream, mi):
    stream.seek(0)
 def get_cover(stream):
    stream.seek(0)
    data = cStringIO.StringIO()
    try:
-        with FileWrapper(stream) as stream:
+        pdf = PdfFileReader(stream)
-            pdf = PdfFileReader(stream)
+        output = PdfFileWriter()
            output = PdfFileWriter()
-            if len(pdf.pages) >= 1:
+        if len(pdf.pages) >= 1:
-                output.addPage(pdf.getPage(0))
+            output.addPage(pdf.getPage(0))
-            with TemporaryDirectory('_pdfmeta') as tdir:
+        with TemporaryDirectory('_pdfmeta') as tdir:
-                cover_path = os.path.join(tdir, 'cover.pdf')
+            cover_path = os.path.join(tdir, 'cover.pdf')
-                outputStream = file(cover_path, "wb")
+            with open(cover_path, "wb") as outputStream:
                output.write(outputStream)
                outputStream.close()
            with ImageMagick():
                wand = NewMagickWand()
                MagickReadImage(wand, cover_path)
                MagickSetImageFormat(wand, 'JPEG')
                MagickWriteImage(wand, '%s.jpg' % cover_path)
                img = Image.open('%s.jpg' % cover_path)
                img.save(data, 'JPEG')
    except:
        import traceback
        traceback.print_exc()
    return data.getvalue()
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -272,11 +272,7 @@ def XPath(expr):
 def xpath(elem, expr):
    return elem.xpath(expr, namespaces=XPNSMAP)
 def _prepare_xml_for_serialization(root):
    pass
 def xml2str(root, pretty_print=False, strip_comments=False):
    _prepare_xml_for_serialization(root)
    ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
                          pretty_print=pretty_print)
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
 def xml2unicode(root, pretty_print=False):
    _prepare_xml_for_serialization(root)
    return etree.tostring(root, pretty_print=pretty_print)
 ASCII_CHARS   = set(chr(x) for x in xrange(128))
@ -321,6 +316,25 @@ def urlnormalize(href):
    parts = (urlquote(part) for part in parts)
    return urlunparse(parts)
 class DummyHandler(logging.Handler):
    def __init__(self):
        logging.Handler.__init__(self, logging.WARNING)
        self.setFormatter(logging.Formatter('%(message)s'))
        self.log = None
    def emit(self, record):
        if self.log is not None:
            msg = self.format(record)
            f = self.log.error if record.levelno >= logging.ERROR \
                    else self.log.warn
            f(msg)
 _css_logger = logging.getLogger('calibre.css')
 _css_logger.setLevel(logging.WARNING)
 _css_log_handler = DummyHandler()
 _css_logger.addHandler(_css_log_handler)
 class OEBError(Exception):
    """Generic OEB-processing error."""
@ -778,7 +792,8 @@ class Manifest(object):
            data = self.oeb.css_preprocessor(data)
            data = XHTML_CSS_NAMESPACE + data
            parser = CSSParser(loglevel=logging.WARNING,
-                               fetcher=self._fetch_css)
+                               fetcher=self._fetch_css,
                               log=_css_logger)
            data = parser.parseString(data, href=self.href)
            data.namespaces['h'] = XHTML_NS
            return data
@ -1435,7 +1450,7 @@ class OEBBook(object):
        :attr:`pages`: List of "pages," such as indexed to a print edition of
            the same text.
        """
-
+        _css_log_handler.log = logger
        self.encoding = encoding
        self.html_preprocessor = html_preprocessor
        self.css_preprocessor = css_preprocessor
@ -1450,6 +1465,7 @@ class OEBBook(object):
        self.guide = Guide(self)
        self.toc = TOC()
        self.pages = PageList()
        self.auto_generated_toc = True
    @classmethod
    def generate(cls, opts):
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase
 from calibre.customize.ui import available_input_formats
 from calibre.ebooks.epub.from_html import TITLEPAGE
-from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
+from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import safe_replace, ZipFile
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
 from calibre import CurrentDir
 def character_count(html):
    '''
@ -57,31 +56,21 @@ class FakeOpts(object):
    max_levels = 5
    input_encoding = None
 def html2opf(path, tdir, log):
    from calibre.ebooks.html.input import get_filelist
    from calibre.ebooks.metadata.meta import get_metadata
    with CurrentDir(tdir):
        fl = get_filelist(path, tdir, FakeOpts(), log)
        mi = get_metadata(open(path, 'rb'), 'html')
        mi = OPFCreator(os.getcwdu(), mi)
        mi.guide = None
        entries = [(f.path, 'application/xhtml+xml') for f in fl]
        mi.create_manifest(entries)
        mi.create_spine([f.path for f in fl])
        mi.render(open('metadata.opf', 'wb'))
        opfpath = os.path.abspath('metadata.opf')
    return opfpath
 def opf2opf(path, tdir, opts):
    return path
 def is_supported(path):
    ext = os.path.splitext(path)[1].replace('.', '').lower()
    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
    return ext in available_input_formats()
 def write_oebbook(oeb, path):
    from calibre.ebooks.oeb.writer import OEBWriter
    from calibre import walk
    w = OEBWriter()
    w(oeb, path)
    for f in walk(path):
        if f.endswith('.opf'):
            return f
 class EbookIterator(object):
    CHARACTERS_PER_PAGE = 1000
@ -131,17 +120,16 @@ class EbookIterator(object):
    def __enter__(self):
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
-        if self.ebook_ext == 'opf':
+        from calibre.ebooks.conversion.plumber import Plumber
-            self.pathtoopf = self.pathtoebook
+        plumber = Plumber(self.pathtoebook, self.base, self.log)
-        elif self.ebook_ext == 'html':
+        plumber.setup_options()
-            self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
+        if hasattr(plumber.opts, 'dont_package'):
-        else:
+            plumber.opts.dont_package = True
-            from calibre.ebooks.conversion.plumber import Plumber
+        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
-            plumber = Plumber(self.pathtoebook, self.base, self.log)
+                plumber.opts, plumber.input_fmt, self.log,
-            plumber.setup_options()
+                {}, self.base)
-            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
+        if hasattr(self.pathtoopf, 'manifest'):
-                    plumber.opts, plumber.input_fmt, self.log,
+            self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
                    {}, self.base)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
    author = 'Kovid Goyal'
    file_type = 'oeb'
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        self.log, self.opts = log, opts
        if not os.path.exists(output_path):
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -349,6 +349,7 @@ class OEBReader(object):
    def _toc_from_ncx(self, item):
        if item is None:
            return False
        self.log.debug('Reading TOC from NCX...')
        ncx = item.data
        title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
        title = COLLAPSE_RE.sub(' ', title.strip())
@ -364,6 +365,7 @@ class OEBReader(object):
        result = xpath(opf, 'o2:tours/o2:tour')
        if not result:
            return False
        self.log.debug('Reading TOC from tour...')
        tour = result[0]
        toc = self.oeb.toc
        toc.title = tour.get('title')
@ -384,6 +386,7 @@ class OEBReader(object):
    def _toc_from_html(self, opf):
        if 'toc' not in self.oeb.guide:
            return False
        self.log.debug('Reading TOC from HTML...')
        itempath, frag = urldefrag(self.oeb.guide['toc'].href)
        item = self.oeb.manifest.hrefs[itempath]
        html = item.data
@ -414,6 +417,7 @@ class OEBReader(object):
        return True
    def _toc_from_spine(self, opf):
        self.log.warn('Generating default TOC from spine...')
        toc = self.oeb.toc
        titles = []
        headers = []
@ -441,11 +445,14 @@ class OEBReader(object):
        return True
    def _toc_from_opf(self, opf, item):
        self.oeb.auto_generated_toc = False
        if self._toc_from_ncx(item): return
-        if self._toc_from_tour(opf): return
+        # Prefer HTML to tour based TOC, since several LIT files
-        self.logger.warn('No metadata table of contents found')
+        # have good HTML TOCs but bad tour based TOCs
        if self._toc_from_html(opf): return
        if self._toc_from_tour(opf): return
        self._toc_from_spine(opf)
        self.oeb.auto_generated_toc = True
    def _pages_from_ncx(self, opf, item):
        if item is None:
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -51,8 +51,8 @@ class Split(object):
        self.log = oeb.log
        self.map = {}
        self.page_break_selectors = None
-        for item in self.oeb.manifest.items:
+        for item in list(self.oeb.manifest.items):
-            if etree.iselement(item.data):
+            if item.spine_position is not None and etree.iselement(item.data):
                self.split_item(item)
        self.fix_links()
@ -74,31 +74,34 @@ class Split(object):
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
-        page_break_selectors = set([])
+            for rule in rules(stylesheets):
-        for rule in rules(stylesheets):
+                before = getattr(rule.style.getPropertyCSSValue(
-            before = getattr(rule.style.getPropertyCSSValue(
+                    'page-break-before'), 'cssText', '').strip().lower()
-                'page-break-before'), 'cssText', '').strip().lower()
+                after  = getattr(rule.style.getPropertyCSSValue(
-            after  = getattr(rule.style.getPropertyCSSValue(
+                    'page-break-after'), 'cssText', '').strip().lower()
-                'page-break-after'), 'cssText', '').strip().lower()
+                try:
-            try:
+                    if before and before != 'avoid':
-                if before and before != 'avoid':
+                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
-                    page_break_selectors.add((CSSSelector(rule.selectorText),
+                            True))
-                        True))
+                except:
-            except:
+                    pass
-                pass
+                try:
-            try:
+                    if after and after != 'avoid':
-                if after and after != 'avoid':
+                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
-                    page_break_selectors.add((CSSSelector(rule.selectorText),
+                            False))
-                        False))
+                except:
-            except:
+                    pass
                pass
        page_breaks = set([])
-        for selector, before in page_break_selectors:
+        for selector, before in self.page_break_selectors:
-            for elem in selector(item.data):
+            body = item.data.xpath('//h:body', namespaces=NAMESPACES)
-                if before:
+            if not body:
-                    elem.set('pb_before', '1')
+                continue
-                page_breaks.add(elem)
+            for elem in selector(body[0]):
                if elem not in body:
                    if before:
                        elem.set('pb_before', '1')
                    page_breaks.add(elem)
        for i, elem in enumerate(item.data.iter()):
            elem.set('pb_order', str(i))
@ -136,8 +139,10 @@ class Split(object):
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
-                nhref = '#'.join(href, frag)
+                nhref = '#'.join((nhref, frag))
            return nhref
        return url
@ -153,7 +158,7 @@ class FlowSplitter(object):
        self.page_breaks    = page_breaks
        self.page_break_ids = page_break_ids
        self.max_flow_size  = max_flow_size
-        self.base           = item.abshref(item.href)
+        self.base           = item.href
        base, ext = os.path.splitext(self.base)
        self.base = base.replace('%', '%%')+'_split_%d'+ext
@ -192,9 +197,9 @@ class FlowSplitter(object):
        self.trees = []
        tree = orig_tree
        for pattern, before in ordered_ids:
            self.log.debug('\t\tSplitting on page-break')
            elem = pattern(tree)
            if elem:
                self.log.debug('\t\tSplitting on page-break')
                before, after = self.do_split(tree, elem[0], before)
                self.trees.append(before)
                tree = after
@ -414,13 +419,14 @@ class FlowSplitter(object):
                elem.attrib.pop(SPLIT_ATTR, None)
                elem.attrib.pop(SPLIT_POINT_ATTR, '0')
-        spine_pos = self.item.spine_pos
+        spine_pos = self.item.spine_position
-        for current, tree in zip(map(reversed, (self.files, self.trees))):
+        for current, tree in zip(*map(reversed, (self.files, self.trees))):
            for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
                href = a.get('href').strip()
                if href.startswith('#'):
                    anchor = href[1:]
                    file = self.anchor_map[anchor]
                    file = self.item.relhref(file)
                    if file != current:
                        a.set('href', file+href)
@ -430,12 +436,12 @@ class FlowSplitter(object):
            self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
        if self.oeb.guide:
-            for ref in self.oeb.guide:
+            for ref in self.oeb.guide.values():
                href, frag = urldefrag(ref.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
-                        nhref = '#'.join(nhref, frag)
+                        nhref = '#'.join((nhref, frag))
                    ref.href = nhref
        def fix_toc_entry(toc):
@ -444,7 +450,7 @@ class FlowSplitter(object):
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
-                        nhref = '#'.join(nhref, frag)
+                        nhref = '#'.join((nhref, frag))
                    toc.href = nhref
            for x in toc:
                fix_toc_entry(x)
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -49,7 +49,7 @@ class OEBWriter(object):
    def __call__(self, oeb, path):
        """
-        Read the book in the :class:`OEBBook` object :param:`oeb` to a file
+        Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
        at :param:`path`.
        """
        version = int(self.version[0])
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -319,6 +319,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.cover_changed = True
    def initialize_series(self):
        self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
        all_series = self.db.all_series()
        all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
        series_id = self.db.series_id(self.row)
@ -335,13 +336,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            self.series.setCurrentIndex(idx)
            self.enable_series_index()
        pl = self.series.parentWidget().layout()
        for i in range(pl.count()):
            l =  pl.itemAt(i).layout()
            if l:
                l.invalidate()
                l.activate()
    def initialize_series_and_publisher(self):
        self.initialize_series()
        all_publishers = self.db.all_publishers()
--- a/src/calibre/gui2/images/news/der_standard.png
+++ b/src/calibre/gui2/images/news/der_standard.png
--- a/src/calibre/gui2/images/news/diepresse.png
+++ b/src/calibre/gui2/images/news/diepresse.png
--- a/src/calibre/gui2/images/news/seattle_times.png
+++ b/src/calibre/gui2/images/news/seattle_times.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -40,6 +40,7 @@ recipe_modules = ['recipe_' + r for r in (
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
           'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
           'seattle_times',
          )]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -1,14 +1,37 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DerStandardRecipe(BasicNewsRecipe):
-    title          = u'derStandard'
+    title = u'derStandard'
-    __author__  = 'Gerhard Aigner'
+    __author__ = 'Gerhard Aigner'
-
+    description = u'Nachrichten aus Österreich' 
    publisher ='derStandard.at'
    category = 'news, politics, nachrichten, Austria'
    use_embedded_content = False
    remove_empty_feeds = True
    lang = 'de-AT'
    no_stylesheets = True
    encoding = 'utf-8'
    language = _('German')
    recursions = 0
    oldest_article = 1
    max_articles_per_feed = 100
    html2lrf_options = [
                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    feeds          = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
        (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
        (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
@ -20,14 +43,10 @@ class DerStandardRecipe(BasicNewsRecipe):
        (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
        (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
        (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
    encoding = 'utf-8'
    language = _('German')
    recursions = 0
    remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
        dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
    preprocess_regexps = [
-        (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
+        (re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
    ]
@ -40,3 +59,10 @@ class DerStandardRecipe(BasicNewsRecipe):
        if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
            return None
        return article.link
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup  
--- a/src/calibre/web/feeds/recipes/recipe_diepresse.py
+++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py
@ -1,18 +1,42 @@
-import re
+# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 ''' http://www.diepresse.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiePresseRecipe(BasicNewsRecipe):
-    title          = u'diePresse'
+    title = u'diePresse'
    __author__ = 'Gerhard Aigner'
    description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' 
    publisher ='DiePresse.com'
    category = 'news, politics, nachrichten, Austria'
    use_embedded_content = False
    remove_empty_feeds = True
    lang = 'de-AT'
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    language = _('German')
    recursions = 0
    oldest_article = 1
    max_articles_per_feed = 100
-    recursions = 0
+  
-    language = _('German')
+    html2lrf_options = [
-    __author__ = 'Gerhard Aigner'
+                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [
 	(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
    ]
    remove_tags = [dict(name='hr'),
 	dict(name='br'),
 	dict(name='small'),
@ -21,6 +45,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	dict(name='h1', attrs={'class':'titel'}),
 	dict(name='a', attrs={'class':'print'}),
 	dict(name='div', attrs={'class':'hline'})]
    feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
 	(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
 	(u'Europa', u'http://diepresse.com/rss/EU'),
@ -29,7 +54,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	(u'Kultur', u'http://diepresse.com/rss/Kultur'),
 	(u'Leben', u'http://diepresse.com/rss/Leben'),
 	(u'Tech', u'http://diepresse.com/rss/Tech'),
-	(u'Science', u'http://diepresse.com/rss/Science'),
+	(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
 	(u'Bildung', u'http://diepresse.com/rss/Bildung'),
 	(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
 	(u'Recht', u'http://diepresse.com/rss/Recht'),
@ -38,3 +63,10 @@ class DiePresseRecipe(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('home','text/home')
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
 	mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
        soup.head.insert(0,mtag)
 	return soup  
--- a/src/calibre/web/feeds/recipes/recipe_seattle_times.py
+++ b/src/calibre/web/feeds/recipes/recipe_seattle_times.py
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 seattletimes.nwsource.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class SeattleTimes(BasicNewsRecipe):
    title                 = 'The Seattle Times'
    __author__            = 'Darko Miletic'
    description           = 'News from Seattle and USA'
    publisher             = 'The Seattle Times'
    category              = 'news, politics, USA'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    language              = _('English')
    html2lrf_options = [
                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    feeds              = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')]
    remove_tags        = [
                             dict(name=['object','link','script'])
                            ,dict(name='p', attrs={'class':'permission'})
                         ]
    def print_version(self, url):
        start_url, sep, rest_url = url.rpartition('_')
        rurl, rsep, article_id = start_url.rpartition('/')
        return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/src/pyPdf/generic.py
+++ b/src/pyPdf/generic.py
@ -299,7 +299,7 @@ def readStringFromStream(stream):
            elif tok == "t":
                tok = "\t"
            elif tok == "b":
-                tok == "\b"
+                tok = "\b"
            elif tok == "f":
                tok = "\f"
            elif tok == "(":
--- a/src/pyPdf/pdf.py
+++ b/src/pyPdf/pdf.py
@ -39,15 +39,12 @@ __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 import struct
-try:
+from cStringIO import StringIO
    from cStringIO import StringIO
 except ImportError:
    from StringIO import StringIO
-import filters
+from generic import DictionaryObject, NameObject, NumberObject, \
-import utils
+createStringObject, ArrayObject, ByteStringObject, StreamObject, \
-import warnings
+IndirectObject, utils, readObject, TextStringObject, BooleanObject, \
-from generic import *
+RectangleObject, DecodedStreamObject
 from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
@ -56,6 +53,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
 # class (typically {@link #PdfFileReader PdfFileReader}).
 class PdfFileWriter(object):
    def __init__(self,title=u"Unknown",author=u"Unknown"):
        self.killed = False
        self._header = "%PDF-1.3"
        self._objects = []  # array of indirect objects
@ -162,7 +160,7 @@ class PdfFileWriter(object):
    # @param stream An object to write the file to.  The object must support
    # the write method, and the tell method, similar to a file object.
    def write(self, stream):
-        import struct, md5
+        import md5
        externalReferenceMap = {}
        self.stack = []
@ -214,6 +212,8 @@ class PdfFileWriter(object):
        stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))
    def _sweepIndirectReferences(self, externMap, data):
        if self.killed:
            raise RuntimeError('Writer killed')
        if isinstance(data, DictionaryObject):
            for key, value in data.items():
                origvalue = value