pdf get_cover returns cover image instead of nothing.

2025-07-08 18:54:09 -04:00 · 2009-04-18 07:54:56 -04:00 · 2009-04-18 07:54:56 -04:00 · b104286f61
commit b104286f61
parent 37b820b046 f969ed39fe
24 changed files with 405 additions and 210 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.mobi import set_metadata
        set_metadata(stream, mi)
-        
+
 class PDFMetadataWriter(MetadataWriterPlugin):

    name        = 'Set PDF metadata'
    file_types  = set(['pdf'])
    description = _('Set metadata in %s files') % 'PDF'
    author      = 'John Schember'
-    
+
    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.pdf import set_metadata
        set_metadata(stream, mi)
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
+from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles

 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
-        TXTInput, OEBOutput, TXTOutput, PDFOutput]
+        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -41,6 +41,11 @@ class ConversionOption(object):
    def __eq__(self, other):
        return hash(self) == hash(other)

+    def clone(self):
+        return ConversionOption(name=self.name, help=self.help,
+                long_switch=self.long_switch, short_switch=self.short_switch,
+                choices=self.choices)
+
 class OptionRecommendation(object):
    LOW  = 1
    MED  = 2
@ -59,6 +64,10 @@ class OptionRecommendation(object):

        self.validate_parameters()

+    def clone(self):
+        return OptionRecommendation(recommended_value=self.recommended_value,
+                level=self.level, option=self.option.clone())
+
    def validate_parameters(self):
        if self.option.choices and self.recommended_value not in \
                                                    self.option.choices:
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
            options.debug_input = os.path.abspath(options.debug_input)
            if not os.path.exists(options.debug_input):
                os.makedirs(options.debug_input)
-            shutil.rmtree(options.debug_input)
-            shutil.copytree(output_dir, options.debug_input)
+            if isinstance(ret, basestring):
+                shutil.rmtree(options.debug_input)
+                shutil.copytree(output_dir, options.debug_input)
+            else:
+                from calibre.ebooks.oeb.writer import OEBWriter
+                w = OEBWriter(pretty_print=options.pretty_print)
+                w(ret, options.debug_input)
+
            log.info('Input debug saved to:', options.debug_input)

        return ret
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
        raise SystemExit(1)

    output = args[2]
-    if output.startswith('.'):
+    if output.startswith('.') and output != '.':
        output = os.path.splitext(os.path.basename(input))[0]+output
    output = os.path.abspath(output)

@ -171,7 +171,8 @@ def main(args=sys.argv):

    plumber.run()

-    log(_('Output saved to'), ' ', plumber.output)
+    if plumber.opts.debug_input is None:
+        log(_('Output saved to'), ' ', plumber.output)

    return 0

--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -32,8 +32,8 @@ class Plumber(object):
        :param input: Path to input file.
        :param output: Path to output file/directory
        '''
-        self.input = input
-        self.output = output
+        self.input = os.path.abspath(input)
+        self.output = os.path.abspath(output)
        self.log = log

        # Initialize the conversion options that are independent of input and
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
 ]


-        input_fmt = os.path.splitext(input)[1]
+        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
            raise ValueError('Input file must have an extension')
        input_fmt = input_fmt[1:].lower()

-        if os.path.exists(output) and os.path.isdir(output):
+        if os.path.exists(self.output) and os.path.isdir(self.output):
            output_fmt = 'oeb'
        else:
-            output_fmt = os.path.splitext(output)[1]
+            output_fmt = os.path.splitext(self.output)[1]
            if not output_fmt:
                output_fmt = '.oeb'
            output_fmt = output_fmt[1:].lower()
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
        self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
                                    self.input_fmt, self.log,
                                    accelerators, tdir)
+        if self.opts.debug_input is not None:
+            self.log('Debug input called, aborting the rest of the pipeline.')
+            return
        if not hasattr(self.oeb, 'manifest'):
            self.oeb = create_oebbook(self.log, self.oeb, self.opts)

@ -365,18 +368,20 @@ OptionRecommendation(name='language',
        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)

-def create_oebbook(log, opfpath, opts):
+def create_oebbook(log, path_or_stream, opts, reader=None):
    '''
-    Create an OEBBook from an OPF file.
+    Create an OEBBook.
    '''
-    from calibre.ebooks.oeb.reader import OEBReader
    from calibre.ebooks.oeb.base import OEBBook
    html_preprocessor = HTMLPreProcessor()
-    reader = OEBReader()
    oeb = OEBBook(log, html_preprocessor=html_preprocessor,
            pretty_print=opts.pretty_print)
    # Read OEB Book into OEBBook
-    log.info('Parsing all content...')
-    reader(oeb, opfpath)
+    log('Parsing all content...')
+    if reader is None:
+        from calibre.ebooks.oeb.reader import OEBReader
+        reader = OEBReader
+
+    reader()(oeb, path_or_stream)
    return oeb

--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
                   )
        ),

+        OptionRecommendation(name='dont_package',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Normally this input plugin re-arranges all the input '
+                'files into a standard folder hierarchy. Only use this option '
+                'if you know what you are doing as it can result in various '
+                'nasty side effects in the rest of of the conversion pipeline.'
+                )
+        ),
    ])

    def convert(self, stream, opts, file_ext, log,
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
            mi.render(open('metadata.opf', 'wb'))
            opfpath = os.path.abspath('metadata.opf')

+        if opts.dont_package:
+            return opfpath
+
        from calibre.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, opfpath, opts)

--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.customize.conversion import InputFormatPlugin
+
+class LITInput(InputFormatPlugin):
+
+    name        = 'LIT Input'
+    author      = 'Marshall T. Vandegrift'
+    description = 'Convert LIT files to HTML'
+    file_types  = set(['lit'])
+
+    def convert(self, stream, options, file_ext, log,
+                accelerators):
+        from calibre.ebooks.lit.reader import LitReader
+        from calibre.ebooks.conversion.plumber import create_oebbook
+        return create_oebbook(log, stream, options, reader=LitReader)
+
+
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,13 +7,12 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'

-import sys, struct, os
+import struct, os
 import functools
 import re
 from urlparse import urldefrag
 from cStringIO import StringIO
 from urllib import unquote as urlunquote
-from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
@ -29,12 +28,12 @@ __all__ = ["LitReader"]
 XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 """
 OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE package 
+<!DOCTYPE package
  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
 """
 HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE html PUBLIC 
+<!DOCTYPE html PUBLIC
 "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
 "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
@ -73,7 +72,7 @@ def encint(bytes, remaining):
        val <<= 7
        val |= (b & 0x7f)
        if b & 0x80 == 0: break
-    return val, bytes[pos:], remaining 
+    return val, bytes[pos:], remaining

 def msguid(bytes):
    values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
@ -123,7 +122,7 @@ class UnBinary(object):
    CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
    DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
    EMPTY_ATOMS = ({},{})
-    
+
    def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
        self.manifest = manifest
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
@ -143,7 +142,7 @@ class UnBinary(object):
        raw = self.CLOSE_ANGLE_RE.sub(r'&gt;', raw)
        raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
        self.raw = raw
-    
+
    def item_path(self, internal_id):
        try:
            target = self.manifest[internal_id].path
@ -159,7 +158,7 @@ class UnBinary(object):
            index += 1
        relpath = (['..'] * (len(base) - index)) + target[index:]
        return '/'.join(relpath)
-    
+
    def __unicode__(self):
        return self.raw.decode('utf-8')

@ -172,11 +171,11 @@ class UnBinary(object):
        in_censorship = is_goingdown = False
        state = 'text'
        flags = 0
-        
+
        while index < len(bin):
            c, index = read_utf8_char(bin, index)
            oc = ord(c)
-            
+
            if state == 'text':
                if oc == 0:
                    state = 'get flags'
@ -188,14 +187,14 @@ class UnBinary(object):
                elif c == '<':
                    c = '<<'
                buf.write(encode(c))
-            
+
            elif state == 'get flags':
                if oc == 0:
                    state = 'text'
                    continue
                flags = oc
                state = 'get tag'
-            
+
            elif state == 'get tag':
                state = 'text' if oc == 0 else 'get attr'
                if flags & FLAG_OPENING:
@ -226,7 +225,7 @@ class UnBinary(object):
                    if depth == 0:
                        raise LitError('Extra closing tag')
                    return index
-            
+
            elif state == 'get attr':
                in_censorship = False
                if oc == 0:
@ -265,7 +264,7 @@ class UnBinary(object):
                        state = 'get href length'
                    else:
                        state = 'get value length'
-            
+
            elif state == 'get value length':
                if not in_censorship:
                    buf.write('"')
@ -281,7 +280,7 @@ class UnBinary(object):
                    continue
                if count < 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
-            
+
            elif state == 'get value':
                if count == 0xfffe:
                    if not in_censorship:
@ -301,7 +300,7 @@ class UnBinary(object):
                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'
-            
+
            elif state == 'get custom length':
                count = oc - 1
                if count <= 0 or count > len(bin)-index:
@ -309,21 +308,21 @@ class UnBinary(object):
                dynamic_tag += 1
                state = 'get custom'
                tag_name = ''
-            
+
            elif state == 'get custom':
                tag_name += c
                count -= 1
                if count == 0:
                    buf.write(encode(tag_name))
                    state = 'get attr'
-            
+
            elif state == 'get attr length':
                count = oc - 1
                if count <= 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
                buf.write(' ')
                state = 'get custom attr'
-            
+
            elif state == 'get custom attr':
                buf.write(encode(c))
                count -= 1
@ -337,7 +336,7 @@ class UnBinary(object):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
-                    
+
            elif state == 'get href':
                href += c
                count -= 1
@ -350,7 +349,7 @@ class UnBinary(object):
                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'
        return index
-    
+

 class DirectoryEntry(object):
    def __init__(self, name, section, offset, size):
@ -358,11 +357,11 @@ class DirectoryEntry(object):
        self.section = section
        self.offset = offset
        self.size = size
-        
+
    def __repr__(self):
        return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
            % (repr(self.name), self.section, self.offset, self.size)
-        
+
    def __str__(self):
        return repr(self)

@ -382,12 +381,12 @@ class ManifestItem(object):
        path = os.path.normpath(path).replace('\\', '/')
        while path.startswith('../'): path = path[3:]
        self.path = path
-        
+
    def __eq__(self, other):
        if hasattr(other, 'internal'):
            return self.internal == other.internal
        return self.internal == other
-    
+
    def __repr__(self):
        return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
            "offset=%d, root=%r, state=%r)" \
@ -404,7 +403,7 @@ def preserve(function):
            self.stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper
-    
+
 class LitFile(object):
    PIECE_SIZE = 16

@ -438,14 +437,14 @@ class LitFile(object):
            return self.stream.read(8)
        return property(fget=fget)
    magic = magic()
-    
+
    def version():
        def fget(self):
            self.stream.seek(8)
            return u32(self.stream.read(4))
        return property(fget=fget)
    version = version()
-    
+
    def hdr_len():
        @preserve
        def fget(self):
@ -453,7 +452,7 @@ class LitFile(object):
            return int32(self.stream.read(4))
        return property(fget=fget)
    hdr_len = hdr_len()
-    
+
    def num_pieces():
        @preserve
        def fget(self):
@ -461,7 +460,7 @@ class LitFile(object):
            return int32(self.stream.read(4))
        return property(fget=fget)
    num_pieces = num_pieces()
-    
+
    def sec_hdr_len():
        @preserve
        def fget(self):
@ -469,7 +468,7 @@ class LitFile(object):
            return int32(self.stream.read(4))
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()
-    
+
    def guid():
        @preserve
        def fget(self):
@ -477,7 +476,7 @@ class LitFile(object):
            return self.stream.read(16)
        return property(fget=fget)
    guid = guid()
-    
+
    def header():
        @preserve
        def fget(self):
@ -488,7 +487,7 @@ class LitFile(object):
            return self.stream.read(size)
        return property(fget=fget)
    header = header()
-    
+
    @preserve
    def __len__(self):
        self.stream.seek(0, 2)
@ -501,7 +500,7 @@ class LitFile(object):

    def read_content(self, offset, size):
        return self.read_raw(self.content_offset + offset, size)
-    
+
    def read_secondary_header(self):
        offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
        bytes = self.read_raw(offset, self.sec_hdr_len)
@ -526,12 +525,12 @@ class LitFile(object):
                if u32(bytes[offset+4+16:]):
                    raise LitError('This file has a 64bit content offset')
                self.content_offset = u32(bytes[offset+16:])
-                self.timestamp      = u32(bytes[offset+24:]) 
+                self.timestamp      = u32(bytes[offset+24:])
                self.language_id    = u32(bytes[offset+28:])
                offset += 48
        if not hasattr(self, 'content_offset'):
            raise LitError('Could not figure out the content offset')
-    
+
    def read_header_pieces(self):
        src = self.header[self.hdr_len:]
        for i in xrange(self.num_pieces):
@ -556,7 +555,7 @@ class LitFile(object):
                self.piece3_guid = piece
            elif i == 4:
                self.piece4_guid = piece
-                
+
    def read_directory(self, piece):
        if not piece.startswith('IFCM'):
            raise LitError('Header piece #1 is not main directory.')
@ -760,9 +759,9 @@ class LitFile(object):
            raise LitError("Reset table is too short")
        if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
            raise LitError("Reset table has 64bit value for UCLENGTH")
-        
+
        result = []
-        
+
        window_size = 14
        u = u32(control[CONTROL_WINDOW_SIZE:])
        while u > 0:
@ -847,13 +846,13 @@ class LitContainer(object):

    def __init__(self, filename_or_stream):
        self._litfile = LitFile(filename_or_stream)
-    
+
    def namelist(self):
        return self._litfile.paths.keys()

    def exists(self, name):
        return urlunquote(name) in self._litfile.paths
-    
+
    def read(self, name):
        entry = self._litfile.paths[urlunquote(name)] if name else None
        if entry is None:
@ -869,7 +868,7 @@ class LitContainer(object):
            internal = '/'.join(('/data', entry.internal))
            content = self._litfile.get_file(internal)
        return content
-    
+
    def _read_meta(self):
        path = 'content.opf'
        raw = self._litfile.get_file('/meta')
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -1,10 +1,10 @@
 from __future__ import with_statement
-
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''

 import sys, os, cStringIO
+from threading import Thread

 from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
@ -13,7 +13,8 @@ from pyPdf import PdfFileReader, PdfFileWriter
 import Image
 try:
    from calibre.utils.PythonMagickWand import \
-        NewMagickWand, MagickReadImage, MagickSetImageFormat, MagickWriteImage
+        NewMagickWand, MagickReadImage, MagickSetImageFormat, \
+        MagickWriteImage, ImageMagick
    _imagemagick_loaded = True
 except:
    _imagemagick_loaded = False
@ -51,9 +52,23 @@ def get_metadata(stream, extract_cover=True):
        print >>sys.stderr, msg.encode('utf8')
    return mi

+class MetadataWriter(Thread):
+
+    def __init__(self, out_pdf, buf):
+        self.out_pdf = out_pdf
+        self.buf = buf
+        Thread.__init__(self)
+        self.daemon = True
+
+    def run(self):
+        try:
+            self.out_pdf.write(self.buf)
+        except RuntimeError:
+            pass
+
 def set_metadata(stream, mi):
    stream.seek(0)
-    # Use a cStringIO object for the pdf because we will want to over
+    # Use a StringIO object for the pdf because we will want to over
    # write it later and if we are working on the stream directly it
    # could cause some issues.
    raw = cStringIO.StringIO(stream.read())
@ -61,10 +76,18 @@ def set_metadata(stream, mi):
    title = mi.title if mi.title else orig_pdf.documentInfo.title
    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
+    out_str = cStringIO.StringIO()
+    writer = MetadataWriter(out_pdf, out_str)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
-    out_str = cStringIO.StringIO()
-    out_pdf.write(out_str)
+    writer.start()
+    writer.join(10) # Wait 10 secs for writing to complete
+    out_pdf.killed = True
+    writer.join()
+    if out_pdf.killed:
+        print 'Failed to set metadata: took too long'
+        return
+
    stream.seek(0)
    stream.truncate()
    out_str.seek(0)
@ -72,35 +95,32 @@ def set_metadata(stream, mi):
    stream.seek(0)

 def get_cover(stream):
-    stream.seek(0)
-    
    data = cStringIO.StringIO()

    try:
-        with FileWrapper(stream) as stream:
-            pdf = PdfFileReader(stream)
-            output = PdfFileWriter()
-    
-            if len(pdf.pages) >= 1:
-                output.addPage(pdf.getPage(0))
-    
-            with TemporaryDirectory('_pdfmeta') as tdir:
-                cover_path = os.path.join(tdir, 'cover.pdf')
-    
-                outputStream = file(cover_path, "wb")
+        pdf = PdfFileReader(stream)
+        output = PdfFileWriter()
+
+        if len(pdf.pages) >= 1:
+            output.addPage(pdf.getPage(0))
+
+        with TemporaryDirectory('_pdfmeta') as tdir:
+            cover_path = os.path.join(tdir, 'cover.pdf')
+
+            with open(cover_path, "wb") as outputStream:
                output.write(outputStream)
-                outputStream.close()
-    
+                
+            with ImageMagick():
                wand = NewMagickWand()
                MagickReadImage(wand, cover_path)
                MagickSetImageFormat(wand, 'JPEG')
                MagickWriteImage(wand, '%s.jpg' % cover_path)
-    
+
                img = Image.open('%s.jpg' % cover_path)
-    
                img.save(data, 'JPEG')
    except:
        import traceback
        traceback.print_exc()

    return data.getvalue()
+
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -272,11 +272,7 @@ def XPath(expr):
 def xpath(elem, expr):
    return elem.xpath(expr, namespaces=XPNSMAP)

-def _prepare_xml_for_serialization(root):
-    pass
-
 def xml2str(root, pretty_print=False, strip_comments=False):
-    _prepare_xml_for_serialization(root)
    ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
                          pretty_print=pretty_print)

@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):


 def xml2unicode(root, pretty_print=False):
-    _prepare_xml_for_serialization(root)
    return etree.tostring(root, pretty_print=pretty_print)

 ASCII_CHARS   = set(chr(x) for x in xrange(128))
@ -321,6 +316,25 @@ def urlnormalize(href):
    parts = (urlquote(part) for part in parts)
    return urlunparse(parts)

+class DummyHandler(logging.Handler):
+
+    def __init__(self):
+        logging.Handler.__init__(self, logging.WARNING)
+        self.setFormatter(logging.Formatter('%(message)s'))
+        self.log = None
+
+    def emit(self, record):
+        if self.log is not None:
+            msg = self.format(record)
+            f = self.log.error if record.levelno >= logging.ERROR \
+                    else self.log.warn
+            f(msg)
+
+
+_css_logger = logging.getLogger('calibre.css')
+_css_logger.setLevel(logging.WARNING)
+_css_log_handler = DummyHandler()
+_css_logger.addHandler(_css_log_handler)

 class OEBError(Exception):
    """Generic OEB-processing error."""
@ -778,7 +792,8 @@ class Manifest(object):
            data = self.oeb.css_preprocessor(data)
            data = XHTML_CSS_NAMESPACE + data
            parser = CSSParser(loglevel=logging.WARNING,
-                               fetcher=self._fetch_css)
+                               fetcher=self._fetch_css,
+                               log=_css_logger)
            data = parser.parseString(data, href=self.href)
            data.namespaces['h'] = XHTML_NS
            return data
@ -1435,7 +1450,7 @@ class OEBBook(object):
        :attr:`pages`: List of "pages," such as indexed to a print edition of
            the same text.
        """
-
+        _css_log_handler.log = logger
        self.encoding = encoding
        self.html_preprocessor = html_preprocessor
        self.css_preprocessor = css_preprocessor
@ -1450,6 +1465,7 @@ class OEBBook(object):
        self.guide = Guide(self)
        self.toc = TOC()
        self.pages = PageList()
+        self.auto_generated_toc = True

    @classmethod
    def generate(cls, opts):
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase

 from calibre.customize.ui import available_input_formats
 from calibre.ebooks.epub.from_html import TITLEPAGE
-from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
+from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import safe_replace, ZipFile
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
-from calibre import CurrentDir

 def character_count(html):
    '''
@ -57,31 +56,21 @@ class FakeOpts(object):
    max_levels = 5
    input_encoding = None

-def html2opf(path, tdir, log):
-    from calibre.ebooks.html.input import get_filelist
-    from calibre.ebooks.metadata.meta import get_metadata
-    with CurrentDir(tdir):
-        fl = get_filelist(path, tdir, FakeOpts(), log)
-        mi = get_metadata(open(path, 'rb'), 'html')
-        mi = OPFCreator(os.getcwdu(), mi)
-        mi.guide = None
-        entries = [(f.path, 'application/xhtml+xml') for f in fl]
-        mi.create_manifest(entries)
-        mi.create_spine([f.path for f in fl])
-
-        mi.render(open('metadata.opf', 'wb'))
-        opfpath = os.path.abspath('metadata.opf')
-
-    return opfpath
-
-def opf2opf(path, tdir, opts):
-    return path
-
 def is_supported(path):
    ext = os.path.splitext(path)[1].replace('.', '').lower()
    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
    return ext in available_input_formats()

+
+def write_oebbook(oeb, path):
+    from calibre.ebooks.oeb.writer import OEBWriter
+    from calibre import walk
+    w = OEBWriter()
+    w(oeb, path)
+    for f in walk(path):
+        if f.endswith('.opf'):
+            return f
+
 class EbookIterator(object):

    CHARACTERS_PER_PAGE = 1000
@ -131,17 +120,16 @@ class EbookIterator(object):
    def __enter__(self):
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
-        if self.ebook_ext == 'opf':
-            self.pathtoopf = self.pathtoebook
-        elif self.ebook_ext == 'html':
-            self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
-        else:
-            from calibre.ebooks.conversion.plumber import Plumber
-            plumber = Plumber(self.pathtoebook, self.base, self.log)
-            plumber.setup_options()
-            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
-                    plumber.opts, plumber.input_fmt, self.log,
-                    {}, self.base)
+        from calibre.ebooks.conversion.plumber import Plumber
+        plumber = Plumber(self.pathtoebook, self.base, self.log)
+        plumber.setup_options()
+        if hasattr(plumber.opts, 'dont_package'):
+            plumber.opts.dont_package = True
+        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
+                plumber.opts, plumber.input_fmt, self.log,
+                {}, self.base)
+        if hasattr(self.pathtoopf, 'manifest'):
+            self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)


        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
    author = 'Kovid Goyal'
    file_type = 'oeb'

-
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        self.log, self.opts = log, opts
        if not os.path.exists(output_path):
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -349,6 +349,7 @@ class OEBReader(object):
    def _toc_from_ncx(self, item):
        if item is None:
            return False
+        self.log.debug('Reading TOC from NCX...')
        ncx = item.data
        title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
        title = COLLAPSE_RE.sub(' ', title.strip())
@ -364,6 +365,7 @@ class OEBReader(object):
        result = xpath(opf, 'o2:tours/o2:tour')
        if not result:
            return False
+        self.log.debug('Reading TOC from tour...')
        tour = result[0]
        toc = self.oeb.toc
        toc.title = tour.get('title')
@ -384,6 +386,7 @@ class OEBReader(object):
    def _toc_from_html(self, opf):
        if 'toc' not in self.oeb.guide:
            return False
+        self.log.debug('Reading TOC from HTML...')
        itempath, frag = urldefrag(self.oeb.guide['toc'].href)
        item = self.oeb.manifest.hrefs[itempath]
        html = item.data
@ -414,6 +417,7 @@ class OEBReader(object):
        return True

    def _toc_from_spine(self, opf):
+        self.log.warn('Generating default TOC from spine...')
        toc = self.oeb.toc
        titles = []
        headers = []
@ -441,11 +445,14 @@ class OEBReader(object):
        return True

    def _toc_from_opf(self, opf, item):
+        self.oeb.auto_generated_toc = False
        if self._toc_from_ncx(item): return
-        if self._toc_from_tour(opf): return
-        self.logger.warn('No metadata table of contents found')
+        # Prefer HTML to tour based TOC, since several LIT files
+        # have good HTML TOCs but bad tour based TOCs
        if self._toc_from_html(opf): return
+        if self._toc_from_tour(opf): return
        self._toc_from_spine(opf)
+        self.oeb.auto_generated_toc = True

    def _pages_from_ncx(self, opf, item):
        if item is None:
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -51,8 +51,8 @@ class Split(object):
        self.log = oeb.log
        self.map = {}
        self.page_break_selectors = None
-        for item in self.oeb.manifest.items:
-            if etree.iselement(item.data):
+        for item in list(self.oeb.manifest.items):
+            if item.spine_position is not None and etree.iselement(item.data):
                self.split_item(item)

        self.fix_links()
@ -74,31 +74,34 @@ class Split(object):
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
-        page_break_selectors = set([])
-        for rule in rules(stylesheets):
-            before = getattr(rule.style.getPropertyCSSValue(
-                'page-break-before'), 'cssText', '').strip().lower()
-            after  = getattr(rule.style.getPropertyCSSValue(
-                'page-break-after'), 'cssText', '').strip().lower()
-            try:
-                if before and before != 'avoid':
-                    page_break_selectors.add((CSSSelector(rule.selectorText),
-                        True))
-            except:
-                pass
-            try:
-                if after and after != 'avoid':
-                    page_break_selectors.add((CSSSelector(rule.selectorText),
-                        False))
-            except:
-                pass
+            for rule in rules(stylesheets):
+                before = getattr(rule.style.getPropertyCSSValue(
+                    'page-break-before'), 'cssText', '').strip().lower()
+                after  = getattr(rule.style.getPropertyCSSValue(
+                    'page-break-after'), 'cssText', '').strip().lower()
+                try:
+                    if before and before != 'avoid':
+                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
+                            True))
+                except:
+                    pass
+                try:
+                    if after and after != 'avoid':
+                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
+                            False))
+                except:
+                    pass

        page_breaks = set([])
-        for selector, before in page_break_selectors:
-            for elem in selector(item.data):
-                if before:
-                    elem.set('pb_before', '1')
-                page_breaks.add(elem)
+        for selector, before in self.page_break_selectors:
+            body = item.data.xpath('//h:body', namespaces=NAMESPACES)
+            if not body:
+                continue
+            for elem in selector(body[0]):
+                if elem not in body:
+                    if before:
+                        elem.set('pb_before', '1')
+                    page_breaks.add(elem)

        for i, elem in enumerate(item.data.iter()):
            elem.set('pb_order', str(i))
@ -136,8 +139,10 @@ class Split(object):
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
+            nhref = self.current_item.relhref(nhref)
            if frag:
-                nhref = '#'.join(href, frag)
+                nhref = '#'.join((nhref, frag))
+
            return nhref
        return url

@ -153,7 +158,7 @@ class FlowSplitter(object):
        self.page_breaks    = page_breaks
        self.page_break_ids = page_break_ids
        self.max_flow_size  = max_flow_size
-        self.base           = item.abshref(item.href)
+        self.base           = item.href

        base, ext = os.path.splitext(self.base)
        self.base = base.replace('%', '%%')+'_split_%d'+ext
@ -192,9 +197,9 @@ class FlowSplitter(object):
        self.trees = []
        tree = orig_tree
        for pattern, before in ordered_ids:
-            self.log.debug('\t\tSplitting on page-break')
            elem = pattern(tree)
            if elem:
+                self.log.debug('\t\tSplitting on page-break')
                before, after = self.do_split(tree, elem[0], before)
                self.trees.append(before)
                tree = after
@ -414,13 +419,14 @@ class FlowSplitter(object):
                elem.attrib.pop(SPLIT_ATTR, None)
                elem.attrib.pop(SPLIT_POINT_ATTR, '0')

-        spine_pos = self.item.spine_pos
-        for current, tree in zip(map(reversed, (self.files, self.trees))):
+        spine_pos = self.item.spine_position
+        for current, tree in zip(*map(reversed, (self.files, self.trees))):
            for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
                href = a.get('href').strip()
                if href.startswith('#'):
                    anchor = href[1:]
                    file = self.anchor_map[anchor]
+                    file = self.item.relhref(file)
                    if file != current:
                        a.set('href', file+href)

@ -430,12 +436,12 @@ class FlowSplitter(object):
            self.oeb.spine.insert(spine_pos, new_item, self.item.linear)

        if self.oeb.guide:
-            for ref in self.oeb.guide:
+            for ref in self.oeb.guide.values():
                href, frag = urldefrag(ref.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
-                        nhref = '#'.join(nhref, frag)
+                        nhref = '#'.join((nhref, frag))
                    ref.href = nhref

        def fix_toc_entry(toc):
@ -444,7 +450,7 @@ class FlowSplitter(object):
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
-                        nhref = '#'.join(nhref, frag)
+                        nhref = '#'.join((nhref, frag))
                    toc.href = nhref
            for x in toc:
                fix_toc_entry(x)
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -49,7 +49,7 @@ class OEBWriter(object):

    def __call__(self, oeb, path):
        """
-        Read the book in the :class:`OEBBook` object :param:`oeb` to a file
+        Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
        at :param:`path`.
        """
        version = int(self.version[0])
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -319,6 +319,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.cover_changed = True

    def initialize_series(self):
+        self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
        all_series = self.db.all_series()
        all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
        series_id = self.db.series_id(self.row)
@ -335,13 +336,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            self.series.setCurrentIndex(idx)
            self.enable_series_index()

-        pl = self.series.parentWidget().layout()
-        for i in range(pl.count()):
-            l =  pl.itemAt(i).layout()
-            if l:
-                l.invalidate()
-                l.activate()
-
    def initialize_series_and_publisher(self):
        self.initialize_series()
        all_publishers = self.db.all_publishers()
--- a/src/calibre/gui2/images/news/der_standard.png
+++ b/src/calibre/gui2/images/news/der_standard.png
--- a/src/calibre/gui2/images/news/diepresse.png
+++ b/src/calibre/gui2/images/news/diepresse.png
--- a/src/calibre/gui2/images/news/seattle_times.png
+++ b/src/calibre/gui2/images/news/seattle_times.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -40,6 +40,7 @@ recipe_modules = ['recipe_' + r for r in (
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
           'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
+           'seattle_times',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -1,14 +1,37 @@
+#!/usr/bin/env  python
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'

 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class DerStandardRecipe(BasicNewsRecipe):
-    title          = u'derStandard'
-    __author__  = 'Gerhard Aigner'
-
+    title = u'derStandard'
+    __author__ = 'Gerhard Aigner'
+    description = u'Nachrichten aus Österreich' 
+    publisher ='derStandard.at'
+    category = 'news, politics, nachrichten, Austria'
+    use_embedded_content = False
+    remove_empty_feeds = True
+    lang = 'de-AT'
+    no_stylesheets = True
+    encoding = 'utf-8'
+    language = _('German')
+    recursions = 0
    oldest_article = 1
    max_articles_per_feed = 100
+    
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    
    feeds          = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
        (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
        (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
@ -20,17 +43,13 @@ class DerStandardRecipe(BasicNewsRecipe):
        (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
        (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
        (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
-
-    encoding = 'utf-8'
-    language = _('German')
-    recursions = 0
    remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
        dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
    preprocess_regexps = [
-        (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
+        (re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
    ]
-
+    
    def print_version(self, url):
        return url.replace('?id=', 'txt/?id=')

@ -40,3 +59,10 @@ class DerStandardRecipe(BasicNewsRecipe):
        if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
            return None
        return article.link
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
+        soup.head.insert(0,mtag)
+        return soup  
--- a/src/calibre/web/feeds/recipes/recipe_diepresse.py
+++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py
@ -1,18 +1,42 @@
-import re
+# -*- coding: utf-8 -*-

+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
+
+''' http://www.diepresse.at - Austrian Newspaper '''
+
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class DiePresseRecipe(BasicNewsRecipe):
-    title          = u'diePresse'
+    title = u'diePresse'
+    __author__ = 'Gerhard Aigner'
+    description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' 
+    publisher ='DiePresse.com'
+    category = 'news, politics, nachrichten, Austria'
+    use_embedded_content = False
+    remove_empty_feeds = True
+    lang = 'de-AT'
+    no_stylesheets = True
+    encoding = 'ISO-8859-1'
+    language = _('German')
+    recursions = 0
    oldest_article = 1
    max_articles_per_feed = 100
-    recursions = 0
-    language = _('German')
-    __author__ = 'Gerhard Aigner'
+  
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]

+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+  
    preprocess_regexps = [
 	(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
    ]
+    
    remove_tags = [dict(name='hr'),
 	dict(name='br'),
 	dict(name='small'),
@ -21,6 +45,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	dict(name='h1', attrs={'class':'titel'}),
 	dict(name='a', attrs={'class':'print'}),
 	dict(name='div', attrs={'class':'hline'})]
+	
    feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
 	(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
 	(u'Europa', u'http://diepresse.com/rss/EU'),
@ -29,7 +54,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	(u'Kultur', u'http://diepresse.com/rss/Kultur'),
 	(u'Leben', u'http://diepresse.com/rss/Leben'),
 	(u'Tech', u'http://diepresse.com/rss/Tech'),
-	(u'Science', u'http://diepresse.com/rss/Science'),
+	(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
 	(u'Bildung', u'http://diepresse.com/rss/Bildung'),
 	(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
 	(u'Recht', u'http://diepresse.com/rss/Recht'),
@ -38,3 +63,10 @@ class DiePresseRecipe(BasicNewsRecipe):

    def print_version(self, url):
        return url.replace('home','text/home')
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = self.lang
+        soup.html['lang']     = self.lang
+	mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
+        soup.head.insert(0,mtag)
+	return soup  
--- a/src/calibre/web/feeds/recipes/recipe_seattle_times.py
+++ b/src/calibre/web/feeds/recipes/recipe_seattle_times.py
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+'''
+seattletimes.nwsource.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SeattleTimes(BasicNewsRecipe):
+    title                 = 'The Seattle Times'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Seattle and USA'
+    publisher             = 'The Seattle Times'
+    category              = 'news, politics, USA'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    language              = _('English')
+
+    html2lrf_options = [
+                          '--comment'  , description
+                        , '--category' , category
+                        , '--publisher', publisher
+                        ]
+
+    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+
+    feeds              = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')]
+
+    remove_tags        = [
+                             dict(name=['object','link','script'])
+                            ,dict(name='p', attrs={'class':'permission'})
+                         ]
+
+    def print_version(self, url):
+        start_url, sep, rest_url = url.rpartition('_')
+        rurl, rsep, article_id = start_url.rpartition('/')
+        return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
+
+    def preprocess_html(self, soup):
+        mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
--- a/src/pyPdf/generic.py
+++ b/src/pyPdf/generic.py
@ -299,7 +299,7 @@ def readStringFromStream(stream):
            elif tok == "t":
                tok = "\t"
            elif tok == "b":
-                tok == "\b"
+                tok = "\b"
            elif tok == "f":
                tok = "\f"
            elif tok == "(":
@ -673,7 +673,7 @@ class RectangleObject(ArrayObject):

    def getUpperLeft_x(self):
        return self.getLowerLeft_x()
-    
+
    def getUpperLeft_y(self):
        return self.getUpperRight_y()

--- a/src/pyPdf/pdf.py
+++ b/src/pyPdf/pdf.py
@ -39,15 +39,12 @@ __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"

 import struct
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
+from cStringIO import StringIO

-import filters
-import utils
-import warnings
-from generic import *
+from generic import DictionaryObject, NameObject, NumberObject, \
+createStringObject, ArrayObject, ByteStringObject, StreamObject, \
+IndirectObject, utils, readObject, TextStringObject, BooleanObject, \
+RectangleObject, DecodedStreamObject
 from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList


@ -56,6 +53,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
 # class (typically {@link #PdfFileReader PdfFileReader}).
 class PdfFileWriter(object):
    def __init__(self,title=u"Unknown",author=u"Unknown"):
+        self.killed = False
        self._header = "%PDF-1.3"
        self._objects = []  # array of indirect objects

@ -162,7 +160,7 @@ class PdfFileWriter(object):
    # @param stream An object to write the file to.  The object must support
    # the write method, and the tell method, similar to a file object.
    def write(self, stream):
-        import struct, md5
+        import md5

        externalReferenceMap = {}
        self.stack = []
@ -209,11 +207,13 @@ class PdfFileWriter(object):
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)
-        
+
        # eof
        stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))

    def _sweepIndirectReferences(self, externMap, data):
+        if self.killed:
+            raise RuntimeError('Writer killed')
        if isinstance(data, DictionaryObject):
            for key, value in data.items():
                origvalue = value
@ -356,8 +356,8 @@ class PdfFileReader(object):
        return self.flattenedPages[pageNumber]

    ##
-    # Read-only property that accesses the 
-    # {@link #PdfFileReader.getNamedDestinations 
+    # Read-only property that accesses the
+    # {@link #PdfFileReader.getNamedDestinations
    # getNamedDestinations} function.
    # <p>
    # Stability: Added in v1.10, will exist for all future v1.x releases.
@ -374,7 +374,7 @@ class PdfFileReader(object):
        if retval == None:
            retval = {}
            catalog = self.trailer["/Root"]
-            
+
            # get the name tree
            if catalog.has_key("/Dests"):
                tree = catalog["/Dests"]
@ -382,7 +382,7 @@ class PdfFileReader(object):
                names = catalog['/Names']
                if names.has_key("/Dests"):
                    tree = names['/Dests']
-        
+
        if tree == None:
            return retval

@ -420,17 +420,17 @@ class PdfFileReader(object):
        if outlines == None:
            outlines = []
            catalog = self.trailer["/Root"]
-            
+
            # get the outline dictionary and named destinations
            if catalog.has_key("/Outlines"):
                lines = catalog["/Outlines"]
                if lines.has_key("/First"):
                    node = lines["/First"]
            self._namedDests = self.getNamedDestinations()
-            
+
        if node == None:
          return outlines
-          
+
        # see if there are any more outlines
        while 1:
            outline = self._buildOutline(node)
@ -454,10 +454,10 @@ class PdfFileReader(object):
        page, typ = array[0:2]
        array = array[2:]
        return Destination(title, page, typ, *array)
-          
+
    def _buildOutline(self, node):
        dest, title, outline = None, None, None
-        
+
        if node.has_key("/A") and node.has_key("/Title"):
            # Action, section 8.5 (only type GoTo supported)
            title  = node["/Title"]
@ -951,7 +951,7 @@ class PageObject(DictionaryObject):

    def _pushPopGS(contents, pdf):
        # adds a graphics state "push" and "pop" to the beginning and end
-        # of a content stream.  This isolates it from changes such as 
+        # of a content stream.  This isolates it from changes such as
        # transformation matricies.
        stream = ContentStream(contents, pdf)
        stream.operations.insert(0, [[], "q"])
@ -1291,7 +1291,7 @@ class Destination(DictionaryObject):
        self[NameObject("/Title")] = title
        self[NameObject("/Page")] = page
        self[NameObject("/Type")] = typ
-        
+
        # from table 8.2 of the PDF 1.6 reference.
        if typ == "/XYZ":
            (self[NameObject("/Left")], self[NameObject("/Top")],
@ -1307,7 +1307,7 @@ class Destination(DictionaryObject):
            pass
        else:
            raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
-          
+
    ##
    # Read-only property accessing the destination title.
    # @return A string.
@ -1474,25 +1474,25 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    # described in Algorithm 3.2.
    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
-    # shown in step 1 of Algorithm 3.2 as input to this function. 
+    # shown in step 1 of Algorithm 3.2 as input to this function.
    import md5
    m = md5.new()
    m.update(_encryption_padding)
    # 3. Pass the first element of the file's file identifier array (the value
    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
    # page 73) to the hash function and finish the hash.  (See implementation
-    # note 25 in Appendix H.) 
+    # note 25 in Appendix H.)
    m.update(id1_entry)
    md5_hash = m.digest()
    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
-    # function with the encryption key from step 1. 
+    # function with the encryption key from step 1.
    val = utils.RC4_encrypt(key, md5_hash)
    # 5. Do the following 19 times: Take the output from the previous
    # invocation of the RC4 function and pass it as input to a new invocation
    # of the function; use an encryption key generated by taking each byte of
    # the original encryption key (obtained in step 2) and performing an XOR
    # operation between that byte and the single-byte value of the iteration
-    # counter (from 1 to 19). 
+    # counter (from 1 to 19).
    for i in range(1, 20):
        new_key = ''
        for l in range(len(key)):
@ -1500,7 +1500,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
        val = utils.RC4_encrypt(new_key, val)
    # 6. Append 16 bytes of arbitrary padding to the output from the final
    # invocation of the RC4 function and store the 32-byte result as the value
-    # of the U entry in the encryption dictionary. 
+    # of the U entry in the encryption dictionary.
    # (implementator note: I don't know what "arbitrary padding" is supposed to
    # mean, so I have used null bytes.  This seems to match a few other
    # people's implementations)