pdf get_cover returns cover image instead of nothing.

2025-07-09 03:04:10 -04:00 · 2009-04-18 07:54:56 -04:00 · 2009-04-18 07:54:56 -04:00 · b104286f61
commit b104286f61
parent 37b820b046 f969ed39fe
24 changed files with 405 additions and 210 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.mobi import set_metadata
        set_metadata(stream, mi)
-        
+
 class PDFMetadataWriter(MetadataWriterPlugin):
    name        = 'Set PDF metadata'
    file_types  = set(['pdf'])
    description = _('Set metadata in %s files') % 'PDF'
    author      = 'John Schember'
-    
+
    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.pdf import set_metadata
        set_metadata(stream, mi)
@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
-        TXTInput, OEBOutput, TXTOutput, PDFOutput]
+        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -41,6 +41,11 @@ class ConversionOption(object):
    def __eq__(self, other):
        return hash(self) == hash(other)
    def clone(self):
        return ConversionOption(name=self.name, help=self.help,
                long_switch=self.long_switch, short_switch=self.short_switch,
                choices=self.choices)
 class OptionRecommendation(object):
    LOW  = 1
    MED  = 2
@ -59,6 +64,10 @@ class OptionRecommendation(object):
        self.validate_parameters()
    def clone(self):
        return OptionRecommendation(recommended_value=self.recommended_value,
                level=self.level, option=self.option.clone())
    def validate_parameters(self):
        if self.option.choices and self.recommended_value not in \
                                                    self.option.choices:
@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin):
            options.debug_input = os.path.abspath(options.debug_input)
            if not os.path.exists(options.debug_input):
                os.makedirs(options.debug_input)
-            shutil.rmtree(options.debug_input)
+            if isinstance(ret, basestring):
-            shutil.copytree(output_dir, options.debug_input)
+                shutil.rmtree(options.debug_input)
                shutil.copytree(output_dir, options.debug_input)
            else:
                from calibre.ebooks.oeb.writer import OEBWriter
                w = OEBWriter(pretty_print=options.pretty_print)
                w(ret, options.debug_input)
            log.info('Input debug saved to:', options.debug_input)
        return ret
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log):
        raise SystemExit(1)
    output = args[2]
-    if output.startswith('.'):
+    if output.startswith('.') and output != '.':
        output = os.path.splitext(os.path.basename(input))[0]+output
    output = os.path.abspath(output)
@ -171,7 +171,8 @@ def main(args=sys.argv):
    plumber.run()
-    log(_('Output saved to'), ' ', plumber.output)
+    if plumber.opts.debug_input is None:
        log(_('Output saved to'), ' ', plumber.output)
    return 0
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -32,8 +32,8 @@ class Plumber(object):
        :param input: Path to input file.
        :param output: Path to output file/directory
        '''
-        self.input = input
+        self.input = os.path.abspath(input)
-        self.output = output
+        self.output = os.path.abspath(output)
        self.log = log
        # Initialize the conversion options that are independent of input and
@ -188,15 +188,15 @@ OptionRecommendation(name='language',
 ]
-        input_fmt = os.path.splitext(input)[1]
+        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
            raise ValueError('Input file must have an extension')
        input_fmt = input_fmt[1:].lower()
-        if os.path.exists(output) and os.path.isdir(output):
+        if os.path.exists(self.output) and os.path.isdir(self.output):
            output_fmt = 'oeb'
        else:
-            output_fmt = os.path.splitext(output)[1]
+            output_fmt = os.path.splitext(self.output)[1]
            if not output_fmt:
                output_fmt = '.oeb'
            output_fmt = output_fmt[1:].lower()
@ -323,6 +323,9 @@ OptionRecommendation(name='language',
        self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
                                    self.input_fmt, self.log,
                                    accelerators, tdir)
        if self.opts.debug_input is not None:
            self.log('Debug input called, aborting the rest of the pipeline.')
            return
        if not hasattr(self.oeb, 'manifest'):
            self.oeb = create_oebbook(self.log, self.oeb, self.opts)
@ -365,18 +368,20 @@ OptionRecommendation(name='language',
        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
-def create_oebbook(log, opfpath, opts):
+def create_oebbook(log, path_or_stream, opts, reader=None):
    '''
-    Create an OEBBook from an OPF file.
+    Create an OEBBook.
    '''
    from calibre.ebooks.oeb.reader import OEBReader
    from calibre.ebooks.oeb.base import OEBBook
    html_preprocessor = HTMLPreProcessor()
    reader = OEBReader()
    oeb = OEBBook(log, html_preprocessor=html_preprocessor,
            pretty_print=opts.pretty_print)
    # Read OEB Book into OEBBook
-    log.info('Parsing all content...')
+    log('Parsing all content...')
-    reader(oeb, opfpath)
+    if reader is None:
        from calibre.ebooks.oeb.reader import OEBReader
        reader = OEBReader
    reader()(oeb, path_or_stream)
    return oeb
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin):
                   )
        ),
        OptionRecommendation(name='dont_package',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Normally this input plugin re-arranges all the input '
                'files into a standard folder hierarchy. Only use this option '
                'if you know what you are doing as it can result in various '
                'nasty side effects in the rest of of the conversion pipeline.'
                )
        ),
    ])
    def convert(self, stream, opts, file_ext, log,
@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin):
            mi.render(open('metadata.opf', 'wb'))
            opfpath = os.path.abspath('metadata.opf')
        if opts.dont_package:
            return opfpath
        from calibre.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, opfpath, opts)
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -0,0 +1,24 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.customize.conversion import InputFormatPlugin
 class LITInput(InputFormatPlugin):
    name        = 'LIT Input'
    author      = 'Marshall T. Vandegrift'
    description = 'Convert LIT files to HTML'
    file_types  = set(['lit'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.lit.reader import LitReader
        from calibre.ebooks.conversion.plumber import create_oebbook
        return create_oebbook(log, stream, options, reader=LitReader)
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,13 +7,12 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'
-import sys, struct, os
+import struct, os
 import functools
 import re
 from urlparse import urldefrag
 from cStringIO import StringIO
 from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
@ -29,12 +28,12 @@ __all__ = ["LitReader"]
 XML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 """
 OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE package 
+<!DOCTYPE package
  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
 """
 HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE html PUBLIC 
+<!DOCTYPE html PUBLIC
 "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
 "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
@ -73,7 +72,7 @@ def encint(bytes, remaining):
        val <<= 7
        val |= (b & 0x7f)
        if b & 0x80 == 0: break
-    return val, bytes[pos:], remaining 
+    return val, bytes[pos:], remaining
 def msguid(bytes):
    values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
@ -123,7 +122,7 @@ class UnBinary(object):
    CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
    DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
    EMPTY_ATOMS = ({},{})
-    
+
    def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS):
        self.manifest = manifest
        self.tag_map, self.attr_map, self.tag_to_attr_map = map
@ -143,7 +142,7 @@ class UnBinary(object):
        raw = self.CLOSE_ANGLE_RE.sub(r'&gt;', raw)
        raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
        self.raw = raw
-    
+
    def item_path(self, internal_id):
        try:
            target = self.manifest[internal_id].path
@ -159,7 +158,7 @@ class UnBinary(object):
            index += 1
        relpath = (['..'] * (len(base) - index)) + target[index:]
        return '/'.join(relpath)
-    
+
    def __unicode__(self):
        return self.raw.decode('utf-8')
@ -172,11 +171,11 @@ class UnBinary(object):
        in_censorship = is_goingdown = False
        state = 'text'
        flags = 0
-        
+
        while index < len(bin):
            c, index = read_utf8_char(bin, index)
            oc = ord(c)
-            
+
            if state == 'text':
                if oc == 0:
                    state = 'get flags'
@ -188,14 +187,14 @@ class UnBinary(object):
                elif c == '<':
                    c = '<<'
                buf.write(encode(c))
-            
+
            elif state == 'get flags':
                if oc == 0:
                    state = 'text'
                    continue
                flags = oc
                state = 'get tag'
-            
+
            elif state == 'get tag':
                state = 'text' if oc == 0 else 'get attr'
                if flags & FLAG_OPENING:
@ -226,7 +225,7 @@ class UnBinary(object):
                    if depth == 0:
                        raise LitError('Extra closing tag')
                    return index
-            
+
            elif state == 'get attr':
                in_censorship = False
                if oc == 0:
@ -265,7 +264,7 @@ class UnBinary(object):
                        state = 'get href length'
                    else:
                        state = 'get value length'
-            
+
            elif state == 'get value length':
                if not in_censorship:
                    buf.write('"')
@ -281,7 +280,7 @@ class UnBinary(object):
                    continue
                if count < 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
-            
+
            elif state == 'get value':
                if count == 0xfffe:
                    if not in_censorship:
@ -301,7 +300,7 @@ class UnBinary(object):
                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'
-            
+
            elif state == 'get custom length':
                count = oc - 1
                if count <= 0 or count > len(bin)-index:
@ -309,21 +308,21 @@ class UnBinary(object):
                dynamic_tag += 1
                state = 'get custom'
                tag_name = ''
-            
+
            elif state == 'get custom':
                tag_name += c
                count -= 1
                if count == 0:
                    buf.write(encode(tag_name))
                    state = 'get attr'
-            
+
            elif state == 'get attr length':
                count = oc - 1
                if count <= 0 or count > (len(bin) - index):
                    raise LitError('Invalid character count %d' % count)
                buf.write(' ')
                state = 'get custom attr'
-            
+
            elif state == 'get custom attr':
                buf.write(encode(c))
                count -= 1
@ -337,7 +336,7 @@ class UnBinary(object):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
-                    
+
            elif state == 'get href':
                href += c
                count -= 1
@ -350,7 +349,7 @@ class UnBinary(object):
                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'
        return index
-    
+
 class DirectoryEntry(object):
    def __init__(self, name, section, offset, size):
@ -358,11 +357,11 @@ class DirectoryEntry(object):
        self.section = section
        self.offset = offset
        self.size = size
-        
+
    def __repr__(self):
        return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
            % (repr(self.name), self.section, self.offset, self.size)
-        
+
    def __str__(self):
        return repr(self)
@ -382,12 +381,12 @@ class ManifestItem(object):
        path = os.path.normpath(path).replace('\\', '/')
        while path.startswith('../'): path = path[3:]
        self.path = path
-        
+
    def __eq__(self, other):
        if hasattr(other, 'internal'):
            return self.internal == other.internal
        return self.internal == other
-    
+
    def __repr__(self):
        return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \
            "offset=%d, root=%r, state=%r)" \
@ -404,7 +403,7 @@ def preserve(function):
            self.stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper
-    
+
 class LitFile(object):
    PIECE_SIZE = 16
@ -438,14 +437,14 @@ class LitFile(object):
            return self.stream.read(8)
        return property(fget=fget)
    magic = magic()
-    
+
    def version():
        def fget(self):
            self.stream.seek(8)
            return u32(self.stream.read(4))
        return property(fget=fget)
    version = version()
-    
+
    def hdr_len():
        @preserve
        def fget(self):
@ -453,7 +452,7 @@ class LitFile(object):
            return int32(self.stream.read(4))
        return property(fget=fget)
    hdr_len = hdr_len()
-    
+
    def num_pieces():
        @preserve
        def fget(self):
@ -461,7 +460,7 @@ class LitFile(object):
            return int32(self.stream.read(4))
        return property(fget=fget)
    num_pieces = num_pieces()
-    
+
    def sec_hdr_len():
        @preserve
        def fget(self):
@ -469,7 +468,7 @@ class LitFile(object):
            return int32(self.stream.read(4))
        return property(fget=fget)
    sec_hdr_len = sec_hdr_len()
-    
+
    def guid():
        @preserve
        def fget(self):
@ -477,7 +476,7 @@ class LitFile(object):
            return self.stream.read(16)
        return property(fget=fget)
    guid = guid()
-    
+
    def header():
        @preserve
        def fget(self):
@ -488,7 +487,7 @@ class LitFile(object):
            return self.stream.read(size)
        return property(fget=fget)
    header = header()
-    
+
    @preserve
    def __len__(self):
        self.stream.seek(0, 2)
@ -501,7 +500,7 @@ class LitFile(object):
    def read_content(self, offset, size):
        return self.read_raw(self.content_offset + offset, size)
-    
+
    def read_secondary_header(self):
        offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
        bytes = self.read_raw(offset, self.sec_hdr_len)
@ -526,12 +525,12 @@ class LitFile(object):
                if u32(bytes[offset+4+16:]):
                    raise LitError('This file has a 64bit content offset')
                self.content_offset = u32(bytes[offset+16:])
-                self.timestamp      = u32(bytes[offset+24:]) 
+                self.timestamp      = u32(bytes[offset+24:])
                self.language_id    = u32(bytes[offset+28:])
                offset += 48
        if not hasattr(self, 'content_offset'):
            raise LitError('Could not figure out the content offset')
-    
+
    def read_header_pieces(self):
        src = self.header[self.hdr_len:]
        for i in xrange(self.num_pieces):
@ -556,7 +555,7 @@ class LitFile(object):
                self.piece3_guid = piece
            elif i == 4:
                self.piece4_guid = piece
-                
+
    def read_directory(self, piece):
        if not piece.startswith('IFCM'):
            raise LitError('Header piece #1 is not main directory.')
@ -760,9 +759,9 @@ class LitFile(object):
            raise LitError("Reset table is too short")
        if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
            raise LitError("Reset table has 64bit value for UCLENGTH")
-        
+
        result = []
-        
+
        window_size = 14
        u = u32(control[CONTROL_WINDOW_SIZE:])
        while u > 0:
@ -847,13 +846,13 @@ class LitContainer(object):
    def __init__(self, filename_or_stream):
        self._litfile = LitFile(filename_or_stream)
-    
+
    def namelist(self):
        return self._litfile.paths.keys()
    def exists(self, name):
        return urlunquote(name) in self._litfile.paths
-    
+
    def read(self, name):
        entry = self._litfile.paths[urlunquote(name)] if name else None
        if entry is None:
@ -869,7 +868,7 @@ class LitContainer(object):
            internal = '/'.join(('/data', entry.internal))
            content = self._litfile.get_file(internal)
        return content
-    
+
    def _read_meta(self):
        path = 'content.opf'
        raw = self._litfile.get_file('/meta')
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -1,10 +1,10 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
 import sys, os, cStringIO
 from threading import Thread
 from calibre import FileWrapper
 from calibre.ebooks.metadata import MetaInformation, authors_to_string
@ -13,7 +13,8 @@ from pyPdf import PdfFileReader, PdfFileWriter
 import Image
 try:
    from calibre.utils.PythonMagickWand import \
-        NewMagickWand, MagickReadImage, MagickSetImageFormat, MagickWriteImage
+        NewMagickWand, MagickReadImage, MagickSetImageFormat, \
        MagickWriteImage, ImageMagick
    _imagemagick_loaded = True
 except:
    _imagemagick_loaded = False
@ -51,9 +52,23 @@ def get_metadata(stream, extract_cover=True):
        print >>sys.stderr, msg.encode('utf8')
    return mi
 class MetadataWriter(Thread):
    def __init__(self, out_pdf, buf):
        self.out_pdf = out_pdf
        self.buf = buf
        Thread.__init__(self)
        self.daemon = True
    def run(self):
        try:
            self.out_pdf.write(self.buf)
        except RuntimeError:
            pass
 def set_metadata(stream, mi):
    stream.seek(0)
-    # Use a cStringIO object for the pdf because we will want to over
+    # Use a StringIO object for the pdf because we will want to over
    # write it later and if we are working on the stream directly it
    # could cause some issues.
    raw = cStringIO.StringIO(stream.read())
@ -61,10 +76,18 @@ def set_metadata(stream, mi):
    title = mi.title if mi.title else orig_pdf.documentInfo.title
    author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author
    out_pdf = PdfFileWriter(title=title, author=author)
    out_str = cStringIO.StringIO()
    writer = MetadataWriter(out_pdf, out_str)
    for page in orig_pdf.pages:
        out_pdf.addPage(page)
-    out_str = cStringIO.StringIO()
+    writer.start()
-    out_pdf.write(out_str)
+    writer.join(10) # Wait 10 secs for writing to complete
    out_pdf.killed = True
    writer.join()
    if out_pdf.killed:
        print 'Failed to set metadata: took too long'
        return
    stream.seek(0)
    stream.truncate()
    out_str.seek(0)
@ -72,35 +95,32 @@ def set_metadata(stream, mi):
    stream.seek(0)
 def get_cover(stream):
    stream.seek(0)
    data = cStringIO.StringIO()
    try:
-        with FileWrapper(stream) as stream:
+        pdf = PdfFileReader(stream)
-            pdf = PdfFileReader(stream)
+        output = PdfFileWriter()
-            output = PdfFileWriter()
+
-    
+        if len(pdf.pages) >= 1:
-            if len(pdf.pages) >= 1:
+            output.addPage(pdf.getPage(0))
-                output.addPage(pdf.getPage(0))
+
-    
+        with TemporaryDirectory('_pdfmeta') as tdir:
-            with TemporaryDirectory('_pdfmeta') as tdir:
+            cover_path = os.path.join(tdir, 'cover.pdf')
-                cover_path = os.path.join(tdir, 'cover.pdf')
+
-    
+            with open(cover_path, "wb") as outputStream:
                outputStream = file(cover_path, "wb")
                output.write(outputStream)
-                outputStream.close()
+                
-    
+            with ImageMagick():
                wand = NewMagickWand()
                MagickReadImage(wand, cover_path)
                MagickSetImageFormat(wand, 'JPEG')
                MagickWriteImage(wand, '%s.jpg' % cover_path)
-    
+
                img = Image.open('%s.jpg' % cover_path)
                img.save(data, 'JPEG')
    except:
        import traceback
        traceback.print_exc()
    return data.getvalue()
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -272,11 +272,7 @@ def XPath(expr):
 def xpath(elem, expr):
    return elem.xpath(expr, namespaces=XPNSMAP)
 def _prepare_xml_for_serialization(root):
    pass
 def xml2str(root, pretty_print=False, strip_comments=False):
    _prepare_xml_for_serialization(root)
    ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
                          pretty_print=pretty_print)
@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False):
 def xml2unicode(root, pretty_print=False):
    _prepare_xml_for_serialization(root)
    return etree.tostring(root, pretty_print=pretty_print)
 ASCII_CHARS   = set(chr(x) for x in xrange(128))
@ -321,6 +316,25 @@ def urlnormalize(href):
    parts = (urlquote(part) for part in parts)
    return urlunparse(parts)
 class DummyHandler(logging.Handler):
    def __init__(self):
        logging.Handler.__init__(self, logging.WARNING)
        self.setFormatter(logging.Formatter('%(message)s'))
        self.log = None
    def emit(self, record):
        if self.log is not None:
            msg = self.format(record)
            f = self.log.error if record.levelno >= logging.ERROR \
                    else self.log.warn
            f(msg)
 _css_logger = logging.getLogger('calibre.css')
 _css_logger.setLevel(logging.WARNING)
 _css_log_handler = DummyHandler()
 _css_logger.addHandler(_css_log_handler)
 class OEBError(Exception):
    """Generic OEB-processing error."""
@ -778,7 +792,8 @@ class Manifest(object):
            data = self.oeb.css_preprocessor(data)
            data = XHTML_CSS_NAMESPACE + data
            parser = CSSParser(loglevel=logging.WARNING,
-                               fetcher=self._fetch_css)
+                               fetcher=self._fetch_css,
                               log=_css_logger)
            data = parser.parseString(data, href=self.href)
            data.namespaces['h'] = XHTML_NS
            return data
@ -1435,7 +1450,7 @@ class OEBBook(object):
        :attr:`pages`: List of "pages," such as indexed to a print edition of
            the same text.
        """
-
+        _css_log_handler.log = logger
        self.encoding = encoding
        self.html_preprocessor = html_preprocessor
        self.css_preprocessor = css_preprocessor
@ -1450,6 +1465,7 @@ class OEBBook(object):
        self.guide = Guide(self)
        self.toc = TOC()
        self.pages = PageList()
        self.auto_generated_toc = True
    @classmethod
    def generate(cls, opts):
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase
 from calibre.customize.ui import available_input_formats
 from calibre.ebooks.epub.from_html import TITLEPAGE
-from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
+from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import safe_replace, ZipFile
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
 from calibre import CurrentDir
 def character_count(html):
    '''
@ -57,31 +56,21 @@ class FakeOpts(object):
    max_levels = 5
    input_encoding = None
 def html2opf(path, tdir, log):
    from calibre.ebooks.html.input import get_filelist
    from calibre.ebooks.metadata.meta import get_metadata
    with CurrentDir(tdir):
        fl = get_filelist(path, tdir, FakeOpts(), log)
        mi = get_metadata(open(path, 'rb'), 'html')
        mi = OPFCreator(os.getcwdu(), mi)
        mi.guide = None
        entries = [(f.path, 'application/xhtml+xml') for f in fl]
        mi.create_manifest(entries)
        mi.create_spine([f.path for f in fl])
        mi.render(open('metadata.opf', 'wb'))
        opfpath = os.path.abspath('metadata.opf')
    return opfpath
 def opf2opf(path, tdir, opts):
    return path
 def is_supported(path):
    ext = os.path.splitext(path)[1].replace('.', '').lower()
    ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
    return ext in available_input_formats()
 def write_oebbook(oeb, path):
    from calibre.ebooks.oeb.writer import OEBWriter
    from calibre import walk
    w = OEBWriter()
    w(oeb, path)
    for f in walk(path):
        if f.endswith('.opf'):
            return f
 class EbookIterator(object):
    CHARACTERS_PER_PAGE = 1000
@ -131,17 +120,16 @@ class EbookIterator(object):
    def __enter__(self):
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
-        if self.ebook_ext == 'opf':
+        from calibre.ebooks.conversion.plumber import Plumber
-            self.pathtoopf = self.pathtoebook
+        plumber = Plumber(self.pathtoebook, self.base, self.log)
-        elif self.ebook_ext == 'html':
+        plumber.setup_options()
-            self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log)
+        if hasattr(plumber.opts, 'dont_package'):
-        else:
+            plumber.opts.dont_package = True
-            from calibre.ebooks.conversion.plumber import Plumber
+        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
-            plumber = Plumber(self.pathtoebook, self.base, self.log)
+                plumber.opts, plumber.input_fmt, self.log,
-            plumber.setup_options()
+                {}, self.base)
-            self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
+        if hasattr(self.pathtoopf, 'manifest'):
-                    plumber.opts, plumber.input_fmt, self.log,
+            self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir)
                    {}, self.base)
        self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin):
    author = 'Kovid Goyal'
    file_type = 'oeb'
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        self.log, self.opts = log, opts
        if not os.path.exists(output_path):
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -349,6 +349,7 @@ class OEBReader(object):
    def _toc_from_ncx(self, item):
        if item is None:
            return False
        self.log.debug('Reading TOC from NCX...')
        ncx = item.data
        title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
        title = COLLAPSE_RE.sub(' ', title.strip())
@ -364,6 +365,7 @@ class OEBReader(object):
        result = xpath(opf, 'o2:tours/o2:tour')
        if not result:
            return False
        self.log.debug('Reading TOC from tour...')
        tour = result[0]
        toc = self.oeb.toc
        toc.title = tour.get('title')
@ -384,6 +386,7 @@ class OEBReader(object):
    def _toc_from_html(self, opf):
        if 'toc' not in self.oeb.guide:
            return False
        self.log.debug('Reading TOC from HTML...')
        itempath, frag = urldefrag(self.oeb.guide['toc'].href)
        item = self.oeb.manifest.hrefs[itempath]
        html = item.data
@ -414,6 +417,7 @@ class OEBReader(object):
        return True
    def _toc_from_spine(self, opf):
        self.log.warn('Generating default TOC from spine...')
        toc = self.oeb.toc
        titles = []
        headers = []
@ -441,11 +445,14 @@ class OEBReader(object):
        return True
    def _toc_from_opf(self, opf, item):
        self.oeb.auto_generated_toc = False
        if self._toc_from_ncx(item): return
-        if self._toc_from_tour(opf): return
+        # Prefer HTML to tour based TOC, since several LIT files
-        self.logger.warn('No metadata table of contents found')
+        # have good HTML TOCs but bad tour based TOCs
        if self._toc_from_html(opf): return
        if self._toc_from_tour(opf): return
        self._toc_from_spine(opf)
        self.oeb.auto_generated_toc = True
    def _pages_from_ncx(self, opf, item):
        if item is None:
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -51,8 +51,8 @@ class Split(object):
        self.log = oeb.log
        self.map = {}
        self.page_break_selectors = None
-        for item in self.oeb.manifest.items:
+        for item in list(self.oeb.manifest.items):
-            if etree.iselement(item.data):
+            if item.spine_position is not None and etree.iselement(item.data):
                self.split_item(item)
        self.fix_links()
@ -74,31 +74,34 @@ class Split(object):
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
-        page_break_selectors = set([])
+            for rule in rules(stylesheets):
-        for rule in rules(stylesheets):
+                before = getattr(rule.style.getPropertyCSSValue(
-            before = getattr(rule.style.getPropertyCSSValue(
+                    'page-break-before'), 'cssText', '').strip().lower()
-                'page-break-before'), 'cssText', '').strip().lower()
+                after  = getattr(rule.style.getPropertyCSSValue(
-            after  = getattr(rule.style.getPropertyCSSValue(
+                    'page-break-after'), 'cssText', '').strip().lower()
-                'page-break-after'), 'cssText', '').strip().lower()
+                try:
-            try:
+                    if before and before != 'avoid':
-                if before and before != 'avoid':
+                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
-                    page_break_selectors.add((CSSSelector(rule.selectorText),
+                            True))
-                        True))
+                except:
-            except:
+                    pass
-                pass
+                try:
-            try:
+                    if after and after != 'avoid':
-                if after and after != 'avoid':
+                        self.page_break_selectors.add((CSSSelector(rule.selectorText),
-                    page_break_selectors.add((CSSSelector(rule.selectorText),
+                            False))
-                        False))
+                except:
-            except:
+                    pass
                pass
        page_breaks = set([])
-        for selector, before in page_break_selectors:
+        for selector, before in self.page_break_selectors:
-            for elem in selector(item.data):
+            body = item.data.xpath('//h:body', namespaces=NAMESPACES)
-                if before:
+            if not body:
-                    elem.set('pb_before', '1')
+                continue
-                page_breaks.add(elem)
+            for elem in selector(body[0]):
                if elem not in body:
                    if before:
                        elem.set('pb_before', '1')
                    page_breaks.add(elem)
        for i, elem in enumerate(item.data.iter()):
            elem.set('pb_order', str(i))
@ -136,8 +139,10 @@ class Split(object):
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
-                nhref = '#'.join(href, frag)
+                nhref = '#'.join((nhref, frag))
            return nhref
        return url
@ -153,7 +158,7 @@ class FlowSplitter(object):
        self.page_breaks    = page_breaks
        self.page_break_ids = page_break_ids
        self.max_flow_size  = max_flow_size
-        self.base           = item.abshref(item.href)
+        self.base           = item.href
        base, ext = os.path.splitext(self.base)
        self.base = base.replace('%', '%%')+'_split_%d'+ext
@ -192,9 +197,9 @@ class FlowSplitter(object):
        self.trees = []
        tree = orig_tree
        for pattern, before in ordered_ids:
            self.log.debug('\t\tSplitting on page-break')
            elem = pattern(tree)
            if elem:
                self.log.debug('\t\tSplitting on page-break')
                before, after = self.do_split(tree, elem[0], before)
                self.trees.append(before)
                tree = after
@ -414,13 +419,14 @@ class FlowSplitter(object):
                elem.attrib.pop(SPLIT_ATTR, None)
                elem.attrib.pop(SPLIT_POINT_ATTR, '0')
-        spine_pos = self.item.spine_pos
+        spine_pos = self.item.spine_position
-        for current, tree in zip(map(reversed, (self.files, self.trees))):
+        for current, tree in zip(*map(reversed, (self.files, self.trees))):
            for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
                href = a.get('href').strip()
                if href.startswith('#'):
                    anchor = href[1:]
                    file = self.anchor_map[anchor]
                    file = self.item.relhref(file)
                    if file != current:
                        a.set('href', file+href)
@ -430,12 +436,12 @@ class FlowSplitter(object):
            self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
        if self.oeb.guide:
-            for ref in self.oeb.guide:
+            for ref in self.oeb.guide.values():
                href, frag = urldefrag(ref.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
-                        nhref = '#'.join(nhref, frag)
+                        nhref = '#'.join((nhref, frag))
                    ref.href = nhref
        def fix_toc_entry(toc):
@ -444,7 +450,7 @@ class FlowSplitter(object):
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
-                        nhref = '#'.join(nhref, frag)
+                        nhref = '#'.join((nhref, frag))
                    toc.href = nhref
            for x in toc:
                fix_toc_entry(x)
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -49,7 +49,7 @@ class OEBWriter(object):
    def __call__(self, oeb, path):
        """
-        Read the book in the :class:`OEBBook` object :param:`oeb` to a file
+        Write the book in the :class:`OEBBook` object :param:`oeb` to a folder
        at :param:`path`.
        """
        version = int(self.version[0])
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -319,6 +319,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.cover_changed = True
    def initialize_series(self):
        self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow)
        all_series = self.db.all_series()
        all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
        series_id = self.db.series_id(self.row)
@ -335,13 +336,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
            self.series.setCurrentIndex(idx)
            self.enable_series_index()
        pl = self.series.parentWidget().layout()
        for i in range(pl.count()):
            l =  pl.itemAt(i).layout()
            if l:
                l.invalidate()
                l.activate()
    def initialize_series_and_publisher(self):
        self.initialize_series()
        all_publishers = self.db.all_publishers()
--- a/src/calibre/gui2/images/news/der_standard.png
+++ b/src/calibre/gui2/images/news/der_standard.png
--- a/src/calibre/gui2/images/news/diepresse.png
+++ b/src/calibre/gui2/images/news/diepresse.png
--- a/src/calibre/gui2/images/news/seattle_times.png
+++ b/src/calibre/gui2/images/news/seattle_times.png
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -40,6 +40,7 @@ recipe_modules = ['recipe_' + r for r in (
           'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet',
           'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en',
           'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
           'seattle_times',
          )]
 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_der_standard.py
+++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py
@ -1,14 +1,37 @@
 #!/usr/bin/env  python
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 ''' http://www.derstandard.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DerStandardRecipe(BasicNewsRecipe):
-    title          = u'derStandard'
+    title = u'derStandard'
-    __author__  = 'Gerhard Aigner'
+    __author__ = 'Gerhard Aigner'
-
+    description = u'Nachrichten aus Österreich' 
    publisher ='derStandard.at'
    category = 'news, politics, nachrichten, Austria'
    use_embedded_content = False
    remove_empty_feeds = True
    lang = 'de-AT'
    no_stylesheets = True
    encoding = 'utf-8'
    language = _('German')
    recursions = 0
    oldest_article = 1
    max_articles_per_feed = 100
    html2lrf_options = [
                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    feeds          = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
        (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
        (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
@ -20,17 +43,13 @@ class DerStandardRecipe(BasicNewsRecipe):
        (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
        (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
        (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
    encoding = 'utf-8'
    language = _('German')
    recursions = 0
    remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
        dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
    preprocess_regexps = [
-        (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
+        (re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
    ]
-
+    
    def print_version(self, url):
        return url.replace('?id=', 'txt/?id=')
@ -40,3 +59,10 @@ class DerStandardRecipe(BasicNewsRecipe):
        if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
            return None
        return article.link
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        return soup  
--- a/src/calibre/web/feeds/recipes/recipe_diepresse.py
+++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py
@ -1,18 +1,42 @@
-import re
+# -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
 ''' http://www.diepresse.at - Austrian Newspaper '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class DiePresseRecipe(BasicNewsRecipe):
-    title          = u'diePresse'
+    title = u'diePresse'
    __author__ = 'Gerhard Aigner'
    description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' 
    publisher ='DiePresse.com'
    category = 'news, politics, nachrichten, Austria'
    use_embedded_content = False
    remove_empty_feeds = True
    lang = 'de-AT'
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    language = _('German')
    recursions = 0
    oldest_article = 1
    max_articles_per_feed = 100
-    recursions = 0
+  
-    language = _('German')
+    html2lrf_options = [
-    __author__ = 'Gerhard Aigner'
+                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
    preprocess_regexps = [
 	(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
    ]
    remove_tags = [dict(name='hr'),
 	dict(name='br'),
 	dict(name='small'),
@ -21,6 +45,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	dict(name='h1', attrs={'class':'titel'}),
 	dict(name='a', attrs={'class':'print'}),
 	dict(name='div', attrs={'class':'hline'})]
    feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
 	(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
 	(u'Europa', u'http://diepresse.com/rss/EU'),
@ -29,7 +54,7 @@ class DiePresseRecipe(BasicNewsRecipe):
 	(u'Kultur', u'http://diepresse.com/rss/Kultur'),
 	(u'Leben', u'http://diepresse.com/rss/Leben'),
 	(u'Tech', u'http://diepresse.com/rss/Tech'),
-	(u'Science', u'http://diepresse.com/rss/Science'),
+	(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
 	(u'Bildung', u'http://diepresse.com/rss/Bildung'),
 	(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
 	(u'Recht', u'http://diepresse.com/rss/Recht'),
@ -38,3 +63,10 @@ class DiePresseRecipe(BasicNewsRecipe):
    def print_version(self, url):
        return url.replace('home','text/home')
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
 	mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
        soup.head.insert(0,mtag)
 	return soup  
--- a/src/calibre/web/feeds/recipes/recipe_seattle_times.py
+++ b/src/calibre/web/feeds/recipes/recipe_seattle_times.py
@ -0,0 +1,50 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 seattletimes.nwsource.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class SeattleTimes(BasicNewsRecipe):
    title                 = 'The Seattle Times'
    __author__            = 'Darko Miletic'
    description           = 'News from Seattle and USA'
    publisher             = 'The Seattle Times'
    category              = 'news, politics, USA'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    encoding              = 'cp1252'
    language              = _('English')
    html2lrf_options = [
                          '--comment'  , description
                        , '--category' , category
                        , '--publisher', publisher
                        ]
    html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    feeds              = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')]
    remove_tags        = [
                             dict(name=['object','link','script'])
                            ,dict(name='p', attrs={'class':'permission'})
                         ]
    def print_version(self, url):
        start_url, sep, rest_url = url.rpartition('_')
        rurl, rsep, article_id = start_url.rpartition('/')
        return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
        soup.head.insert(0,mtag)
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/src/pyPdf/generic.py
+++ b/src/pyPdf/generic.py
@ -299,7 +299,7 @@ def readStringFromStream(stream):
            elif tok == "t":
                tok = "\t"
            elif tok == "b":
-                tok == "\b"
+                tok = "\b"
            elif tok == "f":
                tok = "\f"
            elif tok == "(":
@ -673,7 +673,7 @@ class RectangleObject(ArrayObject):
    def getUpperLeft_x(self):
        return self.getLowerLeft_x()
-    
+
    def getUpperLeft_y(self):
        return self.getUpperRight_y()
--- a/src/pyPdf/pdf.py
+++ b/src/pyPdf/pdf.py
@ -39,15 +39,12 @@ __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 import struct
-try:
+from cStringIO import StringIO
    from cStringIO import StringIO
 except ImportError:
    from StringIO import StringIO
-import filters
+from generic import DictionaryObject, NameObject, NumberObject, \
-import utils
+createStringObject, ArrayObject, ByteStringObject, StreamObject, \
-import warnings
+IndirectObject, utils, readObject, TextStringObject, BooleanObject, \
-from generic import *
+RectangleObject, DecodedStreamObject
 from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
@ -56,6 +53,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt
 # class (typically {@link #PdfFileReader PdfFileReader}).
 class PdfFileWriter(object):
    def __init__(self,title=u"Unknown",author=u"Unknown"):
        self.killed = False
        self._header = "%PDF-1.3"
        self._objects = []  # array of indirect objects
@ -162,7 +160,7 @@ class PdfFileWriter(object):
    # @param stream An object to write the file to.  The object must support
    # the write method, and the tell method, similar to a file object.
    def write(self, stream):
-        import struct, md5
+        import md5
        externalReferenceMap = {}
        self.stack = []
@ -209,11 +207,13 @@ class PdfFileWriter(object):
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)
-        
+
        # eof
        stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location))
    def _sweepIndirectReferences(self, externMap, data):
        if self.killed:
            raise RuntimeError('Writer killed')
        if isinstance(data, DictionaryObject):
            for key, value in data.items():
                origvalue = value
@ -356,8 +356,8 @@ class PdfFileReader(object):
        return self.flattenedPages[pageNumber]
    ##
-    # Read-only property that accesses the 
+    # Read-only property that accesses the
-    # {@link #PdfFileReader.getNamedDestinations 
+    # {@link #PdfFileReader.getNamedDestinations
    # getNamedDestinations} function.
    # <p>
    # Stability: Added in v1.10, will exist for all future v1.x releases.
@ -374,7 +374,7 @@ class PdfFileReader(object):
        if retval == None:
            retval = {}
            catalog = self.trailer["/Root"]
-            
+
            # get the name tree
            if catalog.has_key("/Dests"):
                tree = catalog["/Dests"]
@ -382,7 +382,7 @@ class PdfFileReader(object):
                names = catalog['/Names']
                if names.has_key("/Dests"):
                    tree = names['/Dests']
-        
+
        if tree == None:
            return retval
@ -420,17 +420,17 @@ class PdfFileReader(object):
        if outlines == None:
            outlines = []
            catalog = self.trailer["/Root"]
-            
+
            # get the outline dictionary and named destinations
            if catalog.has_key("/Outlines"):
                lines = catalog["/Outlines"]
                if lines.has_key("/First"):
                    node = lines["/First"]
            self._namedDests = self.getNamedDestinations()
-            
+
        if node == None:
          return outlines
-          
+
        # see if there are any more outlines
        while 1:
            outline = self._buildOutline(node)
@ -454,10 +454,10 @@ class PdfFileReader(object):
        page, typ = array[0:2]
        array = array[2:]
        return Destination(title, page, typ, *array)
-          
+
    def _buildOutline(self, node):
        dest, title, outline = None, None, None
-        
+
        if node.has_key("/A") and node.has_key("/Title"):
            # Action, section 8.5 (only type GoTo supported)
            title  = node["/Title"]
@ -951,7 +951,7 @@ class PageObject(DictionaryObject):
    def _pushPopGS(contents, pdf):
        # adds a graphics state "push" and "pop" to the beginning and end
-        # of a content stream.  This isolates it from changes such as 
+        # of a content stream.  This isolates it from changes such as
        # transformation matricies.
        stream = ContentStream(contents, pdf)
        stream.operations.insert(0, [[], "q"])
@ -1291,7 +1291,7 @@ class Destination(DictionaryObject):
        self[NameObject("/Title")] = title
        self[NameObject("/Page")] = page
        self[NameObject("/Type")] = typ
-        
+
        # from table 8.2 of the PDF 1.6 reference.
        if typ == "/XYZ":
            (self[NameObject("/Left")], self[NameObject("/Top")],
@ -1307,7 +1307,7 @@ class Destination(DictionaryObject):
            pass
        else:
            raise utils.PdfReadError("Unknown Destination Type: %r" % typ)
-          
+
    ##
    # Read-only property accessing the destination title.
    # @return A string.
@ -1474,25 +1474,25 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
    # described in Algorithm 3.2.
    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
-    # shown in step 1 of Algorithm 3.2 as input to this function. 
+    # shown in step 1 of Algorithm 3.2 as input to this function.
    import md5
    m = md5.new()
    m.update(_encryption_padding)
    # 3. Pass the first element of the file's file identifier array (the value
    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
    # page 73) to the hash function and finish the hash.  (See implementation
-    # note 25 in Appendix H.) 
+    # note 25 in Appendix H.)
    m.update(id1_entry)
    md5_hash = m.digest()
    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
-    # function with the encryption key from step 1. 
+    # function with the encryption key from step 1.
    val = utils.RC4_encrypt(key, md5_hash)
    # 5. Do the following 19 times: Take the output from the previous
    # invocation of the RC4 function and pass it as input to a new invocation
    # of the function; use an encryption key generated by taking each byte of
    # the original encryption key (obtained in step 2) and performing an XOR
    # operation between that byte and the single-byte value of the iteration
-    # counter (from 1 to 19). 
+    # counter (from 1 to 19).
    for i in range(1, 20):
        new_key = ''
        for l in range(len(key)):
@ -1500,7 +1500,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr
        val = utils.RC4_encrypt(new_key, val)
    # 6. Append 16 bytes of arbitrary padding to the output from the final
    # invocation of the RC4 function and store the 32-byte result as the value
-    # of the U entry in the encryption dictionary. 
+    # of the U entry in the encryption dictionary.
    # (implementator note: I don't know what "arbitrary padding" is supposed to
    # mean, so I have used null bytes.  This seems to match a few other
    # people's implementations)