diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index aa6c003114..08824a3591 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -263,14 +263,14 @@ class MOBIMetadataWriter(MetadataWriterPlugin): def set_metadata(self, stream, mi, type): from calibre.ebooks.metadata.mobi import set_metadata set_metadata(stream, mi) - + class PDFMetadataWriter(MetadataWriterPlugin): name = 'Set PDF metadata' file_types = set(['pdf']) description = _('Set metadata in %s files') % 'PDF' author = 'John Schember' - + def set_metadata(self, stream, mi, type): from calibre.ebooks.metadata.pdf import set_metadata set_metadata(stream, mi) @@ -280,6 +280,7 @@ from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.pdf.input import PDFInput from calibre.ebooks.txt.input import TXTInput +from calibre.ebooks.lit.input import LITInput from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.txt.output import TXTOutput @@ -287,7 +288,7 @@ from calibre.ebooks.pdf.output import PDFOutput from calibre.customize.profiles import input_profiles, output_profiles plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput, - TXTInput, OEBOutput, TXTOutput, PDFOutput] + TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index 77cdb0b7da..b334816adf 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -41,6 +41,11 @@ class ConversionOption(object): def __eq__(self, other): return hash(self) == hash(other) + def clone(self): + return ConversionOption(name=self.name, help=self.help, + long_switch=self.long_switch, short_switch=self.short_switch, + choices=self.choices) + class OptionRecommendation(object): LOW = 1 MED = 2 @@ -59,6 +64,10 @@ class OptionRecommendation(object): self.validate_parameters() + def clone(self): + return OptionRecommendation(recommended_value=self.recommended_value, + level=self.level, option=self.option.clone()) + def validate_parameters(self): if self.option.choices and self.recommended_value not in \ self.option.choices: @@ -170,8 +179,14 @@ class InputFormatPlugin(Plugin): options.debug_input = os.path.abspath(options.debug_input) if not os.path.exists(options.debug_input): os.makedirs(options.debug_input) - shutil.rmtree(options.debug_input) - shutil.copytree(output_dir, options.debug_input) + if isinstance(ret, basestring): + shutil.rmtree(options.debug_input) + shutil.copytree(output_dir, options.debug_input) + else: + from calibre.ebooks.oeb.writer import OEBWriter + w = OEBWriter(pretty_print=options.pretty_print) + w(ret, options.debug_input) + log.info('Input debug saved to:', options.debug_input) return ret diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index a3d57be191..b7336ab30a 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -57,7 +57,7 @@ def check_command_line_options(parser, args, log): raise SystemExit(1) output = args[2] - if output.startswith('.'): + if output.startswith('.') and output != '.': output = os.path.splitext(os.path.basename(input))[0]+output output = os.path.abspath(output) @@ -171,7 +171,8 @@ def main(args=sys.argv): plumber.run() - log(_('Output saved to'), ' ', plumber.output) + if plumber.opts.debug_input is None: + log(_('Output saved to'), ' ', plumber.output) return 0 diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 93fc376bea..1edeed8d9c 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -32,8 +32,8 @@ class Plumber(object): :param input: Path to input file. :param output: Path to output file/directory ''' - self.input = input - self.output = output + self.input = os.path.abspath(input) + self.output = os.path.abspath(output) self.log = log # Initialize the conversion options that are independent of input and @@ -188,15 +188,15 @@ OptionRecommendation(name='language', ] - input_fmt = os.path.splitext(input)[1] + input_fmt = os.path.splitext(self.input)[1] if not input_fmt: raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower() - if os.path.exists(output) and os.path.isdir(output): + if os.path.exists(self.output) and os.path.isdir(self.output): output_fmt = 'oeb' else: - output_fmt = os.path.splitext(output)[1] + output_fmt = os.path.splitext(self.output)[1] if not output_fmt: output_fmt = '.oeb' output_fmt = output_fmt[1:].lower() @@ -323,6 +323,9 @@ OptionRecommendation(name='language', self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts, self.input_fmt, self.log, accelerators, tdir) + if self.opts.debug_input is not None: + self.log('Debug input called, aborting the rest of the pipeline.') + return if not hasattr(self.oeb, 'manifest'): self.oeb = create_oebbook(self.log, self.oeb, self.opts) @@ -365,18 +368,20 @@ OptionRecommendation(name='language', self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts, self.log) -def create_oebbook(log, opfpath, opts): +def create_oebbook(log, path_or_stream, opts, reader=None): ''' - Create an OEBBook from an OPF file. + Create an OEBBook. ''' - from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks.oeb.base import OEBBook html_preprocessor = HTMLPreProcessor() - reader = OEBReader() oeb = OEBBook(log, html_preprocessor=html_preprocessor, pretty_print=opts.pretty_print) # Read OEB Book into OEBBook - log.info('Parsing all content...') - reader(oeb, opfpath) + log('Parsing all content...') + if reader is None: + from calibre.ebooks.oeb.reader import OEBReader + reader = OEBReader + + reader()(oeb, path_or_stream) return oeb diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 5b9a085b1d..252032a23d 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -252,6 +252,14 @@ class HTMLInput(InputFormatPlugin): ) ), + OptionRecommendation(name='dont_package', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Normally this input plugin re-arranges all the input ' + 'files into a standard folder hierarchy. Only use this option ' + 'if you know what you are doing as it can result in various ' + 'nasty side effects in the rest of of the conversion pipeline.' + ) + ), ]) def convert(self, stream, opts, file_ext, log, @@ -276,6 +284,9 @@ class HTMLInput(InputFormatPlugin): mi.render(open('metadata.opf', 'wb')) opfpath = os.path.abspath('metadata.opf') + if opts.dont_package: + return opfpath + from calibre.ebooks.conversion.plumber import create_oebbook oeb = create_oebbook(log, opfpath, opts) diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py new file mode 100644 index 0000000000..2d726f7eeb --- /dev/null +++ b/src/calibre/ebooks/lit/input.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.customize.conversion import InputFormatPlugin + +class LITInput(InputFormatPlugin): + + name = 'LIT Input' + author = 'Marshall T. Vandegrift' + description = 'Convert LIT files to HTML' + file_types = set(['lit']) + + def convert(self, stream, options, file_ext, log, + accelerators): + from calibre.ebooks.lit.reader import LitReader + from calibre.ebooks.conversion.plumber import create_oebbook + return create_oebbook(log, stream, options, reader=LitReader) + + diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index f32a65e010..79249fe7c3 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -7,13 +7,12 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' \ 'and Marshall T. Vandegrift ' -import sys, struct, os +import struct, os import functools import re from urlparse import urldefrag from cStringIO import StringIO from urllib import unquote as urlunquote -from lxml import etree from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP import calibre.ebooks.lit.mssha1 as mssha1 @@ -29,12 +28,12 @@ __all__ = ["LitReader"] XML_DECL = """ """ OPF_DECL = """ - """ HTML_DECL = """ - """ @@ -73,7 +72,7 @@ def encint(bytes, remaining): val <<= 7 val |= (b & 0x7f) if b & 0x80 == 0: break - return val, bytes[pos:], remaining + return val, bytes[pos:], remaining def msguid(bytes): values = struct.unpack(">(?=>>|[^>])') DOUBLE_ANGLE_RE = re.compile(r'([<>])\1') EMPTY_ATOMS = ({},{}) - + def __init__(self, bin, path, manifest={}, map=HTML_MAP, atoms=EMPTY_ATOMS): self.manifest = manifest self.tag_map, self.attr_map, self.tag_to_attr_map = map @@ -143,7 +142,7 @@ class UnBinary(object): raw = self.CLOSE_ANGLE_RE.sub(r'>', raw) raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw) self.raw = raw - + def item_path(self, internal_id): try: target = self.manifest[internal_id].path @@ -159,7 +158,7 @@ class UnBinary(object): index += 1 relpath = (['..'] * (len(base) - index)) + target[index:] return '/'.join(relpath) - + def __unicode__(self): return self.raw.decode('utf-8') @@ -172,11 +171,11 @@ class UnBinary(object): in_censorship = is_goingdown = False state = 'text' flags = 0 - + while index < len(bin): c, index = read_utf8_char(bin, index) oc = ord(c) - + if state == 'text': if oc == 0: state = 'get flags' @@ -188,14 +187,14 @@ class UnBinary(object): elif c == '<': c = '<<' buf.write(encode(c)) - + elif state == 'get flags': if oc == 0: state = 'text' continue flags = oc state = 'get tag' - + elif state == 'get tag': state = 'text' if oc == 0 else 'get attr' if flags & FLAG_OPENING: @@ -226,7 +225,7 @@ class UnBinary(object): if depth == 0: raise LitError('Extra closing tag') return index - + elif state == 'get attr': in_censorship = False if oc == 0: @@ -265,7 +264,7 @@ class UnBinary(object): state = 'get href length' else: state = 'get value length' - + elif state == 'get value length': if not in_censorship: buf.write('"') @@ -281,7 +280,7 @@ class UnBinary(object): continue if count < 0 or count > (len(bin) - index): raise LitError('Invalid character count %d' % count) - + elif state == 'get value': if count == 0xfffe: if not in_censorship: @@ -301,7 +300,7 @@ class UnBinary(object): buf.write('"') in_censorship = False state = 'get attr' - + elif state == 'get custom length': count = oc - 1 if count <= 0 or count > len(bin)-index: @@ -309,21 +308,21 @@ class UnBinary(object): dynamic_tag += 1 state = 'get custom' tag_name = '' - + elif state == 'get custom': tag_name += c count -= 1 if count == 0: buf.write(encode(tag_name)) state = 'get attr' - + elif state == 'get attr length': count = oc - 1 if count <= 0 or count > (len(bin) - index): raise LitError('Invalid character count %d' % count) buf.write(' ') state = 'get custom attr' - + elif state == 'get custom attr': buf.write(encode(c)) count -= 1 @@ -337,7 +336,7 @@ class UnBinary(object): raise LitError('Invalid character count %d' % count) href = '' state = 'get href' - + elif state == 'get href': href += c count -= 1 @@ -350,7 +349,7 @@ class UnBinary(object): buf.write(encode(u'"%s"' % path)) state = 'get attr' return index - + class DirectoryEntry(object): def __init__(self, name, section, offset, size): @@ -358,11 +357,11 @@ class DirectoryEntry(object): self.section = section self.offset = offset self.size = size - + def __repr__(self): return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \ % (repr(self.name), self.section, self.offset, self.size) - + def __str__(self): return repr(self) @@ -382,12 +381,12 @@ class ManifestItem(object): path = os.path.normpath(path).replace('\\', '/') while path.startswith('../'): path = path[3:] self.path = path - + def __eq__(self, other): if hasattr(other, 'internal'): return self.internal == other.internal return self.internal == other - + def __repr__(self): return "ManifestItem(internal=%r, path=%r, mime_type=%r, " \ "offset=%d, root=%r, state=%r)" \ @@ -404,7 +403,7 @@ def preserve(function): self.stream.seek(opos) functools.update_wrapper(wrapper, function) return wrapper - + class LitFile(object): PIECE_SIZE = 16 @@ -438,14 +437,14 @@ class LitFile(object): return self.stream.read(8) return property(fget=fget) magic = magic() - + def version(): def fget(self): self.stream.seek(8) return u32(self.stream.read(4)) return property(fget=fget) version = version() - + def hdr_len(): @preserve def fget(self): @@ -453,7 +452,7 @@ class LitFile(object): return int32(self.stream.read(4)) return property(fget=fget) hdr_len = hdr_len() - + def num_pieces(): @preserve def fget(self): @@ -461,7 +460,7 @@ class LitFile(object): return int32(self.stream.read(4)) return property(fget=fget) num_pieces = num_pieces() - + def sec_hdr_len(): @preserve def fget(self): @@ -469,7 +468,7 @@ class LitFile(object): return int32(self.stream.read(4)) return property(fget=fget) sec_hdr_len = sec_hdr_len() - + def guid(): @preserve def fget(self): @@ -477,7 +476,7 @@ class LitFile(object): return self.stream.read(16) return property(fget=fget) guid = guid() - + def header(): @preserve def fget(self): @@ -488,7 +487,7 @@ class LitFile(object): return self.stream.read(size) return property(fget=fget) header = header() - + @preserve def __len__(self): self.stream.seek(0, 2) @@ -501,7 +500,7 @@ class LitFile(object): def read_content(self, offset, size): return self.read_raw(self.content_offset + offset, size) - + def read_secondary_header(self): offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE) bytes = self.read_raw(offset, self.sec_hdr_len) @@ -526,12 +525,12 @@ class LitFile(object): if u32(bytes[offset+4+16:]): raise LitError('This file has a 64bit content offset') self.content_offset = u32(bytes[offset+16:]) - self.timestamp = u32(bytes[offset+24:]) + self.timestamp = u32(bytes[offset+24:]) self.language_id = u32(bytes[offset+28:]) offset += 48 if not hasattr(self, 'content_offset'): raise LitError('Could not figure out the content offset') - + def read_header_pieces(self): src = self.header[self.hdr_len:] for i in xrange(self.num_pieces): @@ -556,7 +555,7 @@ class LitFile(object): self.piece3_guid = piece elif i == 4: self.piece4_guid = piece - + def read_directory(self, piece): if not piece.startswith('IFCM'): raise LitError('Header piece #1 is not main directory.') @@ -760,9 +759,9 @@ class LitFile(object): raise LitError("Reset table is too short") if u32(reset_table[RESET_UCLENGTH + 4:]) != 0: raise LitError("Reset table has 64bit value for UCLENGTH") - + result = [] - + window_size = 14 u = u32(control[CONTROL_WINDOW_SIZE:]) while u > 0: @@ -847,13 +846,13 @@ class LitContainer(object): def __init__(self, filename_or_stream): self._litfile = LitFile(filename_or_stream) - + def namelist(self): return self._litfile.paths.keys() def exists(self, name): return urlunquote(name) in self._litfile.paths - + def read(self, name): entry = self._litfile.paths[urlunquote(name)] if name else None if entry is None: @@ -869,7 +868,7 @@ class LitContainer(object): internal = '/'.join(('/data', entry.internal)) content = self._litfile.get_file(internal) return content - + def _read_meta(self): path = 'content.opf' raw = self._litfile.get_file('/meta') diff --git a/src/calibre/ebooks/metadata/pdf.py b/src/calibre/ebooks/metadata/pdf.py index 4dc98509e2..ec713b5adf 100644 --- a/src/calibre/ebooks/metadata/pdf.py +++ b/src/calibre/ebooks/metadata/pdf.py @@ -1,10 +1,10 @@ from __future__ import with_statement - __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' '''Read meta information from PDF files''' import sys, os, cStringIO +from threading import Thread from calibre import FileWrapper from calibre.ebooks.metadata import MetaInformation, authors_to_string @@ -13,7 +13,8 @@ from pyPdf import PdfFileReader, PdfFileWriter import Image try: from calibre.utils.PythonMagickWand import \ - NewMagickWand, MagickReadImage, MagickSetImageFormat, MagickWriteImage + NewMagickWand, MagickReadImage, MagickSetImageFormat, \ + MagickWriteImage, ImageMagick _imagemagick_loaded = True except: _imagemagick_loaded = False @@ -51,9 +52,23 @@ def get_metadata(stream, extract_cover=True): print >>sys.stderr, msg.encode('utf8') return mi +class MetadataWriter(Thread): + + def __init__(self, out_pdf, buf): + self.out_pdf = out_pdf + self.buf = buf + Thread.__init__(self) + self.daemon = True + + def run(self): + try: + self.out_pdf.write(self.buf) + except RuntimeError: + pass + def set_metadata(stream, mi): stream.seek(0) - # Use a cStringIO object for the pdf because we will want to over + # Use a StringIO object for the pdf because we will want to over # write it later and if we are working on the stream directly it # could cause some issues. raw = cStringIO.StringIO(stream.read()) @@ -61,10 +76,18 @@ def set_metadata(stream, mi): title = mi.title if mi.title else orig_pdf.documentInfo.title author = authors_to_string(mi.authors) if mi.authors else orig_pdf.documentInfo.author out_pdf = PdfFileWriter(title=title, author=author) + out_str = cStringIO.StringIO() + writer = MetadataWriter(out_pdf, out_str) for page in orig_pdf.pages: out_pdf.addPage(page) - out_str = cStringIO.StringIO() - out_pdf.write(out_str) + writer.start() + writer.join(10) # Wait 10 secs for writing to complete + out_pdf.killed = True + writer.join() + if out_pdf.killed: + print 'Failed to set metadata: took too long' + return + stream.seek(0) stream.truncate() out_str.seek(0) @@ -72,35 +95,32 @@ def set_metadata(stream, mi): stream.seek(0) def get_cover(stream): - stream.seek(0) - data = cStringIO.StringIO() try: - with FileWrapper(stream) as stream: - pdf = PdfFileReader(stream) - output = PdfFileWriter() - - if len(pdf.pages) >= 1: - output.addPage(pdf.getPage(0)) - - with TemporaryDirectory('_pdfmeta') as tdir: - cover_path = os.path.join(tdir, 'cover.pdf') - - outputStream = file(cover_path, "wb") + pdf = PdfFileReader(stream) + output = PdfFileWriter() + + if len(pdf.pages) >= 1: + output.addPage(pdf.getPage(0)) + + with TemporaryDirectory('_pdfmeta') as tdir: + cover_path = os.path.join(tdir, 'cover.pdf') + + with open(cover_path, "wb") as outputStream: output.write(outputStream) - outputStream.close() - + + with ImageMagick(): wand = NewMagickWand() MagickReadImage(wand, cover_path) MagickSetImageFormat(wand, 'JPEG') MagickWriteImage(wand, '%s.jpg' % cover_path) - + img = Image.open('%s.jpg' % cover_path) - img.save(data, 'JPEG') except: import traceback traceback.print_exc() return data.getvalue() + diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 5d2c51c4ba..dda36a7500 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -272,11 +272,7 @@ def XPath(expr): def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) -def _prepare_xml_for_serialization(root): - pass - def xml2str(root, pretty_print=False, strip_comments=False): - _prepare_xml_for_serialization(root) ans = etree.tostring(root, encoding='utf-8', xml_declaration=True, pretty_print=pretty_print) @@ -287,7 +283,6 @@ def xml2str(root, pretty_print=False, strip_comments=False): def xml2unicode(root, pretty_print=False): - _prepare_xml_for_serialization(root) return etree.tostring(root, pretty_print=pretty_print) ASCII_CHARS = set(chr(x) for x in xrange(128)) @@ -321,6 +316,25 @@ def urlnormalize(href): parts = (urlquote(part) for part in parts) return urlunparse(parts) +class DummyHandler(logging.Handler): + + def __init__(self): + logging.Handler.__init__(self, logging.WARNING) + self.setFormatter(logging.Formatter('%(message)s')) + self.log = None + + def emit(self, record): + if self.log is not None: + msg = self.format(record) + f = self.log.error if record.levelno >= logging.ERROR \ + else self.log.warn + f(msg) + + +_css_logger = logging.getLogger('calibre.css') +_css_logger.setLevel(logging.WARNING) +_css_log_handler = DummyHandler() +_css_logger.addHandler(_css_log_handler) class OEBError(Exception): """Generic OEB-processing error.""" @@ -778,7 +792,8 @@ class Manifest(object): data = self.oeb.css_preprocessor(data) data = XHTML_CSS_NAMESPACE + data parser = CSSParser(loglevel=logging.WARNING, - fetcher=self._fetch_css) + fetcher=self._fetch_css, + log=_css_logger) data = parser.parseString(data, href=self.href) data.namespaces['h'] = XHTML_NS return data @@ -1435,7 +1450,7 @@ class OEBBook(object): :attr:`pages`: List of "pages," such as indexed to a print edition of the same text. """ - + _css_log_handler.log = logger self.encoding = encoding self.html_preprocessor = html_preprocessor self.css_preprocessor = css_preprocessor @@ -1450,6 +1465,7 @@ class OEBBook(object): self.guide = Guide(self) self.toc = TOC() self.pages = PageList() + self.auto_generated_toc = True @classmethod def generate(cls, opts): diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index df4f3b88f1..81e1f89029 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -13,13 +13,12 @@ from PyQt4.Qt import QFontDatabase from calibre.customize.ui import available_input_formats from calibre.ebooks.epub.from_html import TITLEPAGE -from calibre.ebooks.metadata.opf2 import OPF, OPFCreator +from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.zipfile import safe_replace, ZipFile from calibre.utils.config import DynamicConfig from calibre.utils.logging import Log -from calibre import CurrentDir def character_count(html): ''' @@ -57,31 +56,21 @@ class FakeOpts(object): max_levels = 5 input_encoding = None -def html2opf(path, tdir, log): - from calibre.ebooks.html.input import get_filelist - from calibre.ebooks.metadata.meta import get_metadata - with CurrentDir(tdir): - fl = get_filelist(path, tdir, FakeOpts(), log) - mi = get_metadata(open(path, 'rb'), 'html') - mi = OPFCreator(os.getcwdu(), mi) - mi.guide = None - entries = [(f.path, 'application/xhtml+xml') for f in fl] - mi.create_manifest(entries) - mi.create_spine([f.path for f in fl]) - - mi.render(open('metadata.opf', 'wb')) - opfpath = os.path.abspath('metadata.opf') - - return opfpath - -def opf2opf(path, tdir, opts): - return path - def is_supported(path): ext = os.path.splitext(path)[1].replace('.', '').lower() ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) return ext in available_input_formats() + +def write_oebbook(oeb, path): + from calibre.ebooks.oeb.writer import OEBWriter + from calibre import walk + w = OEBWriter() + w(oeb, path) + for f in walk(path): + if f.endswith('.opf'): + return f + class EbookIterator(object): CHARACTERS_PER_PAGE = 1000 @@ -131,17 +120,16 @@ class EbookIterator(object): def __enter__(self): self._tdir = TemporaryDirectory('_ebook_iter') self.base = self._tdir.__enter__() - if self.ebook_ext == 'opf': - self.pathtoopf = self.pathtoebook - elif self.ebook_ext == 'html': - self.pathtoopf = html2opf(self.pathtoebook, self.base, self.log) - else: - from calibre.ebooks.conversion.plumber import Plumber - plumber = Plumber(self.pathtoebook, self.base, self.log) - plumber.setup_options() - self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'), - plumber.opts, plumber.input_fmt, self.log, - {}, self.base) + from calibre.ebooks.conversion.plumber import Plumber + plumber = Plumber(self.pathtoebook, self.base, self.log) + plumber.setup_options() + if hasattr(plumber.opts, 'dont_package'): + plumber.opts.dont_package = True + self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'), + plumber.opts, plumber.input_fmt, self.log, + {}, self.base) + if hasattr(self.pathtoopf, 'manifest'): + self.pathtoopf = write_oebbook(self.pathtoebook, self._tdir) self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf)) diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index 480ca3776e..ba62897215 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -16,7 +16,6 @@ class OEBOutput(OutputFormatPlugin): author = 'Kovid Goyal' file_type = 'oeb' - def convert(self, oeb_book, output_path, input_plugin, opts, log): self.log, self.opts = log, opts if not os.path.exists(output_path): diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index faeff4b825..6f0ff44bc9 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -349,6 +349,7 @@ class OEBReader(object): def _toc_from_ncx(self, item): if item is None: return False + self.log.debug('Reading TOC from NCX...') ncx = item.data title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()')) title = COLLAPSE_RE.sub(' ', title.strip()) @@ -364,6 +365,7 @@ class OEBReader(object): result = xpath(opf, 'o2:tours/o2:tour') if not result: return False + self.log.debug('Reading TOC from tour...') tour = result[0] toc = self.oeb.toc toc.title = tour.get('title') @@ -384,6 +386,7 @@ class OEBReader(object): def _toc_from_html(self, opf): if 'toc' not in self.oeb.guide: return False + self.log.debug('Reading TOC from HTML...') itempath, frag = urldefrag(self.oeb.guide['toc'].href) item = self.oeb.manifest.hrefs[itempath] html = item.data @@ -414,6 +417,7 @@ class OEBReader(object): return True def _toc_from_spine(self, opf): + self.log.warn('Generating default TOC from spine...') toc = self.oeb.toc titles = [] headers = [] @@ -441,11 +445,14 @@ class OEBReader(object): return True def _toc_from_opf(self, opf, item): + self.oeb.auto_generated_toc = False if self._toc_from_ncx(item): return - if self._toc_from_tour(opf): return - self.logger.warn('No metadata table of contents found') + # Prefer HTML to tour based TOC, since several LIT files + # have good HTML TOCs but bad tour based TOCs if self._toc_from_html(opf): return + if self._toc_from_tour(opf): return self._toc_from_spine(opf) + self.oeb.auto_generated_toc = True def _pages_from_ncx(self, opf, item): if item is None: diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 1bb5b50d06..33ab14b73d 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -51,8 +51,8 @@ class Split(object): self.log = oeb.log self.map = {} self.page_break_selectors = None - for item in self.oeb.manifest.items: - if etree.iselement(item.data): + for item in list(self.oeb.manifest.items): + if item.spine_position is not None and etree.iselement(item.data): self.split_item(item) self.fix_links() @@ -74,31 +74,34 @@ class Split(object): self.page_break_selectors = set([]) stylesheets = [x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES] - page_break_selectors = set([]) - for rule in rules(stylesheets): - before = getattr(rule.style.getPropertyCSSValue( - 'page-break-before'), 'cssText', '').strip().lower() - after = getattr(rule.style.getPropertyCSSValue( - 'page-break-after'), 'cssText', '').strip().lower() - try: - if before and before != 'avoid': - page_break_selectors.add((CSSSelector(rule.selectorText), - True)) - except: - pass - try: - if after and after != 'avoid': - page_break_selectors.add((CSSSelector(rule.selectorText), - False)) - except: - pass + for rule in rules(stylesheets): + before = getattr(rule.style.getPropertyCSSValue( + 'page-break-before'), 'cssText', '').strip().lower() + after = getattr(rule.style.getPropertyCSSValue( + 'page-break-after'), 'cssText', '').strip().lower() + try: + if before and before != 'avoid': + self.page_break_selectors.add((CSSSelector(rule.selectorText), + True)) + except: + pass + try: + if after and after != 'avoid': + self.page_break_selectors.add((CSSSelector(rule.selectorText), + False)) + except: + pass page_breaks = set([]) - for selector, before in page_break_selectors: - for elem in selector(item.data): - if before: - elem.set('pb_before', '1') - page_breaks.add(elem) + for selector, before in self.page_break_selectors: + body = item.data.xpath('//h:body', namespaces=NAMESPACES) + if not body: + continue + for elem in selector(body[0]): + if elem not in body: + if before: + elem.set('pb_before', '1') + page_breaks.add(elem) for i, elem in enumerate(item.data.iter()): elem.set('pb_order', str(i)) @@ -136,8 +139,10 @@ class Split(object): if href in self.map: anchor_map = self.map[href] nhref = anchor_map[frag if frag else None] + nhref = self.current_item.relhref(nhref) if frag: - nhref = '#'.join(href, frag) + nhref = '#'.join((nhref, frag)) + return nhref return url @@ -153,7 +158,7 @@ class FlowSplitter(object): self.page_breaks = page_breaks self.page_break_ids = page_break_ids self.max_flow_size = max_flow_size - self.base = item.abshref(item.href) + self.base = item.href base, ext = os.path.splitext(self.base) self.base = base.replace('%', '%%')+'_split_%d'+ext @@ -192,9 +197,9 @@ class FlowSplitter(object): self.trees = [] tree = orig_tree for pattern, before in ordered_ids: - self.log.debug('\t\tSplitting on page-break') elem = pattern(tree) if elem: + self.log.debug('\t\tSplitting on page-break') before, after = self.do_split(tree, elem[0], before) self.trees.append(before) tree = after @@ -414,13 +419,14 @@ class FlowSplitter(object): elem.attrib.pop(SPLIT_ATTR, None) elem.attrib.pop(SPLIT_POINT_ATTR, '0') - spine_pos = self.item.spine_pos - for current, tree in zip(map(reversed, (self.files, self.trees))): + spine_pos = self.item.spine_position + for current, tree in zip(*map(reversed, (self.files, self.trees))): for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES): href = a.get('href').strip() if href.startswith('#'): anchor = href[1:] file = self.anchor_map[anchor] + file = self.item.relhref(file) if file != current: a.set('href', file+href) @@ -430,12 +436,12 @@ class FlowSplitter(object): self.oeb.spine.insert(spine_pos, new_item, self.item.linear) if self.oeb.guide: - for ref in self.oeb.guide: + for ref in self.oeb.guide.values(): href, frag = urldefrag(ref.href) if href == self.item.href: nhref = self.anchor_map[frag if frag else None] if frag: - nhref = '#'.join(nhref, frag) + nhref = '#'.join((nhref, frag)) ref.href = nhref def fix_toc_entry(toc): @@ -444,7 +450,7 @@ class FlowSplitter(object): if href == self.item.href: nhref = self.anchor_map[frag if frag else None] if frag: - nhref = '#'.join(nhref, frag) + nhref = '#'.join((nhref, frag)) toc.href = nhref for x in toc: fix_toc_entry(x) diff --git a/src/calibre/ebooks/oeb/writer.py b/src/calibre/ebooks/oeb/writer.py index ef72414f5a..f71eb88ea5 100644 --- a/src/calibre/ebooks/oeb/writer.py +++ b/src/calibre/ebooks/oeb/writer.py @@ -49,7 +49,7 @@ class OEBWriter(object): def __call__(self, oeb, path): """ - Read the book in the :class:`OEBBook` object :param:`oeb` to a file + Write the book in the :class:`OEBBook` object :param:`oeb` to a folder at :param:`path`. """ version = int(self.version[0]) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index c48c7c3640..4a74c87097 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -319,6 +319,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.cover_changed = True def initialize_series(self): + self.series.setSizeAdjustPolicy(self.series.AdjustToContentsOnFirstShow) all_series = self.db.all_series() all_series.sort(cmp=lambda x, y : cmp(x[1], y[1])) series_id = self.db.series_id(self.row) @@ -335,13 +336,6 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.series.setCurrentIndex(idx) self.enable_series_index() - pl = self.series.parentWidget().layout() - for i in range(pl.count()): - l = pl.itemAt(i).layout() - if l: - l.invalidate() - l.activate() - def initialize_series_and_publisher(self): self.initialize_series() all_publishers = self.db.all_publishers() diff --git a/src/calibre/gui2/images/news/der_standard.png b/src/calibre/gui2/images/news/der_standard.png new file mode 100644 index 0000000000..4d750fe5a8 Binary files /dev/null and b/src/calibre/gui2/images/news/der_standard.png differ diff --git a/src/calibre/gui2/images/news/diepresse.png b/src/calibre/gui2/images/news/diepresse.png new file mode 100644 index 0000000000..41bbdcbf1b Binary files /dev/null and b/src/calibre/gui2/images/news/diepresse.png differ diff --git a/src/calibre/gui2/images/news/seattle_times.png b/src/calibre/gui2/images/news/seattle_times.png new file mode 100644 index 0000000000..b885684992 Binary files /dev/null and b/src/calibre/gui2/images/news/seattle_times.png differ diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 9e2ef1969d..405fd265a7 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -40,6 +40,7 @@ recipe_modules = ['recipe_' + r for r in ( 'krstarica', 'krstarica_en', 'tanjug', 'laprensa_ni', 'azstarnet', 'corriere_della_sera_it', 'corriere_della_sera_en', 'msdnmag_en', 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna', + 'seattle_times', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_der_standard.py b/src/calibre/web/feeds/recipes/recipe_der_standard.py index eec4c4e74d..c053d74cfb 100644 --- a/src/calibre/web/feeds/recipes/recipe_der_standard.py +++ b/src/calibre/web/feeds/recipes/recipe_der_standard.py @@ -1,14 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, Gerhard Aigner ' ''' http://www.derstandard.at - Austrian Newspaper ''' import re from calibre.web.feeds.news import BasicNewsRecipe class DerStandardRecipe(BasicNewsRecipe): - title = u'derStandard' - __author__ = 'Gerhard Aigner' - + title = u'derStandard' + __author__ = 'Gerhard Aigner' + description = u'Nachrichten aus Österreich' + publisher ='derStandard.at' + category = 'news, politics, nachrichten, Austria' + use_embedded_content = False + remove_empty_feeds = True + lang = 'de-AT' + no_stylesheets = True + encoding = 'utf-8' + language = _('German') + recursions = 0 oldest_article = 1 max_articles_per_feed = 100 + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'), (u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'), (u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'), @@ -20,17 +43,13 @@ class DerStandardRecipe(BasicNewsRecipe): (u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'), (u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'), (u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')] - - encoding = 'utf-8' - language = _('German') - recursions = 0 remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'), dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')] preprocess_regexps = [ - (re.compile(r'\[[\d*]\]', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '') ] - + def print_version(self, url): return url.replace('?id=', 'txt/?id=') @@ -40,3 +59,10 @@ class DerStandardRecipe(BasicNewsRecipe): if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0): return None return article.link + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + return soup \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_diepresse.py b/src/calibre/web/feeds/recipes/recipe_diepresse.py index c806575356..362a08fb3a 100644 --- a/src/calibre/web/feeds/recipes/recipe_diepresse.py +++ b/src/calibre/web/feeds/recipes/recipe_diepresse.py @@ -1,18 +1,42 @@ -import re +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, Gerhard Aigner ' + +''' http://www.diepresse.at - Austrian Newspaper ''' + +import re from calibre.web.feeds.news import BasicNewsRecipe class DiePresseRecipe(BasicNewsRecipe): - title = u'diePresse' + title = u'diePresse' + __author__ = 'Gerhard Aigner' + description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' + publisher ='DiePresse.com' + category = 'news, politics, nachrichten, Austria' + use_embedded_content = False + remove_empty_feeds = True + lang = 'de-AT' + no_stylesheets = True + encoding = 'ISO-8859-1' + language = _('German') + recursions = 0 oldest_article = 1 max_articles_per_feed = 100 - recursions = 0 - language = _('German') - __author__ = 'Gerhard Aigner' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + preprocess_regexps = [ (re.compile(r'Textversion', re.DOTALL), lambda match: ''), ] + remove_tags = [dict(name='hr'), dict(name='br'), dict(name='small'), @@ -21,6 +45,7 @@ class DiePresseRecipe(BasicNewsRecipe): dict(name='h1', attrs={'class':'titel'}), dict(name='a', attrs={'class':'print'}), dict(name='div', attrs={'class':'hline'})] + feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'), (u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'), (u'Europa', u'http://diepresse.com/rss/EU'), @@ -29,7 +54,7 @@ class DiePresseRecipe(BasicNewsRecipe): (u'Kultur', u'http://diepresse.com/rss/Kultur'), (u'Leben', u'http://diepresse.com/rss/Leben'), (u'Tech', u'http://diepresse.com/rss/Tech'), - (u'Science', u'http://diepresse.com/rss/Science'), + (u'Wissenschaft', u'http://diepresse.com/rss/Science'), (u'Bildung', u'http://diepresse.com/rss/Bildung'), (u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'), (u'Recht', u'http://diepresse.com/rss/Recht'), @@ -38,3 +63,10 @@ class DiePresseRecipe(BasicNewsRecipe): def print_version(self, url): return url.replace('home','text/home') + + def preprocess_html(self, soup): + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mtag = '' + soup.head.insert(0,mtag) + return soup \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_seattle_times.py b/src/calibre/web/feeds/recipes/recipe_seattle_times.py new file mode 100644 index 0000000000..695a82b5b4 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_seattle_times.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +seattletimes.nwsource.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class SeattleTimes(BasicNewsRecipe): + title = 'The Seattle Times' + __author__ = 'Darko Miletic' + description = 'News from Seattle and USA' + publisher = 'The Seattle Times' + category = 'news, politics, USA' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + language = _('English') + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + feeds = [(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')] + + remove_tags = [ + dict(name=['object','link','script']) + ,dict(name='p', attrs={'class':'permission'}) + ] + + def print_version(self, url): + start_url, sep, rest_url = url.rpartition('_') + rurl, rsep, article_id = start_url.rpartition('/') + return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id + + def preprocess_html(self, soup): + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + diff --git a/src/pyPdf/generic.py b/src/pyPdf/generic.py index fb75ef3b3f..5447ef5fbc 100644 --- a/src/pyPdf/generic.py +++ b/src/pyPdf/generic.py @@ -299,7 +299,7 @@ def readStringFromStream(stream): elif tok == "t": tok = "\t" elif tok == "b": - tok == "\b" + tok = "\b" elif tok == "f": tok = "\f" elif tok == "(": @@ -673,7 +673,7 @@ class RectangleObject(ArrayObject): def getUpperLeft_x(self): return self.getLowerLeft_x() - + def getUpperLeft_y(self): return self.getUpperRight_y() diff --git a/src/pyPdf/pdf.py b/src/pyPdf/pdf.py index 362879a39a..710d128ad0 100644 --- a/src/pyPdf/pdf.py +++ b/src/pyPdf/pdf.py @@ -39,15 +39,12 @@ __author__ = "Mathieu Fenniak" __author_email__ = "biziqe@mathieu.fenniak.net" import struct -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from cStringIO import StringIO -import filters -import utils -import warnings -from generic import * +from generic import DictionaryObject, NameObject, NumberObject, \ +createStringObject, ArrayObject, ByteStringObject, StreamObject, \ +IndirectObject, utils, readObject, TextStringObject, BooleanObject, \ +RectangleObject, DecodedStreamObject from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList @@ -56,6 +53,7 @@ from utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirt # class (typically {@link #PdfFileReader PdfFileReader}). class PdfFileWriter(object): def __init__(self,title=u"Unknown",author=u"Unknown"): + self.killed = False self._header = "%PDF-1.3" self._objects = [] # array of indirect objects @@ -162,7 +160,7 @@ class PdfFileWriter(object): # @param stream An object to write the file to. The object must support # the write method, and the tell method, similar to a file object. def write(self, stream): - import struct, md5 + import md5 externalReferenceMap = {} self.stack = [] @@ -209,11 +207,13 @@ class PdfFileWriter(object): if hasattr(self, "_encrypt"): trailer[NameObject("/Encrypt")] = self._encrypt trailer.writeToStream(stream, None) - + # eof stream.write("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)) def _sweepIndirectReferences(self, externMap, data): + if self.killed: + raise RuntimeError('Writer killed') if isinstance(data, DictionaryObject): for key, value in data.items(): origvalue = value @@ -356,8 +356,8 @@ class PdfFileReader(object): return self.flattenedPages[pageNumber] ## - # Read-only property that accesses the - # {@link #PdfFileReader.getNamedDestinations + # Read-only property that accesses the + # {@link #PdfFileReader.getNamedDestinations # getNamedDestinations} function. #

# Stability: Added in v1.10, will exist for all future v1.x releases. @@ -374,7 +374,7 @@ class PdfFileReader(object): if retval == None: retval = {} catalog = self.trailer["/Root"] - + # get the name tree if catalog.has_key("/Dests"): tree = catalog["/Dests"] @@ -382,7 +382,7 @@ class PdfFileReader(object): names = catalog['/Names'] if names.has_key("/Dests"): tree = names['/Dests'] - + if tree == None: return retval @@ -420,17 +420,17 @@ class PdfFileReader(object): if outlines == None: outlines = [] catalog = self.trailer["/Root"] - + # get the outline dictionary and named destinations if catalog.has_key("/Outlines"): lines = catalog["/Outlines"] if lines.has_key("/First"): node = lines["/First"] self._namedDests = self.getNamedDestinations() - + if node == None: return outlines - + # see if there are any more outlines while 1: outline = self._buildOutline(node) @@ -454,10 +454,10 @@ class PdfFileReader(object): page, typ = array[0:2] array = array[2:] return Destination(title, page, typ, *array) - + def _buildOutline(self, node): dest, title, outline = None, None, None - + if node.has_key("/A") and node.has_key("/Title"): # Action, section 8.5 (only type GoTo supported) title = node["/Title"] @@ -951,7 +951,7 @@ class PageObject(DictionaryObject): def _pushPopGS(contents, pdf): # adds a graphics state "push" and "pop" to the beginning and end - # of a content stream. This isolates it from changes such as + # of a content stream. This isolates it from changes such as # transformation matricies. stream = ContentStream(contents, pdf) stream.operations.insert(0, [[], "q"]) @@ -1291,7 +1291,7 @@ class Destination(DictionaryObject): self[NameObject("/Title")] = title self[NameObject("/Page")] = page self[NameObject("/Type")] = typ - + # from table 8.2 of the PDF 1.6 reference. if typ == "/XYZ": (self[NameObject("/Left")], self[NameObject("/Top")], @@ -1307,7 +1307,7 @@ class Destination(DictionaryObject): pass else: raise utils.PdfReadError("Unknown Destination Type: %r" % typ) - + ## # Read-only property accessing the destination title. # @return A string. @@ -1474,25 +1474,25 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr # described in Algorithm 3.2. key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) # 2. Initialize the MD5 hash function and pass the 32-byte padding string - # shown in step 1 of Algorithm 3.2 as input to this function. + # shown in step 1 of Algorithm 3.2 as input to this function. import md5 m = md5.new() m.update(_encryption_padding) # 3. Pass the first element of the file's file identifier array (the value # of the ID entry in the document's trailer dictionary; see Table 3.13 on # page 73) to the hash function and finish the hash. (See implementation - # note 25 in Appendix H.) + # note 25 in Appendix H.) m.update(id1_entry) md5_hash = m.digest() # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption - # function with the encryption key from step 1. + # function with the encryption key from step 1. val = utils.RC4_encrypt(key, md5_hash) # 5. Do the following 19 times: Take the output from the previous # invocation of the RC4 function and pass it as input to a new invocation # of the function; use an encryption key generated by taking each byte of # the original encryption key (obtained in step 2) and performing an XOR # operation between that byte and the single-byte value of the iteration - # counter (from 1 to 19). + # counter (from 1 to 19). for i in range(1, 20): new_key = '' for l in range(len(key)): @@ -1500,7 +1500,7 @@ def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encr val = utils.RC4_encrypt(new_key, val) # 6. Append 16 bytes of arbitrary padding to the output from the final # invocation of the RC4 function and store the 32-byte result as the value - # of the U entry in the encryption dictionary. + # of the U entry in the encryption dictionary. # (implementator note: I don't know what "arbitrary padding" is supposed to # mean, so I have used null bytes. This seems to match a few other # people's implementations)