PDF manipulation via the pdfmanipulate command. TXT and PDF output.

2025-07-09 03:04:10 -04:00 · 2009-04-01 14:07:43 -07:00 · 2009-04-01 14:07:43 -07:00 · b2e8618354
commit b2e8618354
parent 9ab8caf6f5 118fd6ece0
19 changed files with 1322 additions and 176 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -160,6 +160,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.odt import get_metadata
        return get_metadata(stream)
 class TXTMetadataReader(MetadataReaderPlugin):
    name        = 'Read TXT metadata'
    file_types  = set(['txt'])
    description = _('Read metadata from %s files') % 'TXT'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.txt import get_metadata
        return get_metadata(stream)
 class LRXMetadataReader(MetadataReaderPlugin):
    name        = 'Read LRX metadata'
@ -256,9 +266,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles
-plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
+plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput, PDFOutput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -196,7 +196,7 @@ OptionRecommendation(name='language',
        self.input_fmt = input_fmt
        self.output_fmt = output_fmt
-        # Build set of all possible options. Two options are equal iff their
+        # Build set of all possible options. Two options are equal if their
        # names are the same.
        self.input_options  = self.input_plugin.options.union(
                                    self.input_plugin.common_options)
--- a/src/calibre/ebooks/htmlsymbols.py
+++ b/src/calibre/ebooks/htmlsymbols.py
@ -0,0 +1,310 @@
 # -*- coding: utf-8 -*-
 '''
 Maping of non-acii symbols and their corresponding html entity number and name
 '''
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 # http://www.w3schools.com/tags/ref_symbols.asp
 HTML_SYMBOLS = {
                # Math Symbols
                u'∀' : ['&#8704;', '&forall;'], # for all
                u'∂' : ['&#8706;', '&part;'], # part
                u'∃' : ['&#8707;', '&exists;'], # exists
                u'∅' : ['&#8709;', '&empty;'], # empty
                u'∇' : ['&#8711;', '&nabla;'], # nabla
                u'∈' : ['&#8712;', '&isin;'], # isin
                u'∉' : ['&#8713;', '&notin;'], # notin
                u'∋' : ['&#8715;', '&ni;'], # ni
                u'∏' : ['&#8719;', '&prod;'], # prod
                u'∑' : ['&#8721;', '&sum;'], # sum
                u'−' : ['&#8722;', '&minus;'], # minus
                u'∗' : ['&#8727;', '&lowast;'], # lowast
                u'√' : ['&#8730;', '&radic;'], # square root
                u'∝' : ['&#8733;', '&prop;'], # proportional to
                u'∞' : ['&#8734;', '&infin;'], # infinity
                u'∠' : ['&#8736;', '&ang;'], # angle
                u'∧' : ['&#8743;', '&and;'], # and
                u'∨' : ['&#8744;', '&or;'], # or
                u'∩' : ['&#8745;', '&cap;'], # cap
                u'∪' : ['&#8746;', '&cup;'], # cup
                u'∫' : ['&#8747;', '&int;'], # integral
                u'∴' : ['&#8756;', '&there4;'], # therefore
                u'∼' : ['&#8764;', '&sim;'], # simular to
                u'≅' : ['&#8773;', '&cong;'], # approximately equal
                u'≈' : ['&#8776;', '&asymp;'], # almost equal
                u'≠' : ['&#8800;', '&ne;'], # not equal
                u'≡' : ['&#8801;', '&equiv;'], # equivalent
                u'≤' : ['&#8804;', '&le;'], # less or equal
                u'≥' : ['&#8805;', '&ge;'], # greater or equal
                u'⊂' : ['&#8834;', '&sub;'], # subset of
                u'⊃' : ['&#8835;', '&sup;'], # superset of
                u'⊄' : ['&#8836;', '&nsub;'], # not subset of
                u'⊆' : ['&#8838;', '&sube;'], # subset or equal
                u'⊇' : ['&#8839;', '&supe;'], # superset or equal
                u'⊕' : ['&#8853;', '&oplus;'], # circled plus
                u'⊗' : ['&#8855;', '&otimes;'], # cirled times
                u'⊥' : ['&#8869;', '&perp;'], # perpendicular
                u'⋅' : ['&#8901;', '&sdot;'], # dot operator
                # Greek Letters
                u'Α' : ['&#913;', '&Alpha;'], # Alpha
                u'Β' : ['&#914;', '&Beta;'], # Beta
                u'Γ' : ['&#915;', '&Gamma;'], # Gamma
                u'Δ' : ['&#916;', '&Delta;'], # Delta
                u'Ε' : ['&#917;', '&Epsilon;'], # Epsilon
                u'Ζ' : ['&#918;', '&Zeta;'], # Zeta
                u'Η' : ['&#919;', '&Eta;'], # Eta
                u'Θ' : ['&#920;', '&Theta;'], # Theta
                u'Ι' : ['&#921;', '&Iota;'], # Iota
                u'Κ' : ['&#922;', '&Kappa;'], # Kappa
                u'Λ' : ['&#923;', '&Lambda;'], # Lambda
                u'Μ' : ['&#924;', '&Mu;'], # Mu
                u'Ν' : ['&#925;', '&Nu;'], # Nu
                u'Ξ' : ['&#926;', '&Xi;'], # Xi
                u'Ο' : ['&#927;', '&Omicron;'], # Omicron
                u'Π' : ['&#928;', '&Pi;'], # Pi
                u'Ρ' : ['&#929;', '&Rho;'], # Rho
                u'Σ' : ['&#931;', '&Sigma;'], # Sigma
                u'Τ' : ['&#932;', '&Tau;'], # Tau
                u'Υ' : ['&#933;', '&Upsilon;'], # Upsilon
                u'Φ' : ['&#934;', '&Phi;'], # Phi
                u'Χ' : ['&#935;', '&Chi;'], # Chi
                u'Ψ' : ['&#936;', '&Psi;'], # Psi
                u'ω' : ['&#969;', '&omega;'], # omega
                u'ϑ' : ['&#977;', '&thetasym;'], # theta symbol
                u'ϒ' : ['&#978;', '&upsih;'], # upsilon symbol
                u'ϖ' : ['&#982;', '&piv;'], # pi symbol
                # Other
                u'Œ' : ['&#338;', '&OElig;'], # capital ligature OE
                u'œ' : ['&#339;', '&oelig;'], # small ligature oe
                u'Š' : ['&#352;', '&Scaron;'], # capital S with caron
                u'š' : ['&#353;', '&scaron;'], # small S with caron
                u'Ÿ' : ['&#376;', '&Yuml;'], # capital Y with diaeres
                u'ƒ' : ['&#402;', '&fnof;'], # f with hook
                u'ˆ' : ['&#710;', '&circ;'], # modifier letter circumflex accent
                u'˜' : ['&#732;', '&tilde;'], # small tilde
                u'–' : ['&#8211;', '&ndash;'], # en dash
                u'—' : ['&#8212;', '&mdash;'], # em dash
                u'‘' : ['&#8216;', '&lsquo;'], # left single quotation mark
                u'’' : ['&#8217;', '&rsquo;'], # right single quotation mark
                u'‚' : ['&#8218;', '&sbquo;'], # single low-9 quotation mark
                u'“' : ['&#8220;', '&ldquo;'], # left double quotation mark
                u'”' : ['&#8221;', '&rdquo;'], # right double quotation mark
                u'„' : ['&#8222;', '&bdquo;'], # double low-9 quotation mark
                u'†' : ['&#8224;', '&dagger;'], # dagger
                u'‡' : ['&#8225;', '&Dagger;'], # double dagger
                u'•' : ['&#8226;', '&bull;'], # bullet
                u'…' : ['&#8230;', '&hellip;'], # horizontal ellipsis
                u'‰' : ['&#8240;', '&permil;'], # per mille 
                u'′' : ['&#8242;', '&prime;'], # minutes
                u'″' : ['&#8243;', '&Prime;'], # seconds
                u'‹' : ['&#8249;', '&lsaquo;'], # single left angle quotation
                u'›' : ['&#8250;', '&rsaquo;'], # single right angle quotation
                u'‾' : ['&#8254;', '&oline;'], # overline
                u'€' : ['&#8364;', '&euro;'], # euro
                u'™' : ['&#8482;', '&trade;'], # trademark
                u'←' : ['&#8592;', '&larr;'], # left arrow
                u'↑' : ['&#8593;', '&uarr;'], # up arrow
                u'→' : ['&#8594;', '&rarr;'], # right arrow
                u'↓' : ['&#8595;', '&darr;'], # down arrow
                u'↔' : ['&#8596;', '&harr;'], # left right arrow
                u'↵' : ['&#8629;', '&crarr;'], # carriage return arrow
                u'⌈' : ['&#8968;', '&lceil;'], # left ceiling
                u'⌉' : ['&#8969;', '&rceil;'], # right ceiling
                u'⌊' : ['&#8970;', '&lfloor;'], # left floor
                u'⌋' : ['&#8971;', '&rfloor;'], # right floor
                u'◊' : ['&#9674;', '&loz;'], # lozenge
                u'♠' : ['&#9824;', '&spades;'], # spade
                u'♣' : ['&#9827;', '&clubs;'], # club
                u'♥' : ['&#9829;', '&hearts;'], # heart
                u'♦' : ['&#9830;', '&diams;'], # diamond
                # Extra http://www.ascii.cl/htmlcodes.htm
                u' ' : ['&#32;'], # space
                u'!' : ['&#33;'], # exclamation point
                u'#' : ['&#35;'], # number sign
                u'$' : ['&#36;'], # dollar sign
                u'%' : ['&#37;'], # percent sign
                u'\'' : ['&#39;'], # single quote
                u'(' : ['&#40;'], # opening parenthesis
                u')' : ['&#41;'], # closing parenthesis
                u'*' : ['&#42;'], # asterisk
                u'+' : ['&#43;'], # plus sign
                u',' : ['&#44;'], # comma
                u'-' : ['&#45;'], # minus sign - hyphen
                u'.' : ['&#46;'], # period
                u'/' : ['&#47;'], # slash
                u'0' : ['&#48;'], # zero
                u'1' : ['&#49;'], # one
                u'2' : ['&#50;'], # two
                u'3' : ['&#51;'], # three
                u'4' : ['&#52;'], # four
                u'5' : ['&#53;'], # five
                u'6' : ['&#54;'], # six
                u'7' : ['&#55;'], # seven
                u'8' : ['&#56;'], # eight
                u'9' : ['&#57;'], # nine
                u':' : ['&#58;'], # colon
                u';' : ['&#59;'], # semicolon
                u'=' : ['&#61;'], # equal sign
                u'?' : ['&#63;'], # question mark
                u'@' : ['&#64;'], # at symbol
                u'A' : ['&#65;'], # 
                u'B' : ['&#66;'], # 
                u'C' : ['&#67;'], # 
                u'D' : ['&#68;'], # 
                u'E' : ['&#69;'], # 
                u'F' : ['&#70;'], # 
                u'G' : ['&#71;'], # 
                u'H' : ['&#72;'], # 
                u'I' : ['&#73;'], # 
                u'J' : ['&#74;'], # 
                u'K' : ['&#75;'], # 
                u'L' : ['&#76;'], # 
                u'M' : ['&#77;'], # 
                u'N' : ['&#78;'], # 
                u'O' : ['&#79;'], # 
                u'P' : ['&#80;'], # 
                u'Q' : ['&#81;'], # 
                u'R' : ['&#82;'], # 
                u'S' : ['&#83;'], # 
                u'T' : ['&#84;'], # 
                u'U' : ['&#85;'], # 
                u'V' : ['&#86;'], # 
                u'W' : ['&#87;'], # 
                u'X' : ['&#88;'], # 
                u'Y' : ['&#89;'], # 
                u'Z' : ['&#90;'], # 
                u'[' : ['&#91;'], # opening bracket
                u'\\' : ['&#92;'], # backslash
                u']' : ['&#93;'], # closing bracket
                u'^' : ['&#94;'], # caret - circumflex
                u'_' : ['&#95;'], # underscore
                u'`' : ['&#96;'], # grave accent
                u'a' : ['&#97;'], # 
                u'b' : ['&#98;'], # 
                u'c' : ['&#99;'], # 
                u'd' : ['&#100;'], # 
                u'e' : ['&#101;'], # 
                u'f' : ['&#102;'], # 
                u'g' : ['&#103;'], # 
                u'h' : ['&#104;'], # 
                u'i' : ['&#105;'], # 
                u'j' : ['&#106;'], # 
                u'k' : ['&#107;'], # 
                u'l' : ['&#108;'], # 
                u'm' : ['&#109;'], # 
                u'n' : ['&#110;'], # 
                u'o' : ['&#111;'], # 
                u'p' : ['&#112;'], # 
                u'q' : ['&#113;'], # 
                u'r' : ['&#114;'], # 
                u's' : ['&#115;'], # 
                u't' : ['&#116;'], # 
                u'u' : ['&#117;'], # 
                u'v' : ['&#118;'], # 
                u'w' : ['&#119;'], # 
                u'x' : ['&#120;'], # 
                u'y' : ['&#121;'], # 
                u'z' : ['&#122;'], # 
                u'{' : ['&#123;'], # opening brace
                u'|' : ['&#124;'], # vertical bar
                u'}' : ['&#125;'], # closing brace
                u'~' : ['&#126;'], # equivalency sign - tilde
                u'<' : ['&#60;', '&lt;'], # less than sign
                u'>' : ['&#62;', '&gt;'], # greater than sign
                u'¡' : ['&#161;', '&iexcl;'], # inverted exclamation mark
                u'¢' : ['&#162;', '&cent;'], # cent sign
                u'£' : ['&#163;', '&pound;'], # pound sign
                u'¤' : ['&#164;', '&curren;'], # currency sign
                u'¥' : ['&#165;', '&yen;'], # yen sign
                u'¦' : ['&#166;', '&brvbar;'], # broken vertical bar
                u'§' : ['&#167;', '&sect;'], # section sign
                u'¨' : ['&#168;', '&uml;'], # spacing diaeresis - umlaut
                u'©' : ['&#169;', '&copy;'], # copyright sign
                u'ª' : ['&#170;', '&ordf;'], # feminine ordinal indicator
                u'«' : ['&#171;', '&laquo;'], # left double angle quotes
                u'¬' : ['&#172;', '&not;'], # not sign
                u'®' : ['&#174;', '&reg;'], # registered trade mark sign
                u'¯' : ['&#175;', '&macr;'], # spacing macron - overline
                u'°' : ['&#176;', '&deg;'], # degree sign
                u'±' : ['&#177;', '&plusmn;'], # plus-or-minus sign
                u'²' : ['&#178;', '&sup2;'], # superscript two - squared
                u'³' : ['&#179;', '&sup3;'], # superscript three - cubed
                u'´' : ['&#180;', '&acute;'], # acute accent - spacing acute
                u'µ' : ['&#181;', '&micro;'], # micro sign
                u'¶' : ['&#182;', '&para;'], # pilcrow sign - paragraph sign
                u'·' : ['&#183;', '&middot;'], # middle dot - Georgian comma
                u'¸' : ['&#184;', '&cedil;'], # spacing cedilla
                u'¹' : ['&#185;', '&sup1;'], # superscript one
                u'º' : ['&#186;', '&ordm;'], # masculine ordinal indicator
                u'»' : ['&#187;', '&raquo;'], # right double angle quotes
                u'¼' : ['&#188;', '&frac14;'], # fraction one quarter
                u'½' : ['&#189;', '&frac12;'], # fraction one half
                u'¾' : ['&#190;', '&frac34;'], # fraction three quarters
                u'¿' : ['&#191;', '&iquest;'], # inverted question mark
                u'À' : ['&#192;', '&Agrave;'], # latin capital letter A with grave
                u'Á' : ['&#193;', '&Aacute;'], # latin capital letter A with acute
                u'Â' : ['&#194;', '&Acirc;'], # latin capital letter A with circumflex
                u'Ã' : ['&#195;', '&Atilde;'], # latin capital letter A with tilde
                u'Ä' : ['&#196;', '&Auml;'], # latin capital letter A with diaeresis
                u'Å' : ['&#197;', '&Aring;'], # latin capital letter A with ring above
                u'Æ' : ['&#198;', '&AElig;'], # latin capital letter AE
                u'Ç' : ['&#199;', '&Ccedil;'], # latin capital letter C with cedilla
                u'È' : ['&#200;', '&Egrave;'], # latin capital letter E with grave
                u'É' : ['&#201;', '&Eacute;'], # latin capital letter E with acute
                u'Ê' : ['&#202;', '&Ecirc;'], # latin capital letter E with circumflex
                u'Ë' : ['&#203;', '&Euml;'], # latin capital letter E with diaeresis
                u'Ì' : ['&#204;', '&Igrave;'], # latin capital letter I with grave
                u'Í' : ['&#205;', '&Iacute;'], # latin capital letter I with acute
                u'Î' : ['&#206;', '&Icirc;'], # latin capital letter I with circumflex
                u'Ï' : ['&#207;', '&Iuml;'], # latin capital letter I with diaeresis
                u'Ð' : ['&#208;', '&ETH;'], # latin capital letter ETH
                u'Ñ' : ['&#209;', '&Ntilde;'], # latin capital letter N with tilde
                u'Ò' : ['&#210;', '&Ograve;'], # latin capital letter O with grave
                u'Ó' : ['&#211;', '&Oacute;'], # latin capital letter O with acute
                u'Ô' : ['&#212;', '&Ocirc;'], # latin capital letter O with circumflex
                u'Õ' : ['&#213;', '&Otilde;'], # latin capital letter O with tilde
                u'Ö' : ['&#214;', '&Ouml;'], # latin capital letter O with diaeresis
                u'×' : ['&#215;', '&times;'], # multiplication sign
                u'Ø' : ['&#216;', '&Oslash;'], # latin capital letter O with slash
                u'Ù' : ['&#217;', '&Ugrave;'], # latin capital letter U with grave
                u'Ú' : ['&#218;', '&Uacute;'], # latin capital letter U with acute
                u'Û' : ['&#219;', '&Ucirc;'], # latin capital letter U with circumflex
                u'Ü' : ['&#220;', '&Uuml;'], # latin capital letter U with diaeresis
                u'Ý' : ['&#221;', '&Yacute;'], # latin capital letter Y with acute
                u'Þ' : ['&#222;', '&THORN;'], # latin capital letter THORN
                u'ß' : ['&#223;', '&szlig;'], # latin small letter sharp s - ess-zed
                u'à' : ['&#224;', '&agrave;'], # latin small letter a with grave
                u'á' : ['&#225;', '&aacute;'], # latin small letter a with acute
                u'â' : ['&#226;', '&acirc;'], # latin small letter a with circumflex
                u'ã' : ['&#227;', '&atilde;'], # latin small letter a with tilde
                u'ä' : ['&#228;', '&auml;'], # latin small letter a with diaeresis
                u'å' : ['&#229;', '&aring;'], # latin small letter a with ring above
                u'æ' : ['&#230;', '&aelig;'], # latin small letter ae
                u'ç' : ['&#231;', '&ccedil;'], # latin small letter c with cedilla
                u'è' : ['&#232;', '&egrave;'], # latin small letter e with grave
                u'é' : ['&#233;', '&eacute;'], # latin small letter e with acute
                u'ê' : ['&#234;', '&ecirc;'], # latin small letter e with circumflex
                u'ë' : ['&#235;', '&euml;'], # latin small letter e with diaeresis
                u'ì' : ['&#236;', '&igrave;'], # latin small letter i with grave
                u'í' : ['&#237;', '&iacute;'], # latin small letter i with acute
                u'î' : ['&#238;', '&icirc;'], # latin small letter i with circumflex
                u'ï' : ['&#239;', '&iuml;'], # latin small letter i with diaeresis
                u'ð' : ['&#240;', '&eth;'], # latin small letter eth
                u'ñ' : ['&#241;', '&ntilde;'], # latin small letter n with tilde
                u'ò' : ['&#242;', '&ograve;'], # latin small letter o with grave
                u'ó' : ['&#243;', '&oacute;'], # latin small letter o with acute
                u'ô' : ['&#244;', '&ocirc;'], # latin small letter o with circumflex
                u'õ' : ['&#245;', '&otilde;'], # latin small letter o with tilde
                u'ö' : ['&#246;', '&ouml;'], # latin small letter o with diaeresis
                u'÷' : ['&#247;', '&divide;'], # division sign
                u'ø' : ['&#248;', '&oslash;'], # latin small letter o with slash
                u'ù' : ['&#249;', '&ugrave;'], # latin small letter u with grave
                u'ú' : ['&#250;', '&uacute;'], # latin small letter u with acute
                u'û' : ['&#251;', '&ucirc;'], # latin small letter u with circumflex
                u'ü' : ['&#252;', '&uuml;'], # latin small letter u with diaeresis
                u'ý' : ['&#253;', '&yacute;'], # latin small letter y with acute
                u'þ' : ['&#254;', '&thorn;'], # latin small letter thorn
                u'ÿ' : ['&#255;', '&yuml;'], # latin small letter y with diaeresis
               }
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@ -0,0 +1,30 @@
 '''Read meta information from TXT files'''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 import re
 from calibre.ebooks.metadata import MetaInformation
 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    mdata = ''
    for x in range(0, 4):
        line = stream.readline()
        if line == '':
            break
        else:
            mdata += line
    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
    if mo != None:
        mi.title = mo.group('title')
        mi.authors = mo.group('author').split(',')
    return mi
--- a/src/calibre/ebooks/pdf/from_any.py
+++ b/src/calibre/ebooks/pdf/from_any.py
@ -1,69 +0,0 @@
 '''
 Convert any ebook format to PDF.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>' \
    'and John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import sys, os, glob, logging
 from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
 from calibre.ebooks.epub import config as common_config
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.pdf.writer import oeb2pdf, config as pdf_config
 def config(defaults=None):
    c = common_config(defaults=defaults, name='pdf')
    c.remove_opt('profile')
    pdfc = pdf_config(defaults=defaults)
    c.update(pdfc)
    return c 
 def option_parser(usage=USAGE):
    usage = usage % ('PDF', formats())
    parser = config().option_parser(usage=usage)
    return parser
 def any2pdf(opts, path, notification=None):
    ext = os.path.splitext(path)[1]
    if not ext:
        raise ValueError('Unknown file type: '+path)
    ext = ext.lower()[1:]
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(path))[0]+'.pdf'
    opts.output = os.path.abspath(opts.output)
    orig_output = opts.output
    with TemporaryDirectory('_any2pdf') as tdir:
        oebdir = os.path.join(tdir, 'oeb')
        os.mkdir(oebdir)
        opts.output = os.path.join(tdir, 'dummy.epub')
        opts.profile = 'None'
        opts.dont_split_on_page_breaks = True
        orig_bfs = opts.base_font_size2
        opts.base_font_size2 = 0
        any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
        opts.base_font_size2 = orig_bfs
        opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
        opts.output = orig_output
        logging.getLogger('html2epub').info(_('Creating PDF file from EPUB...'))
        oeb2pdf(opts, opf)
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        print 'No input file specified.'
        return 1
    any2pdf(opts, args[1])
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/info.py
+++ b/src/calibre/ebooks/pdf/info.py
@ -0,0 +1,90 @@
 '''
 Merge PDF files into a single PDF document.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os, re, sys, time
 from calibre.utils.config import Config, StringConfig
 from pyPdf import PdfFileWriter, PdfFileReader
 def config(defaults=None):
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('manipulatepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file.pdf ...
 	Get info about a PDF.
 	'''.replace('%%name', name)))
 def print_info(pdf_path):
    with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
        pdf = PdfFileReader(pdf_file)
        print _('Title:                 %s' % pdf.documentInfo.title)
        print _('Author:                %s' % pdf.documentInfo.author)
        print _('Subject:               %s' % pdf.documentInfo.subject)
        print _('Creator:               %s' % pdf.documentInfo.creator)
        print _('Producer:              %s' % pdf.documentInfo.producer)
        print _('Creation Date:         %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path))))
        print _('Modification Date:     %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path))))
        print _('Pages:                 %s' % pdf.numPages)
        print _('Encrypted:             %s' % pdf.isEncrypted)
        try:
            print _('File Size:             %s bytes' % os.path.getsize(pdf_path))
        except: pass
        try:
            pdf_file.seek(0)
            vline = pdf_file.readline()
            mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline)
            if mo != None:
                print _('PDF Version:           %s' % mo.group('version'))
        except: pass
 def verify_files(files):
    invalid = []
    for pdf_path in files:
        try:
            with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
                pdf = PdfFileReader(pdf_file)
        except:
            invalid.append(pdf_path)
    return invalid
 def main(args=sys.argv, name=''):
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: No PDF sepecified.\n'
        print parser.get_usage()
        return 2
    bad_pdfs = verify_files(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
            print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
        return 2
    for pdf in args:
        print_info(pdf)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/manipulate.py
+++ b/src/calibre/ebooks/pdf/manipulate.py
@ -0,0 +1,69 @@
 '''
 Command line interface to run pdf manipulation commands.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import string, sys
 from calibre.utils.config import Config, StringConfig
 from calibre.ebooks.pdf import info, merge, reverse, split, trim
 COMMANDS = {
             'info'    : info,
             'merge'   : merge,
             'reverse' : reverse,
             'split'   : split,
             'trim'    : trim,
           }
 def config(defaults=None):
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('manipulatepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    return c
 def option_parser():
    c = config()
    return c.option_parser(usage=_('''\
 	%prog command ...
 	command can be one of the following:
 	[%%commands]
 	Use %prog command --help to get more information about a specific command
 	Manipulate a PDF.
 	'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
 def main(args=sys.argv):
    parser = option_parser()
    if len(args) < 2:
        print 'Error: No command sepecified.\n'
        print parser.get_usage()
        return 2
    command = args[1].lower().strip()
    if command in COMMANDS.keys():    
        del args[1]
        return COMMANDS[command].main(args, command)
    else:
        parser.parse_args(args)
        print 'Unknown command %s.\n' % command
        print parser.get_usage()
        return 2
    # We should never get here.
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/merge.py
+++ b/src/calibre/ebooks/pdf/merge.py
@ -0,0 +1,91 @@
 '''
 Merge PDF files into a single PDF document.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os, sys
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import Config, StringConfig
 from pyPdf import PdfFileWriter, PdfFileReader
 def config(defaults=None):
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('mergepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('output', ['-o', '--output'], default='merged.pdf',
          help=_('Path to output file. By default a file is created in the current directory.'))
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file1.pdf file2.pdf ...
 	Merges individual PDFs. Metadata will be used from the first PDF specified.
 	'''.replace('%%name', name)))
 def merge_files(in_paths, out_path, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    for pdf_path in in_paths:
        pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
        for page in pdf.pages:
            out_pdf.addPage(page)
    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
 def verify_files(files):
    invalid = []
    for pdf_path in files:
        try:
            with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
                pdf = PdfFileReader(pdf_file)
                if pdf.isEncrypted or pdf.numPages <= 0:
                    raise Exception
        except:
            invalid.append(pdf_path)
    return invalid
 def main(args=sys.argv, name=''):
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 2:
        print 'Error: Two or more PDF files are required.\n\n'
        print parser.get_usage()
        return 2
    bad_pdfs = verify_files(args)
    if bad_pdfs != []:
        for pdf in bad_pdfs:
            print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
        return 2
    mi = metadata_from_formats([args[0]])
    merge_files(args, opts.output, mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/output.py
+++ b/src/calibre/ebooks/pdf/output.py
@ -0,0 +1,85 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Convert OEB ebook format to PDF.
 '''
 #unit, papersize, orientation, custom_size, profile
 import os
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.pdf.writer import PDFWriter
 from calibre.ebooks.pdf.pageoptions import UNITS, unit, PAPER_SIZES, \
    paper_size, ORIENTATIONS, orientation, PageOptions
 class PDFOutput(OutputFormatPlugin):
    name = 'PDF Output'
    author = 'John Schember'
    file_type = 'pdf'
    options = set([
                    OptionRecommendation(name='margin_top', recommended_value='1',
                        level=OptionRecommendation.LOW, long_switch='margin_top',
                        help=_('The top margin around the document.')),
                    OptionRecommendation(name='margin_bottom', recommended_value='1',
                        level=OptionRecommendation.LOW, long_switch='margin_bottom',
                        help=_('The bottom margin around the document.')),
                    OptionRecommendation(name='margin_left', recommended_value='1',
                        level=OptionRecommendation.LOW, long_switch='margin_left',
                        help=_('The left margin around the document.')),
                    OptionRecommendation(name='margin_right', recommended_value='1',
                        level=OptionRecommendation.LOW, long_switch='margin_right',
                        help=_('The right margin around the document.')),
                    OptionRecommendation(name='unit', recommended_value='inch',
                        level=OptionRecommendation.LOW, short_switch='u',
                        long_switch='unit', choices=UNITS.keys(),
                        help=_('The unit of measure. Default is inch. Choices '
                        'are %s' % UNITS.keys())),
                    OptionRecommendation(name='paper_size', recommended_value='letter',
                        level=OptionRecommendation.LOW,
                        long_switch='paper_size', choices=PAPER_SIZES.keys(),
                        help=_('The size of the paper. Default is letter. Choices '
                        'are %s' % PAPER_SIZES.keys())),
                    OptionRecommendation(name='orientation', recommended_value='portrait',
                        level=OptionRecommendation.LOW,
                        long_switch='orientation', choices=ORIENTATIONS.keys(),
                        help=_('The orientation of the page. Default is portrait. Choices '
                        'are %s' % ORIENTATIONS.keys())),
                 ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        popts = PageOptions()
        popts.set_margin_top(opts.margin_top)
        popts.set_margin_bottom(opts.margin_bottom)
        popts.set_margin_left(opts.margin_left)
        popts.set_margin_right(opts.margin_right)
        popts.unit = unit(opts.unit)
        popts.paper_size = paper_size(opts.paper_size)
        popts.orientation = orientation(opts.orientation)
        writer = PDFWriter(log, popts)
        close = False
        if not hasattr(output_path, 'write'):
            close = True
            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
                os.makedirs(os.path.dirname(output_path))
            out_stream = open(output_path, 'wb')
        else:
            out_stream = output_path
        out_stream.seek(0)
        out_stream.truncate()
        writer.dump(oeb_book.spine, out_stream)
        if close:
            out_stream.close()
--- a/src/calibre/ebooks/pdf/pageoptions.py
+++ b/src/calibre/ebooks/pdf/pageoptions.py
@ -0,0 +1,98 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from PyQt4.Qt import QPrinter
 UNITS = {
            'millimeter' : QPrinter.Millimeter,
            'point' : QPrinter.Point,
            'inch' : QPrinter.Inch,
            'pica' : QPrinter.Pica,
            'didot' : QPrinter.Didot,
            'cicero' : QPrinter.Cicero,
            'devicepixel' : QPrinter.DevicePixel,
        }
 def unit(unit):
    return UNITS.get(unit, QPrinter.Inch)
 PAPER_SIZES = {
                'a0' : QPrinter.A0, # 841 x 1189 mm
                'a1' : QPrinter.A1, # 594 x 841 mm
                'a2' : QPrinter.A2, # 420 x 594 mm
                'a3' : QPrinter.A3, # 297 x 420 mm
                'a4' : QPrinter.A4, # 210 x 297 mm, 8.26 x 11.69 inches
                'a5' : QPrinter.A5, # 148 x 210 mm
                'a6' : QPrinter.A6, # 105 x 148 mm
                'a7' : QPrinter.A7, # 74 x 105 mm
                'a8' : QPrinter.A8, # 52 x 74 mm
                'a9' : QPrinter.A9, # 37 x 52 mm
                'b0' : QPrinter.B0, # 1030 x 1456 mm
                'b1' : QPrinter.B1, # 728 x 1030 mm
                'b2' : QPrinter.B2, # 515 x 728 mm
                'b3' : QPrinter.B3, # 364 x 515 mm
                'b4' : QPrinter.B4, # 257 x 364 mm
                'b5' : QPrinter.B5, # 182 x 257 mm, 7.17 x 10.13 inches
                'b6' : QPrinter.B6, # 128 x 182 mm
                'b7' : QPrinter.B7, # 91 x 128 mm
                'b8' : QPrinter.B8, # 64 x 91 mm
                'b9' : QPrinter.B9, # 45 x 64 mm
                'b10' : QPrinter.B10, # 32 x 45 mm
                'c5e' : QPrinter.C5E, # 163 x 229 mm
                'comm10e' : QPrinter.Comm10E, # 105 x 241 mm, U.S. Common 10 Envelope
                'dle' : QPrinter.DLE, # 110 x 220 mm
                'executive' : QPrinter.Executive, # 7.5 x 10 inches, 191 x 254 mm
                'folio' : QPrinter.Folio, # 210 x 330 mm
                'ledger' : QPrinter.Ledger, # 432 x 279 mm
                'legal' : QPrinter.Legal, # 8.5 x 14 inches, 216 x 356 mm
                'letter' : QPrinter.Letter, # 8.5 x 11 inches, 216 x 279 mm
                'tabloid' : QPrinter.Tabloid, #  279 x 432 mm
                #'custom' : QPrinter.Custom, # Unknown, or a user defined size.
             }
 def paper_size(size):
    return PAPER_SIZES.get(size, QPrinter.Letter)
 ORIENTATIONS = {
                'portrait' : QPrinter.Portrait,
                'landscape' : QPrinter.Landscape,
               }
 def orientation(orientation):
    return ORIENTATIONS.get(orientation, QPrinter.Portrait)
 class PageOptions(object):
    margin_top = 1
    margin_bottom = 1
    margin_left = 1
    margin_right = 1
    unit = QPrinter.Inch
    paper_size = QPrinter.Letter
    orientation = QPrinter.Portrait
    def set_margin_top(self, size):
        try:
            self.margin_top = int(size)
        except:
            self.margin_top = 1
    def set_margin_bottom(self, size):
        try:
            self.margin_bottom = int(size)
        except:
            self.margin_bottom = 1
    def set_margin_left(self, size):
        try:
            self.margin_left = int(size)
        except:
            self.margin_left = 1
    def set_margin_right(self, size):
        try:
            self.margin_right = int(size)
        except:
            self.margin_right = 1
--- a/src/calibre/ebooks/pdf/reverse.py
+++ b/src/calibre/ebooks/pdf/reverse.py
@ -0,0 +1,88 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Reverse content of PDF.
 '''
 import os, sys
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import Config, StringConfig
 from pyPdf import PdfFileWriter, PdfFileReader
 def config(defaults=None):
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('reversepdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
          help=_('Path to output file. By default a file is created in the current directory.'))
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file1.pdf
 	Reverse PDF.
 	'''.replace('%%name', name)))
 def reverse(pdf_path, out_path, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    for page in reversed(pdf.pages):
        out_pdf.addPage(page)
    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
 # Return True if the pdf is valid.
 def valid_pdf(pdf_path):
    try:
        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
            pdf = PdfFileReader(pdf_file)
            if pdf.isEncrypted or pdf.numPages <= 0:
                raise Exception
    except:
        return False
    return True
 def main(args=sys.argv, name=''):
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    args = args[1:]
    if len(args) < 1:
        print 'Error: A PDF file is required.\n\n'
        print parser.get_usage()
        return 2
    if not valid_pdf(args[0]):
        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
        return 2
    mi = metadata_from_formats([args[0]])
    reverse(args[0], opts.output, mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/split.py
+++ b/src/calibre/ebooks/pdf/split.py
@ -0,0 +1,186 @@
 '''
 Split PDF file into multiple PDF documents.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os, sys, re
 from calibre.ebooks.metadata.meta import metadata_from_formats
 from calibre.ebooks.metadata import authors_to_string
 from calibre.utils.config import Config, StringConfig
 from pyPdf import PdfFileWriter, PdfFileReader
 def config(defaults=None):
    desc = _('Options to control the transformation of pdf')
    if defaults is None:
        c = Config('splitpdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('output', ['-o', '--output'], default='split.pdf',
          help=_('Path to output file. By default a file is created in the current directory. \
            The file name will be the base name for the output.'))
    return c
 def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
 	%prog %%name [options] file.pdf page_to_split_on ...
 	%prog %%name [options] file.pdf page_range_to_split_on ...
 	Ex.
 	%prog %%name file.pdf 6
 	%prog %%name file.pdf 6-12
 	%prog %%name file.pdf 6-12 8 10 9-20
 	Split a PDF.
 	'''.replace('%%name', name)))
 def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
    pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
    total_pages = pdf.numPages - 1
    for index in pages+page_ranges:
        if index in pages:
            write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata)
        else:
            write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata)
 def write_pdf(pdf, name, suffix, start, end, metadata=None):
    if metadata == None:
        title = _('Unknown')
        author = _('Unknown')
    else:
        title = metadata.title
        author = authors_to_string(metadata.authors)
    out_pdf = PdfFileWriter(title=title, author=author)
    for page_num in range(start, end + 1):
        out_pdf.addPage(pdf.getPage(page_num))
    with open('%s%s.pdf' % (name, suffix), 'wb') as out_file:
        out_pdf.write(out_file)
 def split_args(args):
    pdf = ''
    pages = []
    page_ranges = []
    bad = []
    for arg in args:
        arg = arg.strip()
        # Find the pdf input
        if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None:
            if pdf == '':
                pdf = arg
            else:
                bad.append(arg)
        # Find single indexes
        elif re.search('^[ ]*\d+[ ]*$', arg) != None:
            pages.append(arg)
        # Find index ranges
        elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None:
            mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg)
            start = mo.group('start')
            end = mo.group('end')
            # check to see if the range is really a single index
            if start == end:
                pages.append(start)
            else:
                page_ranges.append([start, end])
        else:
            bad.append(arg)
    bad = sorted(list(set(bad)))
    return pdf, pages, page_ranges, bad
 # Remove duplicates from pages and page_ranges.
 # Set pages higher than the total number of pages in the pdf to the last page.
 # Return pages and page_ranges as lists of ints.
 def clean_page_list(pdf_path, pages, page_ranges):
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
    total_pages = pdf.numPages
    sorted_pages = []
    sorted_ranges = []
    for index in pages:
        index = int(index)
        if index > total_pages:
            sorted_pages.append(total_pages - 1)
        else:
            sorted_pages.append(index - 1)
    for start, end in page_ranges:
        start = int(start)
        end = int(end)
        if start > total_pages and end > total_pages:
            sorted_pages.append(total_pages - 1)
            continue
        if start > total_pages:
            start = total_pages
        if end > total_pages:
            end = total_pages
        page_range = sorted([start - 1, end - 1])
        if page_range not in sorted_ranges:
            sorted_ranges.append(page_range)
    # Remove duplicates and sort
    pages = sorted(list(set(sorted_pages)))
    page_ranges = sorted(sorted_ranges)
    return pages, page_ranges
 # Return True if the pdf is valid.
 def valid_pdf(pdf_path):
    try:
        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
            pdf = PdfFileReader(pdf_file)
            if pdf.isEncrypted or pdf.numPages <= 0:
                raise Exception
    except:
        return False
    return True
 def main(args=sys.argv, name=''):
    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    pdf, pages, page_ranges, unknown = split_args(args[1:])
    if pdf == '' and (pages == [] or page_ranges == []):
        print 'Error: PDF and where to split is required.\n\n'
        print parser.get_usage()
        return 2
    if unknown != []:
        for arg in unknown:
            print 'Error: Unknown argument `%s`' % arg
        print parser.get_usage()
        return 2
    if not valid_pdf(pdf):
        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
        return 2
    pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
    mi = metadata_from_formats([pdf])
    split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/pdf/pdftrim.py
+++ b/src/calibre/ebooks/pdf/pdftrim.py
@ -16,8 +16,6 @@ def config(defaults=None):
        c = Config('trimpdf', desc)
    else:
        c = StringConfig(defaults, desc)
    c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
          help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
    c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
          help=_('Path to output file. By default a file is created in the current directory.'))
    c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
@ -33,16 +31,16 @@ def config(defaults=None):
    return c
-def option_parser():
+def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
-	%prog [options] file.pdf
+	%prog %%name [options] file.pdf
 	Crops a pdf. 
-	'''))
+	'''.replace('%%name', name)))
-def main(args=sys.argv):
+def main(args=sys.argv, name=''):
-    parser = option_parser()
+    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    try:
        source = os.path.abspath(args[1])
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -1,19 +1,18 @@
-'''
+# -*- coding: utf-8 -*-
 Write content to PDF.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import os, logging, shutil, sys
+'''
 Write content to PDF.
 '''
 import os, shutil, sys
 from calibre import LoggingInterface
 from calibre.ebooks.epub.iterator import SpineItem
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.customize.ui import run_plugins_on_postprocess
+from calibre.ebooks.pdf.pageoptions import PageOptions
 from calibre.utils.config import Config, StringConfig
 from PyQt4 import QtCore
 from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \
@ -22,20 +21,13 @@ from PyQt4.QtWebKit import QWebView
 from pyPdf import PdfFileWriter, PdfFileReader
 class PDFMargins:
    def __init__(self, margin=1):
        self.top    = margin
        self.bottom = margin
        self.left   = margin
        self.right  = margin
 class PDFWriter(QObject):
-    def __init__(self, margins=PDFMargins()):
+    def __init__(self, log, popts=PageOptions()):
        if QApplication.instance() is None:
            QApplication([])
        QObject.__init__(self)
-        self.logger = logging.getLogger('oeb2pdf')
+        self.logger = log
        self.loop = QEventLoop()
        self.view = QWebView()
@ -43,15 +35,14 @@ class PDFWriter(QObject):
        self.render_queue = []
        self.combine_queue = []
        self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts')
-        self.margins = margins
+        self.popts = popts
-    def dump(self, oebpath, path):
+    def dump(self, spine, out_stream):
        self._delete_tmpdir()
-        opf = OPF(oebpath, os.path.dirname(oebpath))
+        self.render_queue = spine[:]
        self.render_queue = [SpineItem(i.path) for i in opf.spine]
        self.combine_queue = []
-        self.path = path
+        self.out_stream = out_stream
        QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
        self.loop.exec_()
@ -78,7 +69,9 @@ class PDFWriter(QObject):
            self.logger.debug('\tRendering item as %s' % item_path)
            printer = QPrinter(QPrinter.HighResolution)
-            printer.setPageMargins(self.margins.left, self.margins.top, self.margins.right, self.margins.bottom, QPrinter.Inch)
+            printer.setPageMargins(self.popts.margin_left, self.popts.margin_top, self.popts.margin_right, self.popts.margin_bottom, self.popts.unit)
            printer.setPaperSize(self.popts.paper_size)
            printer.setOrientation(self.popts.orientation)
            printer.setOutputFormat(QPrinter.PdfFormat)
            printer.setOutputFileName(item_path)
            self.view.print_(printer)
@ -98,75 +91,7 @@ class PDFWriter(QObject):
                inputPDF = PdfFileReader(file(item, 'rb'))
                for page in inputPDF.pages:
                    outPDF.addPage(page)
-            outputStream = file(self.path, 'wb')
+            outPDF.write(self.out_stream)
            outPDF.write(outputStream)
            outputStream.close()
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)
 def config(defaults=None):
    desc = _('Options to control the conversion to PDF')
    if defaults is None:
        c = Config('pdf', desc)
    else:
        c = StringConfig(defaults, desc)
    pdf = c.add_group('PDF', _('PDF options.'))
    pdf('margin_top', ['--margin_top'], default=1,
         help=_('The top margin around the document in inches.'))
    pdf('margin_bottom', ['--margin_bottom'], default=1,
         help=_('The bottom margin around the document in inches.'))
    pdf('margin_left', ['--margin_left'], default=1,
         help=_('The left margin around the document in inches.'))
    pdf('margin_right', ['--margin_right'], default=1,
         help=_('The right margin around the document in inches.'))
    return c
 def option_parser():
    c = config()
    parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
    parser.add_option(
        '-o', '--output', default=None, 
        help=_('Output file. Default is derived from input filename.'))
    parser.add_option(
        '-v', '--verbose', default=0, action='count',
        help=_('Useful for debugging.'))        
    return parser
 def oeb2pdf(opts, inpath):
    logger = LoggingInterface(logging.getLogger('oeb2pdf'))
    logger.setup_cli_handler(opts.verbose)
    outpath = opts.output
    if outpath is None:
        outpath = os.path.basename(inpath)
        outpath = os.path.splitext(outpath)[0] + '.pdf'
    margins = PDFMargins()
    margins.top = opts.margin_top
    margins.bottom = opts.margin_bottom
    margins.left = opts.margin_left
    margins.right = opts.margin_right
    writer = PDFWriter(margins)
    writer.dump(inpath, outpath)
    run_plugins_on_postprocess(outpath, 'pdf')
    logger.log_info(_('Output written to ') + outpath)
 def main(argv=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(argv[1:])
    if len(args) != 1:
        parser.print_help()
        return 1
    inpath = args[0]
    retval = oeb2pdf(opts, inpath)
    return retval
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/txt/init.py
+++ b/src/calibre/ebooks/txt/init.py
@ -0,0 +1,9 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, John Schember john@nachtimwald.com'
 __docformat__ = 'restructuredtext en'
 '''
 Used for txt output
 '''
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@ -0,0 +1,63 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
 from calibre.ebooks.metadata import authors_to_string
 class TXTOutput(OutputFormatPlugin):
    name = 'TXT Output'
    author = 'John Schember'
    file_type = 'txt'
    options = set([
                    OptionRecommendation(name='newline', recommended_value='system',
                        level=OptionRecommendation.LOW, long_switch='newline',
                        short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
                        help=_('Type of newline to use. Options are %s. Default is \'system\'. '
                            'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
                            'For Mac OS X use \'unix\'. \'system\' will default to the newline '
                            'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))),
                    OptionRecommendation(name='prepend_author', recommended_value='true',
                        level=OptionRecommendation.LOW, long_switch='prepend_author',
                        choices=['true', 'false'],
                        help=_('Write the author to the beginning of the file. '
                            'Default is \'true\'. Use \'false\' to disable.')),
                    OptionRecommendation(name='prepend_title', recommended_value='true',
                        choices=['true', 'false'],
                        level=OptionRecommendation.LOW, long_switch='prepend_title',
                        help=_('Write the title to the beginning of the file. '
                            'Default is \'true\'. Use \'false\' to disable.'))
                 ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        metadata = TxtMetadata()
        if opts.prepend_author.lower() == 'true':
            metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors)
        if opts.prepend_title.lower() == 'true':
            metadata.title = opts.title if opts.title else oeb_book.metadata.title
        writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
        txt = writer.dump(oeb_book.spine, metadata)
        close = False
        if not hasattr(output_path, 'write'):
            close = True
            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
                os.makedirs(os.path.dirname(output_path))
            out_stream = open(output_path, 'wb')
        else:
            out_stream = output_path
        out_stream.seek(0)
        out_stream.truncate()
        out_stream.write(txt)
        if close:
            out_stream.close()
--- a/src/calibre/ebooks/txt/writer.py
+++ b/src/calibre/ebooks/txt/writer.py
@ -0,0 +1,158 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 '''
 Write content to TXT.
 '''
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os, re, sys
 from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
 from BeautifulSoup import BeautifulSoup
 class TxtWriter(object):
    def __init__(self, newline, log):
        self.newline = newline
        self.log = log
    def dump(self, spine, metadata):
        out = u''
        for item in spine:
            with open(item, 'r') as itemf:
                content = itemf.read().decode(item.encoding)
                # Convert newlines to unix style \n for processing. These
                # will be changed to the specified type later in the process.
                content = self.unix_newlines(content)
                content = self.strip_html(content)
                content = self.replace_html_symbols(content)
                content = self.cleanup_text(content)
                content = self.specified_newlines(content)
                out += content
        # Prepend metadata
        if metadata.author != None and metadata.author != '':
            out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out
        if metadata.title != None and metadata.title != '':
            out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out
            # Put two blank lines at end of file
            end = out[-3 * len(self.newline):]
            for i in range(3 - end.count(self.newline)):
                out += self.newline
        return out
    def strip_html(self, html):
        stripped = u''
        for dom_tree in BeautifulSoup(html).findAll('body'):
            text = unicode(dom_tree)
            # Remove unnecessary tags
            for tag in ['script', 'style']:
                text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
            text = re.sub('<!--.*-->', '', text)
            text = re.sub('<\?.*?\?>', '', text)
            text = re.sub('<@.*?@>', '', text)
            text = re.sub('<%.*?%>', '', text)
            # Headings usually indicate Chapters.
            # We are going to use a marker to insert the proper number of
            # newline characters at the end of cleanup_text because cleanup_text
            # remove excessive (more than 2 newlines).
            for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text)
                text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text)
            # Separate content with space.
            for tag in ['td']:
                text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text)
            # Separate content with empty line.
            for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']:
                text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text)
            for tag in ['hr', 'br']:
                text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text)
            # Remove any tags that do not need special processing.
            text = re.sub('<.*?>', '', text)
            stripped = stripped + text
        return stripped
    def replace_html_symbols(self, content):
        for symbol in HTML_SYMBOLS:
            for code in HTML_SYMBOLS[symbol]:
                content = content.replace(code, symbol)
        return content
    def cleanup_text(self, text):
        # Replace bad characters.
        text = text.replace(u'\xc2', '')
        text = text.replace(u'\xa0', ' ')
        # Replace tabs, vertical tags and form feeds with single space.
        text = text.replace('\t+', ' ')
        text = text.replace('\v+', ' ')
        text = text.replace('\f+', ' ')
        # Single line paragraph.
        r = re.compile('.\n.')
        while True:
            mo = r.search(text)
            if mo == None:
                break
            text = '%s %s' % (text[:mo.start()+1], text[mo.end()-1:])
        # Remove multiple spaces.
        text = re.sub('[  ]+', ' ', text)
        # Remove excessive newlines.
        text = re.sub('\n[ ]+\n', '\n\n', text)
        text = re.sub('\n{3,}', '\n\n', text)
        # Replace markers with the proper characters.
        text = text.replace('-vzxedxy-', '\n\n\n\n\n')
        text = text.replace('-vlgzxey-', '\n\n\n')
        # Replace spaces at the beginning and end of lines
        text = re.sub('(?imu)^[ ]+', '', text)
        text = re.sub('(?imu)[ ]+$', '', text)
        return text
    def unix_newlines(self, text):
        text = text.replace('\r\n', '\n')
        text = text.replace('\r', '\n')
        return text
    def specified_newlines(self, text):
        if self.newline == '\n':
            return text
        return text.replace('\n', self.newline)        
 class TxtNewlines(object):
    NEWLINE_TYPES = {
                        'system'  : os.linesep,
                        'unix'    : '\n',
                        'old_mac' : '\r',
                        'windows' : '\r\n'
                     }
    def __init__(self, newline_type):
        self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
 class TxtMetadata(object):
    def __init__(self):
        self.title = None
        self.author = None
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -713,6 +713,9 @@ class BooksView(TableView):
    def set_editable(self, editable):
        self._model.set_editable(editable)
    def set_editable(self, editable):
        self._model.set_editable(editable)
    def connect_to_search_box(self, sb):
        QObject.connect(sb, SIGNAL('search(PyQt_PyObject, PyQt_PyObject)'),
                        self._model.search)
@ -1007,6 +1010,10 @@ class DeviceBooksModel(BooksModel):
        self.editable = editable
    def set_editable(self, editable):
        self.editable = editable
 class SearchBox(QLineEdit):
    INTERVAL = 1000 #: Time to wait before emitting search signal
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -39,10 +39,9 @@ entry_points = {
             'calibre-fontconfig = calibre.utils.fontconfig:main',
             'calibre-parallel   = calibre.parallel:main',
             'calibre-customize  = calibre.customize.ui:main',
-             'pdftrim            = calibre.ebooks.pdf.pdftrim:main' ,
+             'pdfmanipulate      = calibre.ebooks.pdf.manipulate:main',
             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
             'calibre-smtp = calibre.utils.smtp:main',
        ],
        'gui_scripts'    : [
            __appname__+' = calibre.gui2.main:main',
@ -548,6 +547,3 @@ main = post_install
 if __name__ == '__main__':
    post_install()