PDF manipulation via the pdfmanipulate command. TXT and PDF output.

2025-07-09 03:04:10 -04:00 · 2009-04-01 14:07:43 -07:00 · 2009-04-01 14:07:43 -07:00 · b2e8618354
commit b2e8618354
parent 9ab8caf6f5 118fd6ece0
19 changed files with 1322 additions and 176 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -159,6 +159,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.odt import get_metadata
        return get_metadata(stream)
+        
+class TXTMetadataReader(MetadataReaderPlugin):
+    
+    name        = 'Read TXT metadata'
+    file_types  = set(['txt'])
+    description = _('Read metadata from %s files') % 'TXT'
+    
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.txt import get_metadata
+        return get_metadata(stream)

 class LRXMetadataReader(MetadataReaderPlugin):
    
@ -256,9 +266,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.oeb.output import OEBOutput
+from calibre.ebooks.txt.output import TXTOutput
+from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles

-plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
+plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput, PDFOutput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -196,7 +196,7 @@ OptionRecommendation(name='language',
        self.input_fmt = input_fmt
        self.output_fmt = output_fmt

-        # Build set of all possible options. Two options are equal iff their
+        # Build set of all possible options. Two options are equal if their
        # names are the same.
        self.input_options  = self.input_plugin.options.union(
                                    self.input_plugin.common_options)
--- a/src/calibre/ebooks/htmlsymbols.py
+++ b/src/calibre/ebooks/htmlsymbols.py
@ -0,0 +1,310 @@
+# -*- coding: utf-8 -*-
+'''
+Maping of non-acii symbols and their corresponding html entity number and name
+'''
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+
+# http://www.w3schools.com/tags/ref_symbols.asp
+HTML_SYMBOLS = {
+                # Math Symbols
+                u'∀' : ['&#8704;', '&forall;'], # for all
+                u'∂' : ['&#8706;', '&part;'], # part
+                u'∃' : ['&#8707;', '&exists;'], # exists
+                u'∅' : ['&#8709;', '&empty;'], # empty
+                u'∇' : ['&#8711;', '&nabla;'], # nabla
+                u'∈' : ['&#8712;', '&isin;'], # isin
+                u'∉' : ['&#8713;', '&notin;'], # notin
+                u'∋' : ['&#8715;', '&ni;'], # ni
+                u'∏' : ['&#8719;', '&prod;'], # prod
+                u'∑' : ['&#8721;', '&sum;'], # sum
+                u'−' : ['&#8722;', '&minus;'], # minus
+                u'∗' : ['&#8727;', '&lowast;'], # lowast
+                u'√' : ['&#8730;', '&radic;'], # square root
+                u'∝' : ['&#8733;', '&prop;'], # proportional to
+                u'∞' : ['&#8734;', '&infin;'], # infinity
+                u'∠' : ['&#8736;', '&ang;'], # angle
+                u'∧' : ['&#8743;', '&and;'], # and
+                u'∨' : ['&#8744;', '&or;'], # or
+                u'∩' : ['&#8745;', '&cap;'], # cap
+                u'∪' : ['&#8746;', '&cup;'], # cup
+                u'∫' : ['&#8747;', '&int;'], # integral
+                u'∴' : ['&#8756;', '&there4;'], # therefore
+                u'∼' : ['&#8764;', '&sim;'], # simular to
+                u'≅' : ['&#8773;', '&cong;'], # approximately equal
+                u'≈' : ['&#8776;', '&asymp;'], # almost equal
+                u'≠' : ['&#8800;', '&ne;'], # not equal
+                u'≡' : ['&#8801;', '&equiv;'], # equivalent
+                u'≤' : ['&#8804;', '&le;'], # less or equal
+                u'≥' : ['&#8805;', '&ge;'], # greater or equal
+                u'⊂' : ['&#8834;', '&sub;'], # subset of
+                u'⊃' : ['&#8835;', '&sup;'], # superset of
+                u'⊄' : ['&#8836;', '&nsub;'], # not subset of
+                u'⊆' : ['&#8838;', '&sube;'], # subset or equal
+                u'⊇' : ['&#8839;', '&supe;'], # superset or equal
+                u'⊕' : ['&#8853;', '&oplus;'], # circled plus
+                u'⊗' : ['&#8855;', '&otimes;'], # cirled times
+                u'⊥' : ['&#8869;', '&perp;'], # perpendicular
+                u'⋅' : ['&#8901;', '&sdot;'], # dot operator
+                # Greek Letters
+                u'Α' : ['&#913;', '&Alpha;'], # Alpha
+                u'Β' : ['&#914;', '&Beta;'], # Beta
+                u'Γ' : ['&#915;', '&Gamma;'], # Gamma
+                u'Δ' : ['&#916;', '&Delta;'], # Delta
+                u'Ε' : ['&#917;', '&Epsilon;'], # Epsilon
+                u'Ζ' : ['&#918;', '&Zeta;'], # Zeta
+                u'Η' : ['&#919;', '&Eta;'], # Eta
+                u'Θ' : ['&#920;', '&Theta;'], # Theta
+                u'Ι' : ['&#921;', '&Iota;'], # Iota
+                u'Κ' : ['&#922;', '&Kappa;'], # Kappa
+                u'Λ' : ['&#923;', '&Lambda;'], # Lambda
+                u'Μ' : ['&#924;', '&Mu;'], # Mu
+                u'Ν' : ['&#925;', '&Nu;'], # Nu
+                u'Ξ' : ['&#926;', '&Xi;'], # Xi
+                u'Ο' : ['&#927;', '&Omicron;'], # Omicron
+                u'Π' : ['&#928;', '&Pi;'], # Pi
+                u'Ρ' : ['&#929;', '&Rho;'], # Rho
+                u'Σ' : ['&#931;', '&Sigma;'], # Sigma
+                u'Τ' : ['&#932;', '&Tau;'], # Tau
+                u'Υ' : ['&#933;', '&Upsilon;'], # Upsilon
+                u'Φ' : ['&#934;', '&Phi;'], # Phi
+                u'Χ' : ['&#935;', '&Chi;'], # Chi
+                u'Ψ' : ['&#936;', '&Psi;'], # Psi
+                u'ω' : ['&#969;', '&omega;'], # omega
+                u'ϑ' : ['&#977;', '&thetasym;'], # theta symbol
+                u'ϒ' : ['&#978;', '&upsih;'], # upsilon symbol
+                u'ϖ' : ['&#982;', '&piv;'], # pi symbol
+                # Other
+                u'Œ' : ['&#338;', '&OElig;'], # capital ligature OE
+                u'œ' : ['&#339;', '&oelig;'], # small ligature oe
+                u'Š' : ['&#352;', '&Scaron;'], # capital S with caron
+                u'š' : ['&#353;', '&scaron;'], # small S with caron
+                u'Ÿ' : ['&#376;', '&Yuml;'], # capital Y with diaeres
+                u'ƒ' : ['&#402;', '&fnof;'], # f with hook
+                u'ˆ' : ['&#710;', '&circ;'], # modifier letter circumflex accent
+                u'˜' : ['&#732;', '&tilde;'], # small tilde
+                u'–' : ['&#8211;', '&ndash;'], # en dash
+                u'—' : ['&#8212;', '&mdash;'], # em dash
+                u'‘' : ['&#8216;', '&lsquo;'], # left single quotation mark
+                u'’' : ['&#8217;', '&rsquo;'], # right single quotation mark
+                u'‚' : ['&#8218;', '&sbquo;'], # single low-9 quotation mark
+                u'“' : ['&#8220;', '&ldquo;'], # left double quotation mark
+                u'”' : ['&#8221;', '&rdquo;'], # right double quotation mark
+                u'„' : ['&#8222;', '&bdquo;'], # double low-9 quotation mark
+                u'†' : ['&#8224;', '&dagger;'], # dagger
+                u'‡' : ['&#8225;', '&Dagger;'], # double dagger
+                u'•' : ['&#8226;', '&bull;'], # bullet
+                u'…' : ['&#8230;', '&hellip;'], # horizontal ellipsis
+                u'‰' : ['&#8240;', '&permil;'], # per mille 
+                u'′' : ['&#8242;', '&prime;'], # minutes
+                u'″' : ['&#8243;', '&Prime;'], # seconds
+                u'‹' : ['&#8249;', '&lsaquo;'], # single left angle quotation
+                u'›' : ['&#8250;', '&rsaquo;'], # single right angle quotation
+                u'‾' : ['&#8254;', '&oline;'], # overline
+                u'€' : ['&#8364;', '&euro;'], # euro
+                u'™' : ['&#8482;', '&trade;'], # trademark
+                u'←' : ['&#8592;', '&larr;'], # left arrow
+                u'↑' : ['&#8593;', '&uarr;'], # up arrow
+                u'→' : ['&#8594;', '&rarr;'], # right arrow
+                u'↓' : ['&#8595;', '&darr;'], # down arrow
+                u'↔' : ['&#8596;', '&harr;'], # left right arrow
+                u'↵' : ['&#8629;', '&crarr;'], # carriage return arrow
+                u'⌈' : ['&#8968;', '&lceil;'], # left ceiling
+                u'⌉' : ['&#8969;', '&rceil;'], # right ceiling
+                u'⌊' : ['&#8970;', '&lfloor;'], # left floor
+                u'⌋' : ['&#8971;', '&rfloor;'], # right floor
+                u'◊' : ['&#9674;', '&loz;'], # lozenge
+                u'♠' : ['&#9824;', '&spades;'], # spade
+                u'♣' : ['&#9827;', '&clubs;'], # club
+                u'♥' : ['&#9829;', '&hearts;'], # heart
+                u'♦' : ['&#9830;', '&diams;'], # diamond
+                # Extra http://www.ascii.cl/htmlcodes.htm
+                u' ' : ['&#32;'], # space
+                u'!' : ['&#33;'], # exclamation point
+                u'#' : ['&#35;'], # number sign
+                u'$' : ['&#36;'], # dollar sign
+                u'%' : ['&#37;'], # percent sign
+                u'\'' : ['&#39;'], # single quote
+                u'(' : ['&#40;'], # opening parenthesis
+                u')' : ['&#41;'], # closing parenthesis
+                u'*' : ['&#42;'], # asterisk
+                u'+' : ['&#43;'], # plus sign
+                u',' : ['&#44;'], # comma
+                u'-' : ['&#45;'], # minus sign - hyphen
+                u'.' : ['&#46;'], # period
+                u'/' : ['&#47;'], # slash
+                u'0' : ['&#48;'], # zero
+                u'1' : ['&#49;'], # one
+                u'2' : ['&#50;'], # two
+                u'3' : ['&#51;'], # three
+                u'4' : ['&#52;'], # four
+                u'5' : ['&#53;'], # five
+                u'6' : ['&#54;'], # six
+                u'7' : ['&#55;'], # seven
+                u'8' : ['&#56;'], # eight
+                u'9' : ['&#57;'], # nine
+                u':' : ['&#58;'], # colon
+                u';' : ['&#59;'], # semicolon
+                u'=' : ['&#61;'], # equal sign
+                u'?' : ['&#63;'], # question mark
+                u'@' : ['&#64;'], # at symbol
+                u'A' : ['&#65;'], # 
+                u'B' : ['&#66;'], # 
+                u'C' : ['&#67;'], # 
+                u'D' : ['&#68;'], # 
+                u'E' : ['&#69;'], # 
+                u'F' : ['&#70;'], # 
+                u'G' : ['&#71;'], # 
+                u'H' : ['&#72;'], # 
+                u'I' : ['&#73;'], # 
+                u'J' : ['&#74;'], # 
+                u'K' : ['&#75;'], # 
+                u'L' : ['&#76;'], # 
+                u'M' : ['&#77;'], # 
+                u'N' : ['&#78;'], # 
+                u'O' : ['&#79;'], # 
+                u'P' : ['&#80;'], # 
+                u'Q' : ['&#81;'], # 
+                u'R' : ['&#82;'], # 
+                u'S' : ['&#83;'], # 
+                u'T' : ['&#84;'], # 
+                u'U' : ['&#85;'], # 
+                u'V' : ['&#86;'], # 
+                u'W' : ['&#87;'], # 
+                u'X' : ['&#88;'], # 
+                u'Y' : ['&#89;'], # 
+                u'Z' : ['&#90;'], # 
+                u'[' : ['&#91;'], # opening bracket
+                u'\\' : ['&#92;'], # backslash
+                u']' : ['&#93;'], # closing bracket
+                u'^' : ['&#94;'], # caret - circumflex
+                u'_' : ['&#95;'], # underscore
+                u'`' : ['&#96;'], # grave accent
+                u'a' : ['&#97;'], # 
+                u'b' : ['&#98;'], # 
+                u'c' : ['&#99;'], # 
+                u'd' : ['&#100;'], # 
+                u'e' : ['&#101;'], # 
+                u'f' : ['&#102;'], # 
+                u'g' : ['&#103;'], # 
+                u'h' : ['&#104;'], # 
+                u'i' : ['&#105;'], # 
+                u'j' : ['&#106;'], # 
+                u'k' : ['&#107;'], # 
+                u'l' : ['&#108;'], # 
+                u'm' : ['&#109;'], # 
+                u'n' : ['&#110;'], # 
+                u'o' : ['&#111;'], # 
+                u'p' : ['&#112;'], # 
+                u'q' : ['&#113;'], # 
+                u'r' : ['&#114;'], # 
+                u's' : ['&#115;'], # 
+                u't' : ['&#116;'], # 
+                u'u' : ['&#117;'], # 
+                u'v' : ['&#118;'], # 
+                u'w' : ['&#119;'], # 
+                u'x' : ['&#120;'], # 
+                u'y' : ['&#121;'], # 
+                u'z' : ['&#122;'], # 
+                u'{' : ['&#123;'], # opening brace
+                u'|' : ['&#124;'], # vertical bar
+                u'}' : ['&#125;'], # closing brace
+                u'~' : ['&#126;'], # equivalency sign - tilde
+                u'<' : ['&#60;', '&lt;'], # less than sign
+                u'>' : ['&#62;', '&gt;'], # greater than sign
+                u'¡' : ['&#161;', '&iexcl;'], # inverted exclamation mark
+                u'¢' : ['&#162;', '&cent;'], # cent sign
+                u'£' : ['&#163;', '&pound;'], # pound sign
+                u'¤' : ['&#164;', '&curren;'], # currency sign
+                u'¥' : ['&#165;', '&yen;'], # yen sign
+                u'¦' : ['&#166;', '&brvbar;'], # broken vertical bar
+                u'§' : ['&#167;', '&sect;'], # section sign
+                u'¨' : ['&#168;', '&uml;'], # spacing diaeresis - umlaut
+                u'©' : ['&#169;', '&copy;'], # copyright sign
+                u'ª' : ['&#170;', '&ordf;'], # feminine ordinal indicator
+                u'«' : ['&#171;', '&laquo;'], # left double angle quotes
+                u'¬' : ['&#172;', '&not;'], # not sign
+                u'®' : ['&#174;', '&reg;'], # registered trade mark sign
+                u'¯' : ['&#175;', '&macr;'], # spacing macron - overline
+                u'°' : ['&#176;', '&deg;'], # degree sign
+                u'±' : ['&#177;', '&plusmn;'], # plus-or-minus sign
+                u'²' : ['&#178;', '&sup2;'], # superscript two - squared
+                u'³' : ['&#179;', '&sup3;'], # superscript three - cubed
+                u'´' : ['&#180;', '&acute;'], # acute accent - spacing acute
+                u'µ' : ['&#181;', '&micro;'], # micro sign
+                u'¶' : ['&#182;', '&para;'], # pilcrow sign - paragraph sign
+                u'·' : ['&#183;', '&middot;'], # middle dot - Georgian comma
+                u'¸' : ['&#184;', '&cedil;'], # spacing cedilla
+                u'¹' : ['&#185;', '&sup1;'], # superscript one
+                u'º' : ['&#186;', '&ordm;'], # masculine ordinal indicator
+                u'»' : ['&#187;', '&raquo;'], # right double angle quotes
+                u'¼' : ['&#188;', '&frac14;'], # fraction one quarter
+                u'½' : ['&#189;', '&frac12;'], # fraction one half
+                u'¾' : ['&#190;', '&frac34;'], # fraction three quarters
+                u'¿' : ['&#191;', '&iquest;'], # inverted question mark
+                u'À' : ['&#192;', '&Agrave;'], # latin capital letter A with grave
+                u'Á' : ['&#193;', '&Aacute;'], # latin capital letter A with acute
+                u'Â' : ['&#194;', '&Acirc;'], # latin capital letter A with circumflex
+                u'Ã' : ['&#195;', '&Atilde;'], # latin capital letter A with tilde
+                u'Ä' : ['&#196;', '&Auml;'], # latin capital letter A with diaeresis
+                u'Å' : ['&#197;', '&Aring;'], # latin capital letter A with ring above
+                u'Æ' : ['&#198;', '&AElig;'], # latin capital letter AE
+                u'Ç' : ['&#199;', '&Ccedil;'], # latin capital letter C with cedilla
+                u'È' : ['&#200;', '&Egrave;'], # latin capital letter E with grave
+                u'É' : ['&#201;', '&Eacute;'], # latin capital letter E with acute
+                u'Ê' : ['&#202;', '&Ecirc;'], # latin capital letter E with circumflex
+                u'Ë' : ['&#203;', '&Euml;'], # latin capital letter E with diaeresis
+                u'Ì' : ['&#204;', '&Igrave;'], # latin capital letter I with grave
+                u'Í' : ['&#205;', '&Iacute;'], # latin capital letter I with acute
+                u'Î' : ['&#206;', '&Icirc;'], # latin capital letter I with circumflex
+                u'Ï' : ['&#207;', '&Iuml;'], # latin capital letter I with diaeresis
+                u'Ð' : ['&#208;', '&ETH;'], # latin capital letter ETH
+                u'Ñ' : ['&#209;', '&Ntilde;'], # latin capital letter N with tilde
+                u'Ò' : ['&#210;', '&Ograve;'], # latin capital letter O with grave
+                u'Ó' : ['&#211;', '&Oacute;'], # latin capital letter O with acute
+                u'Ô' : ['&#212;', '&Ocirc;'], # latin capital letter O with circumflex
+                u'Õ' : ['&#213;', '&Otilde;'], # latin capital letter O with tilde
+                u'Ö' : ['&#214;', '&Ouml;'], # latin capital letter O with diaeresis
+                u'×' : ['&#215;', '&times;'], # multiplication sign
+                u'Ø' : ['&#216;', '&Oslash;'], # latin capital letter O with slash
+                u'Ù' : ['&#217;', '&Ugrave;'], # latin capital letter U with grave
+                u'Ú' : ['&#218;', '&Uacute;'], # latin capital letter U with acute
+                u'Û' : ['&#219;', '&Ucirc;'], # latin capital letter U with circumflex
+                u'Ü' : ['&#220;', '&Uuml;'], # latin capital letter U with diaeresis
+                u'Ý' : ['&#221;', '&Yacute;'], # latin capital letter Y with acute
+                u'Þ' : ['&#222;', '&THORN;'], # latin capital letter THORN
+                u'ß' : ['&#223;', '&szlig;'], # latin small letter sharp s - ess-zed
+                u'à' : ['&#224;', '&agrave;'], # latin small letter a with grave
+                u'á' : ['&#225;', '&aacute;'], # latin small letter a with acute
+                u'â' : ['&#226;', '&acirc;'], # latin small letter a with circumflex
+                u'ã' : ['&#227;', '&atilde;'], # latin small letter a with tilde
+                u'ä' : ['&#228;', '&auml;'], # latin small letter a with diaeresis
+                u'å' : ['&#229;', '&aring;'], # latin small letter a with ring above
+                u'æ' : ['&#230;', '&aelig;'], # latin small letter ae
+                u'ç' : ['&#231;', '&ccedil;'], # latin small letter c with cedilla
+                u'è' : ['&#232;', '&egrave;'], # latin small letter e with grave
+                u'é' : ['&#233;', '&eacute;'], # latin small letter e with acute
+                u'ê' : ['&#234;', '&ecirc;'], # latin small letter e with circumflex
+                u'ë' : ['&#235;', '&euml;'], # latin small letter e with diaeresis
+                u'ì' : ['&#236;', '&igrave;'], # latin small letter i with grave
+                u'í' : ['&#237;', '&iacute;'], # latin small letter i with acute
+                u'î' : ['&#238;', '&icirc;'], # latin small letter i with circumflex
+                u'ï' : ['&#239;', '&iuml;'], # latin small letter i with diaeresis
+                u'ð' : ['&#240;', '&eth;'], # latin small letter eth
+                u'ñ' : ['&#241;', '&ntilde;'], # latin small letter n with tilde
+                u'ò' : ['&#242;', '&ograve;'], # latin small letter o with grave
+                u'ó' : ['&#243;', '&oacute;'], # latin small letter o with acute
+                u'ô' : ['&#244;', '&ocirc;'], # latin small letter o with circumflex
+                u'õ' : ['&#245;', '&otilde;'], # latin small letter o with tilde
+                u'ö' : ['&#246;', '&ouml;'], # latin small letter o with diaeresis
+                u'÷' : ['&#247;', '&divide;'], # division sign
+                u'ø' : ['&#248;', '&oslash;'], # latin small letter o with slash
+                u'ù' : ['&#249;', '&ugrave;'], # latin small letter u with grave
+                u'ú' : ['&#250;', '&uacute;'], # latin small letter u with acute
+                u'û' : ['&#251;', '&ucirc;'], # latin small letter u with circumflex
+                u'ü' : ['&#252;', '&uuml;'], # latin small letter u with diaeresis
+                u'ý' : ['&#253;', '&yacute;'], # latin small letter y with acute
+                u'þ' : ['&#254;', '&thorn;'], # latin small letter thorn
+                u'ÿ' : ['&#255;', '&yuml;'], # latin small letter y with diaeresis
+               }
+
--- a/src/calibre/ebooks/metadata/txt.py
+++ b/src/calibre/ebooks/metadata/txt.py
@ -0,0 +1,30 @@
+'''Read meta information from TXT files'''
+
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+
+import re
+
+from calibre.ebooks.metadata import MetaInformation
+
+def get_metadata(stream, extract_cover=True):
+    """ Return metadata as a L{MetaInfo} object """
+    mi = MetaInformation(_('Unknown'), [_('Unknown')])
+    stream.seek(0)
+
+    mdata = ''
+    for x in range(0, 4):
+        line = stream.readline()
+        if line == '':
+            break
+        else:
+            mdata += line
+    
+    mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
+    if mo != None:
+        mi.title = mo.group('title')
+        mi.authors = mo.group('author').split(',')
+
+    return mi
--- a/src/calibre/ebooks/pdf/from_any.py
+++ b/src/calibre/ebooks/pdf/from_any.py
@ -1,69 +0,0 @@
-'''
-Convert any ebook format to PDF.
-'''
-
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
-    'and Marshall T. Vandegrift <llasram@gmail.com>' \
-    'and John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
-import sys, os, glob, logging
-
-from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
-from calibre.ebooks.epub import config as common_config
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.pdf.writer import oeb2pdf, config as pdf_config
-
-def config(defaults=None):
-    c = common_config(defaults=defaults, name='pdf')
-    c.remove_opt('profile')
-    pdfc = pdf_config(defaults=defaults)
-    c.update(pdfc)
-    return c 
-
-def option_parser(usage=USAGE):
-    usage = usage % ('PDF', formats())
-    parser = config().option_parser(usage=usage)
-    return parser
-
-def any2pdf(opts, path, notification=None):
-    ext = os.path.splitext(path)[1]
-    if not ext:
-        raise ValueError('Unknown file type: '+path)
-    ext = ext.lower()[1:]
-    
-    if opts.output is None:
-        opts.output = os.path.splitext(os.path.basename(path))[0]+'.pdf'
-    
-    opts.output = os.path.abspath(opts.output)
-    orig_output = opts.output
-    
-    with TemporaryDirectory('_any2pdf') as tdir:
-        oebdir = os.path.join(tdir, 'oeb')
-        os.mkdir(oebdir)
-        opts.output = os.path.join(tdir, 'dummy.epub')
-        opts.profile = 'None'
-        opts.dont_split_on_page_breaks = True
-        orig_bfs = opts.base_font_size2
-        opts.base_font_size2 = 0
-        any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
-        opts.base_font_size2 = orig_bfs
-        opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
-        opts.output = orig_output
-        logging.getLogger('html2epub').info(_('Creating PDF file from EPUB...'))
-        oeb2pdf(opts, opf)
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print 'No input file specified.'
-        return 1
-    any2pdf(opts, args[1])
-    
-if __name__ == '__main__':
-    sys.exit(main())
--- a/src/calibre/ebooks/pdf/info.py
+++ b/src/calibre/ebooks/pdf/info.py
@ -0,0 +1,90 @@
+'''
+Merge PDF files into a single PDF document.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, sys, time
+
+from calibre.utils.config import Config, StringConfig
+
+from pyPdf import PdfFileWriter, PdfFileReader
+
+
+def config(defaults=None):
+    desc = _('Options to control the transformation of pdf')
+    if defaults is None:
+        c = Config('manipulatepdf', desc)
+    else:
+        c = StringConfig(defaults, desc)
+    return c
+
+def option_parser(name):
+    c = config()
+    return c.option_parser(usage=_('''\
+	%prog %%name [options] file.pdf ...
+
+	Get info about a PDF.
+	'''.replace('%%name', name)))
+
+def print_info(pdf_path):
+    with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+        pdf = PdfFileReader(pdf_file)
+        print _('Title:                 %s' % pdf.documentInfo.title)
+        print _('Author:                %s' % pdf.documentInfo.author)
+        print _('Subject:               %s' % pdf.documentInfo.subject)
+        print _('Creator:               %s' % pdf.documentInfo.creator)
+        print _('Producer:              %s' % pdf.documentInfo.producer)
+        print _('Creation Date:         %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path))))
+        print _('Modification Date:     %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path))))
+        print _('Pages:                 %s' % pdf.numPages)
+        print _('Encrypted:             %s' % pdf.isEncrypted)
+        try:
+            print _('File Size:             %s bytes' % os.path.getsize(pdf_path))
+        except: pass
+        try:
+            pdf_file.seek(0)
+            vline = pdf_file.readline()
+            mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline)
+            if mo != None:
+                print _('PDF Version:           %s' % mo.group('version'))
+        except: pass
+
+def verify_files(files):
+    invalid = []
+
+    for pdf_path in files:
+        try:
+            with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+                pdf = PdfFileReader(pdf_file)
+        except:
+            invalid.append(pdf_path)
+    return invalid
+
+def main(args=sys.argv, name=''):
+    parser = option_parser(name)
+    opts, args = parser.parse_args(args)
+    args = args[1:]
+    
+    if len(args) < 1:
+        print 'Error: No PDF sepecified.\n'
+        print parser.get_usage()
+        return 2
+    
+    bad_pdfs = verify_files(args)
+    if bad_pdfs != []:
+        for pdf in bad_pdfs:
+            print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
+        return 2
+        
+    for pdf in args:
+        print_info(pdf)
+    
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+
--- a/src/calibre/ebooks/pdf/manipulate.py
+++ b/src/calibre/ebooks/pdf/manipulate.py
@ -0,0 +1,69 @@
+'''
+Command line interface to run pdf manipulation commands.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import string, sys
+
+from calibre.utils.config import Config, StringConfig
+from calibre.ebooks.pdf import info, merge, reverse, split, trim
+
+COMMANDS = {
+             'info'    : info,
+             'merge'   : merge,
+             'reverse' : reverse,
+             'split'   : split,
+             'trim'    : trim,
+           }
+
+def config(defaults=None):
+    desc = _('Options to control the transformation of pdf')
+    if defaults is None:
+        c = Config('manipulatepdf', desc)
+    else:
+        c = StringConfig(defaults, desc)
+    return c
+
+def option_parser():
+    c = config()
+    return c.option_parser(usage=_('''\
+    
+	%prog command ...
+	
+	command can be one of the following:
+	[%%commands]
+	
+	Use %prog command --help to get more information about a specific command
+	
+	Manipulate a PDF.
+	'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
+
+def main(args=sys.argv):
+    parser = option_parser()
+
+    if len(args) < 2:
+        print 'Error: No command sepecified.\n'
+        print parser.get_usage()
+        return 2
+    
+    command = args[1].lower().strip()
+    
+    if command in COMMANDS.keys():    
+        del args[1]
+        return COMMANDS[command].main(args, command)
+    else:
+        parser.parse_args(args)
+        print 'Unknown command %s.\n' % command
+        print parser.get_usage()
+        return 2
+    
+    # We should never get here.
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+
--- a/src/calibre/ebooks/pdf/merge.py
+++ b/src/calibre/ebooks/pdf/merge.py
@ -0,0 +1,91 @@
+'''
+Merge PDF files into a single PDF document.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, sys
+
+from calibre.ebooks.metadata.meta import metadata_from_formats
+from calibre.ebooks.metadata import authors_to_string
+from calibre.utils.config import Config, StringConfig
+
+from pyPdf import PdfFileWriter, PdfFileReader
+
+def config(defaults=None):
+    desc = _('Options to control the transformation of pdf')
+    if defaults is None:
+        c = Config('mergepdf', desc)
+    else:
+        c = StringConfig(defaults, desc)
+    c.add_opt('output', ['-o', '--output'], default='merged.pdf',
+          help=_('Path to output file. By default a file is created in the current directory.'))
+    return c
+
+def option_parser(name):
+    c = config()
+    return c.option_parser(usage=_('''\
+	%prog %%name [options] file1.pdf file2.pdf ...
+
+	Merges individual PDFs. Metadata will be used from the first PDF specified.
+	'''.replace('%%name', name)))
+
+def merge_files(in_paths, out_path, metadata=None):
+    if metadata == None:
+        title = _('Unknown')
+        author = _('Unknown')
+    else:
+        title = metadata.title
+        author = authors_to_string(metadata.authors)
+
+    out_pdf = PdfFileWriter(title=title, author=author)
+
+    for pdf_path in in_paths:
+        pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
+        for page in pdf.pages:
+            out_pdf.addPage(page)
+
+    with open(out_path, 'wb') as out_file:
+        out_pdf.write(out_file)
+    
+def verify_files(files):
+    invalid = []
+
+    for pdf_path in files:
+        try:
+            with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+                pdf = PdfFileReader(pdf_file)
+                if pdf.isEncrypted or pdf.numPages <= 0:
+                    raise Exception
+        except:
+            invalid.append(pdf_path)
+    return invalid
+
+def main(args=sys.argv, name=''):
+    parser = option_parser(name)
+    opts, args = parser.parse_args(args)
+    args = args[1:]
+    
+    if len(args) < 2:
+        print 'Error: Two or more PDF files are required.\n\n'
+        print parser.get_usage()
+        return 2
+    
+    bad_pdfs = verify_files(args)
+    if bad_pdfs != []:
+        for pdf in bad_pdfs:
+            print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
+        return 2
+        
+    mi = metadata_from_formats([args[0]])
+
+    merge_files(args, opts.output, mi)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+
--- a/src/calibre/ebooks/pdf/output.py
+++ b/src/calibre/ebooks/pdf/output.py
@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Convert OEB ebook format to PDF.
+'''
+
+#unit, papersize, orientation, custom_size, profile
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin, \
+    OptionRecommendation
+from calibre.ebooks.pdf.writer import PDFWriter
+from calibre.ebooks.pdf.pageoptions import UNITS, unit, PAPER_SIZES, \
+    paper_size, ORIENTATIONS, orientation, PageOptions
+
+class PDFOutput(OutputFormatPlugin):
+
+    name = 'PDF Output'
+    author = 'John Schember'
+    file_type = 'pdf'
+
+    options = set([
+                    OptionRecommendation(name='margin_top', recommended_value='1',
+                        level=OptionRecommendation.LOW, long_switch='margin_top',
+                        help=_('The top margin around the document.')),
+                    OptionRecommendation(name='margin_bottom', recommended_value='1',
+                        level=OptionRecommendation.LOW, long_switch='margin_bottom',
+                        help=_('The bottom margin around the document.')),
+                    OptionRecommendation(name='margin_left', recommended_value='1',
+                        level=OptionRecommendation.LOW, long_switch='margin_left',
+                        help=_('The left margin around the document.')),
+                    OptionRecommendation(name='margin_right', recommended_value='1',
+                        level=OptionRecommendation.LOW, long_switch='margin_right',
+                        help=_('The right margin around the document.')),
+                        
+                    OptionRecommendation(name='unit', recommended_value='inch',
+                        level=OptionRecommendation.LOW, short_switch='u',
+                        long_switch='unit', choices=UNITS.keys(),
+                        help=_('The unit of measure. Default is inch. Choices '
+                        'are %s' % UNITS.keys())),
+                    OptionRecommendation(name='paper_size', recommended_value='letter',
+                        level=OptionRecommendation.LOW,
+                        long_switch='paper_size', choices=PAPER_SIZES.keys(),
+                        help=_('The size of the paper. Default is letter. Choices '
+                        'are %s' % PAPER_SIZES.keys())),
+                    OptionRecommendation(name='orientation', recommended_value='portrait',
+                        level=OptionRecommendation.LOW,
+                        long_switch='orientation', choices=ORIENTATIONS.keys(),
+                        help=_('The orientation of the page. Default is portrait. Choices '
+                        'are %s' % ORIENTATIONS.keys())),
+                 ])
+                 
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        popts = PageOptions()
+        
+        popts.set_margin_top(opts.margin_top)
+        popts.set_margin_bottom(opts.margin_bottom)
+        popts.set_margin_left(opts.margin_left)
+        popts.set_margin_right(opts.margin_right)
+        
+        popts.unit = unit(opts.unit)
+        popts.paper_size = paper_size(opts.paper_size)
+        popts.orientation = orientation(opts.orientation)
+    
+        writer = PDFWriter(log, popts)
+        
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+        
+        out_stream.seek(0)
+        out_stream.truncate()
+        writer.dump(oeb_book.spine, out_stream)
+        
+        if close:
+            out_stream.close()
--- a/src/calibre/ebooks/pdf/pageoptions.py
+++ b/src/calibre/ebooks/pdf/pageoptions.py
@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from PyQt4.Qt import QPrinter
+
+UNITS = {
+            'millimeter' : QPrinter.Millimeter,
+            'point' : QPrinter.Point,
+            'inch' : QPrinter.Inch,
+            'pica' : QPrinter.Pica,
+            'didot' : QPrinter.Didot,
+            'cicero' : QPrinter.Cicero,
+            'devicepixel' : QPrinter.DevicePixel,
+        }
+
+def unit(unit):
+    return UNITS.get(unit, QPrinter.Inch)
+
+PAPER_SIZES = {
+                'a0' : QPrinter.A0, # 841 x 1189 mm
+                'a1' : QPrinter.A1, # 594 x 841 mm
+                'a2' : QPrinter.A2, # 420 x 594 mm
+                'a3' : QPrinter.A3, # 297 x 420 mm
+                'a4' : QPrinter.A4, # 210 x 297 mm, 8.26 x 11.69 inches
+                'a5' : QPrinter.A5, # 148 x 210 mm
+                'a6' : QPrinter.A6, # 105 x 148 mm
+                'a7' : QPrinter.A7, # 74 x 105 mm
+                'a8' : QPrinter.A8, # 52 x 74 mm
+                'a9' : QPrinter.A9, # 37 x 52 mm
+                'b0' : QPrinter.B0, # 1030 x 1456 mm
+                'b1' : QPrinter.B1, # 728 x 1030 mm
+                'b2' : QPrinter.B2, # 515 x 728 mm
+                'b3' : QPrinter.B3, # 364 x 515 mm
+                'b4' : QPrinter.B4, # 257 x 364 mm
+                'b5' : QPrinter.B5, # 182 x 257 mm, 7.17 x 10.13 inches
+                'b6' : QPrinter.B6, # 128 x 182 mm
+                'b7' : QPrinter.B7, # 91 x 128 mm
+                'b8' : QPrinter.B8, # 64 x 91 mm
+                'b9' : QPrinter.B9, # 45 x 64 mm
+                'b10' : QPrinter.B10, # 32 x 45 mm
+                'c5e' : QPrinter.C5E, # 163 x 229 mm
+                'comm10e' : QPrinter.Comm10E, # 105 x 241 mm, U.S. Common 10 Envelope
+                'dle' : QPrinter.DLE, # 110 x 220 mm
+                'executive' : QPrinter.Executive, # 7.5 x 10 inches, 191 x 254 mm
+                'folio' : QPrinter.Folio, # 210 x 330 mm
+                'ledger' : QPrinter.Ledger, # 432 x 279 mm
+                'legal' : QPrinter.Legal, # 8.5 x 14 inches, 216 x 356 mm
+                'letter' : QPrinter.Letter, # 8.5 x 11 inches, 216 x 279 mm
+                'tabloid' : QPrinter.Tabloid, #  279 x 432 mm
+                #'custom' : QPrinter.Custom, # Unknown, or a user defined size.
+             }
+
+def paper_size(size):
+    return PAPER_SIZES.get(size, QPrinter.Letter)
+
+ORIENTATIONS = {
+                'portrait' : QPrinter.Portrait,
+                'landscape' : QPrinter.Landscape,
+               }
+
+def orientation(orientation):
+    return ORIENTATIONS.get(orientation, QPrinter.Portrait)
+
+
+class PageOptions(object):
+    margin_top = 1
+    margin_bottom = 1
+    margin_left = 1
+    margin_right = 1
+    unit = QPrinter.Inch
+    paper_size = QPrinter.Letter
+    orientation = QPrinter.Portrait
+    
+    def set_margin_top(self, size):
+        try:
+            self.margin_top = int(size)
+        except:
+            self.margin_top = 1
+    
+    def set_margin_bottom(self, size):
+        try:
+            self.margin_bottom = int(size)
+        except:
+            self.margin_bottom = 1
+    
+    def set_margin_left(self, size):
+        try:
+            self.margin_left = int(size)
+        except:
+            self.margin_left = 1
+    
+    def set_margin_right(self, size):
+        try:
+            self.margin_right = int(size)
+        except:
+            self.margin_right = 1
--- a/src/calibre/ebooks/pdf/reverse.py
+++ b/src/calibre/ebooks/pdf/reverse.py
@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Reverse content of PDF.
+'''
+
+import os, sys
+
+from calibre.ebooks.metadata.meta import metadata_from_formats
+from calibre.ebooks.metadata import authors_to_string
+from calibre.utils.config import Config, StringConfig
+
+from pyPdf import PdfFileWriter, PdfFileReader
+
+def config(defaults=None):
+    desc = _('Options to control the transformation of pdf')
+    if defaults is None:
+        c = Config('reversepdf', desc)
+    else:
+        c = StringConfig(defaults, desc)
+    c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
+          help=_('Path to output file. By default a file is created in the current directory.'))
+    return c
+
+def option_parser(name):
+    c = config()
+    return c.option_parser(usage=_('''\
+	%prog %%name [options] file1.pdf
+
+	Reverse PDF.
+	'''.replace('%%name', name)))
+
+def reverse(pdf_path, out_path, metadata=None):
+    if metadata == None:
+        title = _('Unknown')
+        author = _('Unknown')
+    else:
+        title = metadata.title
+        author = authors_to_string(metadata.authors)
+
+    out_pdf = PdfFileWriter(title=title, author=author)
+
+    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
+    for page in reversed(pdf.pages):
+        out_pdf.addPage(page)
+
+    with open(out_path, 'wb') as out_file:
+        out_pdf.write(out_file)
+
+# Return True if the pdf is valid.
+def valid_pdf(pdf_path):
+    try:
+        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+            pdf = PdfFileReader(pdf_file)
+            if pdf.isEncrypted or pdf.numPages <= 0:
+                raise Exception
+    except:
+        return False
+    return True
+
+
+def main(args=sys.argv, name=''):
+    parser = option_parser(name)
+    opts, args = parser.parse_args(args)
+    args = args[1:]
+    
+    if len(args) < 1:
+        print 'Error: A PDF file is required.\n\n'
+        print parser.get_usage()
+        return 2
+    
+    if not valid_pdf(args[0]):
+        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
+        return 2
+    
+    mi = metadata_from_formats([args[0]])
+
+    reverse(args[0], opts.output, mi)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/pdf/split.py
+++ b/src/calibre/ebooks/pdf/split.py
@ -0,0 +1,186 @@
+'''
+Split PDF file into multiple PDF documents.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, sys, re
+
+from calibre.ebooks.metadata.meta import metadata_from_formats
+from calibre.ebooks.metadata import authors_to_string
+from calibre.utils.config import Config, StringConfig
+
+from pyPdf import PdfFileWriter, PdfFileReader
+
+def config(defaults=None):
+    desc = _('Options to control the transformation of pdf')
+    if defaults is None:
+        c = Config('splitpdf', desc)
+    else:
+        c = StringConfig(defaults, desc)
+    c.add_opt('output', ['-o', '--output'], default='split.pdf',
+          help=_('Path to output file. By default a file is created in the current directory. \
+            The file name will be the base name for the output.'))
+    return c
+
+def option_parser(name):
+    c = config()
+    return c.option_parser(usage=_('''\
+    
+	%prog %%name [options] file.pdf page_to_split_on ...
+	%prog %%name [options] file.pdf page_range_to_split_on ...
+	
+	Ex.
+	
+	%prog %%name file.pdf 6
+	%prog %%name file.pdf 6-12
+	%prog %%name file.pdf 6-12 8 10 9-20
+
+	Split a PDF.
+	'''.replace('%%name', name)))
+
+def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
+    pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
+    total_pages = pdf.numPages - 1
+
+    for index in pages+page_ranges:
+        if index in pages:
+            write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata)
+        else:
+            
+            write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata)
+        
+def write_pdf(pdf, name, suffix, start, end, metadata=None):
+    if metadata == None:
+        title = _('Unknown')
+        author = _('Unknown')
+    else:
+        title = metadata.title
+        author = authors_to_string(metadata.authors)
+    
+    out_pdf = PdfFileWriter(title=title, author=author)
+    for page_num in range(start, end + 1):
+        out_pdf.addPage(pdf.getPage(page_num))
+    with open('%s%s.pdf' % (name, suffix), 'wb') as out_file:
+        out_pdf.write(out_file)
+    
+def split_args(args):
+    pdf = ''
+    pages = []
+    page_ranges = []
+    bad = []
+
+    for arg in args:
+        arg = arg.strip()
+        # Find the pdf input
+        if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None:
+            if pdf == '':
+                pdf = arg
+            else:
+                bad.append(arg)
+        # Find single indexes
+        elif re.search('^[ ]*\d+[ ]*$', arg) != None:
+            pages.append(arg)
+        # Find index ranges
+        elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None:
+            mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg)
+            start = mo.group('start')
+            end = mo.group('end')
+            
+            # check to see if the range is really a single index
+            if start == end:
+                pages.append(start)
+            else:
+                page_ranges.append([start, end])
+        else:
+            bad.append(arg)
+        
+    bad = sorted(list(set(bad)))
+    
+    return pdf, pages, page_ranges, bad
+
+# Remove duplicates from pages and page_ranges.
+# Set pages higher than the total number of pages in the pdf to the last page.
+# Return pages and page_ranges as lists of ints.
+def clean_page_list(pdf_path, pages, page_ranges):
+    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
+    
+    total_pages = pdf.numPages
+    sorted_pages = []
+    sorted_ranges = []
+
+    for index in pages:
+        index = int(index)
+        if index > total_pages:
+            sorted_pages.append(total_pages - 1)
+        else:
+            sorted_pages.append(index - 1)
+    
+    for start, end in page_ranges:
+        start = int(start)
+        end = int(end)
+        
+        if start > total_pages and end > total_pages:
+            sorted_pages.append(total_pages - 1)
+            continue
+            
+        if start > total_pages:
+            start = total_pages
+        if end > total_pages:
+            end = total_pages
+        page_range = sorted([start - 1, end - 1])
+        if page_range not in sorted_ranges:
+            sorted_ranges.append(page_range)
+    
+    # Remove duplicates and sort
+    pages = sorted(list(set(sorted_pages)))
+    page_ranges = sorted(sorted_ranges)
+    
+    return pages, page_ranges
+
+# Return True if the pdf is valid.
+def valid_pdf(pdf_path):
+    try:
+        with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
+            pdf = PdfFileReader(pdf_file)
+            if pdf.isEncrypted or pdf.numPages <= 0:
+                raise Exception
+    except:
+        return False
+    return True
+
+def main(args=sys.argv, name=''):
+    parser = option_parser(name)
+    opts, args = parser.parse_args(args)
+    
+    pdf, pages, page_ranges, unknown = split_args(args[1:])
+    
+    if pdf == '' and (pages == [] or page_ranges == []):
+        print 'Error: PDF and where to split is required.\n\n'
+        print parser.get_usage()
+        return 2
+    
+    if unknown != []:
+        for arg in unknown:
+            print 'Error: Unknown argument `%s`' % arg
+        print parser.get_usage()
+        return 2
+    
+    if not valid_pdf(pdf):
+        print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
+        return 2
+        
+    pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
+        
+    mi = metadata_from_formats([pdf])
+
+    split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+
--- a/src/calibre/ebooks/pdf/pdftrim.py
+++ b/src/calibre/ebooks/pdf/pdftrim.py
@ -16,8 +16,6 @@ def config(defaults=None):
        c = Config('trimpdf', desc)
    else:
        c = StringConfig(defaults, desc)
-    c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
-          help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
    c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
          help=_('Path to output file. By default a file is created in the current directory.'))
    c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
@ -33,16 +31,16 @@ def config(defaults=None):
    return c


-def option_parser():
+def option_parser(name):
    c = config()
    return c.option_parser(usage=_('''\
-	%prog [options] file.pdf
+	%prog %%name [options] file.pdf

 	Crops a pdf. 
-	'''))
+	'''.replace('%%name', name)))

-def main(args=sys.argv):
-    parser = option_parser()
+def main(args=sys.argv, name=''):
+    parser = option_parser(name)
    opts, args = parser.parse_args(args)
    try:
        source = os.path.abspath(args[1])
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -1,19 +1,18 @@
-'''
-Write content to PDF.
-'''
+# -*- coding: utf-8 -*-
 from __future__ import with_statement

 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'

-import os, logging, shutil, sys
+'''
+Write content to PDF.
+'''
+
+import os, shutil, sys

-from calibre import LoggingInterface
-from calibre.ebooks.epub.iterator import SpineItem
-from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre.customize.ui import run_plugins_on_postprocess
-from calibre.utils.config import Config, StringConfig
+from calibre.ebooks.pdf.pageoptions import PageOptions

 from PyQt4 import QtCore
 from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \
@ -21,21 +20,14 @@ from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter,
 from PyQt4.QtWebKit import QWebView

 from pyPdf import PdfFileWriter, PdfFileReader
-    
-class PDFMargins:
-    def __init__(self, margin=1):
-        self.top    = margin
-        self.bottom = margin
-        self.left   = margin
-        self.right  = margin
        
 class PDFWriter(QObject):
-    def __init__(self, margins=PDFMargins()):
+    def __init__(self, log, popts=PageOptions()):
        if QApplication.instance() is None:
            QApplication([])
        QObject.__init__(self)
        
-        self.logger = logging.getLogger('oeb2pdf')
+        self.logger = log
        
        self.loop = QEventLoop()
        self.view = QWebView()
@ -43,15 +35,14 @@ class PDFWriter(QObject):
        self.render_queue = []
        self.combine_queue = []
        self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts')
-        self.margins = margins
+        self.popts = popts

-    def dump(self, oebpath, path):
+    def dump(self, spine, out_stream):
        self._delete_tmpdir()
        
-        opf = OPF(oebpath, os.path.dirname(oebpath))
-        self.render_queue = [SpineItem(i.path) for i in opf.spine]
+        self.render_queue = spine[:]
        self.combine_queue = []
-        self.path = path
+        self.out_stream = out_stream
        
        QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
        self.loop.exec_()
@ -78,7 +69,9 @@ class PDFWriter(QObject):
            self.logger.debug('\tRendering item as %s' % item_path)
        
            printer = QPrinter(QPrinter.HighResolution)
-            printer.setPageMargins(self.margins.left, self.margins.top, self.margins.right, self.margins.bottom, QPrinter.Inch)
+            printer.setPageMargins(self.popts.margin_left, self.popts.margin_top, self.popts.margin_right, self.popts.margin_bottom, self.popts.unit)
+            printer.setPaperSize(self.popts.paper_size)
+            printer.setOrientation(self.popts.orientation)
            printer.setOutputFormat(QPrinter.PdfFormat)
            printer.setOutputFileName(item_path)
            self.view.print_(printer)
@ -98,75 +91,7 @@ class PDFWriter(QObject):
                inputPDF = PdfFileReader(file(item, 'rb'))
                for page in inputPDF.pages:
                    outPDF.addPage(page)
-            outputStream = file(self.path, 'wb')
-            outPDF.write(outputStream)
-            outputStream.close()
+            outPDF.write(self.out_stream)
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)
-
-
-def config(defaults=None):
-    desc = _('Options to control the conversion to PDF')
-    if defaults is None:
-        c = Config('pdf', desc)
-    else:
-        c = StringConfig(defaults, desc)
-        
-    pdf = c.add_group('PDF', _('PDF options.'))
-            
-    pdf('margin_top', ['--margin_top'], default=1,
-         help=_('The top margin around the document in inches.'))
-    pdf('margin_bottom', ['--margin_bottom'], default=1,
-         help=_('The bottom margin around the document in inches.'))
-    pdf('margin_left', ['--margin_left'], default=1,
-         help=_('The left margin around the document in inches.'))
-    pdf('margin_right', ['--margin_right'], default=1,
-         help=_('The right margin around the document in inches.'))
-    
-    return c
-
-def option_parser():
-    c = config()
-    parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
-    parser.add_option(
-        '-o', '--output', default=None, 
-        help=_('Output file. Default is derived from input filename.'))
-    parser.add_option(
-        '-v', '--verbose', default=0, action='count',
-        help=_('Useful for debugging.'))        
-    return parser
-
-def oeb2pdf(opts, inpath):
-    logger = LoggingInterface(logging.getLogger('oeb2pdf'))
-    logger.setup_cli_handler(opts.verbose)
-    
-    outpath = opts.output
-    if outpath is None:
-        outpath = os.path.basename(inpath)
-        outpath = os.path.splitext(outpath)[0] + '.pdf'
-
-    margins = PDFMargins()
-    margins.top = opts.margin_top
-    margins.bottom = opts.margin_bottom
-    margins.left = opts.margin_left
-    margins.right = opts.margin_right
-
-    writer = PDFWriter(margins)
-    writer.dump(inpath, outpath)
-    run_plugins_on_postprocess(outpath, 'pdf')
-    logger.log_info(_('Output written to ') + outpath)
-    
-def main(argv=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(argv[1:])
-    if len(args) != 1:
-        parser.print_help()
-        return 1
-    inpath = args[0]
-    retval = oeb2pdf(opts, inpath)
-    return retval
-
-if __name__ == '__main__':
-    sys.exit(main())
-    
--- a/src/calibre/ebooks/txt/init.py
+++ b/src/calibre/ebooks/txt/init.py
@ -0,0 +1,9 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2008, John Schember john@nachtimwald.com'
+__docformat__ = 'restructuredtext en'
+
+'''
+Used for txt output
+'''
+
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin, \
+    OptionRecommendation
+from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
+from calibre.ebooks.metadata import authors_to_string
+
+class TXTOutput(OutputFormatPlugin):
+
+    name = 'TXT Output'
+    author = 'John Schember'
+    file_type = 'txt'
+
+    options = set([
+                    OptionRecommendation(name='newline', recommended_value='system',
+                        level=OptionRecommendation.LOW, long_switch='newline',
+                        short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
+                        help=_('Type of newline to use. Options are %s. Default is \'system\'. '
+                            'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
+                            'For Mac OS X use \'unix\'. \'system\' will default to the newline '
+                            'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))),
+                    OptionRecommendation(name='prepend_author', recommended_value='true',
+                        level=OptionRecommendation.LOW, long_switch='prepend_author',
+                        choices=['true', 'false'],
+                        help=_('Write the author to the beginning of the file. '
+                            'Default is \'true\'. Use \'false\' to disable.')),
+                    OptionRecommendation(name='prepend_title', recommended_value='true',
+                        choices=['true', 'false'],
+                        level=OptionRecommendation.LOW, long_switch='prepend_title',
+                        help=_('Write the title to the beginning of the file. '
+                            'Default is \'true\'. Use \'false\' to disable.'))
+                 ])
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        metadata = TxtMetadata()
+        if opts.prepend_author.lower() == 'true':
+            metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors)
+        if opts.prepend_title.lower() == 'true':
+            metadata.title = opts.title if opts.title else oeb_book.metadata.title
+
+        writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
+        txt = writer.dump(oeb_book.spine, metadata)
+
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+        
+        out_stream.seek(0)
+        out_stream.truncate()
+        out_stream.write(txt)
+        
+        if close:
+            out_stream.close()
--- a/src/calibre/ebooks/txt/writer.py
+++ b/src/calibre/ebooks/txt/writer.py
@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+'''
+Write content to TXT.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, sys
+
+from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
+
+from BeautifulSoup import BeautifulSoup
+
+class TxtWriter(object):
+    def __init__(self, newline, log):
+        self.newline = newline
+        self.log = log
+
+    def dump(self, spine, metadata):
+        out = u''
+        for item in spine:
+            with open(item, 'r') as itemf:
+                content = itemf.read().decode(item.encoding)
+                # Convert newlines to unix style \n for processing. These
+                # will be changed to the specified type later in the process.
+                content = self.unix_newlines(content)
+                content = self.strip_html(content)
+                content = self.replace_html_symbols(content)
+                content = self.cleanup_text(content)
+                content = self.specified_newlines(content)
+                out += content
+
+        # Prepend metadata
+        if metadata.author != None and metadata.author != '':
+            out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out
+        if metadata.title != None and metadata.title != '':
+            out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out
+
+            # Put two blank lines at end of file
+            end = out[-3 * len(self.newline):]
+            for i in range(3 - end.count(self.newline)):
+                out += self.newline
+
+        return out
+
+    def strip_html(self, html):
+        stripped = u''
+        
+        for dom_tree in BeautifulSoup(html).findAll('body'):
+            text = unicode(dom_tree)
+            
+            # Remove unnecessary tags
+            for tag in ['script', 'style']:
+                text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
+            text = re.sub('<!--.*-->', '', text)
+            text = re.sub('<\?.*?\?>', '', text)
+            text = re.sub('<@.*?@>', '', text)
+            text = re.sub('<%.*?%>', '', text)
+
+            # Headings usually indicate Chapters.
+            # We are going to use a marker to insert the proper number of
+            # newline characters at the end of cleanup_text because cleanup_text
+            # remove excessive (more than 2 newlines).
+            for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+                text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text)
+                text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text)
+
+            # Separate content with space.
+            for tag in ['td']:
+                text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text)
+            
+            # Separate content with empty line.
+            for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']:
+                text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text)
+            
+            for tag in ['hr', 'br']:
+                text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text)
+            
+            # Remove any tags that do not need special processing.
+            text = re.sub('<.*?>', '', text)
+            
+            stripped = stripped + text
+            
+        return stripped
+        
+    def replace_html_symbols(self, content):
+        for symbol in HTML_SYMBOLS:
+            for code in HTML_SYMBOLS[symbol]:
+                content = content.replace(code, symbol)
+        return content
+        
+    def cleanup_text(self, text):
+        # Replace bad characters.
+        text = text.replace(u'\xc2', '')
+        text = text.replace(u'\xa0', ' ')
+    
+        # Replace tabs, vertical tags and form feeds with single space.
+        text = text.replace('\t+', ' ')
+        text = text.replace('\v+', ' ')
+        text = text.replace('\f+', ' ')
+    
+        # Single line paragraph.
+        r = re.compile('.\n.')
+        while True:
+            mo = r.search(text)
+            if mo == None:
+                break
+            text = '%s %s' % (text[:mo.start()+1], text[mo.end()-1:])
+        
+        # Remove multiple spaces.
+        text = re.sub('[  ]+', ' ', text)
+        
+        # Remove excessive newlines.
+        text = re.sub('\n[ ]+\n', '\n\n', text)
+        text = re.sub('\n{3,}', '\n\n', text)
+        
+        # Replace markers with the proper characters.
+        text = text.replace('-vzxedxy-', '\n\n\n\n\n')
+        text = text.replace('-vlgzxey-', '\n\n\n')
+        
+        # Replace spaces at the beginning and end of lines
+        text = re.sub('(?imu)^[ ]+', '', text)
+        text = re.sub('(?imu)[ ]+$', '', text)
+        
+        return text
+
+    def unix_newlines(self, text):
+        text = text.replace('\r\n', '\n')
+        text = text.replace('\r', '\n')
+        
+        return text
+        
+    def specified_newlines(self, text):
+        if self.newline == '\n':
+            return text
+        
+        return text.replace('\n', self.newline)        
+
+
+class TxtNewlines(object):
+    NEWLINE_TYPES = {
+                        'system'  : os.linesep,
+                        'unix'    : '\n',
+                        'old_mac' : '\r',
+                        'windows' : '\r\n'
+                     }
+                     
+    def __init__(self, newline_type):
+        self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
+
+
+class TxtMetadata(object):
+    def __init__(self):
+        self.title = None
+        self.author = None
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -709,6 +709,9 @@ class BooksView(TableView):

    def close(self):
        self._model.close()
+        
+    def set_editable(self, editable):
+        self._model.set_editable(editable)

    def set_editable(self, editable):
        self._model.set_editable(editable)
@ -1002,6 +1005,10 @@ class DeviceBooksModel(BooksModel):
                self.sort(col, self.sorted_on[1])
            done = True
        return done
+        
+    def set_editable(self, editable):
+        self.editable = editable
+        

    def set_editable(self, editable):
        self.editable = editable
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -39,10 +39,9 @@ entry_points = {
             'calibre-fontconfig = calibre.utils.fontconfig:main',
             'calibre-parallel   = calibre.parallel:main',
             'calibre-customize  = calibre.customize.ui:main',
-             'pdftrim            = calibre.ebooks.pdf.pdftrim:main' ,
+             'pdfmanipulate      = calibre.ebooks.pdf.manipulate:main',
             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
             'calibre-smtp = calibre.utils.smtp:main',
-
        ],
        'gui_scripts'    : [
            __appname__+' = calibre.gui2.main:main',
@ -548,6 +547,3 @@ main = post_install
 if __name__ == '__main__':
    post_install()

-
-
-