PDF manipulation via the pdfmanipulate command. TXT and PDF output.

This commit is contained in:
Kovid Goyal 2009-04-01 14:07:43 -07:00
commit b2e8618354
19 changed files with 1322 additions and 176 deletions

View File

@ -159,6 +159,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.odt import get_metadata
return get_metadata(stream)
class TXTMetadataReader(MetadataReaderPlugin):
name = 'Read TXT metadata'
file_types = set(['txt'])
description = _('Read metadata from %s files') % 'TXT'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.txt import get_metadata
return get_metadata(stream)
class LRXMetadataReader(MetadataReaderPlugin):
@ -256,9 +266,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
from calibre.ebooks.epub.input import EPUBInput
from calibre.ebooks.mobi.input import MOBIInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput, PDFOutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -196,7 +196,7 @@ OptionRecommendation(name='language',
self.input_fmt = input_fmt
self.output_fmt = output_fmt
# Build set of all possible options. Two options are equal iff their
# Build set of all possible options. Two options are equal if their
# names are the same.
self.input_options = self.input_plugin.options.union(
self.input_plugin.common_options)

View File

@ -0,0 +1,310 @@
# -*- coding: utf-8 -*-
'''
Maping of non-acii symbols and their corresponding html entity number and name
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
# http://www.w3schools.com/tags/ref_symbols.asp
HTML_SYMBOLS = {
# Math Symbols
u'' : ['&#8704;', '&forall;'], # for all
u'' : ['&#8706;', '&part;'], # part
u'' : ['&#8707;', '&exists;'], # exists
u'' : ['&#8709;', '&empty;'], # empty
u'' : ['&#8711;', '&nabla;'], # nabla
u'' : ['&#8712;', '&isin;'], # isin
u'' : ['&#8713;', '&notin;'], # notin
u'' : ['&#8715;', '&ni;'], # ni
u'' : ['&#8719;', '&prod;'], # prod
u'' : ['&#8721;', '&sum;'], # sum
u'' : ['&#8722;', '&minus;'], # minus
u'' : ['&#8727;', '&lowast;'], # lowast
u'' : ['&#8730;', '&radic;'], # square root
u'' : ['&#8733;', '&prop;'], # proportional to
u'' : ['&#8734;', '&infin;'], # infinity
u'' : ['&#8736;', '&ang;'], # angle
u'' : ['&#8743;', '&and;'], # and
u'' : ['&#8744;', '&or;'], # or
u'' : ['&#8745;', '&cap;'], # cap
u'' : ['&#8746;', '&cup;'], # cup
u'' : ['&#8747;', '&int;'], # integral
u'' : ['&#8756;', '&there4;'], # therefore
u'' : ['&#8764;', '&sim;'], # simular to
u'' : ['&#8773;', '&cong;'], # approximately equal
u'' : ['&#8776;', '&asymp;'], # almost equal
u'' : ['&#8800;', '&ne;'], # not equal
u'' : ['&#8801;', '&equiv;'], # equivalent
u'' : ['&#8804;', '&le;'], # less or equal
u'' : ['&#8805;', '&ge;'], # greater or equal
u'' : ['&#8834;', '&sub;'], # subset of
u'' : ['&#8835;', '&sup;'], # superset of
u'' : ['&#8836;', '&nsub;'], # not subset of
u'' : ['&#8838;', '&sube;'], # subset or equal
u'' : ['&#8839;', '&supe;'], # superset or equal
u'' : ['&#8853;', '&oplus;'], # circled plus
u'' : ['&#8855;', '&otimes;'], # cirled times
u'' : ['&#8869;', '&perp;'], # perpendicular
u'' : ['&#8901;', '&sdot;'], # dot operator
# Greek Letters
u'Α' : ['&#913;', '&Alpha;'], # Alpha
u'Β' : ['&#914;', '&Beta;'], # Beta
u'Γ' : ['&#915;', '&Gamma;'], # Gamma
u'Δ' : ['&#916;', '&Delta;'], # Delta
u'Ε' : ['&#917;', '&Epsilon;'], # Epsilon
u'Ζ' : ['&#918;', '&Zeta;'], # Zeta
u'Η' : ['&#919;', '&Eta;'], # Eta
u'Θ' : ['&#920;', '&Theta;'], # Theta
u'Ι' : ['&#921;', '&Iota;'], # Iota
u'Κ' : ['&#922;', '&Kappa;'], # Kappa
u'Λ' : ['&#923;', '&Lambda;'], # Lambda
u'Μ' : ['&#924;', '&Mu;'], # Mu
u'Ν' : ['&#925;', '&Nu;'], # Nu
u'Ξ' : ['&#926;', '&Xi;'], # Xi
u'Ο' : ['&#927;', '&Omicron;'], # Omicron
u'Π' : ['&#928;', '&Pi;'], # Pi
u'Ρ' : ['&#929;', '&Rho;'], # Rho
u'Σ' : ['&#931;', '&Sigma;'], # Sigma
u'Τ' : ['&#932;', '&Tau;'], # Tau
u'Υ' : ['&#933;', '&Upsilon;'], # Upsilon
u'Φ' : ['&#934;', '&Phi;'], # Phi
u'Χ' : ['&#935;', '&Chi;'], # Chi
u'Ψ' : ['&#936;', '&Psi;'], # Psi
u'ω' : ['&#969;', '&omega;'], # omega
u'ϑ' : ['&#977;', '&thetasym;'], # theta symbol
u'ϒ' : ['&#978;', '&upsih;'], # upsilon symbol
u'ϖ' : ['&#982;', '&piv;'], # pi symbol
# Other
u'Œ' : ['&#338;', '&OElig;'], # capital ligature OE
u'œ' : ['&#339;', '&oelig;'], # small ligature oe
u'Š' : ['&#352;', '&Scaron;'], # capital S with caron
u'š' : ['&#353;', '&scaron;'], # small S with caron
u'Ÿ' : ['&#376;', '&Yuml;'], # capital Y with diaeres
u'ƒ' : ['&#402;', '&fnof;'], # f with hook
u'ˆ' : ['&#710;', '&circ;'], # modifier letter circumflex accent
u'˜' : ['&#732;', '&tilde;'], # small tilde
u'' : ['&#8211;', '&ndash;'], # en dash
u'' : ['&#8212;', '&mdash;'], # em dash
u'' : ['&#8216;', '&lsquo;'], # left single quotation mark
u'' : ['&#8217;', '&rsquo;'], # right single quotation mark
u'' : ['&#8218;', '&sbquo;'], # single low-9 quotation mark
u'' : ['&#8220;', '&ldquo;'], # left double quotation mark
u'' : ['&#8221;', '&rdquo;'], # right double quotation mark
u'' : ['&#8222;', '&bdquo;'], # double low-9 quotation mark
u'' : ['&#8224;', '&dagger;'], # dagger
u'' : ['&#8225;', '&Dagger;'], # double dagger
u'' : ['&#8226;', '&bull;'], # bullet
u'' : ['&#8230;', '&hellip;'], # horizontal ellipsis
u'' : ['&#8240;', '&permil;'], # per mille
u'' : ['&#8242;', '&prime;'], # minutes
u'' : ['&#8243;', '&Prime;'], # seconds
u'' : ['&#8249;', '&lsaquo;'], # single left angle quotation
u'' : ['&#8250;', '&rsaquo;'], # single right angle quotation
u'' : ['&#8254;', '&oline;'], # overline
u'' : ['&#8364;', '&euro;'], # euro
u'' : ['&#8482;', '&trade;'], # trademark
u'' : ['&#8592;', '&larr;'], # left arrow
u'' : ['&#8593;', '&uarr;'], # up arrow
u'' : ['&#8594;', '&rarr;'], # right arrow
u'' : ['&#8595;', '&darr;'], # down arrow
u'' : ['&#8596;', '&harr;'], # left right arrow
u'' : ['&#8629;', '&crarr;'], # carriage return arrow
u'' : ['&#8968;', '&lceil;'], # left ceiling
u'' : ['&#8969;', '&rceil;'], # right ceiling
u'' : ['&#8970;', '&lfloor;'], # left floor
u'' : ['&#8971;', '&rfloor;'], # right floor
u'' : ['&#9674;', '&loz;'], # lozenge
u'' : ['&#9824;', '&spades;'], # spade
u'' : ['&#9827;', '&clubs;'], # club
u'' : ['&#9829;', '&hearts;'], # heart
u'' : ['&#9830;', '&diams;'], # diamond
# Extra http://www.ascii.cl/htmlcodes.htm
u' ' : ['&#32;'], # space
u'!' : ['&#33;'], # exclamation point
u'#' : ['&#35;'], # number sign
u'$' : ['&#36;'], # dollar sign
u'%' : ['&#37;'], # percent sign
u'\'' : ['&#39;'], # single quote
u'(' : ['&#40;'], # opening parenthesis
u')' : ['&#41;'], # closing parenthesis
u'*' : ['&#42;'], # asterisk
u'+' : ['&#43;'], # plus sign
u',' : ['&#44;'], # comma
u'-' : ['&#45;'], # minus sign - hyphen
u'.' : ['&#46;'], # period
u'/' : ['&#47;'], # slash
u'0' : ['&#48;'], # zero
u'1' : ['&#49;'], # one
u'2' : ['&#50;'], # two
u'3' : ['&#51;'], # three
u'4' : ['&#52;'], # four
u'5' : ['&#53;'], # five
u'6' : ['&#54;'], # six
u'7' : ['&#55;'], # seven
u'8' : ['&#56;'], # eight
u'9' : ['&#57;'], # nine
u':' : ['&#58;'], # colon
u';' : ['&#59;'], # semicolon
u'=' : ['&#61;'], # equal sign
u'?' : ['&#63;'], # question mark
u'@' : ['&#64;'], # at symbol
u'A' : ['&#65;'], #
u'B' : ['&#66;'], #
u'C' : ['&#67;'], #
u'D' : ['&#68;'], #
u'E' : ['&#69;'], #
u'F' : ['&#70;'], #
u'G' : ['&#71;'], #
u'H' : ['&#72;'], #
u'I' : ['&#73;'], #
u'J' : ['&#74;'], #
u'K' : ['&#75;'], #
u'L' : ['&#76;'], #
u'M' : ['&#77;'], #
u'N' : ['&#78;'], #
u'O' : ['&#79;'], #
u'P' : ['&#80;'], #
u'Q' : ['&#81;'], #
u'R' : ['&#82;'], #
u'S' : ['&#83;'], #
u'T' : ['&#84;'], #
u'U' : ['&#85;'], #
u'V' : ['&#86;'], #
u'W' : ['&#87;'], #
u'X' : ['&#88;'], #
u'Y' : ['&#89;'], #
u'Z' : ['&#90;'], #
u'[' : ['&#91;'], # opening bracket
u'\\' : ['&#92;'], # backslash
u']' : ['&#93;'], # closing bracket
u'^' : ['&#94;'], # caret - circumflex
u'_' : ['&#95;'], # underscore
u'`' : ['&#96;'], # grave accent
u'a' : ['&#97;'], #
u'b' : ['&#98;'], #
u'c' : ['&#99;'], #
u'd' : ['&#100;'], #
u'e' : ['&#101;'], #
u'f' : ['&#102;'], #
u'g' : ['&#103;'], #
u'h' : ['&#104;'], #
u'i' : ['&#105;'], #
u'j' : ['&#106;'], #
u'k' : ['&#107;'], #
u'l' : ['&#108;'], #
u'm' : ['&#109;'], #
u'n' : ['&#110;'], #
u'o' : ['&#111;'], #
u'p' : ['&#112;'], #
u'q' : ['&#113;'], #
u'r' : ['&#114;'], #
u's' : ['&#115;'], #
u't' : ['&#116;'], #
u'u' : ['&#117;'], #
u'v' : ['&#118;'], #
u'w' : ['&#119;'], #
u'x' : ['&#120;'], #
u'y' : ['&#121;'], #
u'z' : ['&#122;'], #
u'{' : ['&#123;'], # opening brace
u'|' : ['&#124;'], # vertical bar
u'}' : ['&#125;'], # closing brace
u'~' : ['&#126;'], # equivalency sign - tilde
u'<' : ['&#60;', '&lt;'], # less than sign
u'>' : ['&#62;', '&gt;'], # greater than sign
u'¡' : ['&#161;', '&iexcl;'], # inverted exclamation mark
u'¢' : ['&#162;', '&cent;'], # cent sign
u'£' : ['&#163;', '&pound;'], # pound sign
u'¤' : ['&#164;', '&curren;'], # currency sign
u'¥' : ['&#165;', '&yen;'], # yen sign
u'¦' : ['&#166;', '&brvbar;'], # broken vertical bar
u'§' : ['&#167;', '&sect;'], # section sign
u'¨' : ['&#168;', '&uml;'], # spacing diaeresis - umlaut
u'©' : ['&#169;', '&copy;'], # copyright sign
u'ª' : ['&#170;', '&ordf;'], # feminine ordinal indicator
u'«' : ['&#171;', '&laquo;'], # left double angle quotes
u'¬' : ['&#172;', '&not;'], # not sign
u'®' : ['&#174;', '&reg;'], # registered trade mark sign
u'¯' : ['&#175;', '&macr;'], # spacing macron - overline
u'°' : ['&#176;', '&deg;'], # degree sign
u'±' : ['&#177;', '&plusmn;'], # plus-or-minus sign
u'²' : ['&#178;', '&sup2;'], # superscript two - squared
u'³' : ['&#179;', '&sup3;'], # superscript three - cubed
u'´' : ['&#180;', '&acute;'], # acute accent - spacing acute
u'µ' : ['&#181;', '&micro;'], # micro sign
u'' : ['&#182;', '&para;'], # pilcrow sign - paragraph sign
u'·' : ['&#183;', '&middot;'], # middle dot - Georgian comma
u'¸' : ['&#184;', '&cedil;'], # spacing cedilla
u'¹' : ['&#185;', '&sup1;'], # superscript one
u'º' : ['&#186;', '&ordm;'], # masculine ordinal indicator
u'»' : ['&#187;', '&raquo;'], # right double angle quotes
u'¼' : ['&#188;', '&frac14;'], # fraction one quarter
u'½' : ['&#189;', '&frac12;'], # fraction one half
u'¾' : ['&#190;', '&frac34;'], # fraction three quarters
u'¿' : ['&#191;', '&iquest;'], # inverted question mark
u'À' : ['&#192;', '&Agrave;'], # latin capital letter A with grave
u'Á' : ['&#193;', '&Aacute;'], # latin capital letter A with acute
u'Â' : ['&#194;', '&Acirc;'], # latin capital letter A with circumflex
u'Ã' : ['&#195;', '&Atilde;'], # latin capital letter A with tilde
u'Ä' : ['&#196;', '&Auml;'], # latin capital letter A with diaeresis
u'Å' : ['&#197;', '&Aring;'], # latin capital letter A with ring above
u'Æ' : ['&#198;', '&AElig;'], # latin capital letter AE
u'Ç' : ['&#199;', '&Ccedil;'], # latin capital letter C with cedilla
u'È' : ['&#200;', '&Egrave;'], # latin capital letter E with grave
u'É' : ['&#201;', '&Eacute;'], # latin capital letter E with acute
u'Ê' : ['&#202;', '&Ecirc;'], # latin capital letter E with circumflex
u'Ë' : ['&#203;', '&Euml;'], # latin capital letter E with diaeresis
u'Ì' : ['&#204;', '&Igrave;'], # latin capital letter I with grave
u'Í' : ['&#205;', '&Iacute;'], # latin capital letter I with acute
u'Î' : ['&#206;', '&Icirc;'], # latin capital letter I with circumflex
u'Ï' : ['&#207;', '&Iuml;'], # latin capital letter I with diaeresis
u'Ð' : ['&#208;', '&ETH;'], # latin capital letter ETH
u'Ñ' : ['&#209;', '&Ntilde;'], # latin capital letter N with tilde
u'Ò' : ['&#210;', '&Ograve;'], # latin capital letter O with grave
u'Ó' : ['&#211;', '&Oacute;'], # latin capital letter O with acute
u'Ô' : ['&#212;', '&Ocirc;'], # latin capital letter O with circumflex
u'Õ' : ['&#213;', '&Otilde;'], # latin capital letter O with tilde
u'Ö' : ['&#214;', '&Ouml;'], # latin capital letter O with diaeresis
u'×' : ['&#215;', '&times;'], # multiplication sign
u'Ø' : ['&#216;', '&Oslash;'], # latin capital letter O with slash
u'Ù' : ['&#217;', '&Ugrave;'], # latin capital letter U with grave
u'Ú' : ['&#218;', '&Uacute;'], # latin capital letter U with acute
u'Û' : ['&#219;', '&Ucirc;'], # latin capital letter U with circumflex
u'Ü' : ['&#220;', '&Uuml;'], # latin capital letter U with diaeresis
u'Ý' : ['&#221;', '&Yacute;'], # latin capital letter Y with acute
u'Þ' : ['&#222;', '&THORN;'], # latin capital letter THORN
u'ß' : ['&#223;', '&szlig;'], # latin small letter sharp s - ess-zed
u'à' : ['&#224;', '&agrave;'], # latin small letter a with grave
u'á' : ['&#225;', '&aacute;'], # latin small letter a with acute
u'â' : ['&#226;', '&acirc;'], # latin small letter a with circumflex
u'ã' : ['&#227;', '&atilde;'], # latin small letter a with tilde
u'ä' : ['&#228;', '&auml;'], # latin small letter a with diaeresis
u'å' : ['&#229;', '&aring;'], # latin small letter a with ring above
u'æ' : ['&#230;', '&aelig;'], # latin small letter ae
u'ç' : ['&#231;', '&ccedil;'], # latin small letter c with cedilla
u'è' : ['&#232;', '&egrave;'], # latin small letter e with grave
u'é' : ['&#233;', '&eacute;'], # latin small letter e with acute
u'ê' : ['&#234;', '&ecirc;'], # latin small letter e with circumflex
u'ë' : ['&#235;', '&euml;'], # latin small letter e with diaeresis
u'ì' : ['&#236;', '&igrave;'], # latin small letter i with grave
u'í' : ['&#237;', '&iacute;'], # latin small letter i with acute
u'î' : ['&#238;', '&icirc;'], # latin small letter i with circumflex
u'ï' : ['&#239;', '&iuml;'], # latin small letter i with diaeresis
u'ð' : ['&#240;', '&eth;'], # latin small letter eth
u'ñ' : ['&#241;', '&ntilde;'], # latin small letter n with tilde
u'ò' : ['&#242;', '&ograve;'], # latin small letter o with grave
u'ó' : ['&#243;', '&oacute;'], # latin small letter o with acute
u'ô' : ['&#244;', '&ocirc;'], # latin small letter o with circumflex
u'õ' : ['&#245;', '&otilde;'], # latin small letter o with tilde
u'ö' : ['&#246;', '&ouml;'], # latin small letter o with diaeresis
u'÷' : ['&#247;', '&divide;'], # division sign
u'ø' : ['&#248;', '&oslash;'], # latin small letter o with slash
u'ù' : ['&#249;', '&ugrave;'], # latin small letter u with grave
u'ú' : ['&#250;', '&uacute;'], # latin small letter u with acute
u'û' : ['&#251;', '&ucirc;'], # latin small letter u with circumflex
u'ü' : ['&#252;', '&uuml;'], # latin small letter u with diaeresis
u'ý' : ['&#253;', '&yacute;'], # latin small letter y with acute
u'þ' : ['&#254;', '&thorn;'], # latin small letter thorn
u'ÿ' : ['&#255;', '&yuml;'], # latin small letter y with diaeresis
}

View File

@ -0,0 +1,30 @@
'''Read meta information from TXT files'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
import re
from calibre.ebooks.metadata import MetaInformation
def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0)
mdata = ''
for x in range(0, 4):
line = stream.readline()
if line == '':
break
else:
mdata += line
mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
if mo != None:
mi.title = mo.group('title')
mi.authors = mo.group('author').split(',')
return mi

View File

@ -1,69 +0,0 @@
'''
Convert any ebook format to PDF.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
'and Marshall T. Vandegrift <llasram@gmail.com>' \
'and John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import sys, os, glob, logging
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
from calibre.ebooks.epub import config as common_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.pdf.writer import oeb2pdf, config as pdf_config
def config(defaults=None):
c = common_config(defaults=defaults, name='pdf')
c.remove_opt('profile')
pdfc = pdf_config(defaults=defaults)
c.update(pdfc)
return c
def option_parser(usage=USAGE):
usage = usage % ('PDF', formats())
parser = config().option_parser(usage=usage)
return parser
def any2pdf(opts, path, notification=None):
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.pdf'
opts.output = os.path.abspath(opts.output)
orig_output = opts.output
with TemporaryDirectory('_any2pdf') as tdir:
oebdir = os.path.join(tdir, 'oeb')
os.mkdir(oebdir)
opts.output = os.path.join(tdir, 'dummy.epub')
opts.profile = 'None'
opts.dont_split_on_page_breaks = True
orig_bfs = opts.base_font_size2
opts.base_font_size2 = 0
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
opts.base_font_size2 = orig_bfs
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
opts.output = orig_output
logging.getLogger('html2epub').info(_('Creating PDF file from EPUB...'))
oeb2pdf(opts, opf)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No input file specified.'
return 1
any2pdf(opts, args[1])
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,90 @@
'''
Merge PDF files into a single PDF document.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, re, sys, time
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('manipulatepdf', desc)
else:
c = StringConfig(defaults, desc)
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file.pdf ...
Get info about a PDF.
'''.replace('%%name', name)))
def print_info(pdf_path):
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
print _('Title: %s' % pdf.documentInfo.title)
print _('Author: %s' % pdf.documentInfo.author)
print _('Subject: %s' % pdf.documentInfo.subject)
print _('Creator: %s' % pdf.documentInfo.creator)
print _('Producer: %s' % pdf.documentInfo.producer)
print _('Creation Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path))))
print _('Modification Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path))))
print _('Pages: %s' % pdf.numPages)
print _('Encrypted: %s' % pdf.isEncrypted)
try:
print _('File Size: %s bytes' % os.path.getsize(pdf_path))
except: pass
try:
pdf_file.seek(0)
vline = pdf_file.readline()
mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline)
if mo != None:
print _('PDF Version: %s' % mo.group('version'))
except: pass
def verify_files(files):
invalid = []
for pdf_path in files:
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
except:
invalid.append(pdf_path)
return invalid
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 1:
print 'Error: No PDF sepecified.\n'
print parser.get_usage()
return 2
bad_pdfs = verify_files(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 2
for pdf in args:
print_info(pdf)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,69 @@
'''
Command line interface to run pdf manipulation commands.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import string, sys
from calibre.utils.config import Config, StringConfig
from calibre.ebooks.pdf import info, merge, reverse, split, trim
COMMANDS = {
'info' : info,
'merge' : merge,
'reverse' : reverse,
'split' : split,
'trim' : trim,
}
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('manipulatepdf', desc)
else:
c = StringConfig(defaults, desc)
return c
def option_parser():
c = config()
return c.option_parser(usage=_('''\
%prog command ...
command can be one of the following:
[%%commands]
Use %prog command --help to get more information about a specific command
Manipulate a PDF.
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
def main(args=sys.argv):
parser = option_parser()
if len(args) < 2:
print 'Error: No command sepecified.\n'
print parser.get_usage()
return 2
command = args[1].lower().strip()
if command in COMMANDS.keys():
del args[1]
return COMMANDS[command].main(args, command)
else:
parser.parse_args(args)
print 'Unknown command %s.\n' % command
print parser.get_usage()
return 2
# We should never get here.
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,91 @@
'''
Merge PDF files into a single PDF document.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, sys
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('mergepdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default='merged.pdf',
help=_('Path to output file. By default a file is created in the current directory.'))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file1.pdf file2.pdf ...
Merges individual PDFs. Metadata will be used from the first PDF specified.
'''.replace('%%name', name)))
def merge_files(in_paths, out_path, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
for pdf_path in in_paths:
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in pdf.pages:
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
def verify_files(files):
invalid = []
for pdf_path in files:
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted or pdf.numPages <= 0:
raise Exception
except:
invalid.append(pdf_path)
return invalid
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 2:
print 'Error: Two or more PDF files are required.\n\n'
print parser.get_usage()
return 2
bad_pdfs = verify_files(args)
if bad_pdfs != []:
for pdf in bad_pdfs:
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 2
mi = metadata_from_formats([args[0]])
merge_files(args, opts.output, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Convert OEB ebook format to PDF.
'''
#unit, papersize, orientation, custom_size, profile
import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.pdf.writer import PDFWriter
from calibre.ebooks.pdf.pageoptions import UNITS, unit, PAPER_SIZES, \
paper_size, ORIENTATIONS, orientation, PageOptions
class PDFOutput(OutputFormatPlugin):
name = 'PDF Output'
author = 'John Schember'
file_type = 'pdf'
options = set([
OptionRecommendation(name='margin_top', recommended_value='1',
level=OptionRecommendation.LOW, long_switch='margin_top',
help=_('The top margin around the document.')),
OptionRecommendation(name='margin_bottom', recommended_value='1',
level=OptionRecommendation.LOW, long_switch='margin_bottom',
help=_('The bottom margin around the document.')),
OptionRecommendation(name='margin_left', recommended_value='1',
level=OptionRecommendation.LOW, long_switch='margin_left',
help=_('The left margin around the document.')),
OptionRecommendation(name='margin_right', recommended_value='1',
level=OptionRecommendation.LOW, long_switch='margin_right',
help=_('The right margin around the document.')),
OptionRecommendation(name='unit', recommended_value='inch',
level=OptionRecommendation.LOW, short_switch='u',
long_switch='unit', choices=UNITS.keys(),
help=_('The unit of measure. Default is inch. Choices '
'are %s' % UNITS.keys())),
OptionRecommendation(name='paper_size', recommended_value='letter',
level=OptionRecommendation.LOW,
long_switch='paper_size', choices=PAPER_SIZES.keys(),
help=_('The size of the paper. Default is letter. Choices '
'are %s' % PAPER_SIZES.keys())),
OptionRecommendation(name='orientation', recommended_value='portrait',
level=OptionRecommendation.LOW,
long_switch='orientation', choices=ORIENTATIONS.keys(),
help=_('The orientation of the page. Default is portrait. Choices '
'are %s' % ORIENTATIONS.keys())),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
popts = PageOptions()
popts.set_margin_top(opts.margin_top)
popts.set_margin_bottom(opts.margin_bottom)
popts.set_margin_left(opts.margin_left)
popts.set_margin_right(opts.margin_right)
popts.unit = unit(opts.unit)
popts.paper_size = paper_size(opts.paper_size)
popts.orientation = orientation(opts.orientation)
writer = PDFWriter(log, popts)
close = False
if not hasattr(output_path, 'write'):
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
out_stream = output_path
out_stream.seek(0)
out_stream.truncate()
writer.dump(oeb_book.spine, out_stream)
if close:
out_stream.close()

View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QPrinter
UNITS = {
'millimeter' : QPrinter.Millimeter,
'point' : QPrinter.Point,
'inch' : QPrinter.Inch,
'pica' : QPrinter.Pica,
'didot' : QPrinter.Didot,
'cicero' : QPrinter.Cicero,
'devicepixel' : QPrinter.DevicePixel,
}
def unit(unit):
return UNITS.get(unit, QPrinter.Inch)
PAPER_SIZES = {
'a0' : QPrinter.A0, # 841 x 1189 mm
'a1' : QPrinter.A1, # 594 x 841 mm
'a2' : QPrinter.A2, # 420 x 594 mm
'a3' : QPrinter.A3, # 297 x 420 mm
'a4' : QPrinter.A4, # 210 x 297 mm, 8.26 x 11.69 inches
'a5' : QPrinter.A5, # 148 x 210 mm
'a6' : QPrinter.A6, # 105 x 148 mm
'a7' : QPrinter.A7, # 74 x 105 mm
'a8' : QPrinter.A8, # 52 x 74 mm
'a9' : QPrinter.A9, # 37 x 52 mm
'b0' : QPrinter.B0, # 1030 x 1456 mm
'b1' : QPrinter.B1, # 728 x 1030 mm
'b2' : QPrinter.B2, # 515 x 728 mm
'b3' : QPrinter.B3, # 364 x 515 mm
'b4' : QPrinter.B4, # 257 x 364 mm
'b5' : QPrinter.B5, # 182 x 257 mm, 7.17 x 10.13 inches
'b6' : QPrinter.B6, # 128 x 182 mm
'b7' : QPrinter.B7, # 91 x 128 mm
'b8' : QPrinter.B8, # 64 x 91 mm
'b9' : QPrinter.B9, # 45 x 64 mm
'b10' : QPrinter.B10, # 32 x 45 mm
'c5e' : QPrinter.C5E, # 163 x 229 mm
'comm10e' : QPrinter.Comm10E, # 105 x 241 mm, U.S. Common 10 Envelope
'dle' : QPrinter.DLE, # 110 x 220 mm
'executive' : QPrinter.Executive, # 7.5 x 10 inches, 191 x 254 mm
'folio' : QPrinter.Folio, # 210 x 330 mm
'ledger' : QPrinter.Ledger, # 432 x 279 mm
'legal' : QPrinter.Legal, # 8.5 x 14 inches, 216 x 356 mm
'letter' : QPrinter.Letter, # 8.5 x 11 inches, 216 x 279 mm
'tabloid' : QPrinter.Tabloid, # 279 x 432 mm
#'custom' : QPrinter.Custom, # Unknown, or a user defined size.
}
def paper_size(size):
return PAPER_SIZES.get(size, QPrinter.Letter)
ORIENTATIONS = {
'portrait' : QPrinter.Portrait,
'landscape' : QPrinter.Landscape,
}
def orientation(orientation):
return ORIENTATIONS.get(orientation, QPrinter.Portrait)
class PageOptions(object):
margin_top = 1
margin_bottom = 1
margin_left = 1
margin_right = 1
unit = QPrinter.Inch
paper_size = QPrinter.Letter
orientation = QPrinter.Portrait
def set_margin_top(self, size):
try:
self.margin_top = int(size)
except:
self.margin_top = 1
def set_margin_bottom(self, size):
try:
self.margin_bottom = int(size)
except:
self.margin_bottom = 1
def set_margin_left(self, size):
try:
self.margin_left = int(size)
except:
self.margin_left = 1
def set_margin_right(self, size):
try:
self.margin_right = int(size)
except:
self.margin_right = 1

View File

@ -0,0 +1,88 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
'''
Reverse content of PDF.
'''
import os, sys
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('reversepdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
help=_('Path to output file. By default a file is created in the current directory.'))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file1.pdf
Reverse PDF.
'''.replace('%%name', name)))
def reverse(pdf_path, out_path, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
for page in reversed(pdf.pages):
out_pdf.addPage(page)
with open(out_path, 'wb') as out_file:
out_pdf.write(out_file)
# Return True if the pdf is valid.
def valid_pdf(pdf_path):
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted or pdf.numPages <= 0:
raise Exception
except:
return False
return True
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
args = args[1:]
if len(args) < 1:
print 'Error: A PDF file is required.\n\n'
print parser.get_usage()
return 2
if not valid_pdf(args[0]):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
return 2
mi = metadata_from_formats([args[0]])
reverse(args[0], opts.output, mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,186 @@
'''
Split PDF file into multiple PDF documents.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, sys, re
from calibre.ebooks.metadata.meta import metadata_from_formats
from calibre.ebooks.metadata import authors_to_string
from calibre.utils.config import Config, StringConfig
from pyPdf import PdfFileWriter, PdfFileReader
def config(defaults=None):
desc = _('Options to control the transformation of pdf')
if defaults is None:
c = Config('splitpdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default='split.pdf',
help=_('Path to output file. By default a file is created in the current directory. \
The file name will be the base name for the output.'))
return c
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog %%name [options] file.pdf page_to_split_on ...
%prog %%name [options] file.pdf page_range_to_split_on ...
Ex.
%prog %%name file.pdf 6
%prog %%name file.pdf 6-12
%prog %%name file.pdf 6-12 8 10 9-20
Split a PDF.
'''.replace('%%name', name)))
def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
total_pages = pdf.numPages - 1
for index in pages+page_ranges:
if index in pages:
write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata)
else:
write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata)
def write_pdf(pdf, name, suffix, start, end, metadata=None):
if metadata == None:
title = _('Unknown')
author = _('Unknown')
else:
title = metadata.title
author = authors_to_string(metadata.authors)
out_pdf = PdfFileWriter(title=title, author=author)
for page_num in range(start, end + 1):
out_pdf.addPage(pdf.getPage(page_num))
with open('%s%s.pdf' % (name, suffix), 'wb') as out_file:
out_pdf.write(out_file)
def split_args(args):
pdf = ''
pages = []
page_ranges = []
bad = []
for arg in args:
arg = arg.strip()
# Find the pdf input
if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None:
if pdf == '':
pdf = arg
else:
bad.append(arg)
# Find single indexes
elif re.search('^[ ]*\d+[ ]*$', arg) != None:
pages.append(arg)
# Find index ranges
elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None:
mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg)
start = mo.group('start')
end = mo.group('end')
# check to see if the range is really a single index
if start == end:
pages.append(start)
else:
page_ranges.append([start, end])
else:
bad.append(arg)
bad = sorted(list(set(bad)))
return pdf, pages, page_ranges, bad
# Remove duplicates from pages and page_ranges.
# Set pages higher than the total number of pages in the pdf to the last page.
# Return pages and page_ranges as lists of ints.
def clean_page_list(pdf_path, pages, page_ranges):
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
total_pages = pdf.numPages
sorted_pages = []
sorted_ranges = []
for index in pages:
index = int(index)
if index > total_pages:
sorted_pages.append(total_pages - 1)
else:
sorted_pages.append(index - 1)
for start, end in page_ranges:
start = int(start)
end = int(end)
if start > total_pages and end > total_pages:
sorted_pages.append(total_pages - 1)
continue
if start > total_pages:
start = total_pages
if end > total_pages:
end = total_pages
page_range = sorted([start - 1, end - 1])
if page_range not in sorted_ranges:
sorted_ranges.append(page_range)
# Remove duplicates and sort
pages = sorted(list(set(sorted_pages)))
page_ranges = sorted(sorted_ranges)
return pages, page_ranges
# Return True if the pdf is valid.
def valid_pdf(pdf_path):
try:
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
pdf = PdfFileReader(pdf_file)
if pdf.isEncrypted or pdf.numPages <= 0:
raise Exception
except:
return False
return True
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
pdf, pages, page_ranges, unknown = split_args(args[1:])
if pdf == '' and (pages == [] or page_ranges == []):
print 'Error: PDF and where to split is required.\n\n'
print parser.get_usage()
return 2
if unknown != []:
for arg in unknown:
print 'Error: Unknown argument `%s`' % arg
print parser.get_usage()
return 2
if not valid_pdf(pdf):
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
return 2
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
mi = metadata_from_formats([pdf])
split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -16,8 +16,6 @@ def config(defaults=None):
c = Config('trimpdf', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
help=_('Path to output file. By default a file is created in the current directory.'))
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
@ -33,16 +31,16 @@ def config(defaults=None):
return c
def option_parser():
def option_parser(name):
c = config()
return c.option_parser(usage=_('''\
%prog [options] file.pdf
%prog %%name [options] file.pdf
Crops a pdf.
'''))
'''.replace('%%name', name)))
def main(args=sys.argv):
parser = option_parser()
def main(args=sys.argv, name=''):
parser = option_parser(name)
opts, args = parser.parse_args(args)
try:
source = os.path.abspath(args[1])

View File

@ -1,19 +1,18 @@
'''
Write content to PDF.
'''
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, logging, shutil, sys
'''
Write content to PDF.
'''
import os, shutil, sys
from calibre import LoggingInterface
from calibre.ebooks.epub.iterator import SpineItem
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.customize.ui import run_plugins_on_postprocess
from calibre.utils.config import Config, StringConfig
from calibre.ebooks.pdf.pageoptions import PageOptions
from PyQt4 import QtCore
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \
@ -21,21 +20,14 @@ from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter,
from PyQt4.QtWebKit import QWebView
from pyPdf import PdfFileWriter, PdfFileReader
class PDFMargins:
def __init__(self, margin=1):
self.top = margin
self.bottom = margin
self.left = margin
self.right = margin
class PDFWriter(QObject):
def __init__(self, margins=PDFMargins()):
def __init__(self, log, popts=PageOptions()):
if QApplication.instance() is None:
QApplication([])
QObject.__init__(self)
self.logger = logging.getLogger('oeb2pdf')
self.logger = log
self.loop = QEventLoop()
self.view = QWebView()
@ -43,15 +35,14 @@ class PDFWriter(QObject):
self.render_queue = []
self.combine_queue = []
self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts')
self.margins = margins
self.popts = popts
def dump(self, oebpath, path):
def dump(self, spine, out_stream):
self._delete_tmpdir()
opf = OPF(oebpath, os.path.dirname(oebpath))
self.render_queue = [SpineItem(i.path) for i in opf.spine]
self.render_queue = spine[:]
self.combine_queue = []
self.path = path
self.out_stream = out_stream
QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
self.loop.exec_()
@ -78,7 +69,9 @@ class PDFWriter(QObject):
self.logger.debug('\tRendering item as %s' % item_path)
printer = QPrinter(QPrinter.HighResolution)
printer.setPageMargins(self.margins.left, self.margins.top, self.margins.right, self.margins.bottom, QPrinter.Inch)
printer.setPageMargins(self.popts.margin_left, self.popts.margin_top, self.popts.margin_right, self.popts.margin_bottom, self.popts.unit)
printer.setPaperSize(self.popts.paper_size)
printer.setOrientation(self.popts.orientation)
printer.setOutputFormat(QPrinter.PdfFormat)
printer.setOutputFileName(item_path)
self.view.print_(printer)
@ -98,75 +91,7 @@ class PDFWriter(QObject):
inputPDF = PdfFileReader(file(item, 'rb'))
for page in inputPDF.pages:
outPDF.addPage(page)
outputStream = file(self.path, 'wb')
outPDF.write(outputStream)
outputStream.close()
outPDF.write(self.out_stream)
finally:
self._delete_tmpdir()
self.loop.exit(0)
def config(defaults=None):
desc = _('Options to control the conversion to PDF')
if defaults is None:
c = Config('pdf', desc)
else:
c = StringConfig(defaults, desc)
pdf = c.add_group('PDF', _('PDF options.'))
pdf('margin_top', ['--margin_top'], default=1,
help=_('The top margin around the document in inches.'))
pdf('margin_bottom', ['--margin_bottom'], default=1,
help=_('The bottom margin around the document in inches.'))
pdf('margin_left', ['--margin_left'], default=1,
help=_('The left margin around the document in inches.'))
pdf('margin_right', ['--margin_right'], default=1,
help=_('The right margin around the document in inches.'))
return c
def option_parser():
c = config()
parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
return parser
def oeb2pdf(opts, inpath):
logger = LoggingInterface(logging.getLogger('oeb2pdf'))
logger.setup_cli_handler(opts.verbose)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.pdf'
margins = PDFMargins()
margins.top = opts.margin_top
margins.bottom = opts.margin_bottom
margins.left = opts.margin_left
margins.right = opts.margin_right
writer = PDFWriter(margins)
writer.dump(inpath, outpath)
run_plugins_on_postprocess(outpath, 'pdf')
logger.log_info(_('Output written to ') + outpath)
def main(argv=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.print_help()
return 1
inpath = args[0]
retval = oeb2pdf(opts, inpath)
return retval
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, John Schember john@nachtimwald.com'
__docformat__ = 'restructuredtext en'
'''
Used for txt output
'''

View File

@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
from calibre.ebooks.metadata import authors_to_string
class TXTOutput(OutputFormatPlugin):
name = 'TXT Output'
author = 'John Schember'
file_type = 'txt'
options = set([
OptionRecommendation(name='newline', recommended_value='system',
level=OptionRecommendation.LOW, long_switch='newline',
short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))),
OptionRecommendation(name='prepend_author', recommended_value='true',
level=OptionRecommendation.LOW, long_switch='prepend_author',
choices=['true', 'false'],
help=_('Write the author to the beginning of the file. '
'Default is \'true\'. Use \'false\' to disable.')),
OptionRecommendation(name='prepend_title', recommended_value='true',
choices=['true', 'false'],
level=OptionRecommendation.LOW, long_switch='prepend_title',
help=_('Write the title to the beginning of the file. '
'Default is \'true\'. Use \'false\' to disable.'))
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
metadata = TxtMetadata()
if opts.prepend_author.lower() == 'true':
metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors)
if opts.prepend_title.lower() == 'true':
metadata.title = opts.title if opts.title else oeb_book.metadata.title
writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
txt = writer.dump(oeb_book.spine, metadata)
close = False
if not hasattr(output_path, 'write'):
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
out_stream = output_path
out_stream.seek(0)
out_stream.truncate()
out_stream.write(txt)
if close:
out_stream.close()

View File

@ -0,0 +1,158 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Write content to TXT.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, re, sys
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
from BeautifulSoup import BeautifulSoup
class TxtWriter(object):
def __init__(self, newline, log):
self.newline = newline
self.log = log
def dump(self, spine, metadata):
out = u''
for item in spine:
with open(item, 'r') as itemf:
content = itemf.read().decode(item.encoding)
# Convert newlines to unix style \n for processing. These
# will be changed to the specified type later in the process.
content = self.unix_newlines(content)
content = self.strip_html(content)
content = self.replace_html_symbols(content)
content = self.cleanup_text(content)
content = self.specified_newlines(content)
out += content
# Prepend metadata
if metadata.author != None and metadata.author != '':
out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out
if metadata.title != None and metadata.title != '':
out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out
# Put two blank lines at end of file
end = out[-3 * len(self.newline):]
for i in range(3 - end.count(self.newline)):
out += self.newline
return out
def strip_html(self, html):
stripped = u''
for dom_tree in BeautifulSoup(html).findAll('body'):
text = unicode(dom_tree)
# Remove unnecessary tags
for tag in ['script', 'style']:
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
text = re.sub('<!--.*-->', '', text)
text = re.sub('<\?.*?\?>', '', text)
text = re.sub('<@.*?@>', '', text)
text = re.sub('<%.*?%>', '', text)
# Headings usually indicate Chapters.
# We are going to use a marker to insert the proper number of
# newline characters at the end of cleanup_text because cleanup_text
# remove excessive (more than 2 newlines).
for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text)
text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text)
# Separate content with space.
for tag in ['td']:
text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text)
# Separate content with empty line.
for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']:
text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text)
for tag in ['hr', 'br']:
text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text)
# Remove any tags that do not need special processing.
text = re.sub('<.*?>', '', text)
stripped = stripped + text
return stripped
def replace_html_symbols(self, content):
for symbol in HTML_SYMBOLS:
for code in HTML_SYMBOLS[symbol]:
content = content.replace(code, symbol)
return content
def cleanup_text(self, text):
# Replace bad characters.
text = text.replace(u'\xc2', '')
text = text.replace(u'\xa0', ' ')
# Replace tabs, vertical tags and form feeds with single space.
text = text.replace('\t+', ' ')
text = text.replace('\v+', ' ')
text = text.replace('\f+', ' ')
# Single line paragraph.
r = re.compile('.\n.')
while True:
mo = r.search(text)
if mo == None:
break
text = '%s %s' % (text[:mo.start()+1], text[mo.end()-1:])
# Remove multiple spaces.
text = re.sub('[ ]+', ' ', text)
# Remove excessive newlines.
text = re.sub('\n[ ]+\n', '\n\n', text)
text = re.sub('\n{3,}', '\n\n', text)
# Replace markers with the proper characters.
text = text.replace('-vzxedxy-', '\n\n\n\n\n')
text = text.replace('-vlgzxey-', '\n\n\n')
# Replace spaces at the beginning and end of lines
text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text)
return text
def unix_newlines(self, text):
text = text.replace('\r\n', '\n')
text = text.replace('\r', '\n')
return text
def specified_newlines(self, text):
if self.newline == '\n':
return text
return text.replace('\n', self.newline)
class TxtNewlines(object):
NEWLINE_TYPES = {
'system' : os.linesep,
'unix' : '\n',
'old_mac' : '\r',
'windows' : '\r\n'
}
def __init__(self, newline_type):
self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
class TxtMetadata(object):
def __init__(self):
self.title = None
self.author = None

View File

@ -709,6 +709,9 @@ class BooksView(TableView):
def close(self):
self._model.close()
def set_editable(self, editable):
self._model.set_editable(editable)
def set_editable(self, editable):
self._model.set_editable(editable)
@ -1002,6 +1005,10 @@ class DeviceBooksModel(BooksModel):
self.sort(col, self.sorted_on[1])
done = True
return done
def set_editable(self, editable):
self.editable = editable
def set_editable(self, editable):
self.editable = editable

View File

@ -39,10 +39,9 @@ entry_points = {
'calibre-fontconfig = calibre.utils.fontconfig:main',
'calibre-parallel = calibre.parallel:main',
'calibre-customize = calibre.customize.ui:main',
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
'pdfmanipulate = calibre.ebooks.pdf.manipulate:main',
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
'calibre-smtp = calibre.utils.smtp:main',
],
'gui_scripts' : [
__appname__+' = calibre.gui2.main:main',
@ -548,6 +547,3 @@ main = post_install
if __name__ == '__main__':
post_install()