diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index d37e241891..932261c45d 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -159,6 +159,16 @@ class ODTMetadataReader(MetadataReaderPlugin): def get_metadata(self, stream, ftype): from calibre.ebooks.metadata.odt import get_metadata return get_metadata(stream) + +class TXTMetadataReader(MetadataReaderPlugin): + + name = 'Read TXT metadata' + file_types = set(['txt']) + description = _('Read metadata from %s files') % 'TXT' + + def get_metadata(self, stream, ftype): + from calibre.ebooks.metadata.txt import get_metadata + return get_metadata(stream) class LRXMetadataReader(MetadataReaderPlugin): @@ -256,9 +266,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput from calibre.ebooks.oeb.output import OEBOutput +from calibre.ebooks.txt.output import TXTOutput +from calibre.ebooks.pdf.output import PDFOutput from calibre.customize.profiles import input_profiles, output_profiles -plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput] +plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput, PDFOutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index fe20e5877f..c2fefa29db 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -196,7 +196,7 @@ OptionRecommendation(name='language', self.input_fmt = input_fmt self.output_fmt = output_fmt - # Build set of all possible options. Two options are equal iff their + # Build set of all possible options. Two options are equal if their # names are the same. self.input_options = self.input_plugin.options.union( self.input_plugin.common_options) diff --git a/src/calibre/ebooks/htmlsymbols.py b/src/calibre/ebooks/htmlsymbols.py new file mode 100644 index 0000000000..fa10873845 --- /dev/null +++ b/src/calibre/ebooks/htmlsymbols.py @@ -0,0 +1,310 @@ +# -*- coding: utf-8 -*- +''' +Maping of non-acii symbols and their corresponding html entity number and name +''' +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' + +# http://www.w3schools.com/tags/ref_symbols.asp +HTML_SYMBOLS = { + # Math Symbols + u'∀' : ['∀', '∀'], # for all + u'∂' : ['∂', '∂'], # part + u'∃' : ['∃', '&exists;'], # exists + u'∅' : ['∅', '∅'], # empty + u'∇' : ['∇', '∇'], # nabla + u'∈' : ['∈', '∈'], # isin + u'∉' : ['∉', '∉'], # notin + u'∋' : ['∋', '∋'], # ni + u'∏' : ['∏', '∏'], # prod + u'∑' : ['∑', '∑'], # sum + u'−' : ['−', '−'], # minus + u'∗' : ['∗', '∗'], # lowast + u'√' : ['√', '√'], # square root + u'∝' : ['∝', '∝'], # proportional to + u'∞' : ['∞', '∞'], # infinity + u'∠' : ['∠', '∠'], # angle + u'∧' : ['∧', '∧'], # and + u'∨' : ['∨', '∨'], # or + u'∩' : ['∩', '∩'], # cap + u'∪' : ['∪', '∪'], # cup + u'∫' : ['∫', '∫'], # integral + u'∴' : ['∴', '∴'], # therefore + u'∼' : ['∼', '∼'], # simular to + u'≅' : ['≅', '≅'], # approximately equal + u'≈' : ['≈', '≈'], # almost equal + u'≠' : ['≠', '≠'], # not equal + u'≡' : ['≡', '≡'], # equivalent + u'≤' : ['≤', '≤'], # less or equal + u'≥' : ['≥', '≥'], # greater or equal + u'⊂' : ['⊂', '⊂'], # subset of + u'⊃' : ['⊃', '⊃'], # superset of + u'⊄' : ['⊄', '⊄'], # not subset of + u'⊆' : ['⊆', '⊆'], # subset or equal + u'⊇' : ['⊇', '⊇'], # superset or equal + u'⊕' : ['⊕', '⊕'], # circled plus + u'⊗' : ['⊗', '⊗'], # cirled times + u'⊥' : ['⊥', '⊥'], # perpendicular + u'⋅' : ['⋅', '⋅'], # dot operator + # Greek Letters + u'Α' : ['Α', 'Α'], # Alpha + u'Β' : ['Β', 'Β'], # Beta + u'Γ' : ['Γ', 'Γ'], # Gamma + u'Δ' : ['Δ', 'Δ'], # Delta + u'Ε' : ['Ε', 'Ε'], # Epsilon + u'Ζ' : ['Ζ', 'Ζ'], # Zeta + u'Η' : ['Η', 'Η'], # Eta + u'Θ' : ['Θ', 'Θ'], # Theta + u'Ι' : ['Ι', 'Ι'], # Iota + u'Κ' : ['Κ', 'Κ'], # Kappa + u'Λ' : ['Λ', 'Λ'], # Lambda + u'Μ' : ['Μ', 'Μ'], # Mu + u'Ν' : ['Ν', 'Ν'], # Nu + u'Ξ' : ['Ξ', 'Ξ'], # Xi + u'Ο' : ['Ο', 'Ο'], # Omicron + u'Π' : ['Π', 'Π'], # Pi + u'Ρ' : ['Ρ', 'Ρ'], # Rho + u'Σ' : ['Σ', 'Σ'], # Sigma + u'Τ' : ['Τ', 'Τ'], # Tau + u'Υ' : ['Υ', 'Υ'], # Upsilon + u'Φ' : ['Φ', 'Φ'], # Phi + u'Χ' : ['Χ', 'Χ'], # Chi + u'Ψ' : ['Ψ', 'Ψ'], # Psi + u'ω' : ['ω', 'ω'], # omega + u'ϑ' : ['ϑ', 'ϑ'], # theta symbol + u'ϒ' : ['ϒ', 'ϒ'], # upsilon symbol + u'ϖ' : ['ϖ', 'ϖ'], # pi symbol + # Other + u'Œ' : ['Œ', 'Œ'], # capital ligature OE + u'œ' : ['œ', 'œ'], # small ligature oe + u'Š' : ['Š', 'Š'], # capital S with caron + u'š' : ['š', 'š'], # small S with caron + u'Ÿ' : ['Ÿ', 'Ÿ'], # capital Y with diaeres + u'ƒ' : ['ƒ', 'ƒ'], # f with hook + u'ˆ' : ['ˆ', 'ˆ'], # modifier letter circumflex accent + u'˜' : ['˜', '˜'], # small tilde + u'–' : ['–', '–'], # en dash + u'—' : ['—', '—'], # em dash + u'‘' : ['‘', '‘'], # left single quotation mark + u'’' : ['’', '’'], # right single quotation mark + u'‚' : ['‚', '‚'], # single low-9 quotation mark + u'“' : ['“', '“'], # left double quotation mark + u'”' : ['”', '”'], # right double quotation mark + u'„' : ['„', '„'], # double low-9 quotation mark + u'†' : ['†', '†'], # dagger + u'‡' : ['‡', '‡'], # double dagger + u'•' : ['•', '•'], # bullet + u'…' : ['…', '…'], # horizontal ellipsis + u'‰' : ['‰', '‰'], # per mille + u'′' : ['′', '′'], # minutes + u'″' : ['″', '″'], # seconds + u'‹' : ['‹', '‹'], # single left angle quotation + u'›' : ['›', '›'], # single right angle quotation + u'‾' : ['‾', '‾'], # overline + u'€' : ['€', '€'], # euro + u'™' : ['™', '™'], # trademark + u'←' : ['←', '←'], # left arrow + u'↑' : ['↑', '↑'], # up arrow + u'→' : ['→', '→'], # right arrow + u'↓' : ['↓', '↓'], # down arrow + u'↔' : ['↔', '↔'], # left right arrow + u'↵' : ['↵', '↵'], # carriage return arrow + u'⌈' : ['⌈', '⌈'], # left ceiling + u'⌉' : ['⌉', '⌉'], # right ceiling + u'⌊' : ['⌊', '⌊'], # left floor + u'⌋' : ['⌋', '⌋'], # right floor + u'◊' : ['◊', '◊'], # lozenge + u'♠' : ['♠', '♠'], # spade + u'♣' : ['♣', '♣'], # club + u'♥' : ['♥', '♥'], # heart + u'♦' : ['♦', '♦'], # diamond + # Extra http://www.ascii.cl/htmlcodes.htm + u' ' : [' '], # space + u'!' : ['!'], # exclamation point + u'#' : ['#'], # number sign + u'$' : ['$'], # dollar sign + u'%' : ['%'], # percent sign + u'\'' : ['''], # single quote + u'(' : ['('], # opening parenthesis + u')' : [')'], # closing parenthesis + u'*' : ['*'], # asterisk + u'+' : ['+'], # plus sign + u',' : [','], # comma + u'-' : ['-'], # minus sign - hyphen + u'.' : ['.'], # period + u'/' : ['/'], # slash + u'0' : ['0'], # zero + u'1' : ['1'], # one + u'2' : ['2'], # two + u'3' : ['3'], # three + u'4' : ['4'], # four + u'5' : ['5'], # five + u'6' : ['6'], # six + u'7' : ['7'], # seven + u'8' : ['8'], # eight + u'9' : ['9'], # nine + u':' : [':'], # colon + u';' : [';'], # semicolon + u'=' : ['='], # equal sign + u'?' : ['?'], # question mark + u'@' : ['@'], # at symbol + u'A' : ['A'], # + u'B' : ['B'], # + u'C' : ['C'], # + u'D' : ['D'], # + u'E' : ['E'], # + u'F' : ['F'], # + u'G' : ['G'], # + u'H' : ['H'], # + u'I' : ['I'], # + u'J' : ['J'], # + u'K' : ['K'], # + u'L' : ['L'], # + u'M' : ['M'], # + u'N' : ['N'], # + u'O' : ['O'], # + u'P' : ['P'], # + u'Q' : ['Q'], # + u'R' : ['R'], # + u'S' : ['S'], # + u'T' : ['T'], # + u'U' : ['U'], # + u'V' : ['V'], # + u'W' : ['W'], # + u'X' : ['X'], # + u'Y' : ['Y'], # + u'Z' : ['Z'], # + u'[' : ['['], # opening bracket + u'\\' : ['\'], # backslash + u']' : [']'], # closing bracket + u'^' : ['^'], # caret - circumflex + u'_' : ['_'], # underscore + u'`' : ['`'], # grave accent + u'a' : ['a'], # + u'b' : ['b'], # + u'c' : ['c'], # + u'd' : ['d'], # + u'e' : ['e'], # + u'f' : ['f'], # + u'g' : ['g'], # + u'h' : ['h'], # + u'i' : ['i'], # + u'j' : ['j'], # + u'k' : ['k'], # + u'l' : ['l'], # + u'm' : ['m'], # + u'n' : ['n'], # + u'o' : ['o'], # + u'p' : ['p'], # + u'q' : ['q'], # + u'r' : ['r'], # + u's' : ['s'], # + u't' : ['t'], # + u'u' : ['u'], # + u'v' : ['v'], # + u'w' : ['w'], # + u'x' : ['x'], # + u'y' : ['y'], # + u'z' : ['z'], # + u'{' : ['{'], # opening brace + u'|' : ['|'], # vertical bar + u'}' : ['}'], # closing brace + u'~' : ['~'], # equivalency sign - tilde + u'<' : ['<', '<'], # less than sign + u'>' : ['>', '>'], # greater than sign + u'¡' : ['¡', '¡'], # inverted exclamation mark + u'¢' : ['¢', '¢'], # cent sign + u'£' : ['£', '£'], # pound sign + u'¤' : ['¤', '¤'], # currency sign + u'¥' : ['¥', '¥'], # yen sign + u'¦' : ['¦', '¦'], # broken vertical bar + u'§' : ['§', '§'], # section sign + u'¨' : ['¨', '¨'], # spacing diaeresis - umlaut + u'©' : ['©', '©'], # copyright sign + u'ª' : ['ª', 'ª'], # feminine ordinal indicator + u'«' : ['«', '«'], # left double angle quotes + u'¬' : ['¬', '¬'], # not sign + u'®' : ['®', '®'], # registered trade mark sign + u'¯' : ['¯', '¯'], # spacing macron - overline + u'°' : ['°', '°'], # degree sign + u'±' : ['±', '±'], # plus-or-minus sign + u'²' : ['²', '²'], # superscript two - squared + u'³' : ['³', '³'], # superscript three - cubed + u'´' : ['´', '´'], # acute accent - spacing acute + u'µ' : ['µ', 'µ'], # micro sign + u'¶' : ['¶', '¶'], # pilcrow sign - paragraph sign + u'·' : ['·', '·'], # middle dot - Georgian comma + u'¸' : ['¸', '¸'], # spacing cedilla + u'¹' : ['¹', '¹'], # superscript one + u'º' : ['º', 'º'], # masculine ordinal indicator + u'»' : ['»', '»'], # right double angle quotes + u'¼' : ['¼', '¼'], # fraction one quarter + u'½' : ['½', '½'], # fraction one half + u'¾' : ['¾', '¾'], # fraction three quarters + u'¿' : ['¿', '¿'], # inverted question mark + u'À' : ['À', 'À'], # latin capital letter A with grave + u'Á' : ['Á', 'Á'], # latin capital letter A with acute + u'Â' : ['Â', 'Â'], # latin capital letter A with circumflex + u'Ã' : ['Ã', 'Ã'], # latin capital letter A with tilde + u'Ä' : ['Ä', 'Ä'], # latin capital letter A with diaeresis + u'Å' : ['Å', 'Å'], # latin capital letter A with ring above + u'Æ' : ['Æ', 'Æ'], # latin capital letter AE + u'Ç' : ['Ç', 'Ç'], # latin capital letter C with cedilla + u'È' : ['È', 'È'], # latin capital letter E with grave + u'É' : ['É', 'É'], # latin capital letter E with acute + u'Ê' : ['Ê', 'Ê'], # latin capital letter E with circumflex + u'Ë' : ['Ë', 'Ë'], # latin capital letter E with diaeresis + u'Ì' : ['Ì', 'Ì'], # latin capital letter I with grave + u'Í' : ['Í', 'Í'], # latin capital letter I with acute + u'Î' : ['Î', 'Î'], # latin capital letter I with circumflex + u'Ï' : ['Ï', 'Ï'], # latin capital letter I with diaeresis + u'Ð' : ['Ð', 'Ð'], # latin capital letter ETH + u'Ñ' : ['Ñ', 'Ñ'], # latin capital letter N with tilde + u'Ò' : ['Ò', 'Ò'], # latin capital letter O with grave + u'Ó' : ['Ó', 'Ó'], # latin capital letter O with acute + u'Ô' : ['Ô', 'Ô'], # latin capital letter O with circumflex + u'Õ' : ['Õ', 'Õ'], # latin capital letter O with tilde + u'Ö' : ['Ö', 'Ö'], # latin capital letter O with diaeresis + u'×' : ['×', '×'], # multiplication sign + u'Ø' : ['Ø', 'Ø'], # latin capital letter O with slash + u'Ù' : ['Ù', 'Ù'], # latin capital letter U with grave + u'Ú' : ['Ú', 'Ú'], # latin capital letter U with acute + u'Û' : ['Û', 'Û'], # latin capital letter U with circumflex + u'Ü' : ['Ü', 'Ü'], # latin capital letter U with diaeresis + u'Ý' : ['Ý', 'Ý'], # latin capital letter Y with acute + u'Þ' : ['Þ', 'Þ'], # latin capital letter THORN + u'ß' : ['ß', 'ß'], # latin small letter sharp s - ess-zed + u'à' : ['à', 'à'], # latin small letter a with grave + u'á' : ['á', 'á'], # latin small letter a with acute + u'â' : ['â', 'â'], # latin small letter a with circumflex + u'ã' : ['ã', 'ã'], # latin small letter a with tilde + u'ä' : ['ä', 'ä'], # latin small letter a with diaeresis + u'å' : ['å', 'å'], # latin small letter a with ring above + u'æ' : ['æ', 'æ'], # latin small letter ae + u'ç' : ['ç', 'ç'], # latin small letter c with cedilla + u'è' : ['è', 'è'], # latin small letter e with grave + u'é' : ['é', 'é'], # latin small letter e with acute + u'ê' : ['ê', 'ê'], # latin small letter e with circumflex + u'ë' : ['ë', 'ë'], # latin small letter e with diaeresis + u'ì' : ['ì', 'ì'], # latin small letter i with grave + u'í' : ['í', 'í'], # latin small letter i with acute + u'î' : ['î', 'î'], # latin small letter i with circumflex + u'ï' : ['ï', 'ï'], # latin small letter i with diaeresis + u'ð' : ['ð', 'ð'], # latin small letter eth + u'ñ' : ['ñ', 'ñ'], # latin small letter n with tilde + u'ò' : ['ò', 'ò'], # latin small letter o with grave + u'ó' : ['ó', 'ó'], # latin small letter o with acute + u'ô' : ['ô', 'ô'], # latin small letter o with circumflex + u'õ' : ['õ', 'õ'], # latin small letter o with tilde + u'ö' : ['ö', 'ö'], # latin small letter o with diaeresis + u'÷' : ['÷', '÷'], # division sign + u'ø' : ['ø', 'ø'], # latin small letter o with slash + u'ù' : ['ù', 'ù'], # latin small letter u with grave + u'ú' : ['ú', 'ú'], # latin small letter u with acute + u'û' : ['û', 'û'], # latin small letter u with circumflex + u'ü' : ['ü', 'ü'], # latin small letter u with diaeresis + u'ý' : ['ý', 'ý'], # latin small letter y with acute + u'þ' : ['þ', 'þ'], # latin small letter thorn + u'ÿ' : ['ÿ', 'ÿ'], # latin small letter y with diaeresis + } + diff --git a/src/calibre/ebooks/metadata/txt.py b/src/calibre/ebooks/metadata/txt.py new file mode 100644 index 0000000000..6283c72256 --- /dev/null +++ b/src/calibre/ebooks/metadata/txt.py @@ -0,0 +1,30 @@ +'''Read meta information from TXT files''' + +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' + +import re + +from calibre.ebooks.metadata import MetaInformation + +def get_metadata(stream, extract_cover=True): + """ Return metadata as a L{MetaInfo} object """ + mi = MetaInformation(_('Unknown'), [_('Unknown')]) + stream.seek(0) + + mdata = '' + for x in range(0, 4): + line = stream.readline() + if line == '': + break + else: + mdata += line + + mo = re.search('(?u)^[ ]*(?P.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata) + if mo != None: + mi.title = mo.group('title') + mi.authors = mo.group('author').split(',') + + return mi diff --git a/src/calibre/ebooks/pdf/from_any.py b/src/calibre/ebooks/pdf/from_any.py deleted file mode 100644 index e4fb937cdb..0000000000 --- a/src/calibre/ebooks/pdf/from_any.py +++ /dev/null @@ -1,69 +0,0 @@ -''' -Convert any ebook format to PDF. -''' - -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \ - 'and Marshall T. Vandegrift <llasram@gmail.com>' \ - 'and John Schember <john@nachtimwald.com>' -__docformat__ = 'restructuredtext en' - -import sys, os, glob, logging - -from calibre.ebooks.epub.from_any import any2epub, formats, USAGE -from calibre.ebooks.epub import config as common_config -from calibre.ptempfile import TemporaryDirectory -from calibre.ebooks.pdf.writer import oeb2pdf, config as pdf_config - -def config(defaults=None): - c = common_config(defaults=defaults, name='pdf') - c.remove_opt('profile') - pdfc = pdf_config(defaults=defaults) - c.update(pdfc) - return c - -def option_parser(usage=USAGE): - usage = usage % ('PDF', formats()) - parser = config().option_parser(usage=usage) - return parser - -def any2pdf(opts, path, notification=None): - ext = os.path.splitext(path)[1] - if not ext: - raise ValueError('Unknown file type: '+path) - ext = ext.lower()[1:] - - if opts.output is None: - opts.output = os.path.splitext(os.path.basename(path))[0]+'.pdf' - - opts.output = os.path.abspath(opts.output) - orig_output = opts.output - - with TemporaryDirectory('_any2pdf') as tdir: - oebdir = os.path.join(tdir, 'oeb') - os.mkdir(oebdir) - opts.output = os.path.join(tdir, 'dummy.epub') - opts.profile = 'None' - opts.dont_split_on_page_breaks = True - orig_bfs = opts.base_font_size2 - opts.base_font_size2 = 0 - any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir) - opts.base_font_size2 = orig_bfs - opf = glob.glob(os.path.join(oebdir, '*.opf'))[0] - opts.output = orig_output - logging.getLogger('html2epub').info(_('Creating PDF file from EPUB...')) - oeb2pdf(opts, opf) - -def main(args=sys.argv): - parser = option_parser() - opts, args = parser.parse_args(args) - if len(args) < 2: - parser.print_help() - print 'No input file specified.' - return 1 - any2pdf(opts, args[1]) - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/info.py b/src/calibre/ebooks/pdf/info.py new file mode 100644 index 0000000000..115e411ce4 --- /dev/null +++ b/src/calibre/ebooks/pdf/info.py @@ -0,0 +1,90 @@ +''' +Merge PDF files into a single PDF document. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import os, re, sys, time + +from calibre.utils.config import Config, StringConfig + +from pyPdf import PdfFileWriter, PdfFileReader + + +def config(defaults=None): + desc = _('Options to control the transformation of pdf') + if defaults is None: + c = Config('manipulatepdf', desc) + else: + c = StringConfig(defaults, desc) + return c + +def option_parser(name): + c = config() + return c.option_parser(usage=_('''\ + %prog %%name [options] file.pdf ... + + Get info about a PDF. + '''.replace('%%name', name))) + +def print_info(pdf_path): + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + print _('Title: %s' % pdf.documentInfo.title) + print _('Author: %s' % pdf.documentInfo.author) + print _('Subject: %s' % pdf.documentInfo.subject) + print _('Creator: %s' % pdf.documentInfo.creator) + print _('Producer: %s' % pdf.documentInfo.producer) + print _('Creation Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path)))) + print _('Modification Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path)))) + print _('Pages: %s' % pdf.numPages) + print _('Encrypted: %s' % pdf.isEncrypted) + try: + print _('File Size: %s bytes' % os.path.getsize(pdf_path)) + except: pass + try: + pdf_file.seek(0) + vline = pdf_file.readline() + mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline) + if mo != None: + print _('PDF Version: %s' % mo.group('version')) + except: pass + +def verify_files(files): + invalid = [] + + for pdf_path in files: + try: + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + except: + invalid.append(pdf_path) + return invalid + +def main(args=sys.argv, name=''): + parser = option_parser(name) + opts, args = parser.parse_args(args) + args = args[1:] + + if len(args) < 1: + print 'Error: No PDF sepecified.\n' + print parser.get_usage() + return 2 + + bad_pdfs = verify_files(args) + if bad_pdfs != []: + for pdf in bad_pdfs: + print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf + return 2 + + for pdf in args: + print_info(pdf) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/src/calibre/ebooks/pdf/manipulate.py b/src/calibre/ebooks/pdf/manipulate.py new file mode 100644 index 0000000000..8c49650730 --- /dev/null +++ b/src/calibre/ebooks/pdf/manipulate.py @@ -0,0 +1,69 @@ +''' +Command line interface to run pdf manipulation commands. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import string, sys + +from calibre.utils.config import Config, StringConfig +from calibre.ebooks.pdf import info, merge, reverse, split, trim + +COMMANDS = { + 'info' : info, + 'merge' : merge, + 'reverse' : reverse, + 'split' : split, + 'trim' : trim, + } + +def config(defaults=None): + desc = _('Options to control the transformation of pdf') + if defaults is None: + c = Config('manipulatepdf', desc) + else: + c = StringConfig(defaults, desc) + return c + +def option_parser(): + c = config() + return c.option_parser(usage=_('''\ + + %prog command ... + + command can be one of the following: + [%%commands] + + Use %prog command --help to get more information about a specific command + + Manipulate a PDF. + '''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', ')))) + +def main(args=sys.argv): + parser = option_parser() + + if len(args) < 2: + print 'Error: No command sepecified.\n' + print parser.get_usage() + return 2 + + command = args[1].lower().strip() + + if command in COMMANDS.keys(): + del args[1] + return COMMANDS[command].main(args, command) + else: + parser.parse_args(args) + print 'Unknown command %s.\n' % command + print parser.get_usage() + return 2 + + # We should never get here. + return 0 + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/src/calibre/ebooks/pdf/merge.py b/src/calibre/ebooks/pdf/merge.py new file mode 100644 index 0000000000..c0385080ad --- /dev/null +++ b/src/calibre/ebooks/pdf/merge.py @@ -0,0 +1,91 @@ +''' +Merge PDF files into a single PDF document. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import os, sys + +from calibre.ebooks.metadata.meta import metadata_from_formats +from calibre.ebooks.metadata import authors_to_string +from calibre.utils.config import Config, StringConfig + +from pyPdf import PdfFileWriter, PdfFileReader + +def config(defaults=None): + desc = _('Options to control the transformation of pdf') + if defaults is None: + c = Config('mergepdf', desc) + else: + c = StringConfig(defaults, desc) + c.add_opt('output', ['-o', '--output'], default='merged.pdf', + help=_('Path to output file. By default a file is created in the current directory.')) + return c + +def option_parser(name): + c = config() + return c.option_parser(usage=_('''\ + %prog %%name [options] file1.pdf file2.pdf ... + + Merges individual PDFs. Metadata will be used from the first PDF specified. + '''.replace('%%name', name))) + +def merge_files(in_paths, out_path, metadata=None): + if metadata == None: + title = _('Unknown') + author = _('Unknown') + else: + title = metadata.title + author = authors_to_string(metadata.authors) + + out_pdf = PdfFileWriter(title=title, author=author) + + for pdf_path in in_paths: + pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) + for page in pdf.pages: + out_pdf.addPage(page) + + with open(out_path, 'wb') as out_file: + out_pdf.write(out_file) + +def verify_files(files): + invalid = [] + + for pdf_path in files: + try: + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + if pdf.isEncrypted or pdf.numPages <= 0: + raise Exception + except: + invalid.append(pdf_path) + return invalid + +def main(args=sys.argv, name=''): + parser = option_parser(name) + opts, args = parser.parse_args(args) + args = args[1:] + + if len(args) < 2: + print 'Error: Two or more PDF files are required.\n\n' + print parser.get_usage() + return 2 + + bad_pdfs = verify_files(args) + if bad_pdfs != []: + for pdf in bad_pdfs: + print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf + return 2 + + mi = metadata_from_formats([args[0]]) + + merge_files(args, opts.output, mi) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py new file mode 100644 index 0000000000..5af4e4bed7 --- /dev/null +++ b/src/calibre/ebooks/pdf/output.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +''' +Convert OEB ebook format to PDF. +''' + +#unit, papersize, orientation, custom_size, profile + +import os + +from calibre.customize.conversion import OutputFormatPlugin, \ + OptionRecommendation +from calibre.ebooks.pdf.writer import PDFWriter +from calibre.ebooks.pdf.pageoptions import UNITS, unit, PAPER_SIZES, \ + paper_size, ORIENTATIONS, orientation, PageOptions + +class PDFOutput(OutputFormatPlugin): + + name = 'PDF Output' + author = 'John Schember' + file_type = 'pdf' + + options = set([ + OptionRecommendation(name='margin_top', recommended_value='1', + level=OptionRecommendation.LOW, long_switch='margin_top', + help=_('The top margin around the document.')), + OptionRecommendation(name='margin_bottom', recommended_value='1', + level=OptionRecommendation.LOW, long_switch='margin_bottom', + help=_('The bottom margin around the document.')), + OptionRecommendation(name='margin_left', recommended_value='1', + level=OptionRecommendation.LOW, long_switch='margin_left', + help=_('The left margin around the document.')), + OptionRecommendation(name='margin_right', recommended_value='1', + level=OptionRecommendation.LOW, long_switch='margin_right', + help=_('The right margin around the document.')), + + OptionRecommendation(name='unit', recommended_value='inch', + level=OptionRecommendation.LOW, short_switch='u', + long_switch='unit', choices=UNITS.keys(), + help=_('The unit of measure. Default is inch. Choices ' + 'are %s' % UNITS.keys())), + OptionRecommendation(name='paper_size', recommended_value='letter', + level=OptionRecommendation.LOW, + long_switch='paper_size', choices=PAPER_SIZES.keys(), + help=_('The size of the paper. Default is letter. Choices ' + 'are %s' % PAPER_SIZES.keys())), + OptionRecommendation(name='orientation', recommended_value='portrait', + level=OptionRecommendation.LOW, + long_switch='orientation', choices=ORIENTATIONS.keys(), + help=_('The orientation of the page. Default is portrait. Choices ' + 'are %s' % ORIENTATIONS.keys())), + ]) + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + popts = PageOptions() + + popts.set_margin_top(opts.margin_top) + popts.set_margin_bottom(opts.margin_bottom) + popts.set_margin_left(opts.margin_left) + popts.set_margin_right(opts.margin_right) + + popts.unit = unit(opts.unit) + popts.paper_size = paper_size(opts.paper_size) + popts.orientation = orientation(opts.orientation) + + writer = PDFWriter(log, popts) + + close = False + if not hasattr(output_path, 'write'): + close = True + if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '': + os.makedirs(os.path.dirname(output_path)) + out_stream = open(output_path, 'wb') + else: + out_stream = output_path + + out_stream.seek(0) + out_stream.truncate() + writer.dump(oeb_book.spine, out_stream) + + if close: + out_stream.close() diff --git a/src/calibre/ebooks/pdf/pageoptions.py b/src/calibre/ebooks/pdf/pageoptions.py new file mode 100644 index 0000000000..26fae81662 --- /dev/null +++ b/src/calibre/ebooks/pdf/pageoptions.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +from PyQt4.Qt import QPrinter + +UNITS = { + 'millimeter' : QPrinter.Millimeter, + 'point' : QPrinter.Point, + 'inch' : QPrinter.Inch, + 'pica' : QPrinter.Pica, + 'didot' : QPrinter.Didot, + 'cicero' : QPrinter.Cicero, + 'devicepixel' : QPrinter.DevicePixel, + } + +def unit(unit): + return UNITS.get(unit, QPrinter.Inch) + +PAPER_SIZES = { + 'a0' : QPrinter.A0, # 841 x 1189 mm + 'a1' : QPrinter.A1, # 594 x 841 mm + 'a2' : QPrinter.A2, # 420 x 594 mm + 'a3' : QPrinter.A3, # 297 x 420 mm + 'a4' : QPrinter.A4, # 210 x 297 mm, 8.26 x 11.69 inches + 'a5' : QPrinter.A5, # 148 x 210 mm + 'a6' : QPrinter.A6, # 105 x 148 mm + 'a7' : QPrinter.A7, # 74 x 105 mm + 'a8' : QPrinter.A8, # 52 x 74 mm + 'a9' : QPrinter.A9, # 37 x 52 mm + 'b0' : QPrinter.B0, # 1030 x 1456 mm + 'b1' : QPrinter.B1, # 728 x 1030 mm + 'b2' : QPrinter.B2, # 515 x 728 mm + 'b3' : QPrinter.B3, # 364 x 515 mm + 'b4' : QPrinter.B4, # 257 x 364 mm + 'b5' : QPrinter.B5, # 182 x 257 mm, 7.17 x 10.13 inches + 'b6' : QPrinter.B6, # 128 x 182 mm + 'b7' : QPrinter.B7, # 91 x 128 mm + 'b8' : QPrinter.B8, # 64 x 91 mm + 'b9' : QPrinter.B9, # 45 x 64 mm + 'b10' : QPrinter.B10, # 32 x 45 mm + 'c5e' : QPrinter.C5E, # 163 x 229 mm + 'comm10e' : QPrinter.Comm10E, # 105 x 241 mm, U.S. Common 10 Envelope + 'dle' : QPrinter.DLE, # 110 x 220 mm + 'executive' : QPrinter.Executive, # 7.5 x 10 inches, 191 x 254 mm + 'folio' : QPrinter.Folio, # 210 x 330 mm + 'ledger' : QPrinter.Ledger, # 432 x 279 mm + 'legal' : QPrinter.Legal, # 8.5 x 14 inches, 216 x 356 mm + 'letter' : QPrinter.Letter, # 8.5 x 11 inches, 216 x 279 mm + 'tabloid' : QPrinter.Tabloid, # 279 x 432 mm + #'custom' : QPrinter.Custom, # Unknown, or a user defined size. + } + +def paper_size(size): + return PAPER_SIZES.get(size, QPrinter.Letter) + +ORIENTATIONS = { + 'portrait' : QPrinter.Portrait, + 'landscape' : QPrinter.Landscape, + } + +def orientation(orientation): + return ORIENTATIONS.get(orientation, QPrinter.Portrait) + + +class PageOptions(object): + margin_top = 1 + margin_bottom = 1 + margin_left = 1 + margin_right = 1 + unit = QPrinter.Inch + paper_size = QPrinter.Letter + orientation = QPrinter.Portrait + + def set_margin_top(self, size): + try: + self.margin_top = int(size) + except: + self.margin_top = 1 + + def set_margin_bottom(self, size): + try: + self.margin_bottom = int(size) + except: + self.margin_bottom = 1 + + def set_margin_left(self, size): + try: + self.margin_left = int(size) + except: + self.margin_left = 1 + + def set_margin_right(self, size): + try: + self.margin_right = int(size) + except: + self.margin_right = 1 diff --git a/src/calibre/ebooks/pdf/reverse.py b/src/calibre/ebooks/pdf/reverse.py new file mode 100644 index 0000000000..87bb9018c1 --- /dev/null +++ b/src/calibre/ebooks/pdf/reverse.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +''' +Reverse content of PDF. +''' + +import os, sys + +from calibre.ebooks.metadata.meta import metadata_from_formats +from calibre.ebooks.metadata import authors_to_string +from calibre.utils.config import Config, StringConfig + +from pyPdf import PdfFileWriter, PdfFileReader + +def config(defaults=None): + desc = _('Options to control the transformation of pdf') + if defaults is None: + c = Config('reversepdf', desc) + else: + c = StringConfig(defaults, desc) + c.add_opt('output', ['-o', '--output'], default='reversed.pdf', + help=_('Path to output file. By default a file is created in the current directory.')) + return c + +def option_parser(name): + c = config() + return c.option_parser(usage=_('''\ + %prog %%name [options] file1.pdf + + Reverse PDF. + '''.replace('%%name', name))) + +def reverse(pdf_path, out_path, metadata=None): + if metadata == None: + title = _('Unknown') + author = _('Unknown') + else: + title = metadata.title + author = authors_to_string(metadata.authors) + + out_pdf = PdfFileWriter(title=title, author=author) + + pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) + for page in reversed(pdf.pages): + out_pdf.addPage(page) + + with open(out_path, 'wb') as out_file: + out_pdf.write(out_file) + +# Return True if the pdf is valid. +def valid_pdf(pdf_path): + try: + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + if pdf.isEncrypted or pdf.numPages <= 0: + raise Exception + except: + return False + return True + + +def main(args=sys.argv, name=''): + parser = option_parser(name) + opts, args = parser.parse_args(args) + args = args[1:] + + if len(args) < 1: + print 'Error: A PDF file is required.\n\n' + print parser.get_usage() + return 2 + + if not valid_pdf(args[0]): + print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0] + return 2 + + mi = metadata_from_formats([args[0]]) + + reverse(args[0], opts.output, mi) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/pdf/split.py b/src/calibre/ebooks/pdf/split.py new file mode 100644 index 0000000000..cc6965dd68 --- /dev/null +++ b/src/calibre/ebooks/pdf/split.py @@ -0,0 +1,186 @@ +''' +Split PDF file into multiple PDF documents. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import os, sys, re + +from calibre.ebooks.metadata.meta import metadata_from_formats +from calibre.ebooks.metadata import authors_to_string +from calibre.utils.config import Config, StringConfig + +from pyPdf import PdfFileWriter, PdfFileReader + +def config(defaults=None): + desc = _('Options to control the transformation of pdf') + if defaults is None: + c = Config('splitpdf', desc) + else: + c = StringConfig(defaults, desc) + c.add_opt('output', ['-o', '--output'], default='split.pdf', + help=_('Path to output file. By default a file is created in the current directory. \ + The file name will be the base name for the output.')) + return c + +def option_parser(name): + c = config() + return c.option_parser(usage=_('''\ + + %prog %%name [options] file.pdf page_to_split_on ... + %prog %%name [options] file.pdf page_range_to_split_on ... + + Ex. + + %prog %%name file.pdf 6 + %prog %%name file.pdf 6-12 + %prog %%name file.pdf 6-12 8 10 9-20 + + Split a PDF. + '''.replace('%%name', name))) + +def split_pdf(in_path, pages, page_ranges, out_name, metadata=None): + pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb')) + total_pages = pdf.numPages - 1 + + for index in pages+page_ranges: + if index in pages: + write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata) + else: + + write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata) + +def write_pdf(pdf, name, suffix, start, end, metadata=None): + if metadata == None: + title = _('Unknown') + author = _('Unknown') + else: + title = metadata.title + author = authors_to_string(metadata.authors) + + out_pdf = PdfFileWriter(title=title, author=author) + for page_num in range(start, end + 1): + out_pdf.addPage(pdf.getPage(page_num)) + with open('%s%s.pdf' % (name, suffix), 'wb') as out_file: + out_pdf.write(out_file) + +def split_args(args): + pdf = '' + pages = [] + page_ranges = [] + bad = [] + + for arg in args: + arg = arg.strip() + # Find the pdf input + if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None: + if pdf == '': + pdf = arg + else: + bad.append(arg) + # Find single indexes + elif re.search('^[ ]*\d+[ ]*$', arg) != None: + pages.append(arg) + # Find index ranges + elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None: + mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg) + start = mo.group('start') + end = mo.group('end') + + # check to see if the range is really a single index + if start == end: + pages.append(start) + else: + page_ranges.append([start, end]) + else: + bad.append(arg) + + bad = sorted(list(set(bad))) + + return pdf, pages, page_ranges, bad + +# Remove duplicates from pages and page_ranges. +# Set pages higher than the total number of pages in the pdf to the last page. +# Return pages and page_ranges as lists of ints. +def clean_page_list(pdf_path, pages, page_ranges): + pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb')) + + total_pages = pdf.numPages + sorted_pages = [] + sorted_ranges = [] + + for index in pages: + index = int(index) + if index > total_pages: + sorted_pages.append(total_pages - 1) + else: + sorted_pages.append(index - 1) + + for start, end in page_ranges: + start = int(start) + end = int(end) + + if start > total_pages and end > total_pages: + sorted_pages.append(total_pages - 1) + continue + + if start > total_pages: + start = total_pages + if end > total_pages: + end = total_pages + page_range = sorted([start - 1, end - 1]) + if page_range not in sorted_ranges: + sorted_ranges.append(page_range) + + # Remove duplicates and sort + pages = sorted(list(set(sorted_pages))) + page_ranges = sorted(sorted_ranges) + + return pages, page_ranges + +# Return True if the pdf is valid. +def valid_pdf(pdf_path): + try: + with open(os.path.abspath(pdf_path), 'rb') as pdf_file: + pdf = PdfFileReader(pdf_file) + if pdf.isEncrypted or pdf.numPages <= 0: + raise Exception + except: + return False + return True + +def main(args=sys.argv, name=''): + parser = option_parser(name) + opts, args = parser.parse_args(args) + + pdf, pages, page_ranges, unknown = split_args(args[1:]) + + if pdf == '' and (pages == [] or page_ranges == []): + print 'Error: PDF and where to split is required.\n\n' + print parser.get_usage() + return 2 + + if unknown != []: + for arg in unknown: + print 'Error: Unknown argument `%s`' % arg + print parser.get_usage() + return 2 + + if not valid_pdf(pdf): + print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf + return 2 + + pages, page_ranges = clean_page_list(pdf, pages, page_ranges) + + mi = metadata_from_formats([pdf]) + + split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/src/calibre/ebooks/pdf/pdftrim.py b/src/calibre/ebooks/pdf/trim.py similarity index 92% rename from src/calibre/ebooks/pdf/pdftrim.py rename to src/calibre/ebooks/pdf/trim.py index c1e8fa2494..b32312fee8 100644 --- a/src/calibre/ebooks/pdf/pdftrim.py +++ b/src/calibre/ebooks/pdf/trim.py @@ -16,8 +16,6 @@ def config(defaults=None): c = Config('trimpdf', desc) else: c = StringConfig(defaults, desc) - c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count', - help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.')) c.add_opt('output', ['-o', '--output'],default='cropped.pdf', help=_('Path to output file. By default a file is created in the current directory.')) c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop, @@ -33,16 +31,16 @@ def config(defaults=None): return c -def option_parser(): +def option_parser(name): c = config() return c.option_parser(usage=_('''\ - %prog [options] file.pdf + %prog %%name [options] file.pdf Crops a pdf. - ''')) + '''.replace('%%name', name))) -def main(args=sys.argv): - parser = option_parser() +def main(args=sys.argv, name=''): + parser = option_parser(name) opts, args = parser.parse_args(args) try: source = os.path.abspath(args[1]) diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index c189407dac..cf77aebc14 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -1,19 +1,18 @@ -''' -Write content to PDF. -''' +# -*- coding: utf-8 -*- from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' -import os, logging, shutil, sys +''' +Write content to PDF. +''' + +import os, shutil, sys -from calibre import LoggingInterface -from calibre.ebooks.epub.iterator import SpineItem -from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import PersistentTemporaryDirectory -from calibre.customize.ui import run_plugins_on_postprocess -from calibre.utils.config import Config, StringConfig +from calibre.ebooks.pdf.pageoptions import PageOptions from PyQt4 import QtCore from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \ @@ -21,21 +20,14 @@ from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, from PyQt4.QtWebKit import QWebView from pyPdf import PdfFileWriter, PdfFileReader - -class PDFMargins: - def __init__(self, margin=1): - self.top = margin - self.bottom = margin - self.left = margin - self.right = margin class PDFWriter(QObject): - def __init__(self, margins=PDFMargins()): + def __init__(self, log, popts=PageOptions()): if QApplication.instance() is None: QApplication([]) QObject.__init__(self) - self.logger = logging.getLogger('oeb2pdf') + self.logger = log self.loop = QEventLoop() self.view = QWebView() @@ -43,15 +35,14 @@ class PDFWriter(QObject): self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts') - self.margins = margins + self.popts = popts - def dump(self, oebpath, path): + def dump(self, spine, out_stream): self._delete_tmpdir() - opf = OPF(oebpath, os.path.dirname(oebpath)) - self.render_queue = [SpineItem(i.path) for i in opf.spine] + self.render_queue = spine[:] self.combine_queue = [] - self.path = path + self.out_stream = out_stream QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection) self.loop.exec_() @@ -78,7 +69,9 @@ class PDFWriter(QObject): self.logger.debug('\tRendering item as %s' % item_path) printer = QPrinter(QPrinter.HighResolution) - printer.setPageMargins(self.margins.left, self.margins.top, self.margins.right, self.margins.bottom, QPrinter.Inch) + printer.setPageMargins(self.popts.margin_left, self.popts.margin_top, self.popts.margin_right, self.popts.margin_bottom, self.popts.unit) + printer.setPaperSize(self.popts.paper_size) + printer.setOrientation(self.popts.orientation) printer.setOutputFormat(QPrinter.PdfFormat) printer.setOutputFileName(item_path) self.view.print_(printer) @@ -98,75 +91,7 @@ class PDFWriter(QObject): inputPDF = PdfFileReader(file(item, 'rb')) for page in inputPDF.pages: outPDF.addPage(page) - outputStream = file(self.path, 'wb') - outPDF.write(outputStream) - outputStream.close() + outPDF.write(self.out_stream) finally: self._delete_tmpdir() self.loop.exit(0) - - -def config(defaults=None): - desc = _('Options to control the conversion to PDF') - if defaults is None: - c = Config('pdf', desc) - else: - c = StringConfig(defaults, desc) - - pdf = c.add_group('PDF', _('PDF options.')) - - pdf('margin_top', ['--margin_top'], default=1, - help=_('The top margin around the document in inches.')) - pdf('margin_bottom', ['--margin_bottom'], default=1, - help=_('The bottom margin around the document in inches.')) - pdf('margin_left', ['--margin_left'], default=1, - help=_('The left margin around the document in inches.')) - pdf('margin_right', ['--margin_right'], default=1, - help=_('The right margin around the document in inches.')) - - return c - -def option_parser(): - c = config() - parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf') - parser.add_option( - '-o', '--output', default=None, - help=_('Output file. Default is derived from input filename.')) - parser.add_option( - '-v', '--verbose', default=0, action='count', - help=_('Useful for debugging.')) - return parser - -def oeb2pdf(opts, inpath): - logger = LoggingInterface(logging.getLogger('oeb2pdf')) - logger.setup_cli_handler(opts.verbose) - - outpath = opts.output - if outpath is None: - outpath = os.path.basename(inpath) - outpath = os.path.splitext(outpath)[0] + '.pdf' - - margins = PDFMargins() - margins.top = opts.margin_top - margins.bottom = opts.margin_bottom - margins.left = opts.margin_left - margins.right = opts.margin_right - - writer = PDFWriter(margins) - writer.dump(inpath, outpath) - run_plugins_on_postprocess(outpath, 'pdf') - logger.log_info(_('Output written to ') + outpath) - -def main(argv=sys.argv): - parser = option_parser() - opts, args = parser.parse_args(argv[1:]) - if len(args) != 1: - parser.print_help() - return 1 - inpath = args[0] - retval = oeb2pdf(opts, inpath) - return retval - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/src/calibre/ebooks/txt/__init__.py b/src/calibre/ebooks/txt/__init__.py new file mode 100644 index 0000000000..dfdbbdb5e2 --- /dev/null +++ b/src/calibre/ebooks/txt/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, John Schember john@nachtimwald.com' +__docformat__ = 'restructuredtext en' + +''' +Used for txt output +''' + diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py new file mode 100644 index 0000000000..7d44172b3f --- /dev/null +++ b/src/calibre/ebooks/txt/output.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import OutputFormatPlugin, \ + OptionRecommendation +from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata +from calibre.ebooks.metadata import authors_to_string + +class TXTOutput(OutputFormatPlugin): + + name = 'TXT Output' + author = 'John Schember' + file_type = 'txt' + + options = set([ + OptionRecommendation(name='newline', recommended_value='system', + level=OptionRecommendation.LOW, long_switch='newline', + short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), + help=_('Type of newline to use. Options are %s. Default is \'system\'. ' + 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' + 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' + 'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))), + OptionRecommendation(name='prepend_author', recommended_value='true', + level=OptionRecommendation.LOW, long_switch='prepend_author', + choices=['true', 'false'], + help=_('Write the author to the beginning of the file. ' + 'Default is \'true\'. Use \'false\' to disable.')), + OptionRecommendation(name='prepend_title', recommended_value='true', + choices=['true', 'false'], + level=OptionRecommendation.LOW, long_switch='prepend_title', + help=_('Write the title to the beginning of the file. ' + 'Default is \'true\'. Use \'false\' to disable.')) + ]) + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + metadata = TxtMetadata() + if opts.prepend_author.lower() == 'true': + metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors) + if opts.prepend_title.lower() == 'true': + metadata.title = opts.title if opts.title else oeb_book.metadata.title + + writer = TxtWriter(TxtNewlines(opts.newline).newline, log) + txt = writer.dump(oeb_book.spine, metadata) + + close = False + if not hasattr(output_path, 'write'): + close = True + if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '': + os.makedirs(os.path.dirname(output_path)) + out_stream = open(output_path, 'wb') + else: + out_stream = output_path + + out_stream.seek(0) + out_stream.truncate() + out_stream.write(txt) + + if close: + out_stream.close() diff --git a/src/calibre/ebooks/txt/writer.py b/src/calibre/ebooks/txt/writer.py new file mode 100644 index 0000000000..eabc2d64ed --- /dev/null +++ b/src/calibre/ebooks/txt/writer.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +from __future__ import with_statement +''' +Write content to TXT. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +import os, re, sys + +from calibre.ebooks.htmlsymbols import HTML_SYMBOLS + +from BeautifulSoup import BeautifulSoup + +class TxtWriter(object): + def __init__(self, newline, log): + self.newline = newline + self.log = log + + def dump(self, spine, metadata): + out = u'' + for item in spine: + with open(item, 'r') as itemf: + content = itemf.read().decode(item.encoding) + # Convert newlines to unix style \n for processing. These + # will be changed to the specified type later in the process. + content = self.unix_newlines(content) + content = self.strip_html(content) + content = self.replace_html_symbols(content) + content = self.cleanup_text(content) + content = self.specified_newlines(content) + out += content + + # Prepend metadata + if metadata.author != None and metadata.author != '': + out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out + if metadata.title != None and metadata.title != '': + out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out + + # Put two blank lines at end of file + end = out[-3 * len(self.newline):] + for i in range(3 - end.count(self.newline)): + out += self.newline + + return out + + def strip_html(self, html): + stripped = u'' + + for dom_tree in BeautifulSoup(html).findAll('body'): + text = unicode(dom_tree) + + # Remove unnecessary tags + for tag in ['script', 'style']: + text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text) + text = re.sub('<!--.*-->', '', text) + text = re.sub('<\?.*?\?>', '', text) + text = re.sub('<@.*?@>', '', text) + text = re.sub('<%.*?%>', '', text) + + # Headings usually indicate Chapters. + # We are going to use a marker to insert the proper number of + # newline characters at the end of cleanup_text because cleanup_text + # remove excessive (more than 2 newlines). + for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: + text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text) + text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text) + + # Separate content with space. + for tag in ['td']: + text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text) + + # Separate content with empty line. + for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']: + text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text) + + for tag in ['hr', 'br']: + text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text) + + # Remove any tags that do not need special processing. + text = re.sub('<.*?>', '', text) + + stripped = stripped + text + + return stripped + + def replace_html_symbols(self, content): + for symbol in HTML_SYMBOLS: + for code in HTML_SYMBOLS[symbol]: + content = content.replace(code, symbol) + return content + + def cleanup_text(self, text): + # Replace bad characters. + text = text.replace(u'\xc2', '') + text = text.replace(u'\xa0', ' ') + + # Replace tabs, vertical tags and form feeds with single space. + text = text.replace('\t+', ' ') + text = text.replace('\v+', ' ') + text = text.replace('\f+', ' ') + + # Single line paragraph. + r = re.compile('.\n.') + while True: + mo = r.search(text) + if mo == None: + break + text = '%s %s' % (text[:mo.start()+1], text[mo.end()-1:]) + + # Remove multiple spaces. + text = re.sub('[ ]+', ' ', text) + + # Remove excessive newlines. + text = re.sub('\n[ ]+\n', '\n\n', text) + text = re.sub('\n{3,}', '\n\n', text) + + # Replace markers with the proper characters. + text = text.replace('-vzxedxy-', '\n\n\n\n\n') + text = text.replace('-vlgzxey-', '\n\n\n') + + # Replace spaces at the beginning and end of lines + text = re.sub('(?imu)^[ ]+', '', text) + text = re.sub('(?imu)[ ]+$', '', text) + + return text + + def unix_newlines(self, text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + + return text + + def specified_newlines(self, text): + if self.newline == '\n': + return text + + return text.replace('\n', self.newline) + + +class TxtNewlines(object): + NEWLINE_TYPES = { + 'system' : os.linesep, + 'unix' : '\n', + 'old_mac' : '\r', + 'windows' : '\r\n' + } + + def __init__(self, newline_type): + self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep) + + +class TxtMetadata(object): + def __init__(self): + self.title = None + self.author = None diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index a5bda4d3ef..1f3ed31478 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -709,6 +709,9 @@ class BooksView(TableView): def close(self): self._model.close() + + def set_editable(self, editable): + self._model.set_editable(editable) def set_editable(self, editable): self._model.set_editable(editable) @@ -1002,6 +1005,10 @@ class DeviceBooksModel(BooksModel): self.sort(col, self.sorted_on[1]) done = True return done + + def set_editable(self, editable): + self.editable = editable + def set_editable(self, editable): self.editable = editable diff --git a/src/calibre/linux.py b/src/calibre/linux.py index aeba2ccfc1..b680ecc304 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -39,10 +39,9 @@ entry_points = { 'calibre-fontconfig = calibre.utils.fontconfig:main', 'calibre-parallel = calibre.parallel:main', 'calibre-customize = calibre.customize.ui:main', - 'pdftrim = calibre.ebooks.pdf.pdftrim:main' , + 'pdfmanipulate = calibre.ebooks.pdf.manipulate:main', 'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main', 'calibre-smtp = calibre.utils.smtp:main', - ], 'gui_scripts' : [ __appname__+' = calibre.gui2.main:main', @@ -548,6 +547,3 @@ main = post_install if __name__ == '__main__': post_install() - - -