mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PDF manipulation via the pdfmanipulate command. TXT and PDF output.
This commit is contained in:
commit
b2e8618354
@ -159,6 +159,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.odt import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
class TXTMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read TXT metadata'
|
||||
file_types = set(['txt'])
|
||||
description = _('Read metadata from %s files') % 'TXT'
|
||||
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.txt import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
class LRXMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
@ -256,9 +266,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
|
||||
from calibre.ebooks.epub.input import EPUBInput
|
||||
from calibre.ebooks.mobi.input import MOBIInput
|
||||
from calibre.ebooks.oeb.output import OEBOutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
from calibre.ebooks.pdf.output import PDFOutput
|
||||
from calibre.customize.profiles import input_profiles, output_profiles
|
||||
|
||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
|
||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput, PDFOutput]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
|
@ -196,7 +196,7 @@ OptionRecommendation(name='language',
|
||||
self.input_fmt = input_fmt
|
||||
self.output_fmt = output_fmt
|
||||
|
||||
# Build set of all possible options. Two options are equal iff their
|
||||
# Build set of all possible options. Two options are equal if their
|
||||
# names are the same.
|
||||
self.input_options = self.input_plugin.options.union(
|
||||
self.input_plugin.common_options)
|
||||
|
310
src/calibre/ebooks/htmlsymbols.py
Normal file
310
src/calibre/ebooks/htmlsymbols.py
Normal file
@ -0,0 +1,310 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
Maping of non-acii symbols and their corresponding html entity number and name
|
||||
'''
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
||||
# http://www.w3schools.com/tags/ref_symbols.asp
|
||||
HTML_SYMBOLS = {
|
||||
# Math Symbols
|
||||
u'∀' : ['∀', '∀'], # for all
|
||||
u'∂' : ['∂', '∂'], # part
|
||||
u'∃' : ['∃', '&exists;'], # exists
|
||||
u'∅' : ['∅', '∅'], # empty
|
||||
u'∇' : ['∇', '∇'], # nabla
|
||||
u'∈' : ['∈', '∈'], # isin
|
||||
u'∉' : ['∉', '∉'], # notin
|
||||
u'∋' : ['∋', '∋'], # ni
|
||||
u'∏' : ['∏', '∏'], # prod
|
||||
u'∑' : ['∑', '∑'], # sum
|
||||
u'−' : ['−', '−'], # minus
|
||||
u'∗' : ['∗', '∗'], # lowast
|
||||
u'√' : ['√', '√'], # square root
|
||||
u'∝' : ['∝', '∝'], # proportional to
|
||||
u'∞' : ['∞', '∞'], # infinity
|
||||
u'∠' : ['∠', '∠'], # angle
|
||||
u'∧' : ['∧', '∧'], # and
|
||||
u'∨' : ['∨', '∨'], # or
|
||||
u'∩' : ['∩', '∩'], # cap
|
||||
u'∪' : ['∪', '∪'], # cup
|
||||
u'∫' : ['∫', '∫'], # integral
|
||||
u'∴' : ['∴', '∴'], # therefore
|
||||
u'∼' : ['∼', '∼'], # simular to
|
||||
u'≅' : ['≅', '≅'], # approximately equal
|
||||
u'≈' : ['≈', '≈'], # almost equal
|
||||
u'≠' : ['≠', '≠'], # not equal
|
||||
u'≡' : ['≡', '≡'], # equivalent
|
||||
u'≤' : ['≤', '≤'], # less or equal
|
||||
u'≥' : ['≥', '≥'], # greater or equal
|
||||
u'⊂' : ['⊂', '⊂'], # subset of
|
||||
u'⊃' : ['⊃', '⊃'], # superset of
|
||||
u'⊄' : ['⊄', '⊄'], # not subset of
|
||||
u'⊆' : ['⊆', '⊆'], # subset or equal
|
||||
u'⊇' : ['⊇', '⊇'], # superset or equal
|
||||
u'⊕' : ['⊕', '⊕'], # circled plus
|
||||
u'⊗' : ['⊗', '⊗'], # cirled times
|
||||
u'⊥' : ['⊥', '⊥'], # perpendicular
|
||||
u'⋅' : ['⋅', '⋅'], # dot operator
|
||||
# Greek Letters
|
||||
u'Α' : ['Α', 'Α'], # Alpha
|
||||
u'Β' : ['Β', 'Β'], # Beta
|
||||
u'Γ' : ['Γ', 'Γ'], # Gamma
|
||||
u'Δ' : ['Δ', 'Δ'], # Delta
|
||||
u'Ε' : ['Ε', 'Ε'], # Epsilon
|
||||
u'Ζ' : ['Ζ', 'Ζ'], # Zeta
|
||||
u'Η' : ['Η', 'Η'], # Eta
|
||||
u'Θ' : ['Θ', 'Θ'], # Theta
|
||||
u'Ι' : ['Ι', 'Ι'], # Iota
|
||||
u'Κ' : ['Κ', 'Κ'], # Kappa
|
||||
u'Λ' : ['Λ', 'Λ'], # Lambda
|
||||
u'Μ' : ['Μ', 'Μ'], # Mu
|
||||
u'Ν' : ['Ν', 'Ν'], # Nu
|
||||
u'Ξ' : ['Ξ', 'Ξ'], # Xi
|
||||
u'Ο' : ['Ο', 'Ο'], # Omicron
|
||||
u'Π' : ['Π', 'Π'], # Pi
|
||||
u'Ρ' : ['Ρ', 'Ρ'], # Rho
|
||||
u'Σ' : ['Σ', 'Σ'], # Sigma
|
||||
u'Τ' : ['Τ', 'Τ'], # Tau
|
||||
u'Υ' : ['Υ', 'Υ'], # Upsilon
|
||||
u'Φ' : ['Φ', 'Φ'], # Phi
|
||||
u'Χ' : ['Χ', 'Χ'], # Chi
|
||||
u'Ψ' : ['Ψ', 'Ψ'], # Psi
|
||||
u'ω' : ['ω', 'ω'], # omega
|
||||
u'ϑ' : ['ϑ', 'ϑ'], # theta symbol
|
||||
u'ϒ' : ['ϒ', 'ϒ'], # upsilon symbol
|
||||
u'ϖ' : ['ϖ', 'ϖ'], # pi symbol
|
||||
# Other
|
||||
u'Œ' : ['Œ', 'Œ'], # capital ligature OE
|
||||
u'œ' : ['œ', 'œ'], # small ligature oe
|
||||
u'Š' : ['Š', 'Š'], # capital S with caron
|
||||
u'š' : ['š', 'š'], # small S with caron
|
||||
u'Ÿ' : ['Ÿ', 'Ÿ'], # capital Y with diaeres
|
||||
u'ƒ' : ['ƒ', 'ƒ'], # f with hook
|
||||
u'ˆ' : ['ˆ', 'ˆ'], # modifier letter circumflex accent
|
||||
u'˜' : ['˜', '˜'], # small tilde
|
||||
u'–' : ['–', '–'], # en dash
|
||||
u'—' : ['—', '—'], # em dash
|
||||
u'‘' : ['‘', '‘'], # left single quotation mark
|
||||
u'’' : ['’', '’'], # right single quotation mark
|
||||
u'‚' : ['‚', '‚'], # single low-9 quotation mark
|
||||
u'“' : ['“', '“'], # left double quotation mark
|
||||
u'”' : ['”', '”'], # right double quotation mark
|
||||
u'„' : ['„', '„'], # double low-9 quotation mark
|
||||
u'†' : ['†', '†'], # dagger
|
||||
u'‡' : ['‡', '‡'], # double dagger
|
||||
u'•' : ['•', '•'], # bullet
|
||||
u'…' : ['…', '…'], # horizontal ellipsis
|
||||
u'‰' : ['‰', '‰'], # per mille
|
||||
u'′' : ['′', '′'], # minutes
|
||||
u'″' : ['″', '″'], # seconds
|
||||
u'‹' : ['‹', '‹'], # single left angle quotation
|
||||
u'›' : ['›', '›'], # single right angle quotation
|
||||
u'‾' : ['‾', '‾'], # overline
|
||||
u'€' : ['€', '€'], # euro
|
||||
u'™' : ['™', '™'], # trademark
|
||||
u'←' : ['←', '←'], # left arrow
|
||||
u'↑' : ['↑', '↑'], # up arrow
|
||||
u'→' : ['→', '→'], # right arrow
|
||||
u'↓' : ['↓', '↓'], # down arrow
|
||||
u'↔' : ['↔', '↔'], # left right arrow
|
||||
u'↵' : ['↵', '↵'], # carriage return arrow
|
||||
u'⌈' : ['⌈', '⌈'], # left ceiling
|
||||
u'⌉' : ['⌉', '⌉'], # right ceiling
|
||||
u'⌊' : ['⌊', '⌊'], # left floor
|
||||
u'⌋' : ['⌋', '⌋'], # right floor
|
||||
u'◊' : ['◊', '◊'], # lozenge
|
||||
u'♠' : ['♠', '♠'], # spade
|
||||
u'♣' : ['♣', '♣'], # club
|
||||
u'♥' : ['♥', '♥'], # heart
|
||||
u'♦' : ['♦', '♦'], # diamond
|
||||
# Extra http://www.ascii.cl/htmlcodes.htm
|
||||
u' ' : [' '], # space
|
||||
u'!' : ['!'], # exclamation point
|
||||
u'#' : ['#'], # number sign
|
||||
u'$' : ['$'], # dollar sign
|
||||
u'%' : ['%'], # percent sign
|
||||
u'\'' : ['''], # single quote
|
||||
u'(' : ['('], # opening parenthesis
|
||||
u')' : [')'], # closing parenthesis
|
||||
u'*' : ['*'], # asterisk
|
||||
u'+' : ['+'], # plus sign
|
||||
u',' : [','], # comma
|
||||
u'-' : ['-'], # minus sign - hyphen
|
||||
u'.' : ['.'], # period
|
||||
u'/' : ['/'], # slash
|
||||
u'0' : ['0'], # zero
|
||||
u'1' : ['1'], # one
|
||||
u'2' : ['2'], # two
|
||||
u'3' : ['3'], # three
|
||||
u'4' : ['4'], # four
|
||||
u'5' : ['5'], # five
|
||||
u'6' : ['6'], # six
|
||||
u'7' : ['7'], # seven
|
||||
u'8' : ['8'], # eight
|
||||
u'9' : ['9'], # nine
|
||||
u':' : [':'], # colon
|
||||
u';' : [';'], # semicolon
|
||||
u'=' : ['='], # equal sign
|
||||
u'?' : ['?'], # question mark
|
||||
u'@' : ['@'], # at symbol
|
||||
u'A' : ['A'], #
|
||||
u'B' : ['B'], #
|
||||
u'C' : ['C'], #
|
||||
u'D' : ['D'], #
|
||||
u'E' : ['E'], #
|
||||
u'F' : ['F'], #
|
||||
u'G' : ['G'], #
|
||||
u'H' : ['H'], #
|
||||
u'I' : ['I'], #
|
||||
u'J' : ['J'], #
|
||||
u'K' : ['K'], #
|
||||
u'L' : ['L'], #
|
||||
u'M' : ['M'], #
|
||||
u'N' : ['N'], #
|
||||
u'O' : ['O'], #
|
||||
u'P' : ['P'], #
|
||||
u'Q' : ['Q'], #
|
||||
u'R' : ['R'], #
|
||||
u'S' : ['S'], #
|
||||
u'T' : ['T'], #
|
||||
u'U' : ['U'], #
|
||||
u'V' : ['V'], #
|
||||
u'W' : ['W'], #
|
||||
u'X' : ['X'], #
|
||||
u'Y' : ['Y'], #
|
||||
u'Z' : ['Z'], #
|
||||
u'[' : ['['], # opening bracket
|
||||
u'\\' : ['\'], # backslash
|
||||
u']' : [']'], # closing bracket
|
||||
u'^' : ['^'], # caret - circumflex
|
||||
u'_' : ['_'], # underscore
|
||||
u'`' : ['`'], # grave accent
|
||||
u'a' : ['a'], #
|
||||
u'b' : ['b'], #
|
||||
u'c' : ['c'], #
|
||||
u'd' : ['d'], #
|
||||
u'e' : ['e'], #
|
||||
u'f' : ['f'], #
|
||||
u'g' : ['g'], #
|
||||
u'h' : ['h'], #
|
||||
u'i' : ['i'], #
|
||||
u'j' : ['j'], #
|
||||
u'k' : ['k'], #
|
||||
u'l' : ['l'], #
|
||||
u'm' : ['m'], #
|
||||
u'n' : ['n'], #
|
||||
u'o' : ['o'], #
|
||||
u'p' : ['p'], #
|
||||
u'q' : ['q'], #
|
||||
u'r' : ['r'], #
|
||||
u's' : ['s'], #
|
||||
u't' : ['t'], #
|
||||
u'u' : ['u'], #
|
||||
u'v' : ['v'], #
|
||||
u'w' : ['w'], #
|
||||
u'x' : ['x'], #
|
||||
u'y' : ['y'], #
|
||||
u'z' : ['z'], #
|
||||
u'{' : ['{'], # opening brace
|
||||
u'|' : ['|'], # vertical bar
|
||||
u'}' : ['}'], # closing brace
|
||||
u'~' : ['~'], # equivalency sign - tilde
|
||||
u'<' : ['<', '<'], # less than sign
|
||||
u'>' : ['>', '>'], # greater than sign
|
||||
u'¡' : ['¡', '¡'], # inverted exclamation mark
|
||||
u'¢' : ['¢', '¢'], # cent sign
|
||||
u'£' : ['£', '£'], # pound sign
|
||||
u'¤' : ['¤', '¤'], # currency sign
|
||||
u'¥' : ['¥', '¥'], # yen sign
|
||||
u'¦' : ['¦', '¦'], # broken vertical bar
|
||||
u'§' : ['§', '§'], # section sign
|
||||
u'¨' : ['¨', '¨'], # spacing diaeresis - umlaut
|
||||
u'©' : ['©', '©'], # copyright sign
|
||||
u'ª' : ['ª', 'ª'], # feminine ordinal indicator
|
||||
u'«' : ['«', '«'], # left double angle quotes
|
||||
u'¬' : ['¬', '¬'], # not sign
|
||||
u'®' : ['®', '®'], # registered trade mark sign
|
||||
u'¯' : ['¯', '¯'], # spacing macron - overline
|
||||
u'°' : ['°', '°'], # degree sign
|
||||
u'±' : ['±', '±'], # plus-or-minus sign
|
||||
u'²' : ['²', '²'], # superscript two - squared
|
||||
u'³' : ['³', '³'], # superscript three - cubed
|
||||
u'´' : ['´', '´'], # acute accent - spacing acute
|
||||
u'µ' : ['µ', 'µ'], # micro sign
|
||||
u'¶' : ['¶', '¶'], # pilcrow sign - paragraph sign
|
||||
u'·' : ['·', '·'], # middle dot - Georgian comma
|
||||
u'¸' : ['¸', '¸'], # spacing cedilla
|
||||
u'¹' : ['¹', '¹'], # superscript one
|
||||
u'º' : ['º', 'º'], # masculine ordinal indicator
|
||||
u'»' : ['»', '»'], # right double angle quotes
|
||||
u'¼' : ['¼', '¼'], # fraction one quarter
|
||||
u'½' : ['½', '½'], # fraction one half
|
||||
u'¾' : ['¾', '¾'], # fraction three quarters
|
||||
u'¿' : ['¿', '¿'], # inverted question mark
|
||||
u'À' : ['À', 'À'], # latin capital letter A with grave
|
||||
u'Á' : ['Á', 'Á'], # latin capital letter A with acute
|
||||
u'Â' : ['Â', 'Â'], # latin capital letter A with circumflex
|
||||
u'Ã' : ['Ã', 'Ã'], # latin capital letter A with tilde
|
||||
u'Ä' : ['Ä', 'Ä'], # latin capital letter A with diaeresis
|
||||
u'Å' : ['Å', 'Å'], # latin capital letter A with ring above
|
||||
u'Æ' : ['Æ', 'Æ'], # latin capital letter AE
|
||||
u'Ç' : ['Ç', 'Ç'], # latin capital letter C with cedilla
|
||||
u'È' : ['È', 'È'], # latin capital letter E with grave
|
||||
u'É' : ['É', 'É'], # latin capital letter E with acute
|
||||
u'Ê' : ['Ê', 'Ê'], # latin capital letter E with circumflex
|
||||
u'Ë' : ['Ë', 'Ë'], # latin capital letter E with diaeresis
|
||||
u'Ì' : ['Ì', 'Ì'], # latin capital letter I with grave
|
||||
u'Í' : ['Í', 'Í'], # latin capital letter I with acute
|
||||
u'Î' : ['Î', 'Î'], # latin capital letter I with circumflex
|
||||
u'Ï' : ['Ï', 'Ï'], # latin capital letter I with diaeresis
|
||||
u'Ð' : ['Ð', 'Ð'], # latin capital letter ETH
|
||||
u'Ñ' : ['Ñ', 'Ñ'], # latin capital letter N with tilde
|
||||
u'Ò' : ['Ò', 'Ò'], # latin capital letter O with grave
|
||||
u'Ó' : ['Ó', 'Ó'], # latin capital letter O with acute
|
||||
u'Ô' : ['Ô', 'Ô'], # latin capital letter O with circumflex
|
||||
u'Õ' : ['Õ', 'Õ'], # latin capital letter O with tilde
|
||||
u'Ö' : ['Ö', 'Ö'], # latin capital letter O with diaeresis
|
||||
u'×' : ['×', '×'], # multiplication sign
|
||||
u'Ø' : ['Ø', 'Ø'], # latin capital letter O with slash
|
||||
u'Ù' : ['Ù', 'Ù'], # latin capital letter U with grave
|
||||
u'Ú' : ['Ú', 'Ú'], # latin capital letter U with acute
|
||||
u'Û' : ['Û', 'Û'], # latin capital letter U with circumflex
|
||||
u'Ü' : ['Ü', 'Ü'], # latin capital letter U with diaeresis
|
||||
u'Ý' : ['Ý', 'Ý'], # latin capital letter Y with acute
|
||||
u'Þ' : ['Þ', 'Þ'], # latin capital letter THORN
|
||||
u'ß' : ['ß', 'ß'], # latin small letter sharp s - ess-zed
|
||||
u'à' : ['à', 'à'], # latin small letter a with grave
|
||||
u'á' : ['á', 'á'], # latin small letter a with acute
|
||||
u'â' : ['â', 'â'], # latin small letter a with circumflex
|
||||
u'ã' : ['ã', 'ã'], # latin small letter a with tilde
|
||||
u'ä' : ['ä', 'ä'], # latin small letter a with diaeresis
|
||||
u'å' : ['å', 'å'], # latin small letter a with ring above
|
||||
u'æ' : ['æ', 'æ'], # latin small letter ae
|
||||
u'ç' : ['ç', 'ç'], # latin small letter c with cedilla
|
||||
u'è' : ['è', 'è'], # latin small letter e with grave
|
||||
u'é' : ['é', 'é'], # latin small letter e with acute
|
||||
u'ê' : ['ê', 'ê'], # latin small letter e with circumflex
|
||||
u'ë' : ['ë', 'ë'], # latin small letter e with diaeresis
|
||||
u'ì' : ['ì', 'ì'], # latin small letter i with grave
|
||||
u'í' : ['í', 'í'], # latin small letter i with acute
|
||||
u'î' : ['î', 'î'], # latin small letter i with circumflex
|
||||
u'ï' : ['ï', 'ï'], # latin small letter i with diaeresis
|
||||
u'ð' : ['ð', 'ð'], # latin small letter eth
|
||||
u'ñ' : ['ñ', 'ñ'], # latin small letter n with tilde
|
||||
u'ò' : ['ò', 'ò'], # latin small letter o with grave
|
||||
u'ó' : ['ó', 'ó'], # latin small letter o with acute
|
||||
u'ô' : ['ô', 'ô'], # latin small letter o with circumflex
|
||||
u'õ' : ['õ', 'õ'], # latin small letter o with tilde
|
||||
u'ö' : ['ö', 'ö'], # latin small letter o with diaeresis
|
||||
u'÷' : ['÷', '÷'], # division sign
|
||||
u'ø' : ['ø', 'ø'], # latin small letter o with slash
|
||||
u'ù' : ['ù', 'ù'], # latin small letter u with grave
|
||||
u'ú' : ['ú', 'ú'], # latin small letter u with acute
|
||||
u'û' : ['û', 'û'], # latin small letter u with circumflex
|
||||
u'ü' : ['ü', 'ü'], # latin small letter u with diaeresis
|
||||
u'ý' : ['ý', 'ý'], # latin small letter y with acute
|
||||
u'þ' : ['þ', 'þ'], # latin small letter thorn
|
||||
u'ÿ' : ['ÿ', 'ÿ'], # latin small letter y with diaeresis
|
||||
}
|
||||
|
30
src/calibre/ebooks/metadata/txt.py
Normal file
30
src/calibre/ebooks/metadata/txt.py
Normal file
@ -0,0 +1,30 @@
|
||||
'''Read meta information from TXT files'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
||||
import re
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
""" Return metadata as a L{MetaInfo} object """
|
||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
stream.seek(0)
|
||||
|
||||
mdata = ''
|
||||
for x in range(0, 4):
|
||||
line = stream.readline()
|
||||
if line == '':
|
||||
break
|
||||
else:
|
||||
mdata += line
|
||||
|
||||
mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
|
||||
if mo != None:
|
||||
mi.title = mo.group('title')
|
||||
mi.authors = mo.group('author').split(',')
|
||||
|
||||
return mi
|
@ -1,69 +0,0 @@
|
||||
'''
|
||||
Convert any ebook format to PDF.
|
||||
'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
|
||||
'and Marshall T. Vandegrift <llasram@gmail.com>' \
|
||||
'and John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, glob, logging
|
||||
|
||||
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
|
||||
from calibre.ebooks.epub import config as common_config
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.pdf.writer import oeb2pdf, config as pdf_config
|
||||
|
||||
def config(defaults=None):
|
||||
c = common_config(defaults=defaults, name='pdf')
|
||||
c.remove_opt('profile')
|
||||
pdfc = pdf_config(defaults=defaults)
|
||||
c.update(pdfc)
|
||||
return c
|
||||
|
||||
def option_parser(usage=USAGE):
|
||||
usage = usage % ('PDF', formats())
|
||||
parser = config().option_parser(usage=usage)
|
||||
return parser
|
||||
|
||||
def any2pdf(opts, path, notification=None):
|
||||
ext = os.path.splitext(path)[1]
|
||||
if not ext:
|
||||
raise ValueError('Unknown file type: '+path)
|
||||
ext = ext.lower()[1:]
|
||||
|
||||
if opts.output is None:
|
||||
opts.output = os.path.splitext(os.path.basename(path))[0]+'.pdf'
|
||||
|
||||
opts.output = os.path.abspath(opts.output)
|
||||
orig_output = opts.output
|
||||
|
||||
with TemporaryDirectory('_any2pdf') as tdir:
|
||||
oebdir = os.path.join(tdir, 'oeb')
|
||||
os.mkdir(oebdir)
|
||||
opts.output = os.path.join(tdir, 'dummy.epub')
|
||||
opts.profile = 'None'
|
||||
opts.dont_split_on_page_breaks = True
|
||||
orig_bfs = opts.base_font_size2
|
||||
opts.base_font_size2 = 0
|
||||
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
|
||||
opts.base_font_size2 = orig_bfs
|
||||
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
|
||||
opts.output = orig_output
|
||||
logging.getLogger('html2epub').info(_('Creating PDF file from EPUB...'))
|
||||
oeb2pdf(opts, opf)
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) < 2:
|
||||
parser.print_help()
|
||||
print 'No input file specified.'
|
||||
return 1
|
||||
any2pdf(opts, args[1])
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
90
src/calibre/ebooks/pdf/info.py
Normal file
90
src/calibre/ebooks/pdf/info.py
Normal file
@ -0,0 +1,90 @@
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, sys, time
|
||||
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('manipulatepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
return c
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file.pdf ...
|
||||
|
||||
Get info about a PDF.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def print_info(pdf_path):
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
print _('Title: %s' % pdf.documentInfo.title)
|
||||
print _('Author: %s' % pdf.documentInfo.author)
|
||||
print _('Subject: %s' % pdf.documentInfo.subject)
|
||||
print _('Creator: %s' % pdf.documentInfo.creator)
|
||||
print _('Producer: %s' % pdf.documentInfo.producer)
|
||||
print _('Creation Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path))))
|
||||
print _('Modification Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path))))
|
||||
print _('Pages: %s' % pdf.numPages)
|
||||
print _('Encrypted: %s' % pdf.isEncrypted)
|
||||
try:
|
||||
print _('File Size: %s bytes' % os.path.getsize(pdf_path))
|
||||
except: pass
|
||||
try:
|
||||
pdf_file.seek(0)
|
||||
vline = pdf_file.readline()
|
||||
mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline)
|
||||
if mo != None:
|
||||
print _('PDF Version: %s' % mo.group('version'))
|
||||
except: pass
|
||||
|
||||
def verify_files(files):
|
||||
invalid = []
|
||||
|
||||
for pdf_path in files:
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
except:
|
||||
invalid.append(pdf_path)
|
||||
return invalid
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: No PDF sepecified.\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
bad_pdfs = verify_files(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
|
||||
for pdf in args:
|
||||
print_info(pdf)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
69
src/calibre/ebooks/pdf/manipulate.py
Normal file
69
src/calibre/ebooks/pdf/manipulate.py
Normal file
@ -0,0 +1,69 @@
|
||||
'''
|
||||
Command line interface to run pdf manipulation commands.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import string, sys
|
||||
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.ebooks.pdf import info, merge, reverse, split, trim
|
||||
|
||||
COMMANDS = {
|
||||
'info' : info,
|
||||
'merge' : merge,
|
||||
'reverse' : reverse,
|
||||
'split' : split,
|
||||
'trim' : trim,
|
||||
}
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('manipulatepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
return c
|
||||
|
||||
def option_parser():
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
|
||||
%prog command ...
|
||||
|
||||
command can be one of the following:
|
||||
[%%commands]
|
||||
|
||||
Use %prog command --help to get more information about a specific command
|
||||
|
||||
Manipulate a PDF.
|
||||
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: No command sepecified.\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
command = args[1].lower().strip()
|
||||
|
||||
if command in COMMANDS.keys():
|
||||
del args[1]
|
||||
return COMMANDS[command].main(args, command)
|
||||
else:
|
||||
parser.parse_args(args)
|
||||
print 'Unknown command %s.\n' % command
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
# We should never get here.
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
91
src/calibre/ebooks/pdf/merge.py
Normal file
91
src/calibre/ebooks/pdf/merge.py
Normal file
@ -0,0 +1,91 @@
|
||||
'''
|
||||
Merge PDF files into a single PDF document.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('mergepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='merged.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
return c
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file1.pdf file2.pdf ...
|
||||
|
||||
Merges individual PDFs. Metadata will be used from the first PDF specified.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def merge_files(in_paths, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
for pdf_path in in_paths:
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
for page in pdf.pages:
|
||||
out_pdf.addPage(page)
|
||||
|
||||
with open(out_path, 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
def verify_files(files):
|
||||
invalid = []
|
||||
|
||||
for pdf_path in files:
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
invalid.append(pdf_path)
|
||||
return invalid
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: Two or more PDF files are required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
bad_pdfs = verify_files(args)
|
||||
if bad_pdfs != []:
|
||||
for pdf in bad_pdfs:
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
merge_files(args, opts.output, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
85
src/calibre/ebooks/pdf/output.py
Normal file
85
src/calibre/ebooks/pdf/output.py
Normal file
@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Convert OEB ebook format to PDF.
|
||||
'''
|
||||
|
||||
#unit, papersize, orientation, custom_size, profile
|
||||
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.pdf.writer import PDFWriter
|
||||
from calibre.ebooks.pdf.pageoptions import UNITS, unit, PAPER_SIZES, \
|
||||
paper_size, ORIENTATIONS, orientation, PageOptions
|
||||
|
||||
class PDFOutput(OutputFormatPlugin):
|
||||
|
||||
name = 'PDF Output'
|
||||
author = 'John Schember'
|
||||
file_type = 'pdf'
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='margin_top', recommended_value='1',
|
||||
level=OptionRecommendation.LOW, long_switch='margin_top',
|
||||
help=_('The top margin around the document.')),
|
||||
OptionRecommendation(name='margin_bottom', recommended_value='1',
|
||||
level=OptionRecommendation.LOW, long_switch='margin_bottom',
|
||||
help=_('The bottom margin around the document.')),
|
||||
OptionRecommendation(name='margin_left', recommended_value='1',
|
||||
level=OptionRecommendation.LOW, long_switch='margin_left',
|
||||
help=_('The left margin around the document.')),
|
||||
OptionRecommendation(name='margin_right', recommended_value='1',
|
||||
level=OptionRecommendation.LOW, long_switch='margin_right',
|
||||
help=_('The right margin around the document.')),
|
||||
|
||||
OptionRecommendation(name='unit', recommended_value='inch',
|
||||
level=OptionRecommendation.LOW, short_switch='u',
|
||||
long_switch='unit', choices=UNITS.keys(),
|
||||
help=_('The unit of measure. Default is inch. Choices '
|
||||
'are %s' % UNITS.keys())),
|
||||
OptionRecommendation(name='paper_size', recommended_value='letter',
|
||||
level=OptionRecommendation.LOW,
|
||||
long_switch='paper_size', choices=PAPER_SIZES.keys(),
|
||||
help=_('The size of the paper. Default is letter. Choices '
|
||||
'are %s' % PAPER_SIZES.keys())),
|
||||
OptionRecommendation(name='orientation', recommended_value='portrait',
|
||||
level=OptionRecommendation.LOW,
|
||||
long_switch='orientation', choices=ORIENTATIONS.keys(),
|
||||
help=_('The orientation of the page. Default is portrait. Choices '
|
||||
'are %s' % ORIENTATIONS.keys())),
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
popts = PageOptions()
|
||||
|
||||
popts.set_margin_top(opts.margin_top)
|
||||
popts.set_margin_bottom(opts.margin_bottom)
|
||||
popts.set_margin_left(opts.margin_left)
|
||||
popts.set_margin_right(opts.margin_right)
|
||||
|
||||
popts.unit = unit(opts.unit)
|
||||
popts.paper_size = paper_size(opts.paper_size)
|
||||
popts.orientation = orientation(opts.orientation)
|
||||
|
||||
writer = PDFWriter(log, popts)
|
||||
|
||||
close = False
|
||||
if not hasattr(output_path, 'write'):
|
||||
close = True
|
||||
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
||||
os.makedirs(os.path.dirname(output_path))
|
||||
out_stream = open(output_path, 'wb')
|
||||
else:
|
||||
out_stream = output_path
|
||||
|
||||
out_stream.seek(0)
|
||||
out_stream.truncate()
|
||||
writer.dump(oeb_book.spine, out_stream)
|
||||
|
||||
if close:
|
||||
out_stream.close()
|
98
src/calibre/ebooks/pdf/pageoptions.py
Normal file
98
src/calibre/ebooks/pdf/pageoptions.py
Normal file
@ -0,0 +1,98 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from PyQt4.Qt import QPrinter
|
||||
|
||||
UNITS = {
|
||||
'millimeter' : QPrinter.Millimeter,
|
||||
'point' : QPrinter.Point,
|
||||
'inch' : QPrinter.Inch,
|
||||
'pica' : QPrinter.Pica,
|
||||
'didot' : QPrinter.Didot,
|
||||
'cicero' : QPrinter.Cicero,
|
||||
'devicepixel' : QPrinter.DevicePixel,
|
||||
}
|
||||
|
||||
def unit(unit):
|
||||
return UNITS.get(unit, QPrinter.Inch)
|
||||
|
||||
PAPER_SIZES = {
|
||||
'a0' : QPrinter.A0, # 841 x 1189 mm
|
||||
'a1' : QPrinter.A1, # 594 x 841 mm
|
||||
'a2' : QPrinter.A2, # 420 x 594 mm
|
||||
'a3' : QPrinter.A3, # 297 x 420 mm
|
||||
'a4' : QPrinter.A4, # 210 x 297 mm, 8.26 x 11.69 inches
|
||||
'a5' : QPrinter.A5, # 148 x 210 mm
|
||||
'a6' : QPrinter.A6, # 105 x 148 mm
|
||||
'a7' : QPrinter.A7, # 74 x 105 mm
|
||||
'a8' : QPrinter.A8, # 52 x 74 mm
|
||||
'a9' : QPrinter.A9, # 37 x 52 mm
|
||||
'b0' : QPrinter.B0, # 1030 x 1456 mm
|
||||
'b1' : QPrinter.B1, # 728 x 1030 mm
|
||||
'b2' : QPrinter.B2, # 515 x 728 mm
|
||||
'b3' : QPrinter.B3, # 364 x 515 mm
|
||||
'b4' : QPrinter.B4, # 257 x 364 mm
|
||||
'b5' : QPrinter.B5, # 182 x 257 mm, 7.17 x 10.13 inches
|
||||
'b6' : QPrinter.B6, # 128 x 182 mm
|
||||
'b7' : QPrinter.B7, # 91 x 128 mm
|
||||
'b8' : QPrinter.B8, # 64 x 91 mm
|
||||
'b9' : QPrinter.B9, # 45 x 64 mm
|
||||
'b10' : QPrinter.B10, # 32 x 45 mm
|
||||
'c5e' : QPrinter.C5E, # 163 x 229 mm
|
||||
'comm10e' : QPrinter.Comm10E, # 105 x 241 mm, U.S. Common 10 Envelope
|
||||
'dle' : QPrinter.DLE, # 110 x 220 mm
|
||||
'executive' : QPrinter.Executive, # 7.5 x 10 inches, 191 x 254 mm
|
||||
'folio' : QPrinter.Folio, # 210 x 330 mm
|
||||
'ledger' : QPrinter.Ledger, # 432 x 279 mm
|
||||
'legal' : QPrinter.Legal, # 8.5 x 14 inches, 216 x 356 mm
|
||||
'letter' : QPrinter.Letter, # 8.5 x 11 inches, 216 x 279 mm
|
||||
'tabloid' : QPrinter.Tabloid, # 279 x 432 mm
|
||||
#'custom' : QPrinter.Custom, # Unknown, or a user defined size.
|
||||
}
|
||||
|
||||
def paper_size(size):
|
||||
return PAPER_SIZES.get(size, QPrinter.Letter)
|
||||
|
||||
ORIENTATIONS = {
|
||||
'portrait' : QPrinter.Portrait,
|
||||
'landscape' : QPrinter.Landscape,
|
||||
}
|
||||
|
||||
def orientation(orientation):
|
||||
return ORIENTATIONS.get(orientation, QPrinter.Portrait)
|
||||
|
||||
|
||||
class PageOptions(object):
|
||||
margin_top = 1
|
||||
margin_bottom = 1
|
||||
margin_left = 1
|
||||
margin_right = 1
|
||||
unit = QPrinter.Inch
|
||||
paper_size = QPrinter.Letter
|
||||
orientation = QPrinter.Portrait
|
||||
|
||||
def set_margin_top(self, size):
|
||||
try:
|
||||
self.margin_top = int(size)
|
||||
except:
|
||||
self.margin_top = 1
|
||||
|
||||
def set_margin_bottom(self, size):
|
||||
try:
|
||||
self.margin_bottom = int(size)
|
||||
except:
|
||||
self.margin_bottom = 1
|
||||
|
||||
def set_margin_left(self, size):
|
||||
try:
|
||||
self.margin_left = int(size)
|
||||
except:
|
||||
self.margin_left = 1
|
||||
|
||||
def set_margin_right(self, size):
|
||||
try:
|
||||
self.margin_right = int(size)
|
||||
except:
|
||||
self.margin_right = 1
|
88
src/calibre/ebooks/pdf/reverse.py
Normal file
88
src/calibre/ebooks/pdf/reverse.py
Normal file
@ -0,0 +1,88 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Reverse content of PDF.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('reversepdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
return c
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog %%name [options] file1.pdf
|
||||
|
||||
Reverse PDF.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def reverse(pdf_path, out_path, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
for page in reversed(pdf.pages):
|
||||
out_pdf.addPage(page)
|
||||
|
||||
with open(out_path, 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
# Return True if the pdf is valid.
|
||||
def valid_pdf(pdf_path):
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
if len(args) < 1:
|
||||
print 'Error: A PDF file is required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
if not valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||
return 2
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
reverse(args[0], opts.output, mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
186
src/calibre/ebooks/pdf/split.py
Normal file
186
src/calibre/ebooks/pdf/split.py
Normal file
@ -0,0 +1,186 @@
|
||||
'''
|
||||
Split PDF file into multiple PDF documents.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys, re
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the transformation of pdf')
|
||||
if defaults is None:
|
||||
c = Config('splitpdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('output', ['-o', '--output'], default='split.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory. \
|
||||
The file name will be the base name for the output.'))
|
||||
return c
|
||||
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
|
||||
%prog %%name [options] file.pdf page_to_split_on ...
|
||||
%prog %%name [options] file.pdf page_range_to_split_on ...
|
||||
|
||||
Ex.
|
||||
|
||||
%prog %%name file.pdf 6
|
||||
%prog %%name file.pdf 6-12
|
||||
%prog %%name file.pdf 6-12 8 10 9-20
|
||||
|
||||
Split a PDF.
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
|
||||
pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
|
||||
total_pages = pdf.numPages - 1
|
||||
|
||||
for index in pages+page_ranges:
|
||||
if index in pages:
|
||||
write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata)
|
||||
else:
|
||||
|
||||
write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata)
|
||||
|
||||
def write_pdf(pdf, name, suffix, start, end, metadata=None):
|
||||
if metadata == None:
|
||||
title = _('Unknown')
|
||||
author = _('Unknown')
|
||||
else:
|
||||
title = metadata.title
|
||||
author = authors_to_string(metadata.authors)
|
||||
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
for page_num in range(start, end + 1):
|
||||
out_pdf.addPage(pdf.getPage(page_num))
|
||||
with open('%s%s.pdf' % (name, suffix), 'wb') as out_file:
|
||||
out_pdf.write(out_file)
|
||||
|
||||
def split_args(args):
|
||||
pdf = ''
|
||||
pages = []
|
||||
page_ranges = []
|
||||
bad = []
|
||||
|
||||
for arg in args:
|
||||
arg = arg.strip()
|
||||
# Find the pdf input
|
||||
if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None:
|
||||
if pdf == '':
|
||||
pdf = arg
|
||||
else:
|
||||
bad.append(arg)
|
||||
# Find single indexes
|
||||
elif re.search('^[ ]*\d+[ ]*$', arg) != None:
|
||||
pages.append(arg)
|
||||
# Find index ranges
|
||||
elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None:
|
||||
mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg)
|
||||
start = mo.group('start')
|
||||
end = mo.group('end')
|
||||
|
||||
# check to see if the range is really a single index
|
||||
if start == end:
|
||||
pages.append(start)
|
||||
else:
|
||||
page_ranges.append([start, end])
|
||||
else:
|
||||
bad.append(arg)
|
||||
|
||||
bad = sorted(list(set(bad)))
|
||||
|
||||
return pdf, pages, page_ranges, bad
|
||||
|
||||
# Remove duplicates from pages and page_ranges.
|
||||
# Set pages higher than the total number of pages in the pdf to the last page.
|
||||
# Return pages and page_ranges as lists of ints.
|
||||
def clean_page_list(pdf_path, pages, page_ranges):
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
|
||||
total_pages = pdf.numPages
|
||||
sorted_pages = []
|
||||
sorted_ranges = []
|
||||
|
||||
for index in pages:
|
||||
index = int(index)
|
||||
if index > total_pages:
|
||||
sorted_pages.append(total_pages - 1)
|
||||
else:
|
||||
sorted_pages.append(index - 1)
|
||||
|
||||
for start, end in page_ranges:
|
||||
start = int(start)
|
||||
end = int(end)
|
||||
|
||||
if start > total_pages and end > total_pages:
|
||||
sorted_pages.append(total_pages - 1)
|
||||
continue
|
||||
|
||||
if start > total_pages:
|
||||
start = total_pages
|
||||
if end > total_pages:
|
||||
end = total_pages
|
||||
page_range = sorted([start - 1, end - 1])
|
||||
if page_range not in sorted_ranges:
|
||||
sorted_ranges.append(page_range)
|
||||
|
||||
# Remove duplicates and sort
|
||||
pages = sorted(list(set(sorted_pages)))
|
||||
page_ranges = sorted(sorted_ranges)
|
||||
|
||||
return pages, page_ranges
|
||||
|
||||
# Return True if the pdf is valid.
|
||||
def valid_pdf(pdf_path):
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||
raise Exception
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
|
||||
pdf, pages, page_ranges, unknown = split_args(args[1:])
|
||||
|
||||
if pdf == '' and (pages == [] or page_ranges == []):
|
||||
print 'Error: PDF and where to split is required.\n\n'
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
if unknown != []:
|
||||
for arg in unknown:
|
||||
print 'Error: Unknown argument `%s`' % arg
|
||||
print parser.get_usage()
|
||||
return 2
|
||||
|
||||
if not valid_pdf(pdf):
|
||||
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||
return 2
|
||||
|
||||
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
|
||||
|
||||
mi = metadata_from_formats([pdf])
|
||||
|
||||
split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -16,8 +16,6 @@ def config(defaults=None):
|
||||
c = Config('trimpdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
|
||||
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
|
||||
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
|
||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
|
||||
@ -33,16 +31,16 @@ def config(defaults=None):
|
||||
return c
|
||||
|
||||
|
||||
def option_parser():
|
||||
def option_parser(name):
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog [options] file.pdf
|
||||
%prog %%name [options] file.pdf
|
||||
|
||||
Crops a pdf.
|
||||
'''))
|
||||
'''.replace('%%name', name)))
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
def main(args=sys.argv, name=''):
|
||||
parser = option_parser(name)
|
||||
opts, args = parser.parse_args(args)
|
||||
try:
|
||||
source = os.path.abspath(args[1])
|
@ -1,19 +1,18 @@
|
||||
'''
|
||||
Write content to PDF.
|
||||
'''
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, logging, shutil, sys
|
||||
'''
|
||||
Write content to PDF.
|
||||
'''
|
||||
|
||||
import os, shutil, sys
|
||||
|
||||
from calibre import LoggingInterface
|
||||
from calibre.ebooks.epub.iterator import SpineItem
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.customize.ui import run_plugins_on_postprocess
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.ebooks.pdf.pageoptions import PageOptions
|
||||
|
||||
from PyQt4 import QtCore
|
||||
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \
|
||||
@ -21,21 +20,14 @@ from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter,
|
||||
from PyQt4.QtWebKit import QWebView
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
class PDFMargins:
|
||||
def __init__(self, margin=1):
|
||||
self.top = margin
|
||||
self.bottom = margin
|
||||
self.left = margin
|
||||
self.right = margin
|
||||
|
||||
class PDFWriter(QObject):
|
||||
def __init__(self, margins=PDFMargins()):
|
||||
def __init__(self, log, popts=PageOptions()):
|
||||
if QApplication.instance() is None:
|
||||
QApplication([])
|
||||
QObject.__init__(self)
|
||||
|
||||
self.logger = logging.getLogger('oeb2pdf')
|
||||
self.logger = log
|
||||
|
||||
self.loop = QEventLoop()
|
||||
self.view = QWebView()
|
||||
@ -43,15 +35,14 @@ class PDFWriter(QObject):
|
||||
self.render_queue = []
|
||||
self.combine_queue = []
|
||||
self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts')
|
||||
self.margins = margins
|
||||
self.popts = popts
|
||||
|
||||
def dump(self, oebpath, path):
|
||||
def dump(self, spine, out_stream):
|
||||
self._delete_tmpdir()
|
||||
|
||||
opf = OPF(oebpath, os.path.dirname(oebpath))
|
||||
self.render_queue = [SpineItem(i.path) for i in opf.spine]
|
||||
self.render_queue = spine[:]
|
||||
self.combine_queue = []
|
||||
self.path = path
|
||||
self.out_stream = out_stream
|
||||
|
||||
QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
|
||||
self.loop.exec_()
|
||||
@ -78,7 +69,9 @@ class PDFWriter(QObject):
|
||||
self.logger.debug('\tRendering item as %s' % item_path)
|
||||
|
||||
printer = QPrinter(QPrinter.HighResolution)
|
||||
printer.setPageMargins(self.margins.left, self.margins.top, self.margins.right, self.margins.bottom, QPrinter.Inch)
|
||||
printer.setPageMargins(self.popts.margin_left, self.popts.margin_top, self.popts.margin_right, self.popts.margin_bottom, self.popts.unit)
|
||||
printer.setPaperSize(self.popts.paper_size)
|
||||
printer.setOrientation(self.popts.orientation)
|
||||
printer.setOutputFormat(QPrinter.PdfFormat)
|
||||
printer.setOutputFileName(item_path)
|
||||
self.view.print_(printer)
|
||||
@ -98,75 +91,7 @@ class PDFWriter(QObject):
|
||||
inputPDF = PdfFileReader(file(item, 'rb'))
|
||||
for page in inputPDF.pages:
|
||||
outPDF.addPage(page)
|
||||
outputStream = file(self.path, 'wb')
|
||||
outPDF.write(outputStream)
|
||||
outputStream.close()
|
||||
outPDF.write(self.out_stream)
|
||||
finally:
|
||||
self._delete_tmpdir()
|
||||
self.loop.exit(0)
|
||||
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the conversion to PDF')
|
||||
if defaults is None:
|
||||
c = Config('pdf', desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
|
||||
pdf = c.add_group('PDF', _('PDF options.'))
|
||||
|
||||
pdf('margin_top', ['--margin_top'], default=1,
|
||||
help=_('The top margin around the document in inches.'))
|
||||
pdf('margin_bottom', ['--margin_bottom'], default=1,
|
||||
help=_('The bottom margin around the document in inches.'))
|
||||
pdf('margin_left', ['--margin_left'], default=1,
|
||||
help=_('The left margin around the document in inches.'))
|
||||
pdf('margin_right', ['--margin_right'], default=1,
|
||||
help=_('The right margin around the document in inches.'))
|
||||
|
||||
return c
|
||||
|
||||
def option_parser():
|
||||
c = config()
|
||||
parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
|
||||
parser.add_option(
|
||||
'-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
parser.add_option(
|
||||
'-v', '--verbose', default=0, action='count',
|
||||
help=_('Useful for debugging.'))
|
||||
return parser
|
||||
|
||||
def oeb2pdf(opts, inpath):
|
||||
logger = LoggingInterface(logging.getLogger('oeb2pdf'))
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
|
||||
outpath = opts.output
|
||||
if outpath is None:
|
||||
outpath = os.path.basename(inpath)
|
||||
outpath = os.path.splitext(outpath)[0] + '.pdf'
|
||||
|
||||
margins = PDFMargins()
|
||||
margins.top = opts.margin_top
|
||||
margins.bottom = opts.margin_bottom
|
||||
margins.left = opts.margin_left
|
||||
margins.right = opts.margin_right
|
||||
|
||||
writer = PDFWriter(margins)
|
||||
writer.dump(inpath, outpath)
|
||||
run_plugins_on_postprocess(outpath, 'pdf')
|
||||
logger.log_info(_('Output written to ') + outpath)
|
||||
|
||||
def main(argv=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(argv[1:])
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
return 1
|
||||
inpath = args[0]
|
||||
retval = oeb2pdf(opts, inpath)
|
||||
return retval
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
|
9
src/calibre/ebooks/txt/__init__.py
Normal file
9
src/calibre/ebooks/txt/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, John Schember john@nachtimwald.com'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Used for txt output
|
||||
'''
|
||||
|
63
src/calibre/ebooks/txt/output.py
Normal file
63
src/calibre/ebooks/txt/output.py
Normal file
@ -0,0 +1,63 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||
OptionRecommendation
|
||||
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
|
||||
class TXTOutput(OutputFormatPlugin):
|
||||
|
||||
name = 'TXT Output'
|
||||
author = 'John Schember'
|
||||
file_type = 'txt'
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='newline', recommended_value='system',
|
||||
level=OptionRecommendation.LOW, long_switch='newline',
|
||||
short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
|
||||
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
|
||||
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
||||
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
||||
'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))),
|
||||
OptionRecommendation(name='prepend_author', recommended_value='true',
|
||||
level=OptionRecommendation.LOW, long_switch='prepend_author',
|
||||
choices=['true', 'false'],
|
||||
help=_('Write the author to the beginning of the file. '
|
||||
'Default is \'true\'. Use \'false\' to disable.')),
|
||||
OptionRecommendation(name='prepend_title', recommended_value='true',
|
||||
choices=['true', 'false'],
|
||||
level=OptionRecommendation.LOW, long_switch='prepend_title',
|
||||
help=_('Write the title to the beginning of the file. '
|
||||
'Default is \'true\'. Use \'false\' to disable.'))
|
||||
])
|
||||
|
||||
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||
metadata = TxtMetadata()
|
||||
if opts.prepend_author.lower() == 'true':
|
||||
metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors)
|
||||
if opts.prepend_title.lower() == 'true':
|
||||
metadata.title = opts.title if opts.title else oeb_book.metadata.title
|
||||
|
||||
writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
|
||||
txt = writer.dump(oeb_book.spine, metadata)
|
||||
|
||||
close = False
|
||||
if not hasattr(output_path, 'write'):
|
||||
close = True
|
||||
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
||||
os.makedirs(os.path.dirname(output_path))
|
||||
out_stream = open(output_path, 'wb')
|
||||
else:
|
||||
out_stream = output_path
|
||||
|
||||
out_stream.seek(0)
|
||||
out_stream.truncate()
|
||||
out_stream.write(txt)
|
||||
|
||||
if close:
|
||||
out_stream.close()
|
158
src/calibre/ebooks/txt/writer.py
Normal file
158
src/calibre/ebooks/txt/writer.py
Normal file
@ -0,0 +1,158 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import with_statement
|
||||
'''
|
||||
Write content to TXT.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, sys
|
||||
|
||||
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
class TxtWriter(object):
|
||||
def __init__(self, newline, log):
|
||||
self.newline = newline
|
||||
self.log = log
|
||||
|
||||
def dump(self, spine, metadata):
|
||||
out = u''
|
||||
for item in spine:
|
||||
with open(item, 'r') as itemf:
|
||||
content = itemf.read().decode(item.encoding)
|
||||
# Convert newlines to unix style \n for processing. These
|
||||
# will be changed to the specified type later in the process.
|
||||
content = self.unix_newlines(content)
|
||||
content = self.strip_html(content)
|
||||
content = self.replace_html_symbols(content)
|
||||
content = self.cleanup_text(content)
|
||||
content = self.specified_newlines(content)
|
||||
out += content
|
||||
|
||||
# Prepend metadata
|
||||
if metadata.author != None and metadata.author != '':
|
||||
out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out
|
||||
if metadata.title != None and metadata.title != '':
|
||||
out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out
|
||||
|
||||
# Put two blank lines at end of file
|
||||
end = out[-3 * len(self.newline):]
|
||||
for i in range(3 - end.count(self.newline)):
|
||||
out += self.newline
|
||||
|
||||
return out
|
||||
|
||||
def strip_html(self, html):
|
||||
stripped = u''
|
||||
|
||||
for dom_tree in BeautifulSoup(html).findAll('body'):
|
||||
text = unicode(dom_tree)
|
||||
|
||||
# Remove unnecessary tags
|
||||
for tag in ['script', 'style']:
|
||||
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
|
||||
text = re.sub('<!--.*-->', '', text)
|
||||
text = re.sub('<\?.*?\?>', '', text)
|
||||
text = re.sub('<@.*?@>', '', text)
|
||||
text = re.sub('<%.*?%>', '', text)
|
||||
|
||||
# Headings usually indicate Chapters.
|
||||
# We are going to use a marker to insert the proper number of
|
||||
# newline characters at the end of cleanup_text because cleanup_text
|
||||
# remove excessive (more than 2 newlines).
|
||||
for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text)
|
||||
text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text)
|
||||
|
||||
# Separate content with space.
|
||||
for tag in ['td']:
|
||||
text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text)
|
||||
|
||||
# Separate content with empty line.
|
||||
for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']:
|
||||
text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text)
|
||||
|
||||
for tag in ['hr', 'br']:
|
||||
text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text)
|
||||
|
||||
# Remove any tags that do not need special processing.
|
||||
text = re.sub('<.*?>', '', text)
|
||||
|
||||
stripped = stripped + text
|
||||
|
||||
return stripped
|
||||
|
||||
def replace_html_symbols(self, content):
|
||||
for symbol in HTML_SYMBOLS:
|
||||
for code in HTML_SYMBOLS[symbol]:
|
||||
content = content.replace(code, symbol)
|
||||
return content
|
||||
|
||||
def cleanup_text(self, text):
|
||||
# Replace bad characters.
|
||||
text = text.replace(u'\xc2', '')
|
||||
text = text.replace(u'\xa0', ' ')
|
||||
|
||||
# Replace tabs, vertical tags and form feeds with single space.
|
||||
text = text.replace('\t+', ' ')
|
||||
text = text.replace('\v+', ' ')
|
||||
text = text.replace('\f+', ' ')
|
||||
|
||||
# Single line paragraph.
|
||||
r = re.compile('.\n.')
|
||||
while True:
|
||||
mo = r.search(text)
|
||||
if mo == None:
|
||||
break
|
||||
text = '%s %s' % (text[:mo.start()+1], text[mo.end()-1:])
|
||||
|
||||
# Remove multiple spaces.
|
||||
text = re.sub('[ ]+', ' ', text)
|
||||
|
||||
# Remove excessive newlines.
|
||||
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||
text = re.sub('\n{3,}', '\n\n', text)
|
||||
|
||||
# Replace markers with the proper characters.
|
||||
text = text.replace('-vzxedxy-', '\n\n\n\n\n')
|
||||
text = text.replace('-vlgzxey-', '\n\n\n')
|
||||
|
||||
# Replace spaces at the beginning and end of lines
|
||||
text = re.sub('(?imu)^[ ]+', '', text)
|
||||
text = re.sub('(?imu)[ ]+$', '', text)
|
||||
|
||||
return text
|
||||
|
||||
def unix_newlines(self, text):
|
||||
text = text.replace('\r\n', '\n')
|
||||
text = text.replace('\r', '\n')
|
||||
|
||||
return text
|
||||
|
||||
def specified_newlines(self, text):
|
||||
if self.newline == '\n':
|
||||
return text
|
||||
|
||||
return text.replace('\n', self.newline)
|
||||
|
||||
|
||||
class TxtNewlines(object):
|
||||
NEWLINE_TYPES = {
|
||||
'system' : os.linesep,
|
||||
'unix' : '\n',
|
||||
'old_mac' : '\r',
|
||||
'windows' : '\r\n'
|
||||
}
|
||||
|
||||
def __init__(self, newline_type):
|
||||
self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
|
||||
|
||||
|
||||
class TxtMetadata(object):
|
||||
def __init__(self):
|
||||
self.title = None
|
||||
self.author = None
|
@ -709,6 +709,9 @@ class BooksView(TableView):
|
||||
|
||||
def close(self):
|
||||
self._model.close()
|
||||
|
||||
def set_editable(self, editable):
|
||||
self._model.set_editable(editable)
|
||||
|
||||
def set_editable(self, editable):
|
||||
self._model.set_editable(editable)
|
||||
@ -1002,6 +1005,10 @@ class DeviceBooksModel(BooksModel):
|
||||
self.sort(col, self.sorted_on[1])
|
||||
done = True
|
||||
return done
|
||||
|
||||
def set_editable(self, editable):
|
||||
self.editable = editable
|
||||
|
||||
|
||||
def set_editable(self, editable):
|
||||
self.editable = editable
|
||||
|
@ -39,10 +39,9 @@ entry_points = {
|
||||
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
||||
'calibre-parallel = calibre.parallel:main',
|
||||
'calibre-customize = calibre.customize.ui:main',
|
||||
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
|
||||
'pdfmanipulate = calibre.ebooks.pdf.manipulate:main',
|
||||
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
||||
'calibre-smtp = calibre.utils.smtp:main',
|
||||
|
||||
],
|
||||
'gui_scripts' : [
|
||||
__appname__+' = calibre.gui2.main:main',
|
||||
@ -548,6 +547,3 @@ main = post_install
|
||||
if __name__ == '__main__':
|
||||
post_install()
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user