mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PDF manipulation via the pdfmanipulate command. TXT and PDF output.
This commit is contained in:
commit
b2e8618354
@ -160,6 +160,16 @@ class ODTMetadataReader(MetadataReaderPlugin):
|
|||||||
from calibre.ebooks.metadata.odt import get_metadata
|
from calibre.ebooks.metadata.odt import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class TXTMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read TXT metadata'
|
||||||
|
file_types = set(['txt'])
|
||||||
|
description = _('Read metadata from %s files') % 'TXT'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.txt import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
class LRXMetadataReader(MetadataReaderPlugin):
|
class LRXMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read LRX metadata'
|
name = 'Read LRX metadata'
|
||||||
@ -256,9 +266,11 @@ class MOBIMetadataWriter(MetadataWriterPlugin):
|
|||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput]
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput, TXTOutput, PDFOutput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -196,7 +196,7 @@ OptionRecommendation(name='language',
|
|||||||
self.input_fmt = input_fmt
|
self.input_fmt = input_fmt
|
||||||
self.output_fmt = output_fmt
|
self.output_fmt = output_fmt
|
||||||
|
|
||||||
# Build set of all possible options. Two options are equal iff their
|
# Build set of all possible options. Two options are equal if their
|
||||||
# names are the same.
|
# names are the same.
|
||||||
self.input_options = self.input_plugin.options.union(
|
self.input_options = self.input_plugin.options.union(
|
||||||
self.input_plugin.common_options)
|
self.input_plugin.common_options)
|
||||||
|
310
src/calibre/ebooks/htmlsymbols.py
Normal file
310
src/calibre/ebooks/htmlsymbols.py
Normal file
@ -0,0 +1,310 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
'''
|
||||||
|
Maping of non-acii symbols and their corresponding html entity number and name
|
||||||
|
'''
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
|
||||||
|
# http://www.w3schools.com/tags/ref_symbols.asp
|
||||||
|
HTML_SYMBOLS = {
|
||||||
|
# Math Symbols
|
||||||
|
u'∀' : ['∀', '∀'], # for all
|
||||||
|
u'∂' : ['∂', '∂'], # part
|
||||||
|
u'∃' : ['∃', '&exists;'], # exists
|
||||||
|
u'∅' : ['∅', '∅'], # empty
|
||||||
|
u'∇' : ['∇', '∇'], # nabla
|
||||||
|
u'∈' : ['∈', '∈'], # isin
|
||||||
|
u'∉' : ['∉', '∉'], # notin
|
||||||
|
u'∋' : ['∋', '∋'], # ni
|
||||||
|
u'∏' : ['∏', '∏'], # prod
|
||||||
|
u'∑' : ['∑', '∑'], # sum
|
||||||
|
u'−' : ['−', '−'], # minus
|
||||||
|
u'∗' : ['∗', '∗'], # lowast
|
||||||
|
u'√' : ['√', '√'], # square root
|
||||||
|
u'∝' : ['∝', '∝'], # proportional to
|
||||||
|
u'∞' : ['∞', '∞'], # infinity
|
||||||
|
u'∠' : ['∠', '∠'], # angle
|
||||||
|
u'∧' : ['∧', '∧'], # and
|
||||||
|
u'∨' : ['∨', '∨'], # or
|
||||||
|
u'∩' : ['∩', '∩'], # cap
|
||||||
|
u'∪' : ['∪', '∪'], # cup
|
||||||
|
u'∫' : ['∫', '∫'], # integral
|
||||||
|
u'∴' : ['∴', '∴'], # therefore
|
||||||
|
u'∼' : ['∼', '∼'], # simular to
|
||||||
|
u'≅' : ['≅', '≅'], # approximately equal
|
||||||
|
u'≈' : ['≈', '≈'], # almost equal
|
||||||
|
u'≠' : ['≠', '≠'], # not equal
|
||||||
|
u'≡' : ['≡', '≡'], # equivalent
|
||||||
|
u'≤' : ['≤', '≤'], # less or equal
|
||||||
|
u'≥' : ['≥', '≥'], # greater or equal
|
||||||
|
u'⊂' : ['⊂', '⊂'], # subset of
|
||||||
|
u'⊃' : ['⊃', '⊃'], # superset of
|
||||||
|
u'⊄' : ['⊄', '⊄'], # not subset of
|
||||||
|
u'⊆' : ['⊆', '⊆'], # subset or equal
|
||||||
|
u'⊇' : ['⊇', '⊇'], # superset or equal
|
||||||
|
u'⊕' : ['⊕', '⊕'], # circled plus
|
||||||
|
u'⊗' : ['⊗', '⊗'], # cirled times
|
||||||
|
u'⊥' : ['⊥', '⊥'], # perpendicular
|
||||||
|
u'⋅' : ['⋅', '⋅'], # dot operator
|
||||||
|
# Greek Letters
|
||||||
|
u'Α' : ['Α', 'Α'], # Alpha
|
||||||
|
u'Β' : ['Β', 'Β'], # Beta
|
||||||
|
u'Γ' : ['Γ', 'Γ'], # Gamma
|
||||||
|
u'Δ' : ['Δ', 'Δ'], # Delta
|
||||||
|
u'Ε' : ['Ε', 'Ε'], # Epsilon
|
||||||
|
u'Ζ' : ['Ζ', 'Ζ'], # Zeta
|
||||||
|
u'Η' : ['Η', 'Η'], # Eta
|
||||||
|
u'Θ' : ['Θ', 'Θ'], # Theta
|
||||||
|
u'Ι' : ['Ι', 'Ι'], # Iota
|
||||||
|
u'Κ' : ['Κ', 'Κ'], # Kappa
|
||||||
|
u'Λ' : ['Λ', 'Λ'], # Lambda
|
||||||
|
u'Μ' : ['Μ', 'Μ'], # Mu
|
||||||
|
u'Ν' : ['Ν', 'Ν'], # Nu
|
||||||
|
u'Ξ' : ['Ξ', 'Ξ'], # Xi
|
||||||
|
u'Ο' : ['Ο', 'Ο'], # Omicron
|
||||||
|
u'Π' : ['Π', 'Π'], # Pi
|
||||||
|
u'Ρ' : ['Ρ', 'Ρ'], # Rho
|
||||||
|
u'Σ' : ['Σ', 'Σ'], # Sigma
|
||||||
|
u'Τ' : ['Τ', 'Τ'], # Tau
|
||||||
|
u'Υ' : ['Υ', 'Υ'], # Upsilon
|
||||||
|
u'Φ' : ['Φ', 'Φ'], # Phi
|
||||||
|
u'Χ' : ['Χ', 'Χ'], # Chi
|
||||||
|
u'Ψ' : ['Ψ', 'Ψ'], # Psi
|
||||||
|
u'ω' : ['ω', 'ω'], # omega
|
||||||
|
u'ϑ' : ['ϑ', 'ϑ'], # theta symbol
|
||||||
|
u'ϒ' : ['ϒ', 'ϒ'], # upsilon symbol
|
||||||
|
u'ϖ' : ['ϖ', 'ϖ'], # pi symbol
|
||||||
|
# Other
|
||||||
|
u'Œ' : ['Œ', 'Œ'], # capital ligature OE
|
||||||
|
u'œ' : ['œ', 'œ'], # small ligature oe
|
||||||
|
u'Š' : ['Š', 'Š'], # capital S with caron
|
||||||
|
u'š' : ['š', 'š'], # small S with caron
|
||||||
|
u'Ÿ' : ['Ÿ', 'Ÿ'], # capital Y with diaeres
|
||||||
|
u'ƒ' : ['ƒ', 'ƒ'], # f with hook
|
||||||
|
u'ˆ' : ['ˆ', 'ˆ'], # modifier letter circumflex accent
|
||||||
|
u'˜' : ['˜', '˜'], # small tilde
|
||||||
|
u'–' : ['–', '–'], # en dash
|
||||||
|
u'—' : ['—', '—'], # em dash
|
||||||
|
u'‘' : ['‘', '‘'], # left single quotation mark
|
||||||
|
u'’' : ['’', '’'], # right single quotation mark
|
||||||
|
u'‚' : ['‚', '‚'], # single low-9 quotation mark
|
||||||
|
u'“' : ['“', '“'], # left double quotation mark
|
||||||
|
u'”' : ['”', '”'], # right double quotation mark
|
||||||
|
u'„' : ['„', '„'], # double low-9 quotation mark
|
||||||
|
u'†' : ['†', '†'], # dagger
|
||||||
|
u'‡' : ['‡', '‡'], # double dagger
|
||||||
|
u'•' : ['•', '•'], # bullet
|
||||||
|
u'…' : ['…', '…'], # horizontal ellipsis
|
||||||
|
u'‰' : ['‰', '‰'], # per mille
|
||||||
|
u'′' : ['′', '′'], # minutes
|
||||||
|
u'″' : ['″', '″'], # seconds
|
||||||
|
u'‹' : ['‹', '‹'], # single left angle quotation
|
||||||
|
u'›' : ['›', '›'], # single right angle quotation
|
||||||
|
u'‾' : ['‾', '‾'], # overline
|
||||||
|
u'€' : ['€', '€'], # euro
|
||||||
|
u'™' : ['™', '™'], # trademark
|
||||||
|
u'←' : ['←', '←'], # left arrow
|
||||||
|
u'↑' : ['↑', '↑'], # up arrow
|
||||||
|
u'→' : ['→', '→'], # right arrow
|
||||||
|
u'↓' : ['↓', '↓'], # down arrow
|
||||||
|
u'↔' : ['↔', '↔'], # left right arrow
|
||||||
|
u'↵' : ['↵', '↵'], # carriage return arrow
|
||||||
|
u'⌈' : ['⌈', '⌈'], # left ceiling
|
||||||
|
u'⌉' : ['⌉', '⌉'], # right ceiling
|
||||||
|
u'⌊' : ['⌊', '⌊'], # left floor
|
||||||
|
u'⌋' : ['⌋', '⌋'], # right floor
|
||||||
|
u'◊' : ['◊', '◊'], # lozenge
|
||||||
|
u'♠' : ['♠', '♠'], # spade
|
||||||
|
u'♣' : ['♣', '♣'], # club
|
||||||
|
u'♥' : ['♥', '♥'], # heart
|
||||||
|
u'♦' : ['♦', '♦'], # diamond
|
||||||
|
# Extra http://www.ascii.cl/htmlcodes.htm
|
||||||
|
u' ' : [' '], # space
|
||||||
|
u'!' : ['!'], # exclamation point
|
||||||
|
u'#' : ['#'], # number sign
|
||||||
|
u'$' : ['$'], # dollar sign
|
||||||
|
u'%' : ['%'], # percent sign
|
||||||
|
u'\'' : ['''], # single quote
|
||||||
|
u'(' : ['('], # opening parenthesis
|
||||||
|
u')' : [')'], # closing parenthesis
|
||||||
|
u'*' : ['*'], # asterisk
|
||||||
|
u'+' : ['+'], # plus sign
|
||||||
|
u',' : [','], # comma
|
||||||
|
u'-' : ['-'], # minus sign - hyphen
|
||||||
|
u'.' : ['.'], # period
|
||||||
|
u'/' : ['/'], # slash
|
||||||
|
u'0' : ['0'], # zero
|
||||||
|
u'1' : ['1'], # one
|
||||||
|
u'2' : ['2'], # two
|
||||||
|
u'3' : ['3'], # three
|
||||||
|
u'4' : ['4'], # four
|
||||||
|
u'5' : ['5'], # five
|
||||||
|
u'6' : ['6'], # six
|
||||||
|
u'7' : ['7'], # seven
|
||||||
|
u'8' : ['8'], # eight
|
||||||
|
u'9' : ['9'], # nine
|
||||||
|
u':' : [':'], # colon
|
||||||
|
u';' : [';'], # semicolon
|
||||||
|
u'=' : ['='], # equal sign
|
||||||
|
u'?' : ['?'], # question mark
|
||||||
|
u'@' : ['@'], # at symbol
|
||||||
|
u'A' : ['A'], #
|
||||||
|
u'B' : ['B'], #
|
||||||
|
u'C' : ['C'], #
|
||||||
|
u'D' : ['D'], #
|
||||||
|
u'E' : ['E'], #
|
||||||
|
u'F' : ['F'], #
|
||||||
|
u'G' : ['G'], #
|
||||||
|
u'H' : ['H'], #
|
||||||
|
u'I' : ['I'], #
|
||||||
|
u'J' : ['J'], #
|
||||||
|
u'K' : ['K'], #
|
||||||
|
u'L' : ['L'], #
|
||||||
|
u'M' : ['M'], #
|
||||||
|
u'N' : ['N'], #
|
||||||
|
u'O' : ['O'], #
|
||||||
|
u'P' : ['P'], #
|
||||||
|
u'Q' : ['Q'], #
|
||||||
|
u'R' : ['R'], #
|
||||||
|
u'S' : ['S'], #
|
||||||
|
u'T' : ['T'], #
|
||||||
|
u'U' : ['U'], #
|
||||||
|
u'V' : ['V'], #
|
||||||
|
u'W' : ['W'], #
|
||||||
|
u'X' : ['X'], #
|
||||||
|
u'Y' : ['Y'], #
|
||||||
|
u'Z' : ['Z'], #
|
||||||
|
u'[' : ['['], # opening bracket
|
||||||
|
u'\\' : ['\'], # backslash
|
||||||
|
u']' : [']'], # closing bracket
|
||||||
|
u'^' : ['^'], # caret - circumflex
|
||||||
|
u'_' : ['_'], # underscore
|
||||||
|
u'`' : ['`'], # grave accent
|
||||||
|
u'a' : ['a'], #
|
||||||
|
u'b' : ['b'], #
|
||||||
|
u'c' : ['c'], #
|
||||||
|
u'd' : ['d'], #
|
||||||
|
u'e' : ['e'], #
|
||||||
|
u'f' : ['f'], #
|
||||||
|
u'g' : ['g'], #
|
||||||
|
u'h' : ['h'], #
|
||||||
|
u'i' : ['i'], #
|
||||||
|
u'j' : ['j'], #
|
||||||
|
u'k' : ['k'], #
|
||||||
|
u'l' : ['l'], #
|
||||||
|
u'm' : ['m'], #
|
||||||
|
u'n' : ['n'], #
|
||||||
|
u'o' : ['o'], #
|
||||||
|
u'p' : ['p'], #
|
||||||
|
u'q' : ['q'], #
|
||||||
|
u'r' : ['r'], #
|
||||||
|
u's' : ['s'], #
|
||||||
|
u't' : ['t'], #
|
||||||
|
u'u' : ['u'], #
|
||||||
|
u'v' : ['v'], #
|
||||||
|
u'w' : ['w'], #
|
||||||
|
u'x' : ['x'], #
|
||||||
|
u'y' : ['y'], #
|
||||||
|
u'z' : ['z'], #
|
||||||
|
u'{' : ['{'], # opening brace
|
||||||
|
u'|' : ['|'], # vertical bar
|
||||||
|
u'}' : ['}'], # closing brace
|
||||||
|
u'~' : ['~'], # equivalency sign - tilde
|
||||||
|
u'<' : ['<', '<'], # less than sign
|
||||||
|
u'>' : ['>', '>'], # greater than sign
|
||||||
|
u'¡' : ['¡', '¡'], # inverted exclamation mark
|
||||||
|
u'¢' : ['¢', '¢'], # cent sign
|
||||||
|
u'£' : ['£', '£'], # pound sign
|
||||||
|
u'¤' : ['¤', '¤'], # currency sign
|
||||||
|
u'¥' : ['¥', '¥'], # yen sign
|
||||||
|
u'¦' : ['¦', '¦'], # broken vertical bar
|
||||||
|
u'§' : ['§', '§'], # section sign
|
||||||
|
u'¨' : ['¨', '¨'], # spacing diaeresis - umlaut
|
||||||
|
u'©' : ['©', '©'], # copyright sign
|
||||||
|
u'ª' : ['ª', 'ª'], # feminine ordinal indicator
|
||||||
|
u'«' : ['«', '«'], # left double angle quotes
|
||||||
|
u'¬' : ['¬', '¬'], # not sign
|
||||||
|
u'®' : ['®', '®'], # registered trade mark sign
|
||||||
|
u'¯' : ['¯', '¯'], # spacing macron - overline
|
||||||
|
u'°' : ['°', '°'], # degree sign
|
||||||
|
u'±' : ['±', '±'], # plus-or-minus sign
|
||||||
|
u'²' : ['²', '²'], # superscript two - squared
|
||||||
|
u'³' : ['³', '³'], # superscript three - cubed
|
||||||
|
u'´' : ['´', '´'], # acute accent - spacing acute
|
||||||
|
u'µ' : ['µ', 'µ'], # micro sign
|
||||||
|
u'¶' : ['¶', '¶'], # pilcrow sign - paragraph sign
|
||||||
|
u'·' : ['·', '·'], # middle dot - Georgian comma
|
||||||
|
u'¸' : ['¸', '¸'], # spacing cedilla
|
||||||
|
u'¹' : ['¹', '¹'], # superscript one
|
||||||
|
u'º' : ['º', 'º'], # masculine ordinal indicator
|
||||||
|
u'»' : ['»', '»'], # right double angle quotes
|
||||||
|
u'¼' : ['¼', '¼'], # fraction one quarter
|
||||||
|
u'½' : ['½', '½'], # fraction one half
|
||||||
|
u'¾' : ['¾', '¾'], # fraction three quarters
|
||||||
|
u'¿' : ['¿', '¿'], # inverted question mark
|
||||||
|
u'À' : ['À', 'À'], # latin capital letter A with grave
|
||||||
|
u'Á' : ['Á', 'Á'], # latin capital letter A with acute
|
||||||
|
u'Â' : ['Â', 'Â'], # latin capital letter A with circumflex
|
||||||
|
u'Ã' : ['Ã', 'Ã'], # latin capital letter A with tilde
|
||||||
|
u'Ä' : ['Ä', 'Ä'], # latin capital letter A with diaeresis
|
||||||
|
u'Å' : ['Å', 'Å'], # latin capital letter A with ring above
|
||||||
|
u'Æ' : ['Æ', 'Æ'], # latin capital letter AE
|
||||||
|
u'Ç' : ['Ç', 'Ç'], # latin capital letter C with cedilla
|
||||||
|
u'È' : ['È', 'È'], # latin capital letter E with grave
|
||||||
|
u'É' : ['É', 'É'], # latin capital letter E with acute
|
||||||
|
u'Ê' : ['Ê', 'Ê'], # latin capital letter E with circumflex
|
||||||
|
u'Ë' : ['Ë', 'Ë'], # latin capital letter E with diaeresis
|
||||||
|
u'Ì' : ['Ì', 'Ì'], # latin capital letter I with grave
|
||||||
|
u'Í' : ['Í', 'Í'], # latin capital letter I with acute
|
||||||
|
u'Î' : ['Î', 'Î'], # latin capital letter I with circumflex
|
||||||
|
u'Ï' : ['Ï', 'Ï'], # latin capital letter I with diaeresis
|
||||||
|
u'Ð' : ['Ð', 'Ð'], # latin capital letter ETH
|
||||||
|
u'Ñ' : ['Ñ', 'Ñ'], # latin capital letter N with tilde
|
||||||
|
u'Ò' : ['Ò', 'Ò'], # latin capital letter O with grave
|
||||||
|
u'Ó' : ['Ó', 'Ó'], # latin capital letter O with acute
|
||||||
|
u'Ô' : ['Ô', 'Ô'], # latin capital letter O with circumflex
|
||||||
|
u'Õ' : ['Õ', 'Õ'], # latin capital letter O with tilde
|
||||||
|
u'Ö' : ['Ö', 'Ö'], # latin capital letter O with diaeresis
|
||||||
|
u'×' : ['×', '×'], # multiplication sign
|
||||||
|
u'Ø' : ['Ø', 'Ø'], # latin capital letter O with slash
|
||||||
|
u'Ù' : ['Ù', 'Ù'], # latin capital letter U with grave
|
||||||
|
u'Ú' : ['Ú', 'Ú'], # latin capital letter U with acute
|
||||||
|
u'Û' : ['Û', 'Û'], # latin capital letter U with circumflex
|
||||||
|
u'Ü' : ['Ü', 'Ü'], # latin capital letter U with diaeresis
|
||||||
|
u'Ý' : ['Ý', 'Ý'], # latin capital letter Y with acute
|
||||||
|
u'Þ' : ['Þ', 'Þ'], # latin capital letter THORN
|
||||||
|
u'ß' : ['ß', 'ß'], # latin small letter sharp s - ess-zed
|
||||||
|
u'à' : ['à', 'à'], # latin small letter a with grave
|
||||||
|
u'á' : ['á', 'á'], # latin small letter a with acute
|
||||||
|
u'â' : ['â', 'â'], # latin small letter a with circumflex
|
||||||
|
u'ã' : ['ã', 'ã'], # latin small letter a with tilde
|
||||||
|
u'ä' : ['ä', 'ä'], # latin small letter a with diaeresis
|
||||||
|
u'å' : ['å', 'å'], # latin small letter a with ring above
|
||||||
|
u'æ' : ['æ', 'æ'], # latin small letter ae
|
||||||
|
u'ç' : ['ç', 'ç'], # latin small letter c with cedilla
|
||||||
|
u'è' : ['è', 'è'], # latin small letter e with grave
|
||||||
|
u'é' : ['é', 'é'], # latin small letter e with acute
|
||||||
|
u'ê' : ['ê', 'ê'], # latin small letter e with circumflex
|
||||||
|
u'ë' : ['ë', 'ë'], # latin small letter e with diaeresis
|
||||||
|
u'ì' : ['ì', 'ì'], # latin small letter i with grave
|
||||||
|
u'í' : ['í', 'í'], # latin small letter i with acute
|
||||||
|
u'î' : ['î', 'î'], # latin small letter i with circumflex
|
||||||
|
u'ï' : ['ï', 'ï'], # latin small letter i with diaeresis
|
||||||
|
u'ð' : ['ð', 'ð'], # latin small letter eth
|
||||||
|
u'ñ' : ['ñ', 'ñ'], # latin small letter n with tilde
|
||||||
|
u'ò' : ['ò', 'ò'], # latin small letter o with grave
|
||||||
|
u'ó' : ['ó', 'ó'], # latin small letter o with acute
|
||||||
|
u'ô' : ['ô', 'ô'], # latin small letter o with circumflex
|
||||||
|
u'õ' : ['õ', 'õ'], # latin small letter o with tilde
|
||||||
|
u'ö' : ['ö', 'ö'], # latin small letter o with diaeresis
|
||||||
|
u'÷' : ['÷', '÷'], # division sign
|
||||||
|
u'ø' : ['ø', 'ø'], # latin small letter o with slash
|
||||||
|
u'ù' : ['ù', 'ù'], # latin small letter u with grave
|
||||||
|
u'ú' : ['ú', 'ú'], # latin small letter u with acute
|
||||||
|
u'û' : ['û', 'û'], # latin small letter u with circumflex
|
||||||
|
u'ü' : ['ü', 'ü'], # latin small letter u with diaeresis
|
||||||
|
u'ý' : ['ý', 'ý'], # latin small letter y with acute
|
||||||
|
u'þ' : ['þ', 'þ'], # latin small letter thorn
|
||||||
|
u'ÿ' : ['ÿ', 'ÿ'], # latin small letter y with diaeresis
|
||||||
|
}
|
||||||
|
|
30
src/calibre/ebooks/metadata/txt.py
Normal file
30
src/calibre/ebooks/metadata/txt.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
'''Read meta information from TXT files'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
def get_metadata(stream, extract_cover=True):
|
||||||
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
|
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
|
stream.seek(0)
|
||||||
|
|
||||||
|
mdata = ''
|
||||||
|
for x in range(0, 4):
|
||||||
|
line = stream.readline()
|
||||||
|
if line == '':
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
mdata += line
|
||||||
|
|
||||||
|
mo = re.search('(?u)^[ ]*(?P<title>.+)[ ]*(\n{3}|(\r\n){3}|\r{3})[ ]*(?P<author>.+)[ ]*(\n|\r\n|\r)$', mdata)
|
||||||
|
if mo != None:
|
||||||
|
mi.title = mo.group('title')
|
||||||
|
mi.authors = mo.group('author').split(',')
|
||||||
|
|
||||||
|
return mi
|
@ -1,69 +0,0 @@
|
|||||||
'''
|
|
||||||
Convert any ebook format to PDF.
|
|
||||||
'''
|
|
||||||
|
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net ' \
|
|
||||||
'and Marshall T. Vandegrift <llasram@gmail.com>' \
|
|
||||||
'and John Schember <john@nachtimwald.com>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
import sys, os, glob, logging
|
|
||||||
|
|
||||||
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
|
|
||||||
from calibre.ebooks.epub import config as common_config
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre.ebooks.pdf.writer import oeb2pdf, config as pdf_config
|
|
||||||
|
|
||||||
def config(defaults=None):
|
|
||||||
c = common_config(defaults=defaults, name='pdf')
|
|
||||||
c.remove_opt('profile')
|
|
||||||
pdfc = pdf_config(defaults=defaults)
|
|
||||||
c.update(pdfc)
|
|
||||||
return c
|
|
||||||
|
|
||||||
def option_parser(usage=USAGE):
|
|
||||||
usage = usage % ('PDF', formats())
|
|
||||||
parser = config().option_parser(usage=usage)
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def any2pdf(opts, path, notification=None):
|
|
||||||
ext = os.path.splitext(path)[1]
|
|
||||||
if not ext:
|
|
||||||
raise ValueError('Unknown file type: '+path)
|
|
||||||
ext = ext.lower()[1:]
|
|
||||||
|
|
||||||
if opts.output is None:
|
|
||||||
opts.output = os.path.splitext(os.path.basename(path))[0]+'.pdf'
|
|
||||||
|
|
||||||
opts.output = os.path.abspath(opts.output)
|
|
||||||
orig_output = opts.output
|
|
||||||
|
|
||||||
with TemporaryDirectory('_any2pdf') as tdir:
|
|
||||||
oebdir = os.path.join(tdir, 'oeb')
|
|
||||||
os.mkdir(oebdir)
|
|
||||||
opts.output = os.path.join(tdir, 'dummy.epub')
|
|
||||||
opts.profile = 'None'
|
|
||||||
opts.dont_split_on_page_breaks = True
|
|
||||||
orig_bfs = opts.base_font_size2
|
|
||||||
opts.base_font_size2 = 0
|
|
||||||
any2epub(opts, path, create_epub=False, oeb_cover=True, extract_to=oebdir)
|
|
||||||
opts.base_font_size2 = orig_bfs
|
|
||||||
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
|
|
||||||
opts.output = orig_output
|
|
||||||
logging.getLogger('html2epub').info(_('Creating PDF file from EPUB...'))
|
|
||||||
oeb2pdf(opts, opf)
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
if len(args) < 2:
|
|
||||||
parser.print_help()
|
|
||||||
print 'No input file specified.'
|
|
||||||
return 1
|
|
||||||
any2pdf(opts, args[1])
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
90
src/calibre/ebooks/pdf/info.py
Normal file
90
src/calibre/ebooks/pdf/info.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
'''
|
||||||
|
Merge PDF files into a single PDF document.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, re, sys, time
|
||||||
|
|
||||||
|
from calibre.utils.config import Config, StringConfig
|
||||||
|
|
||||||
|
from pyPdf import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
desc = _('Options to control the transformation of pdf')
|
||||||
|
if defaults is None:
|
||||||
|
c = Config('manipulatepdf', desc)
|
||||||
|
else:
|
||||||
|
c = StringConfig(defaults, desc)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def option_parser(name):
|
||||||
|
c = config()
|
||||||
|
return c.option_parser(usage=_('''\
|
||||||
|
%prog %%name [options] file.pdf ...
|
||||||
|
|
||||||
|
Get info about a PDF.
|
||||||
|
'''.replace('%%name', name)))
|
||||||
|
|
||||||
|
def print_info(pdf_path):
|
||||||
|
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||||
|
pdf = PdfFileReader(pdf_file)
|
||||||
|
print _('Title: %s' % pdf.documentInfo.title)
|
||||||
|
print _('Author: %s' % pdf.documentInfo.author)
|
||||||
|
print _('Subject: %s' % pdf.documentInfo.subject)
|
||||||
|
print _('Creator: %s' % pdf.documentInfo.creator)
|
||||||
|
print _('Producer: %s' % pdf.documentInfo.producer)
|
||||||
|
print _('Creation Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path))))
|
||||||
|
print _('Modification Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path))))
|
||||||
|
print _('Pages: %s' % pdf.numPages)
|
||||||
|
print _('Encrypted: %s' % pdf.isEncrypted)
|
||||||
|
try:
|
||||||
|
print _('File Size: %s bytes' % os.path.getsize(pdf_path))
|
||||||
|
except: pass
|
||||||
|
try:
|
||||||
|
pdf_file.seek(0)
|
||||||
|
vline = pdf_file.readline()
|
||||||
|
mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline)
|
||||||
|
if mo != None:
|
||||||
|
print _('PDF Version: %s' % mo.group('version'))
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
def verify_files(files):
|
||||||
|
invalid = []
|
||||||
|
|
||||||
|
for pdf_path in files:
|
||||||
|
try:
|
||||||
|
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||||
|
pdf = PdfFileReader(pdf_file)
|
||||||
|
except:
|
||||||
|
invalid.append(pdf_path)
|
||||||
|
return invalid
|
||||||
|
|
||||||
|
def main(args=sys.argv, name=''):
|
||||||
|
parser = option_parser(name)
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
args = args[1:]
|
||||||
|
|
||||||
|
if len(args) < 1:
|
||||||
|
print 'Error: No PDF sepecified.\n'
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
bad_pdfs = verify_files(args)
|
||||||
|
if bad_pdfs != []:
|
||||||
|
for pdf in bad_pdfs:
|
||||||
|
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||||
|
return 2
|
||||||
|
|
||||||
|
for pdf in args:
|
||||||
|
print_info(pdf)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
|
|
69
src/calibre/ebooks/pdf/manipulate.py
Normal file
69
src/calibre/ebooks/pdf/manipulate.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
'''
|
||||||
|
Command line interface to run pdf manipulation commands.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import string, sys
|
||||||
|
|
||||||
|
from calibre.utils.config import Config, StringConfig
|
||||||
|
from calibre.ebooks.pdf import info, merge, reverse, split, trim
|
||||||
|
|
||||||
|
COMMANDS = {
|
||||||
|
'info' : info,
|
||||||
|
'merge' : merge,
|
||||||
|
'reverse' : reverse,
|
||||||
|
'split' : split,
|
||||||
|
'trim' : trim,
|
||||||
|
}
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
desc = _('Options to control the transformation of pdf')
|
||||||
|
if defaults is None:
|
||||||
|
c = Config('manipulatepdf', desc)
|
||||||
|
else:
|
||||||
|
c = StringConfig(defaults, desc)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
c = config()
|
||||||
|
return c.option_parser(usage=_('''\
|
||||||
|
|
||||||
|
%prog command ...
|
||||||
|
|
||||||
|
command can be one of the following:
|
||||||
|
[%%commands]
|
||||||
|
|
||||||
|
Use %prog command --help to get more information about a specific command
|
||||||
|
|
||||||
|
Manipulate a PDF.
|
||||||
|
'''.replace('%%commands', string.join(sorted(COMMANDS.keys()), ', '))))
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
|
||||||
|
if len(args) < 2:
|
||||||
|
print 'Error: No command sepecified.\n'
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
command = args[1].lower().strip()
|
||||||
|
|
||||||
|
if command in COMMANDS.keys():
|
||||||
|
del args[1]
|
||||||
|
return COMMANDS[command].main(args, command)
|
||||||
|
else:
|
||||||
|
parser.parse_args(args)
|
||||||
|
print 'Unknown command %s.\n' % command
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
# We should never get here.
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
|
|
91
src/calibre/ebooks/pdf/merge.py
Normal file
91
src/calibre/ebooks/pdf/merge.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
'''
|
||||||
|
Merge PDF files into a single PDF document.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, sys
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
from calibre.utils.config import Config, StringConfig
|
||||||
|
|
||||||
|
from pyPdf import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
desc = _('Options to control the transformation of pdf')
|
||||||
|
if defaults is None:
|
||||||
|
c = Config('mergepdf', desc)
|
||||||
|
else:
|
||||||
|
c = StringConfig(defaults, desc)
|
||||||
|
c.add_opt('output', ['-o', '--output'], default='merged.pdf',
|
||||||
|
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||||
|
return c
|
||||||
|
|
||||||
|
def option_parser(name):
|
||||||
|
c = config()
|
||||||
|
return c.option_parser(usage=_('''\
|
||||||
|
%prog %%name [options] file1.pdf file2.pdf ...
|
||||||
|
|
||||||
|
Merges individual PDFs. Metadata will be used from the first PDF specified.
|
||||||
|
'''.replace('%%name', name)))
|
||||||
|
|
||||||
|
def merge_files(in_paths, out_path, metadata=None):
|
||||||
|
if metadata == None:
|
||||||
|
title = _('Unknown')
|
||||||
|
author = _('Unknown')
|
||||||
|
else:
|
||||||
|
title = metadata.title
|
||||||
|
author = authors_to_string(metadata.authors)
|
||||||
|
|
||||||
|
out_pdf = PdfFileWriter(title=title, author=author)
|
||||||
|
|
||||||
|
for pdf_path in in_paths:
|
||||||
|
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||||
|
for page in pdf.pages:
|
||||||
|
out_pdf.addPage(page)
|
||||||
|
|
||||||
|
with open(out_path, 'wb') as out_file:
|
||||||
|
out_pdf.write(out_file)
|
||||||
|
|
||||||
|
def verify_files(files):
|
||||||
|
invalid = []
|
||||||
|
|
||||||
|
for pdf_path in files:
|
||||||
|
try:
|
||||||
|
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||||
|
pdf = PdfFileReader(pdf_file)
|
||||||
|
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||||
|
raise Exception
|
||||||
|
except:
|
||||||
|
invalid.append(pdf_path)
|
||||||
|
return invalid
|
||||||
|
|
||||||
|
def main(args=sys.argv, name=''):
|
||||||
|
parser = option_parser(name)
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
args = args[1:]
|
||||||
|
|
||||||
|
if len(args) < 2:
|
||||||
|
print 'Error: Two or more PDF files are required.\n\n'
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
bad_pdfs = verify_files(args)
|
||||||
|
if bad_pdfs != []:
|
||||||
|
for pdf in bad_pdfs:
|
||||||
|
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||||
|
return 2
|
||||||
|
|
||||||
|
mi = metadata_from_formats([args[0]])
|
||||||
|
|
||||||
|
merge_files(args, opts.output, mi)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
|
|
85
src/calibre/ebooks/pdf/output.py
Normal file
85
src/calibre/ebooks/pdf/output.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Convert OEB ebook format to PDF.
|
||||||
|
'''
|
||||||
|
|
||||||
|
#unit, papersize, orientation, custom_size, profile
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
|
OptionRecommendation
|
||||||
|
from calibre.ebooks.pdf.writer import PDFWriter
|
||||||
|
from calibre.ebooks.pdf.pageoptions import UNITS, unit, PAPER_SIZES, \
|
||||||
|
paper_size, ORIENTATIONS, orientation, PageOptions
|
||||||
|
|
||||||
|
class PDFOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'PDF Output'
|
||||||
|
author = 'John Schember'
|
||||||
|
file_type = 'pdf'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='margin_top', recommended_value='1',
|
||||||
|
level=OptionRecommendation.LOW, long_switch='margin_top',
|
||||||
|
help=_('The top margin around the document.')),
|
||||||
|
OptionRecommendation(name='margin_bottom', recommended_value='1',
|
||||||
|
level=OptionRecommendation.LOW, long_switch='margin_bottom',
|
||||||
|
help=_('The bottom margin around the document.')),
|
||||||
|
OptionRecommendation(name='margin_left', recommended_value='1',
|
||||||
|
level=OptionRecommendation.LOW, long_switch='margin_left',
|
||||||
|
help=_('The left margin around the document.')),
|
||||||
|
OptionRecommendation(name='margin_right', recommended_value='1',
|
||||||
|
level=OptionRecommendation.LOW, long_switch='margin_right',
|
||||||
|
help=_('The right margin around the document.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='unit', recommended_value='inch',
|
||||||
|
level=OptionRecommendation.LOW, short_switch='u',
|
||||||
|
long_switch='unit', choices=UNITS.keys(),
|
||||||
|
help=_('The unit of measure. Default is inch. Choices '
|
||||||
|
'are %s' % UNITS.keys())),
|
||||||
|
OptionRecommendation(name='paper_size', recommended_value='letter',
|
||||||
|
level=OptionRecommendation.LOW,
|
||||||
|
long_switch='paper_size', choices=PAPER_SIZES.keys(),
|
||||||
|
help=_('The size of the paper. Default is letter. Choices '
|
||||||
|
'are %s' % PAPER_SIZES.keys())),
|
||||||
|
OptionRecommendation(name='orientation', recommended_value='portrait',
|
||||||
|
level=OptionRecommendation.LOW,
|
||||||
|
long_switch='orientation', choices=ORIENTATIONS.keys(),
|
||||||
|
help=_('The orientation of the page. Default is portrait. Choices '
|
||||||
|
'are %s' % ORIENTATIONS.keys())),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
popts = PageOptions()
|
||||||
|
|
||||||
|
popts.set_margin_top(opts.margin_top)
|
||||||
|
popts.set_margin_bottom(opts.margin_bottom)
|
||||||
|
popts.set_margin_left(opts.margin_left)
|
||||||
|
popts.set_margin_right(opts.margin_right)
|
||||||
|
|
||||||
|
popts.unit = unit(opts.unit)
|
||||||
|
popts.paper_size = paper_size(opts.paper_size)
|
||||||
|
popts.orientation = orientation(opts.orientation)
|
||||||
|
|
||||||
|
writer = PDFWriter(log, popts)
|
||||||
|
|
||||||
|
close = False
|
||||||
|
if not hasattr(output_path, 'write'):
|
||||||
|
close = True
|
||||||
|
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
||||||
|
os.makedirs(os.path.dirname(output_path))
|
||||||
|
out_stream = open(output_path, 'wb')
|
||||||
|
else:
|
||||||
|
out_stream = output_path
|
||||||
|
|
||||||
|
out_stream.seek(0)
|
||||||
|
out_stream.truncate()
|
||||||
|
writer.dump(oeb_book.spine, out_stream)
|
||||||
|
|
||||||
|
if close:
|
||||||
|
out_stream.close()
|
98
src/calibre/ebooks/pdf/pageoptions.py
Normal file
98
src/calibre/ebooks/pdf/pageoptions.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from PyQt4.Qt import QPrinter
|
||||||
|
|
||||||
|
UNITS = {
|
||||||
|
'millimeter' : QPrinter.Millimeter,
|
||||||
|
'point' : QPrinter.Point,
|
||||||
|
'inch' : QPrinter.Inch,
|
||||||
|
'pica' : QPrinter.Pica,
|
||||||
|
'didot' : QPrinter.Didot,
|
||||||
|
'cicero' : QPrinter.Cicero,
|
||||||
|
'devicepixel' : QPrinter.DevicePixel,
|
||||||
|
}
|
||||||
|
|
||||||
|
def unit(unit):
|
||||||
|
return UNITS.get(unit, QPrinter.Inch)
|
||||||
|
|
||||||
|
PAPER_SIZES = {
|
||||||
|
'a0' : QPrinter.A0, # 841 x 1189 mm
|
||||||
|
'a1' : QPrinter.A1, # 594 x 841 mm
|
||||||
|
'a2' : QPrinter.A2, # 420 x 594 mm
|
||||||
|
'a3' : QPrinter.A3, # 297 x 420 mm
|
||||||
|
'a4' : QPrinter.A4, # 210 x 297 mm, 8.26 x 11.69 inches
|
||||||
|
'a5' : QPrinter.A5, # 148 x 210 mm
|
||||||
|
'a6' : QPrinter.A6, # 105 x 148 mm
|
||||||
|
'a7' : QPrinter.A7, # 74 x 105 mm
|
||||||
|
'a8' : QPrinter.A8, # 52 x 74 mm
|
||||||
|
'a9' : QPrinter.A9, # 37 x 52 mm
|
||||||
|
'b0' : QPrinter.B0, # 1030 x 1456 mm
|
||||||
|
'b1' : QPrinter.B1, # 728 x 1030 mm
|
||||||
|
'b2' : QPrinter.B2, # 515 x 728 mm
|
||||||
|
'b3' : QPrinter.B3, # 364 x 515 mm
|
||||||
|
'b4' : QPrinter.B4, # 257 x 364 mm
|
||||||
|
'b5' : QPrinter.B5, # 182 x 257 mm, 7.17 x 10.13 inches
|
||||||
|
'b6' : QPrinter.B6, # 128 x 182 mm
|
||||||
|
'b7' : QPrinter.B7, # 91 x 128 mm
|
||||||
|
'b8' : QPrinter.B8, # 64 x 91 mm
|
||||||
|
'b9' : QPrinter.B9, # 45 x 64 mm
|
||||||
|
'b10' : QPrinter.B10, # 32 x 45 mm
|
||||||
|
'c5e' : QPrinter.C5E, # 163 x 229 mm
|
||||||
|
'comm10e' : QPrinter.Comm10E, # 105 x 241 mm, U.S. Common 10 Envelope
|
||||||
|
'dle' : QPrinter.DLE, # 110 x 220 mm
|
||||||
|
'executive' : QPrinter.Executive, # 7.5 x 10 inches, 191 x 254 mm
|
||||||
|
'folio' : QPrinter.Folio, # 210 x 330 mm
|
||||||
|
'ledger' : QPrinter.Ledger, # 432 x 279 mm
|
||||||
|
'legal' : QPrinter.Legal, # 8.5 x 14 inches, 216 x 356 mm
|
||||||
|
'letter' : QPrinter.Letter, # 8.5 x 11 inches, 216 x 279 mm
|
||||||
|
'tabloid' : QPrinter.Tabloid, # 279 x 432 mm
|
||||||
|
#'custom' : QPrinter.Custom, # Unknown, or a user defined size.
|
||||||
|
}
|
||||||
|
|
||||||
|
def paper_size(size):
|
||||||
|
return PAPER_SIZES.get(size, QPrinter.Letter)
|
||||||
|
|
||||||
|
ORIENTATIONS = {
|
||||||
|
'portrait' : QPrinter.Portrait,
|
||||||
|
'landscape' : QPrinter.Landscape,
|
||||||
|
}
|
||||||
|
|
||||||
|
def orientation(orientation):
|
||||||
|
return ORIENTATIONS.get(orientation, QPrinter.Portrait)
|
||||||
|
|
||||||
|
|
||||||
|
class PageOptions(object):
|
||||||
|
margin_top = 1
|
||||||
|
margin_bottom = 1
|
||||||
|
margin_left = 1
|
||||||
|
margin_right = 1
|
||||||
|
unit = QPrinter.Inch
|
||||||
|
paper_size = QPrinter.Letter
|
||||||
|
orientation = QPrinter.Portrait
|
||||||
|
|
||||||
|
def set_margin_top(self, size):
|
||||||
|
try:
|
||||||
|
self.margin_top = int(size)
|
||||||
|
except:
|
||||||
|
self.margin_top = 1
|
||||||
|
|
||||||
|
def set_margin_bottom(self, size):
|
||||||
|
try:
|
||||||
|
self.margin_bottom = int(size)
|
||||||
|
except:
|
||||||
|
self.margin_bottom = 1
|
||||||
|
|
||||||
|
def set_margin_left(self, size):
|
||||||
|
try:
|
||||||
|
self.margin_left = int(size)
|
||||||
|
except:
|
||||||
|
self.margin_left = 1
|
||||||
|
|
||||||
|
def set_margin_right(self, size):
|
||||||
|
try:
|
||||||
|
self.margin_right = int(size)
|
||||||
|
except:
|
||||||
|
self.margin_right = 1
|
88
src/calibre/ebooks/pdf/reverse.py
Normal file
88
src/calibre/ebooks/pdf/reverse.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Reverse content of PDF.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os, sys
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
from calibre.utils.config import Config, StringConfig
|
||||||
|
|
||||||
|
from pyPdf import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
desc = _('Options to control the transformation of pdf')
|
||||||
|
if defaults is None:
|
||||||
|
c = Config('reversepdf', desc)
|
||||||
|
else:
|
||||||
|
c = StringConfig(defaults, desc)
|
||||||
|
c.add_opt('output', ['-o', '--output'], default='reversed.pdf',
|
||||||
|
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||||
|
return c
|
||||||
|
|
||||||
|
def option_parser(name):
|
||||||
|
c = config()
|
||||||
|
return c.option_parser(usage=_('''\
|
||||||
|
%prog %%name [options] file1.pdf
|
||||||
|
|
||||||
|
Reverse PDF.
|
||||||
|
'''.replace('%%name', name)))
|
||||||
|
|
||||||
|
def reverse(pdf_path, out_path, metadata=None):
|
||||||
|
if metadata == None:
|
||||||
|
title = _('Unknown')
|
||||||
|
author = _('Unknown')
|
||||||
|
else:
|
||||||
|
title = metadata.title
|
||||||
|
author = authors_to_string(metadata.authors)
|
||||||
|
|
||||||
|
out_pdf = PdfFileWriter(title=title, author=author)
|
||||||
|
|
||||||
|
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||||
|
for page in reversed(pdf.pages):
|
||||||
|
out_pdf.addPage(page)
|
||||||
|
|
||||||
|
with open(out_path, 'wb') as out_file:
|
||||||
|
out_pdf.write(out_file)
|
||||||
|
|
||||||
|
# Return True if the pdf is valid.
|
||||||
|
def valid_pdf(pdf_path):
|
||||||
|
try:
|
||||||
|
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||||
|
pdf = PdfFileReader(pdf_file)
|
||||||
|
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||||
|
raise Exception
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv, name=''):
|
||||||
|
parser = option_parser(name)
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
args = args[1:]
|
||||||
|
|
||||||
|
if len(args) < 1:
|
||||||
|
print 'Error: A PDF file is required.\n\n'
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if not valid_pdf(args[0]):
|
||||||
|
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % args[0]
|
||||||
|
return 2
|
||||||
|
|
||||||
|
mi = metadata_from_formats([args[0]])
|
||||||
|
|
||||||
|
reverse(args[0], opts.output, mi)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
186
src/calibre/ebooks/pdf/split.py
Normal file
186
src/calibre/ebooks/pdf/split.py
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
'''
|
||||||
|
Split PDF file into multiple PDF documents.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, sys, re
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
from calibre.utils.config import Config, StringConfig
|
||||||
|
|
||||||
|
from pyPdf import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
|
def config(defaults=None):
|
||||||
|
desc = _('Options to control the transformation of pdf')
|
||||||
|
if defaults is None:
|
||||||
|
c = Config('splitpdf', desc)
|
||||||
|
else:
|
||||||
|
c = StringConfig(defaults, desc)
|
||||||
|
c.add_opt('output', ['-o', '--output'], default='split.pdf',
|
||||||
|
help=_('Path to output file. By default a file is created in the current directory. \
|
||||||
|
The file name will be the base name for the output.'))
|
||||||
|
return c
|
||||||
|
|
||||||
|
def option_parser(name):
|
||||||
|
c = config()
|
||||||
|
return c.option_parser(usage=_('''\
|
||||||
|
|
||||||
|
%prog %%name [options] file.pdf page_to_split_on ...
|
||||||
|
%prog %%name [options] file.pdf page_range_to_split_on ...
|
||||||
|
|
||||||
|
Ex.
|
||||||
|
|
||||||
|
%prog %%name file.pdf 6
|
||||||
|
%prog %%name file.pdf 6-12
|
||||||
|
%prog %%name file.pdf 6-12 8 10 9-20
|
||||||
|
|
||||||
|
Split a PDF.
|
||||||
|
'''.replace('%%name', name)))
|
||||||
|
|
||||||
|
def split_pdf(in_path, pages, page_ranges, out_name, metadata=None):
|
||||||
|
pdf = PdfFileReader(open(os.path.abspath(in_path), 'rb'))
|
||||||
|
total_pages = pdf.numPages - 1
|
||||||
|
|
||||||
|
for index in pages+page_ranges:
|
||||||
|
if index in pages:
|
||||||
|
write_pdf(pdf, out_name, '%s' % (index + 1), index, total_pages, metadata)
|
||||||
|
else:
|
||||||
|
|
||||||
|
write_pdf(pdf, out_name, '%s-%s' % (index[0] + 1, index[1] + 1), index[0], index[1], metadata)
|
||||||
|
|
||||||
|
def write_pdf(pdf, name, suffix, start, end, metadata=None):
|
||||||
|
if metadata == None:
|
||||||
|
title = _('Unknown')
|
||||||
|
author = _('Unknown')
|
||||||
|
else:
|
||||||
|
title = metadata.title
|
||||||
|
author = authors_to_string(metadata.authors)
|
||||||
|
|
||||||
|
out_pdf = PdfFileWriter(title=title, author=author)
|
||||||
|
for page_num in range(start, end + 1):
|
||||||
|
out_pdf.addPage(pdf.getPage(page_num))
|
||||||
|
with open('%s%s.pdf' % (name, suffix), 'wb') as out_file:
|
||||||
|
out_pdf.write(out_file)
|
||||||
|
|
||||||
|
def split_args(args):
|
||||||
|
pdf = ''
|
||||||
|
pages = []
|
||||||
|
page_ranges = []
|
||||||
|
bad = []
|
||||||
|
|
||||||
|
for arg in args:
|
||||||
|
arg = arg.strip()
|
||||||
|
# Find the pdf input
|
||||||
|
if re.search('(?iu)^.*?\.pdf[ ]*$', arg) != None:
|
||||||
|
if pdf == '':
|
||||||
|
pdf = arg
|
||||||
|
else:
|
||||||
|
bad.append(arg)
|
||||||
|
# Find single indexes
|
||||||
|
elif re.search('^[ ]*\d+[ ]*$', arg) != None:
|
||||||
|
pages.append(arg)
|
||||||
|
# Find index ranges
|
||||||
|
elif re.search('^[ ]*\d+[ ]*-[ ]*\d+[ ]*$', arg) != None:
|
||||||
|
mo = re.search('^[ ]*(?P<start>\d+)[ ]*-[ ]*(?P<end>\d+)[ ]*$', arg)
|
||||||
|
start = mo.group('start')
|
||||||
|
end = mo.group('end')
|
||||||
|
|
||||||
|
# check to see if the range is really a single index
|
||||||
|
if start == end:
|
||||||
|
pages.append(start)
|
||||||
|
else:
|
||||||
|
page_ranges.append([start, end])
|
||||||
|
else:
|
||||||
|
bad.append(arg)
|
||||||
|
|
||||||
|
bad = sorted(list(set(bad)))
|
||||||
|
|
||||||
|
return pdf, pages, page_ranges, bad
|
||||||
|
|
||||||
|
# Remove duplicates from pages and page_ranges.
|
||||||
|
# Set pages higher than the total number of pages in the pdf to the last page.
|
||||||
|
# Return pages and page_ranges as lists of ints.
|
||||||
|
def clean_page_list(pdf_path, pages, page_ranges):
|
||||||
|
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||||
|
|
||||||
|
total_pages = pdf.numPages
|
||||||
|
sorted_pages = []
|
||||||
|
sorted_ranges = []
|
||||||
|
|
||||||
|
for index in pages:
|
||||||
|
index = int(index)
|
||||||
|
if index > total_pages:
|
||||||
|
sorted_pages.append(total_pages - 1)
|
||||||
|
else:
|
||||||
|
sorted_pages.append(index - 1)
|
||||||
|
|
||||||
|
for start, end in page_ranges:
|
||||||
|
start = int(start)
|
||||||
|
end = int(end)
|
||||||
|
|
||||||
|
if start > total_pages and end > total_pages:
|
||||||
|
sorted_pages.append(total_pages - 1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if start > total_pages:
|
||||||
|
start = total_pages
|
||||||
|
if end > total_pages:
|
||||||
|
end = total_pages
|
||||||
|
page_range = sorted([start - 1, end - 1])
|
||||||
|
if page_range not in sorted_ranges:
|
||||||
|
sorted_ranges.append(page_range)
|
||||||
|
|
||||||
|
# Remove duplicates and sort
|
||||||
|
pages = sorted(list(set(sorted_pages)))
|
||||||
|
page_ranges = sorted(sorted_ranges)
|
||||||
|
|
||||||
|
return pages, page_ranges
|
||||||
|
|
||||||
|
# Return True if the pdf is valid.
|
||||||
|
def valid_pdf(pdf_path):
|
||||||
|
try:
|
||||||
|
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||||
|
pdf = PdfFileReader(pdf_file)
|
||||||
|
if pdf.isEncrypted or pdf.numPages <= 0:
|
||||||
|
raise Exception
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def main(args=sys.argv, name=''):
|
||||||
|
parser = option_parser(name)
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
|
|
||||||
|
pdf, pages, page_ranges, unknown = split_args(args[1:])
|
||||||
|
|
||||||
|
if pdf == '' and (pages == [] or page_ranges == []):
|
||||||
|
print 'Error: PDF and where to split is required.\n\n'
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if unknown != []:
|
||||||
|
for arg in unknown:
|
||||||
|
print 'Error: Unknown argument `%s`' % arg
|
||||||
|
print parser.get_usage()
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if not valid_pdf(pdf):
|
||||||
|
print 'Error: Could not read file `%s`. Is it a vaild PDF file or is it encrypted/DRMed?.' % pdf
|
||||||
|
return 2
|
||||||
|
|
||||||
|
pages, page_ranges = clean_page_list(pdf, pages, page_ranges)
|
||||||
|
|
||||||
|
mi = metadata_from_formats([pdf])
|
||||||
|
|
||||||
|
split_pdf(pdf, pages, page_ranges, os.path.splitext(opts.output)[0], mi)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
|
|
@ -16,8 +16,6 @@ def config(defaults=None):
|
|||||||
c = Config('trimpdf', desc)
|
c = Config('trimpdf', desc)
|
||||||
else:
|
else:
|
||||||
c = StringConfig(defaults, desc)
|
c = StringConfig(defaults, desc)
|
||||||
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
|
|
||||||
help=_('Be verbose, useful for debugging. Can be specified multiple times for greater verbosity.'))
|
|
||||||
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
|
c.add_opt('output', ['-o', '--output'],default='cropped.pdf',
|
||||||
help=_('Path to output file. By default a file is created in the current directory.'))
|
help=_('Path to output file. By default a file is created in the current directory.'))
|
||||||
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
|
c.add_opt('bottom_left_x', [ '-x', '--leftx'], default=default_crop,
|
||||||
@ -33,16 +31,16 @@ def config(defaults=None):
|
|||||||
return c
|
return c
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
def option_parser(name):
|
||||||
c = config()
|
c = config()
|
||||||
return c.option_parser(usage=_('''\
|
return c.option_parser(usage=_('''\
|
||||||
%prog [options] file.pdf
|
%prog %%name [options] file.pdf
|
||||||
|
|
||||||
Crops a pdf.
|
Crops a pdf.
|
||||||
'''))
|
'''.replace('%%name', name)))
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv, name=''):
|
||||||
parser = option_parser()
|
parser = option_parser(name)
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
try:
|
try:
|
||||||
source = os.path.abspath(args[1])
|
source = os.path.abspath(args[1])
|
@ -1,19 +1,18 @@
|
|||||||
'''
|
# -*- coding: utf-8 -*-
|
||||||
Write content to PDF.
|
|
||||||
'''
|
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, logging, shutil, sys
|
'''
|
||||||
|
Write content to PDF.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os, shutil, sys
|
||||||
|
|
||||||
from calibre import LoggingInterface
|
|
||||||
from calibre.ebooks.epub.iterator import SpineItem
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre.customize.ui import run_plugins_on_postprocess
|
from calibre.ebooks.pdf.pageoptions import PageOptions
|
||||||
from calibre.utils.config import Config, StringConfig
|
|
||||||
|
|
||||||
from PyQt4 import QtCore
|
from PyQt4 import QtCore
|
||||||
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \
|
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, QPrinter, \
|
||||||
@ -22,20 +21,13 @@ from PyQt4.QtWebKit import QWebView
|
|||||||
|
|
||||||
from pyPdf import PdfFileWriter, PdfFileReader
|
from pyPdf import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
class PDFMargins:
|
|
||||||
def __init__(self, margin=1):
|
|
||||||
self.top = margin
|
|
||||||
self.bottom = margin
|
|
||||||
self.left = margin
|
|
||||||
self.right = margin
|
|
||||||
|
|
||||||
class PDFWriter(QObject):
|
class PDFWriter(QObject):
|
||||||
def __init__(self, margins=PDFMargins()):
|
def __init__(self, log, popts=PageOptions()):
|
||||||
if QApplication.instance() is None:
|
if QApplication.instance() is None:
|
||||||
QApplication([])
|
QApplication([])
|
||||||
QObject.__init__(self)
|
QObject.__init__(self)
|
||||||
|
|
||||||
self.logger = logging.getLogger('oeb2pdf')
|
self.logger = log
|
||||||
|
|
||||||
self.loop = QEventLoop()
|
self.loop = QEventLoop()
|
||||||
self.view = QWebView()
|
self.view = QWebView()
|
||||||
@ -43,15 +35,14 @@ class PDFWriter(QObject):
|
|||||||
self.render_queue = []
|
self.render_queue = []
|
||||||
self.combine_queue = []
|
self.combine_queue = []
|
||||||
self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts')
|
self.tmp_path = PersistentTemporaryDirectory('_any2pdf_parts')
|
||||||
self.margins = margins
|
self.popts = popts
|
||||||
|
|
||||||
def dump(self, oebpath, path):
|
def dump(self, spine, out_stream):
|
||||||
self._delete_tmpdir()
|
self._delete_tmpdir()
|
||||||
|
|
||||||
opf = OPF(oebpath, os.path.dirname(oebpath))
|
self.render_queue = spine[:]
|
||||||
self.render_queue = [SpineItem(i.path) for i in opf.spine]
|
|
||||||
self.combine_queue = []
|
self.combine_queue = []
|
||||||
self.path = path
|
self.out_stream = out_stream
|
||||||
|
|
||||||
QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
|
QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
|
||||||
self.loop.exec_()
|
self.loop.exec_()
|
||||||
@ -78,7 +69,9 @@ class PDFWriter(QObject):
|
|||||||
self.logger.debug('\tRendering item as %s' % item_path)
|
self.logger.debug('\tRendering item as %s' % item_path)
|
||||||
|
|
||||||
printer = QPrinter(QPrinter.HighResolution)
|
printer = QPrinter(QPrinter.HighResolution)
|
||||||
printer.setPageMargins(self.margins.left, self.margins.top, self.margins.right, self.margins.bottom, QPrinter.Inch)
|
printer.setPageMargins(self.popts.margin_left, self.popts.margin_top, self.popts.margin_right, self.popts.margin_bottom, self.popts.unit)
|
||||||
|
printer.setPaperSize(self.popts.paper_size)
|
||||||
|
printer.setOrientation(self.popts.orientation)
|
||||||
printer.setOutputFormat(QPrinter.PdfFormat)
|
printer.setOutputFormat(QPrinter.PdfFormat)
|
||||||
printer.setOutputFileName(item_path)
|
printer.setOutputFileName(item_path)
|
||||||
self.view.print_(printer)
|
self.view.print_(printer)
|
||||||
@ -98,75 +91,7 @@ class PDFWriter(QObject):
|
|||||||
inputPDF = PdfFileReader(file(item, 'rb'))
|
inputPDF = PdfFileReader(file(item, 'rb'))
|
||||||
for page in inputPDF.pages:
|
for page in inputPDF.pages:
|
||||||
outPDF.addPage(page)
|
outPDF.addPage(page)
|
||||||
outputStream = file(self.path, 'wb')
|
outPDF.write(self.out_stream)
|
||||||
outPDF.write(outputStream)
|
|
||||||
outputStream.close()
|
|
||||||
finally:
|
finally:
|
||||||
self._delete_tmpdir()
|
self._delete_tmpdir()
|
||||||
self.loop.exit(0)
|
self.loop.exit(0)
|
||||||
|
|
||||||
|
|
||||||
def config(defaults=None):
|
|
||||||
desc = _('Options to control the conversion to PDF')
|
|
||||||
if defaults is None:
|
|
||||||
c = Config('pdf', desc)
|
|
||||||
else:
|
|
||||||
c = StringConfig(defaults, desc)
|
|
||||||
|
|
||||||
pdf = c.add_group('PDF', _('PDF options.'))
|
|
||||||
|
|
||||||
pdf('margin_top', ['--margin_top'], default=1,
|
|
||||||
help=_('The top margin around the document in inches.'))
|
|
||||||
pdf('margin_bottom', ['--margin_bottom'], default=1,
|
|
||||||
help=_('The bottom margin around the document in inches.'))
|
|
||||||
pdf('margin_left', ['--margin_left'], default=1,
|
|
||||||
help=_('The left margin around the document in inches.'))
|
|
||||||
pdf('margin_right', ['--margin_right'], default=1,
|
|
||||||
help=_('The right margin around the document in inches.'))
|
|
||||||
|
|
||||||
return c
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
c = config()
|
|
||||||
parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
|
|
||||||
parser.add_option(
|
|
||||||
'-o', '--output', default=None,
|
|
||||||
help=_('Output file. Default is derived from input filename.'))
|
|
||||||
parser.add_option(
|
|
||||||
'-v', '--verbose', default=0, action='count',
|
|
||||||
help=_('Useful for debugging.'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
def oeb2pdf(opts, inpath):
|
|
||||||
logger = LoggingInterface(logging.getLogger('oeb2pdf'))
|
|
||||||
logger.setup_cli_handler(opts.verbose)
|
|
||||||
|
|
||||||
outpath = opts.output
|
|
||||||
if outpath is None:
|
|
||||||
outpath = os.path.basename(inpath)
|
|
||||||
outpath = os.path.splitext(outpath)[0] + '.pdf'
|
|
||||||
|
|
||||||
margins = PDFMargins()
|
|
||||||
margins.top = opts.margin_top
|
|
||||||
margins.bottom = opts.margin_bottom
|
|
||||||
margins.left = opts.margin_left
|
|
||||||
margins.right = opts.margin_right
|
|
||||||
|
|
||||||
writer = PDFWriter(margins)
|
|
||||||
writer.dump(inpath, outpath)
|
|
||||||
run_plugins_on_postprocess(outpath, 'pdf')
|
|
||||||
logger.log_info(_('Output written to ') + outpath)
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(argv[1:])
|
|
||||||
if len(args) != 1:
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
inpath = args[0]
|
|
||||||
retval = oeb2pdf(opts, inpath)
|
|
||||||
return retval
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
||||||
|
|
||||||
|
9
src/calibre/ebooks/txt/__init__.py
Normal file
9
src/calibre/ebooks/txt/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, John Schember john@nachtimwald.com'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Used for txt output
|
||||||
|
'''
|
||||||
|
|
63
src/calibre/ebooks/txt/output.py
Normal file
63
src/calibre/ebooks/txt/output.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
|
OptionRecommendation
|
||||||
|
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
|
||||||
|
class TXTOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'TXT Output'
|
||||||
|
author = 'John Schember'
|
||||||
|
file_type = 'txt'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='newline', recommended_value='system',
|
||||||
|
level=OptionRecommendation.LOW, long_switch='newline',
|
||||||
|
short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
|
||||||
|
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
|
||||||
|
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
||||||
|
'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
||||||
|
'type used by this OS.' % sorted(TxtNewlines.NEWLINE_TYPES.keys()))),
|
||||||
|
OptionRecommendation(name='prepend_author', recommended_value='true',
|
||||||
|
level=OptionRecommendation.LOW, long_switch='prepend_author',
|
||||||
|
choices=['true', 'false'],
|
||||||
|
help=_('Write the author to the beginning of the file. '
|
||||||
|
'Default is \'true\'. Use \'false\' to disable.')),
|
||||||
|
OptionRecommendation(name='prepend_title', recommended_value='true',
|
||||||
|
choices=['true', 'false'],
|
||||||
|
level=OptionRecommendation.LOW, long_switch='prepend_title',
|
||||||
|
help=_('Write the title to the beginning of the file. '
|
||||||
|
'Default is \'true\'. Use \'false\' to disable.'))
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
metadata = TxtMetadata()
|
||||||
|
if opts.prepend_author.lower() == 'true':
|
||||||
|
metadata.author = opts.authors if opts.authors else authors_to_string(oeb_book.metadata.authors)
|
||||||
|
if opts.prepend_title.lower() == 'true':
|
||||||
|
metadata.title = opts.title if opts.title else oeb_book.metadata.title
|
||||||
|
|
||||||
|
writer = TxtWriter(TxtNewlines(opts.newline).newline, log)
|
||||||
|
txt = writer.dump(oeb_book.spine, metadata)
|
||||||
|
|
||||||
|
close = False
|
||||||
|
if not hasattr(output_path, 'write'):
|
||||||
|
close = True
|
||||||
|
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
||||||
|
os.makedirs(os.path.dirname(output_path))
|
||||||
|
out_stream = open(output_path, 'wb')
|
||||||
|
else:
|
||||||
|
out_stream = output_path
|
||||||
|
|
||||||
|
out_stream.seek(0)
|
||||||
|
out_stream.truncate()
|
||||||
|
out_stream.write(txt)
|
||||||
|
|
||||||
|
if close:
|
||||||
|
out_stream.close()
|
158
src/calibre/ebooks/txt/writer.py
Normal file
158
src/calibre/ebooks/txt/writer.py
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
'''
|
||||||
|
Write content to TXT.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, re, sys
|
||||||
|
|
||||||
|
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
||||||
|
|
||||||
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class TxtWriter(object):
|
||||||
|
def __init__(self, newline, log):
|
||||||
|
self.newline = newline
|
||||||
|
self.log = log
|
||||||
|
|
||||||
|
def dump(self, spine, metadata):
|
||||||
|
out = u''
|
||||||
|
for item in spine:
|
||||||
|
with open(item, 'r') as itemf:
|
||||||
|
content = itemf.read().decode(item.encoding)
|
||||||
|
# Convert newlines to unix style \n for processing. These
|
||||||
|
# will be changed to the specified type later in the process.
|
||||||
|
content = self.unix_newlines(content)
|
||||||
|
content = self.strip_html(content)
|
||||||
|
content = self.replace_html_symbols(content)
|
||||||
|
content = self.cleanup_text(content)
|
||||||
|
content = self.specified_newlines(content)
|
||||||
|
out += content
|
||||||
|
|
||||||
|
# Prepend metadata
|
||||||
|
if metadata.author != None and metadata.author != '':
|
||||||
|
out = (u'%s%s%s%s' % (metadata.author.upper(), self.newline, self.newline, self.newline)) + out
|
||||||
|
if metadata.title != None and metadata.title != '':
|
||||||
|
out = (u'%s%s%s%s' % (metadata.title.upper(), self.newline, self.newline, self.newline)) + out
|
||||||
|
|
||||||
|
# Put two blank lines at end of file
|
||||||
|
end = out[-3 * len(self.newline):]
|
||||||
|
for i in range(3 - end.count(self.newline)):
|
||||||
|
out += self.newline
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
def strip_html(self, html):
|
||||||
|
stripped = u''
|
||||||
|
|
||||||
|
for dom_tree in BeautifulSoup(html).findAll('body'):
|
||||||
|
text = unicode(dom_tree)
|
||||||
|
|
||||||
|
# Remove unnecessary tags
|
||||||
|
for tag in ['script', 'style']:
|
||||||
|
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>(.*)</[ ]*%s[ ]*>' % (tag, tag), '', text)
|
||||||
|
text = re.sub('<!--.*-->', '', text)
|
||||||
|
text = re.sub('<\?.*?\?>', '', text)
|
||||||
|
text = re.sub('<@.*?@>', '', text)
|
||||||
|
text = re.sub('<%.*?%>', '', text)
|
||||||
|
|
||||||
|
# Headings usually indicate Chapters.
|
||||||
|
# We are going to use a marker to insert the proper number of
|
||||||
|
# newline characters at the end of cleanup_text because cleanup_text
|
||||||
|
# remove excessive (more than 2 newlines).
|
||||||
|
for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||||
|
text = re.sub('(?imu)<[ ]*%s[ ]*.*?>' % tag, '-vzxedxy-', text)
|
||||||
|
text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '-vlgzxey-', text)
|
||||||
|
|
||||||
|
# Separate content with space.
|
||||||
|
for tag in ['td']:
|
||||||
|
text = re.sub('(?imu)</[ ]*%s[ ]*>', ' ', text)
|
||||||
|
|
||||||
|
# Separate content with empty line.
|
||||||
|
for tag in ['p', 'div', 'pre', 'li', 'table', 'tr']:
|
||||||
|
text = re.sub('(?imu)</[ ]*%s[ ]*>' % tag, '\n\n', text)
|
||||||
|
|
||||||
|
for tag in ['hr', 'br']:
|
||||||
|
text = re.sub('(?imu)<[ ]*%s[ ]*/*?>' % tag, '\n\n', text)
|
||||||
|
|
||||||
|
# Remove any tags that do not need special processing.
|
||||||
|
text = re.sub('<.*?>', '', text)
|
||||||
|
|
||||||
|
stripped = stripped + text
|
||||||
|
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
def replace_html_symbols(self, content):
|
||||||
|
for symbol in HTML_SYMBOLS:
|
||||||
|
for code in HTML_SYMBOLS[symbol]:
|
||||||
|
content = content.replace(code, symbol)
|
||||||
|
return content
|
||||||
|
|
||||||
|
def cleanup_text(self, text):
|
||||||
|
# Replace bad characters.
|
||||||
|
text = text.replace(u'\xc2', '')
|
||||||
|
text = text.replace(u'\xa0', ' ')
|
||||||
|
|
||||||
|
# Replace tabs, vertical tags and form feeds with single space.
|
||||||
|
text = text.replace('\t+', ' ')
|
||||||
|
text = text.replace('\v+', ' ')
|
||||||
|
text = text.replace('\f+', ' ')
|
||||||
|
|
||||||
|
# Single line paragraph.
|
||||||
|
r = re.compile('.\n.')
|
||||||
|
while True:
|
||||||
|
mo = r.search(text)
|
||||||
|
if mo == None:
|
||||||
|
break
|
||||||
|
text = '%s %s' % (text[:mo.start()+1], text[mo.end()-1:])
|
||||||
|
|
||||||
|
# Remove multiple spaces.
|
||||||
|
text = re.sub('[ ]+', ' ', text)
|
||||||
|
|
||||||
|
# Remove excessive newlines.
|
||||||
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
|
text = re.sub('\n{3,}', '\n\n', text)
|
||||||
|
|
||||||
|
# Replace markers with the proper characters.
|
||||||
|
text = text.replace('-vzxedxy-', '\n\n\n\n\n')
|
||||||
|
text = text.replace('-vlgzxey-', '\n\n\n')
|
||||||
|
|
||||||
|
# Replace spaces at the beginning and end of lines
|
||||||
|
text = re.sub('(?imu)^[ ]+', '', text)
|
||||||
|
text = re.sub('(?imu)[ ]+$', '', text)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def unix_newlines(self, text):
|
||||||
|
text = text.replace('\r\n', '\n')
|
||||||
|
text = text.replace('\r', '\n')
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def specified_newlines(self, text):
|
||||||
|
if self.newline == '\n':
|
||||||
|
return text
|
||||||
|
|
||||||
|
return text.replace('\n', self.newline)
|
||||||
|
|
||||||
|
|
||||||
|
class TxtNewlines(object):
|
||||||
|
NEWLINE_TYPES = {
|
||||||
|
'system' : os.linesep,
|
||||||
|
'unix' : '\n',
|
||||||
|
'old_mac' : '\r',
|
||||||
|
'windows' : '\r\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, newline_type):
|
||||||
|
self.newline = self.NEWLINE_TYPES.get(newline_type.lower(), os.linesep)
|
||||||
|
|
||||||
|
|
||||||
|
class TxtMetadata(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.title = None
|
||||||
|
self.author = None
|
@ -713,6 +713,9 @@ class BooksView(TableView):
|
|||||||
def set_editable(self, editable):
|
def set_editable(self, editable):
|
||||||
self._model.set_editable(editable)
|
self._model.set_editable(editable)
|
||||||
|
|
||||||
|
def set_editable(self, editable):
|
||||||
|
self._model.set_editable(editable)
|
||||||
|
|
||||||
def connect_to_search_box(self, sb):
|
def connect_to_search_box(self, sb):
|
||||||
QObject.connect(sb, SIGNAL('search(PyQt_PyObject, PyQt_PyObject)'),
|
QObject.connect(sb, SIGNAL('search(PyQt_PyObject, PyQt_PyObject)'),
|
||||||
self._model.search)
|
self._model.search)
|
||||||
@ -1007,6 +1010,10 @@ class DeviceBooksModel(BooksModel):
|
|||||||
self.editable = editable
|
self.editable = editable
|
||||||
|
|
||||||
|
|
||||||
|
def set_editable(self, editable):
|
||||||
|
self.editable = editable
|
||||||
|
|
||||||
|
|
||||||
class SearchBox(QLineEdit):
|
class SearchBox(QLineEdit):
|
||||||
|
|
||||||
INTERVAL = 1000 #: Time to wait before emitting search signal
|
INTERVAL = 1000 #: Time to wait before emitting search signal
|
||||||
|
@ -39,10 +39,9 @@ entry_points = {
|
|||||||
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
'calibre-fontconfig = calibre.utils.fontconfig:main',
|
||||||
'calibre-parallel = calibre.parallel:main',
|
'calibre-parallel = calibre.parallel:main',
|
||||||
'calibre-customize = calibre.customize.ui:main',
|
'calibre-customize = calibre.customize.ui:main',
|
||||||
'pdftrim = calibre.ebooks.pdf.pdftrim:main' ,
|
'pdfmanipulate = calibre.ebooks.pdf.manipulate:main',
|
||||||
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
|
||||||
'calibre-smtp = calibre.utils.smtp:main',
|
'calibre-smtp = calibre.utils.smtp:main',
|
||||||
|
|
||||||
],
|
],
|
||||||
'gui_scripts' : [
|
'gui_scripts' : [
|
||||||
__appname__+' = calibre.gui2.main:main',
|
__appname__+' = calibre.gui2.main:main',
|
||||||
@ -548,6 +547,3 @@ main = post_install
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
post_install()
|
post_install()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user