This commit is contained in:
Kovid Goyal 2007-05-18 06:38:23 +00:00
parent 366207763d
commit aeb69e9139
24 changed files with 0 additions and 8031 deletions

View File

@ -1,21 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Provides a command-line interface to the SONY Reader PRS-500.
For usage information run the script.
"""
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -1,325 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Provides a command-line and optional graphical interface to the SONY Reader PRS-500.
For usage information run the script.
"""
import StringIO, sys, time, os
from optparse import OptionParser
from libprs500 import __version__ as VERSION
from libprs500.prs500 import PRS500
from libprs500.cli.terminfo import TerminalController
from libprs500.errors import ArgumentError, DeviceError, DeviceLocked
MINIMUM_COL_WIDTH = 12 #: Minimum width of columns in ls output
def human_readable(size):
""" Convert a size in bytes into a human readle form """
if size < 1024: divisor, suffix = 1, ""
elif size < 1024*1024: divisor, suffix = 1024., "K"
elif size < 1024*1024*1024: divisor, suffix = 1024*1024, "M"
elif size < 1024*1024*1024*1024: divisor, suffix = 1024*1024, "G"
size = str(size/divisor)
if size.find(".") > -1: size = size[:size.find(".")+2]
return size + suffix
class FileFormatter(object):
def __init__(self, file, term):
self.term = term
self.is_dir = file.is_dir
self.is_readonly = file.is_readonly
self.size = file.size
self.ctime = file.ctime
self.wtime = file.wtime
self.name = file.name
self.path = file.path
@apply
def mode_string():
doc=""" The mode string for this file. There are only two modes read-only and read-write """
def fget(self):
mode, x = "-", "-"
if self.is_dir: mode, x = "d", "x"
if self.is_readonly: mode += "r-"+x+"r-"+x+"r-"+x
else: mode += "rw"+x+"rw"+x+"rw"+x
return mode
return property(doc=doc, fget=fget)
@apply
def isdir_name():
doc='''Return self.name + '/' if self is a directory'''
def fget(self):
name = self.name
if self.is_dir:
name += '/'
return name
return property(doc=doc, fget=fget)
@apply
def name_in_color():
doc=""" The name in ANSI text. Directories are blue, ebooks are green """
def fget(self):
cname = self.name
blue, green, normal = "", "", ""
if self.term: blue, green, normal = self.term.BLUE, self.term.GREEN, self.term.NORMAL
if self.is_dir: cname = blue + self.name + normal
else:
ext = self.name[self.name.rfind("."):]
if ext in (".pdf", ".rtf", ".lrf", ".lrx", ".txt"): cname = green + self.name + normal
return cname
return property(doc=doc, fget=fget)
@apply
def human_readable_size():
doc=""" File size in human readable form """
def fget(self):
return human_readable(self.size)
return property(doc=doc, fget=fget)
@apply
def modification_time():
doc=""" Last modified time in the Linux ls -l format """
def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.wtime))
return property(doc=doc, fget=fget)
@apply
def creation_time():
doc=""" Last modified time in the Linux ls -l format """
def fget(self):
return time.strftime("%Y-%m-%d %H:%M", time.localtime(self.ctime))
return property(doc=doc, fget=fget)
def info(dev):
info = dev.get_device_information()
print "Device name: ", info[0]
print "Device version: ", info[1]
print "Software version:", info[2]
print "Mime type: ", info[3]
def ls(dev, path, term, recurse=False, color=False, human_readable_size=False, ll=False, cols=0):
def col_split(l, cols): # split list l into columns
rows = len(l) / cols
if len(l) % cols:
rows += 1
m = []
for i in range(rows):
m.append(l[i::rows])
return m
def row_widths(table): # Calculate widths for each column in the row-wise table
tcols = len(table[0])
rowwidths = [ 0 for i in range(tcols) ]
for row in table:
c = 0
for item in row:
rowwidths[c] = len(item) if len(item) > rowwidths[c] else rowwidths[c]
c += 1
return rowwidths
output = StringIO.StringIO()
if path.endswith("/"): path = path[:-1]
dirs = dev.list(path, recurse)
for dir in dirs:
if recurse: print >>output, dir[0] + ":"
lsoutput, lscoloutput = [], []
files = dir[1]
maxlen = 0
if ll: # Calculate column width for size column
for file in files:
size = len(str(file.size))
if human_readable_size:
file = FileFormatter(file, term)
size = len(file.human_readable_size)
if size > maxlen: maxlen = size
for file in files:
file = FileFormatter(file, term)
name = file.name if ll else file.isdir_name
lsoutput.append(name)
if color: name = file.name_in_color
lscoloutput.append(name)
if ll:
size = str(file.size)
if human_readable_size: size = file.human_readable_size
print >>output, file.mode_string, ("%"+str(maxlen)+"s")%size, file.modification_time, name
if not ll and len(lsoutput) > 0:
trytable = []
for colwidth in range(MINIMUM_COL_WIDTH, cols):
trycols = int(cols/colwidth)
trytable = col_split(lsoutput, trycols)
works = True
for row in trytable:
row_break = False
for item in row:
if len(item) > colwidth - 1:
works, row_break = False, True
break
if row_break: break
if works: break
rowwidths = row_widths(trytable)
trytablecol = col_split(lscoloutput, len(trytable[0]))
for r in range(len(trytable)):
for c in range(len(trytable[r])):
padding = rowwidths[c] - len(trytable[r][c])
print >>output, trytablecol[r][c], "".ljust(padding),
print >>output
print >>output
listing = output.getvalue().rstrip()+ "\n"
output.close()
return listing
def main():
term = TerminalController()
cols = term.COLS
if not cols: # On windows terminal width is unknown
cols = 80
parser = OptionParser(usage="usage: %prog [options] command args\n\ncommand is one of: info, books, df, ls, cp, mkdir, touch, cat, rm\n\n"+
"For help on a particular command: %prog command", version="libprs500 version: " + VERSION)
parser.add_option("--log-packets", help="print out packet stream to stdout. "+\
"The numbers in the left column are byte offsets that allow the packet size to be read off easily.",
dest="log_packets", action="store_true", default=False)
parser.add_option("--unlock", help="Unlock device with KEY. For e.g. --unlock=1234", \
dest='key', default='-1')
parser.remove_option("-h")
parser.disable_interspersed_args() # Allow unrecognized options
options, args = parser.parse_args()
if len(args) < 1:
parser.print_help()
return 1
command = args[0]
args = args[1:]
dev = PRS500(key=options.key, log_packets=options.log_packets)
try:
if command == "df":
total = dev.total_space(end_session=False)
free = dev.free_space()
where = ("Memory", "Stick", "Card")
print "Filesystem\tSize \tUsed \tAvail \tUse%"
for i in range(3):
print "%-10s\t%s\t%s\t%s\t%s"%(where[i], human_readable(total[i]), human_readable(total[i]-free[i]), human_readable(free[i]),\
str(0 if total[i]==0 else int(100*(total[i]-free[i])/(total[i]*1.)))+"%")
elif command == "books":
print "Books in main memory:"
for book in dev.books():
print book
print "\nBooks on storage card:"
for book in dev.books(oncard=True): print book
elif command == "mkdir":
parser = OptionParser(usage="usage: %prog mkdir [options] path\nCreate a directory on the device\n\npath must begin with /,a:/ or b:/")
if len(args) != 1:
parser.print_help()
sys.exit(1)
dev.mkdir(args[0])
elif command == "ls":
parser = OptionParser(usage="usage: %prog ls [options] path\nList files on the device\n\npath must begin with /,a:/ or b:/")
parser.add_option("--color", help="show ls output in color", dest="color", action="store_true", default=False)
parser.add_option("-l", help="In addition to the name of each file, print the file type, permissions, and timestamp (the modification time, in the local timezone). Times are local.", dest="ll", action="store_true", default=False)
parser.add_option("-R", help="Recursively list subdirectories encountered. /dev and /proc are omitted", dest="recurse", action="store_true", default=False)
parser.remove_option("-h")
parser.add_option("-h", "--human-readable", help="show sizes in human readable format", dest="hrs", action="store_true", default=False)
options, args = parser.parse_args(args)
if len(args) != 1:
parser.print_help()
return 1
print ls(dev, args[0], term, color=options.color, recurse=options.recurse, ll=options.ll, human_readable_size=options.hrs, cols=cols),
elif command == "info":
info(dev)
elif command == "cp":
usage="usage: %prog cp [options] source destination\nCopy files to/from the device\n\n"+\
"One of source or destination must be a path on the device. \n\nDevice paths have the form\n"+\
"prs500:mountpoint/my/path\n"+\
"where mountpoint is one of /, a: or b:\n\n"+\
"source must point to a file for which you have read permissions\n"+\
"destination must point to a file or directory for which you have write permissions"
parser = OptionParser(usage=usage)
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
return 1
if args[0].startswith("prs500:"):
outfile = args[1]
path = args[0][7:]
if path.endswith("/"): path = path[:-1]
if os.path.isdir(outfile):
outfile = os.path.join(outfile, path[path.rfind("/")+1:])
try:
outfile = open(outfile, "wb")
except IOError, e:
print >> sys.stderr, e
parser.print_help()
return 1
dev.get_file(path, outfile)
outfile.close()
elif args[1].startswith("prs500:"):
try:
infile = open(args[0], "rb")
except IOError, e:
print >> sys.stderr, e
parser.print_help()
return 1
dev.put_file(infile, args[1][7:])
infile.close()
else:
parser.print_help()
return 1
elif command == "cat":
outfile = sys.stdout
parser = OptionParser(usage="usage: %prog cat path\nShow file on the device\n\npath should point to a file on the device and must begin with /,a:/ or b:/")
options, args = parser.parse_args(args)
if len(args) != 1:
parser.print_help()
return 1
if args[0].endswith("/"): path = args[0][:-1]
else: path = args[0]
outfile = sys.stdout
dev.get_file(path, outfile)
elif command == "rm":
parser = OptionParser(usage="usage: %prog rm path\nDelete files from the device\n\npath should point to a file or empty directory on the device "+\
"and must begin with /,a:/ or b:/\n\n"+\
"rm will DELETE the file. Be very CAREFUL")
options, args = parser.parse_args(args)
if len(args) != 1:
parser.print_help()
return 1
dev.rm(args[0])
elif command == "touch":
parser = OptionParser(usage="usage: %prog touch path\nCreate an empty file on the device\n\npath should point to a file on the device and must begin with /,a:/ or b:/\n\n"+
"Unfortunately, I cant figure out how to update file times on the device, so if path already exists, touch does nothing" )
options, args = parser.parse_args(args)
if len(args) != 1:
parser.print_help()
return 1
dev.touch(args[0])
else:
parser.print_help()
if dev.handle: dev.close()
return 1
except DeviceLocked:
print >> sys.stderr, "The device is locked. Use the --unlock option"
except (ArgumentError, DeviceError), e:
print >>sys.stderr, e
return 1
return 0
if __name__ == '__main__':
main()

View File

@ -1,208 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import sys, re
""" Get information about the terminal we are running in """
class TerminalController:
"""
A class that can be used to portably generate formatted output to
a terminal.
`TerminalController` defines a set of instance variables whose
values are initialized to the control sequence necessary to
perform a given action. These can be simply included in normal
output to the terminal:
>>> term = TerminalController()
>>> print 'This is '+term.GREEN+'green'+term.NORMAL
Alternatively, the `render()` method can used, which replaces
'${action}' with the string required to perform 'action':
>>> term = TerminalController()
>>> print term.render('This is ${GREEN}green${NORMAL}')
If the terminal doesn't support a given action, then the value of
the corresponding instance variable will be set to ''. As a
result, the above code will still work on terminals that do not
support color, except that their output will not be colored.
Also, this means that you can test whether the terminal supports a
given action by simply testing the truth value of the
corresponding instance variable:
>>> term = TerminalController()
>>> if term.CLEAR_SCREEN:
... print 'This terminal supports clearning the screen.'
Finally, if the width and height of the terminal are known, then
they will be stored in the `COLS` and `LINES` attributes.
"""
# Cursor movement:
BOL = '' #: Move the cursor to the beginning of the line
UP = '' #: Move the cursor up one line
DOWN = '' #: Move the cursor down one line
LEFT = '' #: Move the cursor left one char
RIGHT = '' #: Move the cursor right one char
# Deletion:
CLEAR_SCREEN = '' #: Clear the screen and move to home position
CLEAR_EOL = '' #: Clear to the end of the line.
CLEAR_BOL = '' #: Clear to the beginning of the line.
CLEAR_EOS = '' #: Clear to the end of the screen
# Output modes:
BOLD = '' #: Turn on bold mode
BLINK = '' #: Turn on blink mode
DIM = '' #: Turn on half-bright mode
REVERSE = '' #: Turn on reverse-video mode
NORMAL = '' #: Turn off all modes
# Cursor display:
HIDE_CURSOR = '' #: Make the cursor invisible
SHOW_CURSOR = '' #: Make the cursor visible
# Terminal size:
COLS = None #: Width of the terminal (None for unknown)
LINES = None #: Height of the terminal (None for unknown)
# Foreground colors:
BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
# Background colors:
BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
_STRING_CAPABILITIES = """
BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0
HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
_COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
_ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
def __init__(self, term_stream=sys.stdout):
"""
Create a `TerminalController` and initialize its attributes
with appropriate values for the current terminal.
`term_stream` is the stream that will be used for terminal
output; if this stream is not a tty, then the terminal is
assumed to be a dumb terminal (i.e., have no capabilities).
"""
# Curses isn't available on all platforms
try: import curses
except: return
# If the stream isn't a tty, then assume it has no capabilities.
if not term_stream.isatty(): return
# Check the terminal type. If we fail, then assume that the
# terminal has no capabilities.
try: curses.setupterm()
except: return
# Look up numeric capabilities.
self.COLS = curses.tigetnum('cols')
self.LINES = curses.tigetnum('lines')
# Look up string capabilities.
for capability in self._STRING_CAPABILITIES:
(attrib, cap_name) = capability.split('=')
setattr(self, attrib, self._tigetstr(cap_name) or '')
# Colors
set_fg = self._tigetstr('setf')
if set_fg:
for i,color in zip(range(len(self._COLORS)), self._COLORS):
setattr(self, color, curses.tparm(set_fg, i) or '')
set_fg_ansi = self._tigetstr('setaf')
if set_fg_ansi:
for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
set_bg = self._tigetstr('setb')
if set_bg:
for i,color in zip(range(len(self._COLORS)), self._COLORS):
setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '')
set_bg_ansi = self._tigetstr('setab')
if set_bg_ansi:
for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
def _tigetstr(self, cap_name):
# String capabilities can include "delays" of the form "$<2>".
# For any modern terminal, we should be able to just ignore
# these, so strip them out.
import curses
cap = curses.tigetstr(cap_name) or ''
return re.sub(r'\$<\d+>[/*]?', '', cap)
def render(self, template):
"""
Replace each $-substitutions in the given template string with
the corresponding terminal control string (if it's defined) or
'' (if it's not).
"""
return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
def _render_sub(self, match):
s = match.group()
if s == '$$': return s
else: return getattr(self, s[2:-1])
#######################################################################
# Example use case: progress bar
#######################################################################
class ProgressBar:
"""
A 3-line progress bar, which looks like::
Header
20% [===========----------------------------------]
progress message
The progress bar is colored, if the terminal supports color
output; and adjusts to the width of the terminal.
"""
BAR = '%3d%% ${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}\n'
HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
def __init__(self, term, header):
self.term = term
if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
raise ValueError("Terminal isn't capable enough -- you "
"should use a simpler progress dispaly.")
self.width = self.term.COLS or 75
self.bar = term.render(self.BAR)
self.header = self.term.render(self.HEADER % header.center(self.width))
self.cleared = 1 #: true if we haven't drawn the bar yet.
self.update(0, '')
def update(self, percent, message):
if self.cleared:
sys.stdout.write(self.header)
self.cleared = 0
n = int((self.width-10)*percent)
sys.stdout.write(
self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
(self.bar % (100*percent, '='*n, '-'*(self.width-10-n))) +
self.term.CLEAR_EOL + message.center(self.width))
def clear(self):
if not self.cleared:
sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
self.term.UP + self.term.CLEAR_EOL +
self.term.UP + self.term.CLEAR_EOL)
self.cleared = 1

View File

@ -1,94 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
This package contains logic to read and write LRF files. The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
At the time fo writing, this package only supports reading and writing LRF meat information. See L{meta}.
"""
from optparse import OptionParser, OptionValueError
from libprs500.lrf.pylrs.pylrs import Book as _Book
from libprs500.lrf.pylrs.pylrs import TextBlock, Header, PutObj, Paragraph, TextStyle
from libprs500 import __version__ as VERSION
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
class PRS500_PROFILE(object):
screen_width = 600
screen_height = 800
page_width = 575
page_height = 747
dpi = 166
def profile_from_string(option, opt_str, value, parser):
if value == 'prs500':
setattr(parser.values, option.dest, PRS500_PROFILE)
else:
raise OptionValueError('Profile: '+value+' is not implemented')
class ConversionError(Exception):
pass
def option_parser(usage):
parser = OptionParser(usage=usage, version='libprs500 '+VERSION,
epilog='Created by Kovid Goyal')
metadata = parser.add_option_group('METADATA OPTIONS')
metadata.add_option('--header', action='store_true', default=False, dest='header',
help='Add a header to all the pages with title and author.')
metadata.add_option("-t", "--title", action="store", type="string", \
dest="title", help="Set the title. Default: filename.")
metadata.add_option("-a", "--author", action="store", type="string", \
dest="author", help="Set the author. Default: %default", default='Unknown')
metadata.add_option("--freetext", action="store", type="string", \
dest="freetext", help="Set the comments.", default=' ')
metadata.add_option("--category", action="store", type="string", \
dest="category", help="Set the category", default=' ')
metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
help='Sort key for the title')
metadata.add_option('--author-sort', action='store', default='', dest='author_sort',
help='Sort key for the author')
metadata.add_option('--publisher', action='store', default='Unknown', dest='publisher',
help='Publisher')
profiles=['prs500']
parser.add_option('-o', '--output', action='store', default=None, \
help='Output file name. Default is derived from input filename')
parser.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
choices=profiles, action='callback', callback=profile_from_string,
help='''Profile of the target device for which this LRF is '''
'''being generated. Default: ''' + profiles[0] + '''
Supported profiles: '''+', '.join(profiles))
debug = parser.add_option_group('DEBUG OPTIONS')
debug.add_option('--verbose', dest='verbose', action='store_true', default=False,
help='''Be verbose while processing''')
debug.add_option('--lrs', action='store_true', dest='lrs', \
help='Convert to LRS', default=False)
return parser
def Book(font_delta=0, header=None, profile=PRS500_PROFILE, **settings):
ps = dict(textwidth=profile.page_width,
textheight=profile.page_height)
if header:
hdr = Header()
hb = TextBlock(textStyle=TextStyle(align='foot', fontsize=60))
hb.append(header)
hdr.PutObj(hb)
ps['headheight'] = 30
ps['header'] = header
ps['header'] = hdr
ps['topmargin'] = 10
return _Book(textstyledefault=dict(fontsize=100+font_delta*20,
parindent=80, linespace=12), \
pagestyledefault=ps, **settings)

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
This package contains code to convert HTML ebooks to LRF ebooks.
"""
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"

File diff suppressed because it is too large Load Diff

View File

@ -1,89 +0,0 @@
<html>
<head>
<style type='text/css'>
.toc { page-break-after: always; text-indent: 0em; }
</style>
</head>
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
<p>
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
</p>
<br/>
<h2><a name='toc'>Table of Contents</a></h2>
<ul style='page-break-after:always'>
<li><a href='#lists'>Demonstration of Lists</a></li>
<li><a href='#text'>Text formatting and ruled lines</a></li>
<li><a href='#images'>Inline images</a></li>
<li><a href='#recursive'>Recursive link following</a></li>
<li><a href='demo_ext.html'>The HTML used to create this file</a>
</ul>
<h2><a name='lists'>Lists</a></h2>
<p><h3>Unordered lists</h3>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
</p>
<p><h3>Ordered lists</h3>
<ol>
<li>Item 1</li>
<li>Item 2</li>
</ol>
</p>
<br/>
<p>
Note that nested lists are not supported.
</p>
<p class='toc'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
<h2><a name='text'>Text formatting</a></h2>
<p>
A simple <i>paragraph</i> of <b>formatted
<i>text</i></b> with a ruled line following it.
</p>
<hr/>
<p> A
<span style='font-style:italic'>similar</span>
paragraph, but now using
<span style='font-weight:bold'>CSS</span>
to perform the text formatting.</p>
<hr/>
<center>A centered phrase</center>
<span style='text-align:right'>A right aligned phrase</span>
A normal phrase
<hr />
<p> A paragraph containing a <em>&lt;blockquote&gt;</em>
<blockquote>This is blockquoted text. It is rendered in a separate block with margins.</blockquote>The above text should be distinct from the rest of the paragraph.
</p>
<hr/>
<p style='text-indent:30em'>A very indented paragraph</p>
<p style='text-indent:0em'>An unindented paragraph</p>
<p>A default indented paragrpah</p>
<p class='toc'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
<h2><a name='images'>Inline images</a></h2>
<p>
Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which wont fit on this page. Try changing sizes from S to M to L and see how the images behave. <img align='center' src='large.jpg' />
</p>
<p class='toc'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
<h2><a name='recursive'>Recursive link following</a></h2>
<p>
<span style='font:monospace'>html2lrf</span> follows links in HTML files that point to other files, recursively. Thus it can be used to convert a whole tree of HTML files into a single LRF file.
</p>
<p class='toc'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

View File

@ -1,628 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
This module presents an easy to use interface for getting and setting
meta information in LRF files.
Just create an L{LRFMetaFile} object and use its properties
to get and set meta information. For example:
>>> lrf = LRFMetaFile("mybook.lrf")
>>> print lrf.title, lrf.author
>>> lrf.category = "History"
"""
import struct, zlib, sys
from shutil import copyfileobj
from cStringIO import StringIO
import xml.dom.minidom as dom
from functools import wraps
from libprs500.prstypes import field
from libprs500.metadata import MetaInformation
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
DWORD = "<I" #: Unsigned integer little endian encoded in 4 bytes
QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes
class versioned_field(field):
def __init__(self, vfield, version, start=0, fmt=WORD):
field.__init__(self, start=start, fmt=fmt)
self.vfield, self.version = vfield, version
def enabled(self):
return self.vfield > self.version
def __get__(self, obj, typ=None):
if self.enabled():
return field.__get__(self, obj, typ=typ)
else:
return None
def __set__(self, obj, val):
if not self.enabled():
raise LRFException("Trying to set disabled field")
else:
field.__set__(self, obj, val)
class LRFException(Exception):
pass
class fixed_stringfield(object):
""" A field storing a variable length string. """
def __init__(self, length=8, start=0):
"""
@param length: Size of this string
@param start: The byte at which this field is stored in the buffer
"""
self._length = length
self._start = start
def __get__(self, obj, typ=None):
length = str(self._length)
return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
def __set__(self, obj, val):
if val.__class__.__name__ != 'str': val = str(val)
if len(val) != self._length:
raise LRFException("Trying to set fixed_stringfield with a " + \
"string of incorrect length")
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
def __repr__(self):
return "A string of length " + str(self._length) + \
" starting at byte " + str(self._start)
class xml_attr_field(object):
def __init__(self, tag_name, attr, parent='BookInfo'):
self.tag_name = tag_name
self.parent = parent
self.attr= attr
def __get__(self, obj, typ=None):
""" Return the data in this field or '' if the field is empty """
document = dom.parseString(obj.info)
elems = document.getElementsByTagName(self.tag_name)
if len(elems):
elem = None
for candidate in elems:
if candidate.parentNode.nodeName == self.parent:
elem = candidate
if elem and elem.hasAttribute(self.attr):
return elem.getAttribute(self.attr)
return ''
def __set__(self, obj, val):
if val == None:
val = ""
document = dom.parseString(obj.info)
elems = document.getElementsByTagName(self.tag_name)
if len(elems):
elem = None
for candidate in elems:
if candidate.parentNode.nodeName == self.parent:
elem = candidate
if elem:
elem.setAttribute(self.attr, val)
info = document.toxml(encoding='utf-16')
obj.info = info
def __repr__(self):
return "XML Attr Field: " + self.tag_name + " in " + self.parent
def __str__(self):
return self.tag_name+'.'+self.attr
class xml_field(object):
"""
Descriptor that gets and sets XML based meta information from an LRF file.
Works for simple XML fields of the form <tagname>data</tagname>
"""
def __init__(self, tag_name, parent="BookInfo"):
"""
@param tag_name: The XML tag whose data we operate on
@param parent: The tagname of the parent element of C{tag_name}
"""
self.tag_name = tag_name
self.parent = parent
def __get__(self, obj, typ=None):
""" Return the data in this field or '' if the field is empty """
document = dom.parseString(obj.info)
elems = document.getElementsByTagName(self.tag_name)
if len(elems):
elem = None
for candidate in elems:
if candidate.parentNode.nodeName == self.parent:
elem = candidate
if elem:
elem.normalize()
if elem.hasChildNodes():
return elem.firstChild.data.strip()
return ""
def __set__(self, obj, val):
if val == None:
val = ""
document = dom.parseString(obj.info)
def create_elem():
elem = document.createElement(self.tag_name)
elem.appendChild(dom.Text())
parent = document.getElementsByTagName(self.parent)[0]
parent.appendChild(elem)
return elem
if not val:
val = u''
if type(val).__name__ != 'unicode':
val = unicode(val, 'utf-8')
elems = document.getElementsByTagName(self.tag_name)
elem = None
if len(elems):
for candidate in elems:
if candidate.parentNode.nodeName == self.parent:
elem = candidate
if not elem:
elem = create_elem()
else:
elem.normalize()
while elem.hasChildNodes():
elem.removeChild(elem.lastChild)
elem.appendChild(dom.Text())
else:
elem = create_elem()
elem.firstChild.data = val
info = document.toxml(encoding='utf-16')
obj.info = info
def __str__(self):
return self.tag_name
def __repr__(self):
return "XML Field: " + self.tag_name + " in " + self.parent
def insert_into_file(fileobj, data, start, end):
"""
Insert data into fileobj at position C{start}.
This function inserts data into a file, overwriting all data between start
and end. If end == start no data is overwritten. Do not use this function to
append data to a file.
@param fileobj: file like object
@param data: data to be inserted into fileobj
@param start: The position at which to start inserting data
@param end: The position in fileobj of data that must not be overwritten
@return: C{start + len(data) - end}
"""
buffer = StringIO()
fileobj.seek(end)
copyfileobj(fileobj, buffer, -1)
buffer.flush()
buffer.seek(0)
fileobj.seek(start)
fileobj.write(data)
fileobj.flush()
fileobj.truncate()
delta = fileobj.tell() - end # < 0 if len(data) < end-start
copyfileobj(buffer, fileobj, -1)
fileobj.flush()
buffer.close()
return delta
def get_metadata(stream):
"""
Return basic meta-data about the LRF file in C{stream} as a
L{MetaInformation} object.
"""
lrf = LRFMetaFile(stream)
mi = MetaInformation(lrf.title.strip(), lrf.author.strip())
mi.comments = lrf.free_text.strip()
mi.category = lrf.category.strip()
mi.classification = lrf.classification.strip()
mi.publisher = lrf.publisher.strip()
if not mi.title or 'unknown' in mi.title.lower():
mi.title = None
if not mi.author or 'unknown' in mi.author.lower():
mi.author = None
if not mi.category or 'unknown' in mi.category.lower():
mi.category = None
if not mi.classification or 'unknown' in mi.classification.lower():
mi.classification = None
if not mi.publisher or 'unknown' in mi.publisher.lower() or \
'some publisher' in mi.publisher.lower():
mi.publisher = None
return mi
class LRFMetaFile(object):
""" Has properties to read and write all Meta information in a LRF file. """
#: The first 6 bytes of all valid LRF files
LRF_HEADER = 'LRF'.encode('utf-16le')
lrf_header = fixed_stringfield(length=6, start=0x0)
version = field(fmt=WORD, start=0x8)
xor_key = field(fmt=WORD, start=0xa)
root_object_id = field(fmt=DWORD, start=0xc)
number_of_objets = field(fmt=QWORD, start=0x10)
object_index_offset = field(fmt=QWORD, start=0x18)
binding = field(fmt=BYTE, start=0x24)
dpi = field(fmt=WORD, start=0x26)
width = field(fmt=WORD, start=0x2a)
height = field(fmt=WORD, start=0x2c)
color_depth = field(fmt=BYTE, start=0x2e)
toc_object_id = field(fmt=DWORD, start=0x44)
toc_object_offset = field(fmt=DWORD, start=0x48)
compressed_info_size = field(fmt=WORD, start=0x4c)
thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
uncompressed_info_size = versioned_field(compressed_info_size, 0, \
fmt=DWORD, start=0x54)
title = xml_field("Title", parent="BookInfo")
title_reading = xml_attr_field("Title", 'reading', parent="BookInfo")
author = xml_field("Author", parent="BookInfo")
author_reading = xml_attr_field("Author", 'reading', parent="BookInfo")
# 16 characters. First two chars should be FB for personal use ebooks.
book_id = xml_field("BookID", parent="BookInfo")
publisher = xml_field("Publisher", parent="BookInfo")
label = xml_field("Label", parent="BookInfo")
category = xml_field("Category", parent="BookInfo")
classification = xml_field("Classification", parent="BookInfo")
free_text = xml_field("FreeText", parent="BookInfo")
# Should use ISO 639 language codes
language = xml_field("Language", parent="DocInfo")
creator = xml_field("Creator", parent="DocInfo")
# Format is %Y-%m-%d
creation_date = xml_field("CreationDate", parent="DocInfo")
producer = xml_field("Producer", parent="DocInfo")
page = xml_field("Page", parent="DocInfo")
def safe(func):
"""
Decorator that ensures that function calls leave the pos
in the underlying file unchanged
"""
@wraps(func)
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = func(*args, **kwargs)
obj._file.seek(0, 2)
if obj._file.tell() >= pos:
obj._file.seek(pos)
return res
return restore_pos
def safe_property(func):
"""
Decorator that ensures that read or writing a property leaves
the position in the underlying file unchanged
"""
def decorator(f):
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = f(*args, **kwargs)
obj._file.seek(0, 2)
if obj._file.tell() >= pos:
obj._file.seek(pos)
return res
return restore_pos
locals_ = func()
if locals_.has_key("fget"):
locals_["fget"] = decorator(locals_["fget"])
if locals_.has_key("fset"):
locals_["fset"] = decorator(locals_["fset"])
return property(**locals_)
@safe_property
def info():
doc = \
"""
Document meta information in raw XML format as a byte string encoded in
utf-16.
To set use raw XML in a byte string encoded in utf-16.
"""
def fget(self):
if self.compressed_info_size == 0:
raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4
self._file.seek(self.info_start)
try:
src = zlib.decompress(self._file.read(size))
if len(src) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info\
yielded unexpected results")
candidate = unicode(src, 'utf-16')
# LRF files produced with makelrf dont have a correctly
# encoded metadata block.
# Decoding using latin1 is the most useful for me since I
# occassionally read french books.
if not u"Info" in candidate:
candidate = unicode(src, 'latin1', errors='ignore')
if candidate[-1:] == '\0':
candidate = candidate[:-1]
candidate = dom.parseString(candidate.encode('utf-8')).\
toxml(encoding='utf-16')
else:
candidate = candidate.encode('utf-16')
return candidate.strip()
except zlib.error:
raise LRFException("Unable to decompress document meta information")
def fset(self, info):
self.uncompressed_info_size = len(info)
stream = zlib.compress(info)
orig_size = self.compressed_info_size
self.compressed_info_size = len(stream) + 4
delta = insert_into_file(self._file, stream, self.info_start, \
self.info_start + orig_size - 4)
self.toc_object_offset += delta
self.object_index_offset += delta
self.update_object_offsets(delta)
return { "fget":fget, "fset":fset, "doc":doc }
@safe_property
def thumbnail_pos():
doc = """ The position of the thumbnail in the LRF file """
def fget(self):
return self.info_start + self.compressed_info_size-4
return { "fget":fget, "doc":doc }
@classmethod
def _detect_thumbnail_type(cls, slice):
""" @param slice: The first 16 bytes of the thumbnail """
ttype = 0x14 # GIF
if "PNG" in slice:
ttype = 0x12
if "BM" in slice:
ttype = 0x13
if "JFIF" in slice:
ttype = 0x11
return ttype
@safe_property
def thumbnail():
doc = \
"""
The thumbnail.
Represented as a string.
The string you would get from the file read function.
"""
def fget(self):
size = self.thumbnail_size
if size:
self._file.seek(self.thumbnail_pos)
return self._file.read(size)
def fset(self, data):
if self.version <= 800:
raise LRFException("Cannot store thumbnails in LRF files \
of version <= 800")
slice = data[0:16]
orig_size = self.thumbnail_size
self.thumbnail_size = len(data)
delta = insert_into_file(self._file, data, self.thumbnail_pos, \
self.thumbnail_pos + orig_size)
self.toc_object_offset += delta
self.object_index_offset += delta
self.thumbnail_type = self._detect_thumbnail_type(slice)
self.update_object_offsets(delta)
return { "fget":fget, "fset":fset, "doc":doc }
def __init__(self, file):
""" @param file: A file object opened in the r+b mode """
file.seek(0, 2)
self.size = file.tell()
self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + \
" has an invalid LRF header. Are you sure it is an LRF file?")
# Byte at which the compressed meta information starts
self.info_start = 0x58 if self.version > 800 else 0x53
@safe
def update_object_offsets(self, delta):
""" Run through the LRF Object index changing the offset by C{delta}. """
self._file.seek(self.object_index_offset)
while(True):
try:
self._file.read(4)
except EOFError:
break
pos = self._file.tell()
try:
offset = self.unpack(fmt=DWORD, start=pos)[0] + delta
except struct.error:
break
if offset >= (2**8)**4:
# New offset is larger than a DWORD, so leave
# offset unchanged. I'm assuming offset is an offset from
# the previous object, otherwise this would impose a ~ 4MB limit
# on LRF files.
offset -= delta
self.pack(offset, fmt=DWORD, start=pos)
try:
self._file.read(12)
except EOFError:
break
self._file.flush()
@safe
def unpack(self, fmt=DWORD, start=0):
"""
Return decoded data from file.
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file from which to decode
"""
end = start + struct.calcsize(fmt)
self._file.seek(start)
self._file.seek(start)
ret = struct.unpack(fmt, self._file.read(end-start))
return ret
@safe
def pack(self, *args, **kwargs):
"""
Encode C{args} and write them to file.
C{kwargs} must contain the keywords C{fmt} and C{start}
@param args: The values to pack
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file at which to write encoded data
"""
encoded = struct.pack(kwargs["fmt"], *args)
self._file.seek(kwargs["start"])
self._file.write(encoded)
self._file.flush()
def thumbail_extension(self):
"""
Return the extension for the thumbnail image type as specified
by L{self.thumbnail_type}. If the LRF file was created by buggy
software, the extension maye be incorrect. See L{self.fix_thumbnail_type}.
"""
ext = "gif"
ttype = self.thumbnail_type
if ttype == 0x11:
ext = "jpeg"
elif ttype == 0x12:
ext = "png"
elif ttype == 0x13:
ext = "bm"
return ext
def fix_thumbnail_type(self):
"""
Attempt to guess the thumbnail image format and set
L{self.thumbnail_type} accordingly.
"""
slice = self.thumbnail[0:16]
self.thumbnail_type = self._detect_thumbnail_type(slice)
def seek(self, *args):
""" See L{file.seek} """
return self._file.seek(*args)
def tell(self):
""" See L{file.tell} """
return self._file.tell()
def read(self):
""" See L{file.read} """
return self._file.read()
def write(self, val):
""" See L{file.write} """
self._file.write(val)
def parse_options(argv=None, cli=True):
from optparse import OptionParser
from libprs500 import __version__ as VERSION
if not argv:
argv = sys.argv[1:]
parser = OptionParser(usage = \
"""%prog [options] mybook.lrf
Show/edit the metadata in an LRF file.
WARNING: Based on reverse engineering the LRF format.
Making changes may render your LRF file unreadable.
""", version=VERSION)
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help="Set the book title")
parser.add_option('--title-sort', action='store', type='string', default=None,
dest='title_reading', help='Set sort key for the title')
parser.add_option("-a", "--author", action="store", type="string", \
dest="author", help="Set the author")
parser.add_option('--author-sort', action='store', type='string', default=None,
dest='author_reading', help='Set sort key for the author')
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help="The category this book belongs"+\
" to. E.g.: History")
parser.add_option("--thumbnail", action="store", type="string", \
dest="thumbnail", help="Path to a graphic that will be"+\
" set as this files' thumbnail")
parser.add_option("--comment", action="store", type="string", \
dest="comment", help="Path to a txt file containing the "+\
"comment to be stored in the lrf file.")
parser.add_option("--get-thumbnail", action="store_true", \
dest="get_thumbnail", default=False, \
help="Extract thumbnail from LRF file")
parser.add_option("-p", "--page", action="store", type="string", \
dest="page", help="Don't know what this is for")
options, args = parser.parse_args()
if len(args) != 1:
if cli:
parser.print_help()
raise LRFException, 'no filename specified'
return options, args, parser
def main():
import os.path
try:
options, args, parser = parse_options()
except:
sys.exit(1)
lrf = LRFMetaFile(open(args[0], "r+b"))
if options.title:
lrf.title = options.title
if options.title_reading != None:
lrf.title_reading = options.title_reading
if options.author_reading != None:
lrf.author_reading = options.author_reading
if options.author:
lrf.author = options.author
if options.category:
lrf.category = options.category
if options.page:
lrf.page = options.page
if options.thumbnail:
path = os.path.expanduser(os.path.expandvars(options.thumbnail))
f = open(path, "rb")
lrf.thumbnail = f.read()
f.close()
if options.comment:
path = os.path.expanduser(os.path.expandvars(options.comment))
lrf.free_text = open(path).read()
if options.get_thumbnail:
t = lrf.thumbnail
td = "None"
if t and len(t) > 0:
td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension()
f = open(td, "w")
f.write(t)
f.close()
fields = LRFMetaFile.__dict__.items()
fields.sort()
for f in fields:
if "XML" in str(f):
print str(f[1]) + ":", lrf.__getattribute__(f[0]).encode('utf-8')
if options.get_thumbnail:
print "Thumbnail:", td
if __name__ == '__main__':
main()

View File

@ -1,5 +0,0 @@
"""
This package contains code to generate ebooks in the SONY LRS/F format. It was
originally developed by Mike Higgins and has been extended and modified by Kovid
Goyal.
"""

View File

@ -1,79 +0,0 @@
""" elements.py -- replacements and helpers for ElementTree """
class ElementWriter(object):
def __init__(self, e, header=False, sourceEncoding="ascii", spaceBeforeClose=True):
self.header = header
self.e = e
self.sourceEncoding=sourceEncoding
self.spaceBeforeClose = spaceBeforeClose
def _encodeCdata(self, rawText):
if type(rawText) is str:
rawText = rawText.decode(self.sourceEncoding)
text = rawText.replace("&", "&amp;")
text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;")
return text
def _writeAttribute(self, f, name, value):
f.write(u' %s="' % unicode(name))
if not isinstance(value, basestring):
value = unicode(value)
value = self._encodeCdata(value)
value = value.replace('"', '&quot;')
f.write(value)
f.write(u'"')
def _writeText(self, f, rawText):
text = self._encodeCdata(rawText)
f.write(text)
def _write(self, f, e):
f.write(u'<' + unicode(e.tag))
attributes = e.items()
attributes.sort()
for name, value in attributes:
self._writeAttribute(f, name, value)
if e.text is not None or len(e) > 0:
f.write(u'>')
if e.text:
self._writeText(f, e.text)
for e2 in e:
self._write(f, e2)
f.write(u'</%s>' % e.tag)
else:
if self.spaceBeforeClose:
f.write(' ')
f.write(u'/>')
if e.tail is not None:
self._writeText(f, e.tail)
def toString(self):
class x:
pass
buffer = []
x.write = buffer.append
self.write(x)
return u''.join(buffer)
def write(self, f):
if self.header:
f.write(u'<?xml version="1.0" encoding="UTF-16"?>\n')
self._write(f, self.e)

View File

@ -1,777 +0,0 @@
"""
pylrf.py -- very low level interface to create lrf files. See pylrs for
higher level interface that can use this module to render books to lrf.
"""
import struct
import zlib
import StringIO
import codecs
import os
from pylrfopt import tagListOptimizer
PYLRF_VERSION = "1.0"
#
# Acknowledgement:
# This software would not have been possible without the pioneering
# efforts of the author of lrf2lrs.py, Igor Skochinsky.
#
# Copyright (c) 2007 Mike Higgins (Falstaff)
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
# Change History:
#
# V1.0 06 Feb 2007
# Initial Release.
#
# Current limitations and bugs:
# Never "scrambles" any streams (even if asked to). This does not seem
# to hurt anything.
#
# Not based on any official documentation, so many assumptions had to be made.
#
# Can be used to create lrf files that can lock up an eBook reader.
# This is your only warning.
#
# Unsupported objects: Canvas, Window, PopUpWindow, Sound, Import,
# SoundStream, ObjectInfo
#
# The only button type supported is JumpButton.
#
# Unsupported tags: SoundStop, Wait, pos on BlockSpace (and those used by
# unsupported objects).
#
# Tags supporting Japanese text and Asian layout have not been tested.
#
# Tested on Python 2.4 and 2.5, Windows XP and Sony PRS-500.
#
# Commented even less than pylrs, but not very useful when called directly,
# anyway.
#
def writeByte(f, byte):
f.write(struct.pack("<B", byte))
def writeWord(f, word):
f.write(struct.pack("<H", int(word)))
def writeSignedWord(f, sword):
f.write(struct.pack("<h", int(sword)))
def writeWords(f, *words):
f.write(struct.pack("<%dH" % len(words), *words))
def writeDWord(f, dword):
f.write(struct.pack("<I", int(dword)))
def writeDWords(f, *dwords):
f.write(struct.pack("<%dI" % len(dwords), *dwords))
def writeQWord(f, qword):
f.write(struct.pack("<Q", qword))
def writeZeros(f, nZeros):
f.write("\x00" * nZeros)
def writeString(f, str):
f.write(str)
def writeIdList(f, idList):
writeWord(f, len(idList))
writeDWords(f, *idList)
def writeColor(f, color):
# TODO: allow color names, web format
color = int(color, 0)
f.write(struct.pack(">I", color))
def writeLineWidth(f, width):
writeWord(f, int(width)//5)
def writeUnicode(f, string, encoding):
if isinstance(string, str):
string = string.decode(encoding)
string = string.encode("utf-16-le")
writeWord(f, len(string))
writeString(f, string)
def writeRaw(f, string, encoding):
if isinstance(string, str):
string = string.decode(encoding)
string = string.encode("utf-16-le")
writeString(f, string)
def writeRubyAA(f, rubyAA):
ralign, radjust = rubyAA
radjust = {"line-edge":0x10, "none":0}[radjust]
ralign = {"start":1, "center":2}[ralign]
writeWord(f, ralign | radjust)
def writeBgImage(f, bgInfo):
imode, iid = bgInfo
imode = {"pfix": 0, "fix":1, "tile":2, "centering":3}[imode]
writeWord(f, imode)
writeDWord(f, iid)
def writeEmpDots(f, dotsInfo, encoding):
refDotsFont, dotsFontName, dotsCode = dotsInfo
writeDWord(f, refDotsFont)
LrfTag("fontfacename", dotsFontName).write(f, encoding)
writeWord(f, int(dotsCode, 0))
def writeRuledLine(f, lineInfo):
lineLength, lineType, lineWidth, lineColor = lineInfo
writeWord(f, lineLength)
writeWord(f, LINE_TYPE_ENCODING[lineType])
writeWord(f, lineWidth)
writeColor(f, lineColor)
LRF_SIGNATURE = "L\x00R\x00F\x00\x00\x00"
#XOR_KEY = 48
XOR_KEY = 65024 # that's what lrf2lrs says -- not used, anyway...
LRF_VERSION = 1000 # is 999 for librie? lrf2lrs uses 1000
IMAGE_TYPE_ENCODING = dict(GIF=0x14, PNG=0x12, BMP=0x13, JPEG=0x11, JPG=0x11)
OBJECT_TYPE_ENCODING = dict(
PageTree = 0x01,
Page = 0x02,
Header = 0x03,
Footer = 0x04,
PageAtr = 0x05, PageStyle=0x05,
Block = 0x06,
BlockAtr = 0x07, BlockStyle=0x07,
MiniPage = 0x08,
TextBlock = 0x0A, Text=0x0A,
TextAtr = 0x0B, TextStyle=0x0B,
ImageBlock = 0x0C, Image=0x0C,
Canvas = 0x0D,
ESound = 0x0E,
ImageStream = 0x11,
Import = 0x12,
Button = 0x13,
Window = 0x14,
PopUpWindow = 0x15,
Sound = 0x16,
SoundStream = 0x17,
Font = 0x19,
ObjectInfo = 0x1A,
BookAtr = 0x1C, BookStyle=0x1C,
SimpleTextBlock = 0x1D,
TOC=0x1E
)
LINE_TYPE_ENCODING = {
'none':0, 'solid':0x10, 'dashed':0x20, 'double':0x30, 'dotted':0x40
}
BINDING_DIRECTION_ENCODING = dict(Lr=1, Rl=16)
TAG_INFO = dict(
rawtext = (0, writeRaw),
ObjectStart = (0xF500, "<IH"),
ObjectEnd = (0xF501,),
# InfoLink (0xF502)
Link = (0xF503, "<I"),
StreamSize = (0xF504, writeDWord),
StreamData = (0xF505, writeString),
StreamEnd = (0xF506,),
oddheaderid = (0xF507, writeDWord),
evenheaderid = (0xF508, writeDWord),
oddfooterid = (0xF509, writeDWord),
evenfooterid = (0xF50A, writeDWord),
ObjectList = (0xF50B, writeIdList),
fontsize = (0xF511, writeSignedWord),
fontwidth = (0xF512, writeSignedWord),
fontescapement = (0xF513, writeSignedWord),
fontorientation = (0xF514, writeSignedWord),
fontweight = (0xF515, writeWord),
fontfacename = (0xF516, writeUnicode),
textcolor = (0xF517, writeColor),
textbgcolor = (0xF518, writeColor),
wordspace = (0xF519, writeSignedWord),
letterspace = (0xF51A, writeSignedWord),
baselineskip = (0xF51B, writeSignedWord),
linespace = (0xF51C, writeSignedWord),
parindent = (0xF51D, writeSignedWord),
parskip = (0xF51E, writeSignedWord),
# F51F, F520
topmargin = (0xF521, writeWord),
headheight = (0xF522, writeWord),
headsep = (0xF523, writeWord),
oddsidemargin = (0xF524, writeWord),
textheight = (0xF525, writeWord),
textwidth = (0xF526, writeWord),
canvaswidth = (0xF551, writeWord),
canvasheight = (0xF552, writeWord),
footspace = (0xF527, writeWord),
footheight = (0xF528, writeWord),
bgimage = (0xF529, writeBgImage),
setemptyview = (0xF52A, {'show':1, 'empty':0}, writeWord),
pageposition = (0xF52B, {'any':0,'upper':1, 'lower':2}, writeWord),
evensidemargin = (0xF52C, writeWord),
framemode = (0xF52E,
{'None':0, 'curve':2, 'square':1}, writeWord),
blockwidth = (0xF531, writeWord),
blockheight = (0xF532, writeWord),
blockrule = (0xF533, {"horz-fixed":0x14, "horz-adjustable":0x12,
"vert-fixed":0x41, "vert-adjustable":0x21,
"block-fixed":0x44, "block-adjustable":0x22},
writeWord),
bgcolor = (0xF534, writeColor),
layout = (0xF535, {'TbRl':0x41, 'LrTb':0x34}, writeWord),
framewidth = (0xF536, writeWord),
framecolor = (0xF537, writeColor),
topskip = (0xF538, writeWord),
sidemargin = (0xF539, writeWord),
footskip = (0xF53A, writeWord),
align = (0xF53C, {'head':1, 'center':4, 'foot':8}, writeWord),
column = (0xF53D, writeWord),
columnsep = (0xF53E, writeSignedWord),
minipagewidth = (0xF541, writeWord),
minipageheight = (0xF542, writeWord),
yspace = (0xF546, writeWord),
xspace = (0xF547, writeWord),
PutObj = (0xF549, "<HHI"),
ImageRect = (0xF54A, "<HHHH"),
ImageSize = (0xF54B, "<HH"),
RefObjId = (0xF54C, "<I"),
PageDiv = (0xF54E, "<HIHI"),
StreamFlags = (0xF554, writeWord),
Comment = (0xF555, writeUnicode),
FontFilename = (0xF559, writeUnicode),
PageList = (0xF55C, writeIdList),
FontFacename = (0xF55D, writeUnicode),
buttonflags = (0xF561, writeWord),
PushButtonStart = (0xF566,),
PushButtonEnd = (0xF567,),
buttonactions = (0xF56A,),
endbuttonactions= (0xF56B,),
jumpto = (0xF56C, "<II"),
RuledLine = (0xF573, writeRuledLine),
rubyaa = (0xF575, writeRubyAA),
rubyoverhang = (0xF576, {'none':0, 'auto':1}, writeWord),
empdotsposition = (0xF577, {'before':1, 'after':2}, writeWord),
empdots = (0xF578, writeEmpDots),
emplineposition = (0xF579, {'before':1, 'after':2}, writeWord),
emplinetype = (0xF57A, LINE_TYPE_ENCODING, writeWord),
ChildPageTree = (0xF57B, "<I"),
ParentPageTree = (0xF57C, "<I"),
Italic = (0xF581,),
ItalicEnd = (0xF582,),
pstart = (0xF5A1, writeDWord), # what goes in the dword?
pend = (0xF5A2,),
CharButton = (0xF5A7, writeDWord),
CharButtonEnd = (0xF5A8,),
Rubi = (0xF5A9,),
RubiEnd = (0xF5AA,),
Oyamoji = (0xF5AB,),
OyamojiEnd = (0xF5AC,),
Rubimoji = (0xF5AD,),
RubimojiEnd = (0xF5AE,),
Yoko = (0xF5B1,),
YokoEnd = (0xF5B2,),
Tate = (0xF5B3,),
TateEnd = (0xF5B4,),
Nekase = (0xF5B5,),
NekaseEnd = (0xF5B6,),
Sup = (0xF5B7,),
SupEnd = (0xF5B8,),
Sub = (0xF5B9,),
SubEnd = (0xF5BA,),
NoBR = (0xF5BB,),
NoBREnd = (0xF5BC,),
EmpDots = (0xF5BD,),
EmpDotsEnd = (0xF5BE,),
EmpLine = (0xF5C1,),
EmpLineEnd = (0xF5C2,),
DrawChar = (0xF5C3, '<H'),
DrawCharEnd = (0xF5C4,),
Box = (0xF5C6, LINE_TYPE_ENCODING, writeWord),
BoxEnd = (0xF5C7,),
Space = (0xF5CA, writeSignedWord),
textstring = (0xF5CC, writeUnicode), # when is this used?
Plot = (0xF5D1, "<HHII"),
CR = (0xF5D2,),
RegisterFont = (0xF5D8, writeDWord),
setwaitprop = (0xF5DA, {'replay':1, 'noreplay':2}, writeWord),
charspace = (0xF5DD, writeSignedWord),
textlinewidth = (0xF5F1, writeLineWidth),
linecolor = (0xF5F2, writeColor)
)
class LrfError(Exception):
pass
class ObjectTableEntry(object):
def __init__(self, objId, offset, size):
self.objId = objId
self.offset = offset
self.size = size
def write(self, f):
writeDWords(f, self.objId, self.offset, self.size, 0)
class LrfTag(object):
def __init__(self, name, *parameters):
try:
tagInfo = TAG_INFO[name]
except KeyError:
raise LrfError, "tag name %s not recognized" % name
self.name = name
self.type = tagInfo[0]
self.format = tagInfo[1:]
if len(parameters) > 1:
raise LrfError("only one parameter allowed on tag %s" % name)
if len(parameters) == 0:
self.parameter = None
else:
self.parameter = parameters[0]
def write(self, lrf, encoding=None):
if self.type != 0:
writeWord(lrf, self.type)
p = self.parameter
if p is None:
return
#print " Writing tag", self.name
for f in self.format:
if isinstance(f, dict):
p = f[p]
elif isinstance(f, str):
if isinstance(p, tuple):
writeString(lrf, struct.pack(f, *p))
else:
writeString(lrf, struct.pack(f, p))
else:
if f in [writeUnicode, writeRaw, writeEmpDots]:
if encoding is None:
raise LrfError, "Tag requires encoding"
f(lrf, p, encoding)
else:
f(lrf, p)
STREAM_SCRAMBLED = 0x200
STREAM_COMPRESSED = 0x100
STREAM_FORCE_COMPRESSED = 0x8100
STREAM_TOC = 0x0051
class LrfStreamBase(object):
def __init__(self, streamFlags, streamData=None):
self.streamFlags = streamFlags
self.streamData = streamData
def setStreamData(self, streamData):
self.streamData = streamData
def getStreamTags(self, optimize=False):
# tags:
# StreamFlags
# StreamSize
# StreamStart
# (data)
# StreamEnd
#
# if flags & 0x200, stream is scrambled
# if flags & 0x100, stream is compressed
flags = self.streamFlags
streamBuffer = self.streamData
# implement scramble? I never scramble anything...
if flags & STREAM_FORCE_COMPRESSED == STREAM_FORCE_COMPRESSED:
optimize = False
if flags & STREAM_COMPRESSED == STREAM_COMPRESSED:
uncompLen = len(streamBuffer)
compStreamBuffer = zlib.compress(streamBuffer)
if optimize and uncompLen <= len(compStreamBuffer) + 4:
flags &= ~STREAM_COMPRESSED
else:
streamBuffer = struct.pack("<I", uncompLen) + compStreamBuffer
return [LrfTag("StreamFlags", flags & 0x01FF),
LrfTag("StreamSize", len(streamBuffer)),
LrfTag("StreamData", streamBuffer),
LrfTag("StreamEnd")]
class LrfTagStream(LrfStreamBase):
def __init__(self, streamFlags, streamTags=None):
LrfStreamBase.__init__(self, streamFlags)
if streamTags is None:
self.tags = []
else:
self.tags = streamTags[:]
def appendLrfTag(self, tag):
self.tags.append(tag)
def getStreamTags(self, encoding,
optimizeTags=False, optimizeCompression=False):
stream = StringIO.StringIO()
if optimizeTags:
tagListOptimizer(self.tags)
for tag in self.tags:
tag.write(stream, encoding)
self.streamData = stream.getvalue()
stream.close()
return LrfStreamBase.getStreamTags(self, optimize=optimizeCompression)
class LrfFileStream(LrfStreamBase):
def __init__(self, streamFlags, filename):
LrfStreamBase.__init__(self, streamFlags)
f = file(filename, "rb")
self.streamData = f.read()
f.close()
class LrfObject(object):
def __init__(self, name, objId):
if objId <= 0:
raise LrfError, "invalid objId for " + name
self.name = name
self.objId = objId
self.tags = []
try:
self.type = OBJECT_TYPE_ENCODING[name]
except KeyError:
raise LrfError, "object name %s not recognized" % name
def __str__(self):
return 'LRFObject: ' + self.name + ", " + str(self.objId)
def appendLrfTag(self, tag):
self.tags.append(tag)
def appendLrfTags(self, tagList):
self.tags.extend(tagList)
# deprecated old name
append = appendLrfTag
def appendTagDict(self, tagDict, genClass=None):
#
# This code does not really belong here, I think. But it
# belongs somewhere, so here it is.
#
composites = {}
for name, value in tagDict.items():
if name not in ["bgimagemode", "bgimageid",
"rubyalign", "rubyadjust",
"empdotscode", "empdotsfontname", "refempdotsfont"]:
self.append(LrfTag(name, value))
else:
composites[name] = value
if "rubyalign" in composites or "rubyadjust" in composites:
ralign = composites.get("rubyalign", "none")
radjust = composites.get("rubyadjust", "start")
self.append(LrfTag("rubyaa", (ralign, radjust)))
if "bgimagemode" in composites or "bgimageid" in composites:
imode = composites.get("bgimagemode", "fix")
iid = composites.get("bgimageid", 0)
# for some reason, page style uses 0 for "fix"
# we call this pfix to differentiate it
if genClass == "PageStyle" and imode == "fix":
imode = "pfix"
self.append(LrfTag("bgimage", (imode, iid)))
if "empdotscode" in composites or "empdotsfontname" in composites or \
"refempdotsfont" in composites:
dotscode = composites.get("empdotscode", "0x002E")
dotsfontname = composites.get("empdotsfontname",
"Dutch801 Rm BT Roman")
refdotsfont = composites.get("refempdotsfont", 0)
self.append(LrfTag("empdots", (refdotsfont, dotsfontname,
dotscode)))
def write(self, lrf, encoding=None):
#print "Writing object", self.name
LrfTag("ObjectStart", (self.objId, self.type)).write(lrf)
for tag in self.tags:
tag.write(lrf, encoding)
LrfTag("ObjectEnd").write(lrf)
class LrfToc(LrfObject):
"""
Table of contents. Format of toc is:
[ (pageid, objid, string)...]
"""
def __init__(self, objId, toc, se):
LrfObject.__init__(self, "TOC", objId)
streamData = self._makeTocStream(toc, se)
self._makeStreamTags(streamData)
def _makeStreamTags(self, streamData):
stream = LrfStreamBase(STREAM_TOC, streamData)
self.tags.extend(stream.getStreamTags())
def _makeTocStream(self, toc, se):
stream = StringIO.StringIO()
nEntries = len(toc)
writeDWord(stream, nEntries)
lastOffset = 0
writeDWord(stream, lastOffset)
for i in range(nEntries - 1):
pageId, objId, label = toc[i]
entryLen = 4 + 4 + 2 + len(label)*2
lastOffset += entryLen
writeDWord(stream, lastOffset)
for entry in toc:
pageId, objId, label = entry
if pageId <= 0:
raise LrfError, "page id invalid in toc: " + label
if objId <= 0:
raise LrfError, "textblock id invalid in toc: " + label
writeDWord(stream, pageId)
writeDWord(stream, objId)
writeUnicode(stream, label, se)
streamData = stream.getvalue()
stream.close()
return streamData
class LrfWriter(object):
def __init__(self, sourceEncoding):
self.sourceEncoding = sourceEncoding
# The following flags are just to have a place to remember these
# values. The flags must still be passed to the appropriate classes
# in order to have them work.
self.saveStreamTags = False # used only in testing -- hogs memory
# highly experimental -- set to True at your own risk
self.optimizeTags = False
self.optimizeCompression = False
# End of placeholders
self.rootObjId = 0
self.rootObj = None
self.binding = 1 # 1=front to back, 16=back to front
self.dpi = 1600
self.width = 600
self.height = 800
self.colorDepth = 24
self.tocObjId = 0
self.docInfoXml = ""
self.thumbnailEncoding = "JPEG"
self.thumbnailData = ""
self.objects = []
self.objectTable = []
def getSourceEncoding(self):
return self.sourceEncoding
def toUnicode(self, string):
if type(string) is str:
string = string.decode(self.sourceEncoding)
return string
def getDocInfoXml(self):
return self.docInfoXml
def setPageTreeId(self, objId):
self.pageTreeId = objId
def getPageTreeId(self):
return self.pageTreeId
def setRootObject(self, obj):
if self.rootObjId != 0:
raise LrfError, "root object already set"
self.rootObjId = obj.objId
self.rootObj = obj
def registerFontId(self, id):
if self.rootObj is None:
raise LrfError, "can't register font -- no root object"
self.rootObj.append(LrfTag("RegisterFont", id))
def setTocObject(self, obj):
if self.tocObjId != 0:
raise LrfError, "toc object already set"
self.tocObjId = obj.objId
def setThumbnailFile(self, filename, encoding=None):
f = file(filename, "rb")
self.thumbnailData = f.read()
f.close()
if encoding is None:
encoding = os.path.splitext(filename)[1][1:]
encoding = encoding.upper()
if encoding not in IMAGE_TYPE_ENCODING:
raise LrfError, "unknown image type: " + encoding
self.thumbnailEncoding = encoding
def append(self, obj):
self.objects.append(obj)
def addLrfObject(self, objId):
pass
def writeFile(self, lrf):
if self.rootObjId == 0:
raise LrfError, "no root object has been set"
self.writeHeader(lrf)
self.writeObjects(lrf)
self.updateObjectTableOffset(lrf)
self.updateTocObjectOffset(lrf)
self.writeObjectTable(lrf)
def writeHeader(self, lrf):
writeString(lrf, LRF_SIGNATURE)
writeWord(lrf, LRF_VERSION)
writeWord(lrf, XOR_KEY)
writeDWord(lrf, self.rootObjId)
writeQWord(lrf, len(self.objects))
writeQWord(lrf, 0) # 0x18 objectTableOffset -- will be updated
writeZeros(lrf, 4) # 0x20 unknown
writeWord(lrf, self.binding)
writeDWord(lrf, self.dpi)
writeWords(lrf, self.width, self.height, self.colorDepth)
writeZeros(lrf, 20) # 0x30 unknown
writeDWord(lrf, self.tocObjId)
writeDWord(lrf, 0) # 0x48 tocObjectOffset -- will be updated
docInfoXml = codecs.BOM_LE + self.docInfoXml.encode("utf-16-le")
compDocInfo = zlib.compress(docInfoXml)
writeWord(lrf, len(compDocInfo) + 4)
writeWord(lrf, IMAGE_TYPE_ENCODING[self.thumbnailEncoding])
writeDWord(lrf, len(self.thumbnailData))
writeDWord(lrf, len(docInfoXml))
writeString(lrf, compDocInfo)
writeString(lrf, self.thumbnailData)
def writeObjects(self, lrf):
# also appends object entries to the object table
self.objectTable = []
for obj in self.objects:
objStart = lrf.tell()
obj.write(lrf, self.sourceEncoding)
objEnd = lrf.tell()
self.objectTable.append(
ObjectTableEntry(obj.objId, objStart, objEnd-objStart))
def updateObjectTableOffset(self, lrf):
# update the offset of the object table
tableOffset = lrf.tell()
lrf.seek(0x18, 0)
writeQWord(lrf, tableOffset)
lrf.seek(0, 2)
def updateTocObjectOffset(self, lrf):
if self.tocObjId == 0:
return
for entry in self.objectTable:
if entry.objId == self.tocObjId:
lrf.seek(0x48, 0)
writeDWord(lrf, entry.offset)
lrf.seek(0, 2)
break
else:
raise LrfError, "toc object not in object table"
def writeObjectTable(self, lrf):
for tableEntry in self.objectTable:
tableEntry.write(lrf)

View File

@ -1,43 +0,0 @@
def _optimize(tagList, tagName, conversion):
# copy the tag of interest plus any text
newTagList = []
for tag in tagList:
if tag.name == tagName or tag.name == "rawtext":
newTagList.append(tag)
# now, eliminate any duplicates (leaving the last one)
for i, newTag in enumerate(newTagList[:-1]):
if newTag.name == tagName and newTagList[i+1].name == tagName:
tagList.remove(newTag)
# eliminate redundant settings to same value across text strings
newTagList = []
for tag in tagList:
if tag.name == tagName:
newTagList.append(tag)
for i, newTag in enumerate(newTagList[:-1]):
value = conversion(newTag.parameter)
nextValue = conversion(newTagList[i+1].parameter)
if value == nextValue:
tagList.remove(newTagList[i+1])
# eliminate any setting that don't have text after them
while len(tagList) > 0 and tagList[-1].name == tagName:
del tagList[-1]
def tagListOptimizer(tagList):
# this function eliminates redundant or unnecessary tags
# it scans a list of tags, looking for text settings that are
# changed before any text is output
# for example,
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
# should be:
# fontsize=200 text
oldSize = len(tagList)
_optimize(tagList, "fontsize", int)
_optimize(tagList, "fontweight", int)
return oldSize - len(tagList)

File diff suppressed because it is too large Load Diff

View File

@ -1,30 +0,0 @@
#!/usr/bin/python
#Read text streams from LRF files. Usage ./stream.py <myfile.lrf> <offset to beginning of stream object in hex>
import array, sys, struct, zlib
def descrambleBuf(buf, l, xorKey):
i = 0
a = array.array('B',buf)
while l>0:
a[i] ^= xorKey
i+=1
l-=1
return a.tostring()
if __name__ == '__main__':
f = open(sys.argv[1], 'rb')
f.seek(0x0a)
xorkey = struct.unpack('<H', f.read(2))[0]
f.seek(int(sys.argv[2], 16) + 0x10)
flags = struct.unpack('<H', f.read(2))[0]
f.read(2)
l = struct.unpack('<I', f.read(4))[0]
f.read(2)
raw = f.read(l)
key = (l % xorkey) + 0x0f
descrambled = descrambleBuf(raw, l, key) if (flags & 0x200) else raw
stream = zlib.decompress(descrambled[4:]) if (flags & 0x100) else descrambled
print stream

View File

@ -1,14 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

View File

@ -1,109 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Convert .txt files to .lrf
"""
import os, sys
from libprs500.lrf import ConversionError, option_parser
from libprs500.lrf import Book
from libprs500.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
from libprs500 import filename_to_utf8
from libprs500 import iswindows
def parse_options(argv=None, cli=True):
""" CLI for txt -> lrf conversions """
if not argv:
argv = sys.argv[1:]
parser = option_parser(
"""usage: %prog [options] mybook.txt
%prog converts mybook.txt to mybook.lrf
"""
)
defenc = 'cp1252' if iswindows else 'utf8'
enchelp = 'Set the encoding used to decode ' + \
'the text in mybook.txt. Default encoding is %default'
parser.add_option('-e', '--encoding', action='store', type='string', \
dest='encoding', help=enchelp, default=defenc)
options, args = parser.parse_args()
if len(args) != 1:
if cli:
parser.print_help()
raise ConversionError, 'no filename specified'
if options.title == None:
options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0])
return options, args, parser
def main():
try:
options, args, parser = parse_options()
src = os.path.abspath(os.path.expanduser(args[0]))
except:
sys.exit(1)
print 'Output written to ', convert_txt(src, options)
def convert_txt(path, options):
"""
Convert the text file at C{path} into an lrf file.
@param options: Object with the following attributes:
C{author}, C{title}, C{encoding} (the assumed encoding of
the text in C{path}.)
"""
import fileinput
header = None
if options.header:
header = Paragraph()
header.append(Bold(options.title))
header.append(' by ')
header.append(Italic(options.author))
title = (options.title, options.title_sort)
author = (options.author, options.author_sort)
book = Book(header=header, title=title, author=author, \
publisher=options.publisher,
sourceencoding=options.encoding, freetext=options.freetext, \
category=options.category, booksetting=BookSetting
(dpi=10*options.profile.dpi,
screenheight=options.profile.screen_height,
screenwidth=options.profile.screen_width))
buffer = ''
pg = book.create_page()
block = book.create_text_block()
pg.append(block)
book.append(pg)
for line in fileinput.input(path):
line = line.strip()
if line:
buffer = buffer.rstrip() + ' ' + line
else:
block.Paragraph(buffer)
buffer = ''
basename = os.path.basename(path)
oname = options.output
if not oname:
oname = os.path.splitext(basename)[0]+('.lrs' if options.lrs else '.lrf')
oname = os.path.abspath(os.path.expanduser(oname))
try:
book.renderLrs(oname) if options.lrs else book.renderLrf(oname)
except UnicodeDecodeError:
raise ConversionError(path + ' is not encoded in ' + \
options.encoding +'. Specify the '+ \
'correct encoding with the -e option.')
return os.path.abspath(oname)
if __name__ == '__main__':
main()

View File

@ -1,42 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Provides metadata editing support for PDF and RTF files. For LRF metadata, use
the L{libprs500.lrf.meta} module.
"""
__docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
class MetaInformation(object):
def __init__(self, title, author):
self.title = title
self.author = author
self.comments = None
self.category = None
self.classification = None
self.publisher = None
def __str__(self):
ans = ''
ans += 'Title : ' + str(self.title) + '\n'
ans += 'Author : ' + str(self.author) + '\n'
ans += 'Category: ' + str(self.category) + '\n'
ans += 'Comments: ' + str(self.comments) + '\n'
return ans.strip()
def __nonzero__(self):
return self.title or self.author or self.comments or self.category

View File

@ -1,26 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from libprs500.metadata.rtf import get_metadata as rtf_metadata
from libprs500.lrf.meta import get_metadata as lrf_metadata
from libprs500.metadata import MetaInformation
def get_metadata(stream, stream_type='lrf'):
if stream_type == 'rtf':
return rtf_metadata(stream)
if stream_type == 'lrf':
return lrf_metadata(stream)
return MetaInformation(None, None)

View File

@ -1,49 +0,0 @@
#!/usr/bin/perl
# Read/Write PDF meta data
# Based on pdf-meta from http://www.osresearch.net/wiki/index.php/Pdf-meta
use warnings;
use strict;
use PDF::API2;
use Getopt::Long;
use Data::Dumper;
my %new_info = (Creator => 'libprs500.metadata', CreationDate => scalar( localtime ),);
GetOptions(
"c|creator=s" => \$new_info{Creator},
"d|date=s" => \$new_info{CreationDate},
"p|producer=s" => \$new_info{Producer},
"a|author=s" => \$new_info{Author},
"s|subject=s" => \$new_info{Subject},
"k|keywords=s" => \$new_info{Keywords},
"t|title=s" => \$new_info{Title},
) or die "Usage: (no help yet!)\n";
for my $file (@ARGV)
{
my $pdf = PDF::API2->open( $file )
or warn "Unable to open $file: $!\n"
and next;
my %info = $pdf->info;
for my $key (keys %info)
{
print $key.' = """'.$info{$key}.'"""'."\n";
}
print "\n";
for my $key (keys %new_info)
{
my $new_value = $new_info{$key};
next unless defined $new_value;
$info{$key} = $new_value;
}
$pdf->info( %info );
$pdf->saveas( $file );
}

View File

@ -1,101 +0,0 @@
## Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Read metadata from RTF files.
"""
import re, cStringIO
from libprs500.metadata import MetaInformation
title_pat = re.compile(r'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(r'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
comment_pat = re.compile(r'\{\\info.*?\{\\subject(.*?)(?<!\\)\}', re.DOTALL)
category_pat = re.compile(r'\{\\info.*?\{\\category(.*?)(?<!\\)\}', re.DOTALL)
def get_document_info(stream):
"""
Extract the \info block from an RTF file.
Return the info block as a stringa and the position in the file at which it
starts.
@param stream: File like object pointing to the RTF file.
"""
block_size = 4096
stream.seek(0)
found, block = False, ""
while not found:
prefix = block[-6:]
block = prefix + stream.read(block_size)
if len(block) == len(prefix):
break
idx = block.find(r'{\info')
if idx >= 0:
found = True
stream.seek(stream.tell() - block_size + idx - len(prefix))
else:
if block.find(r'\sect') > -1:
break
if not found:
return None, 0
data, count, = cStringIO.StringIO(), 0
pos = stream.tell()
while True:
ch = stream.read(1)
if ch == '\\':
data.write(ch + stream.read(1))
continue
if ch == '{':
count += 1
elif ch == '}':
count -= 1
data.write(ch)
if count == 0:
break
return data.getvalue(), pos
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
stream.seek(0)
if stream.read(5) != r'{\rtf':
raise Exception('Not a valid RTF file')
block, pos = get_document_info(stream)
if not block:
return MetaInformation(None, None)
title, author, comment, category = None, None, None, None
title_match = title_pat.search(block)
if title_match:
title = title_match.group(1).strip()
author_match = author_pat.search(block)
if author_match:
author = author_match.group(1).strip()
comment_match = comment_pat.search(block)
if comment_match:
title = comment_match.group(1).strip()
category_match = category_pat.search(block)
if category_match:
category = category_match.group(1).strip()
mi = MetaInformation(title, author)
mi.comments = comment
mi.category = category
return mi
def main():
import sys
if len(sys.argv) != 2:
print >> sys.stderr, "Usage:", sys.argv[0], " mybook.rtf"
sys.exit(1)
print get_metadata(open(sys.argv[1]))
if __name__ == '__main__':
main()