Rationalize CLI across lrf utilities.

This commit is contained in:
Kovid Goyal 2007-07-14 19:46:11 +00:00
parent 605ff18508
commit d272eb508e
7 changed files with 176 additions and 276 deletions

View File

@ -115,6 +115,40 @@ def option_parser(usage):
help='''Top margin of page. Default is %default px.''')
page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int',
help='''Bottom margin of page. Default is %default px.''')
link = parser.add_option_group('LINK PROCESSING OPTIONS')
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
dest='link_levels',
help=r'''The maximum number of levels to recursively process '''
'''links. A value of 0 means thats links are not followed. '''
'''A negative value means that <a> tags are ignored.''')
link.add_option('--link-exclude', dest='link_exclude', default='$',
help='''A regular expression. <a> tags whoose href '''
'''matches will be ignored. Defaults to %default''')
chapter = parser.add_option_group('CHAPTER OPTIONS')
chapter.add_option('--disable-chapter-detection', action='store_false',
default=True, dest='chapter_detection',
help='''Prevent html2lrf from automatically inserting page breaks'''
''' before what it thinks are chapters.''')
chapter.add_option('--chapter-regex', dest='chapter_regex',
default='chapter|book|appendix',
help='''The regular expression used to detect chapter titles.'''
''' It is searched for in heading tags. Defaults to %default''')
chapter.add_option('--page-break-before', dest='page_break', default='h[12]',
help='''If html2lrf does not find any page breaks in the '''
'''html file and cannot detect chapter headings, it will '''
'''automatically insert page-breaks before the tags whose '''
'''names match this regular expression. Defaults to %default. '''
'''You can disable it by setting the regexp to "$". '''
'''The purpose of this option is to try to ensure that '''
'''there are no really long pages as this degrades the page '''
'''turn performance of the LRF. Thus this option is ignored '''
'''if the current page has only a few elements.''')
chapter.add_option('--force-page-break-before', dest='force_page_break',
default='$', help='Like --page-break-before, but page breaks are forced.')
prepro = parser.add_option_group('PREPROCESSING OPTIONS')
prepro.add_option('--baen', action='store_true', default=False, dest='baen',
help='''Preprocess Baen HTML files to improve generated LRF.''')
fonts = parser.add_option_group('FONT FAMILIES',
'''Specify trutype font families for serif, sans-serif and monospace fonts. '''

View File

@ -35,7 +35,7 @@ except ImportError:
from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \
Comment, Tag, NavigableString, Declaration, ProcessingInstruction
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \
TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \
TextBlock, ImageBlock, JumpButton, CharButton, Bold,\
Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \
LrsError
from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
@ -1368,74 +1368,28 @@ def try_opf(path, options):
print >>sys.stderr, 'Failed to process opf file', err
pass
def option_parser(parser=None, usage='''Usage: %prog [options] mybook.[html|rar|zip]\n\n'''
'''%prog converts mybook.html to mybook.lrf'''
):
if not parser:
parser = lrf_option_parser(usage)
link = parser.add_option_group('LINK PROCESSING OPTIONS')
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
dest='link_levels',
help=r'''The maximum number of levels to recursively process '''
'''links. A value of 0 means thats links are not followed. '''
'''A negative value means that <a> tags are ignored.''')
link.add_option('--link-exclude', dest='link_exclude', default='$',
help='''A regular expression. <a> tags whoose href '''
'''matches will be ignored. Defaults to %default''')
chapter = parser.add_option_group('CHAPTER OPTIONS')
chapter.add_option('--disable-chapter-detection', action='store_false',
default=True, dest='chapter_detection',
help='''Prevent html2lrf from automatically inserting page breaks'''
''' before what it thinks are chapters.''')
chapter.add_option('--chapter-regex', dest='chapter_regex',
default='chapter|book|appendix',
help='''The regular expression used to detect chapter titles.'''
''' It is searched for in heading tags. Defaults to %default''')
chapter.add_option('--page-break-before', dest='page_break', default='h[12]',
help='''If html2lrf does not find any page breaks in the '''
'''html file and cannot detect chapter headings, it will '''
'''automatically insert page-breaks before the tags whose '''
'''names match this regular expression. Defaults to %default. '''
'''You can disable it by setting the regexp to "$". '''
'''The purpose of this option is to try to ensure that '''
'''there are no really long pages as this degrades the page '''
'''turn performance of the LRF. Thus this option is ignored '''
'''if the current page has only a few elements.''')
chapter.add_option('--force-page-break-before', dest='force_page_break',
default='$', help='Like --page-break-before, but page breaks are forced.')
prepro = parser.add_option_group('PREPROCESSING OPTIONS')
prepro.add_option('--baen', action='store_true', default=False, dest='baen',
help='''Preprocess Baen HTML files to improve generated LRF.''')
return parser
def option_parser():
return lrf_option_parser('''Usage: %prog [options] mybook.[html|rar|zip]\n\n'''
'''%prog converts mybook.html to mybook.lrf''')
def parse_options(argv=sys.argv[1:], cli=True, parser=None):
""" CLI for html -> lrf conversions """
parser = option_parser(parser)
options, args = parser.parse_args(args=argv)
if len(args) != 1:
if cli:
parser.print_help()
raise ConversionError, 'no filename specified'
if options.output:
options.output = os.path.abspath(os.path.expanduser(options.output))
return options, args, parser
def main():
def main(args=sys.argv):
try:
options, args, parser = parse_options()
src = args[0]
parser = option_parser()
options, args = parser.parse_args(args)
if options.output:
options.output = os.path.abspath(os.path.expanduser(options.output))
if len(args) != 2:
parser.print_help()
return 1
src = args[1]
if options.verbose:
import warnings
warnings.defaultaction = 'error'
except Exception, err:
print >> sys.stderr, err
sys.exit(1)
return 1
process_file(src, options)
return 0
def console_query(dirpath, candidate, docs):
if len(docs) == 1:
@ -1500,4 +1454,4 @@ def get_path(path, query=console_query):
if __name__ == '__main__':
main()
sys.exit(main())

View File

@ -15,29 +15,19 @@
import os, sys, shutil, glob
from tempfile import mkdtemp
from subprocess import Popen, PIPE
from libprs500.ebooks.lrf import option_parser
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks import ConversionError
from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500 import isosx
CLIT = 'clit'
if isosx and hasattr(sys, 'frameworks_dir'):
CLIT = os.path.join(sys.frameworks_dir, CLIT)
def parse_options(cli=True):
""" CLI for lit -> lrf conversions """
parser = option_parser(
"""usage: %prog [options] mybook.lit
%prog converts mybook.lit to mybook.lrf
"""
def option_parser():
return lrf_option_parser(
'''Usage: %prog [options] mybook.lit\n\n'''
'''%prog converts mybook.lit to mybook.lrf'''
)
options, args = parser.parse_args()
if len(args) != 1:
if cli:
parser.print_help()
raise ConversionError, 'no filename specified'
return options, args, parser
def generate_html(pathtolit):
if not os.access(pathtolit, os.R_OK):
@ -52,56 +42,40 @@ def generate_html(pathtolit):
raise ConversionError, err
return tdir
def main():
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No lit file specified'
return 1
lit = os.path.abspath(os.path.expanduser(args[1]))
tdir = generate_html(lit)
try:
options, args, parser = parse_options()
lit = os.path.abspath(os.path.expanduser(args[0]))
tdir = generate_html(lit)
try:
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
l = glob.glob(os.path.join(tdir, '*toc*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*top*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*top*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*contents*.htm*'))
if not l:
l = glob.glob(os.path.join(tdir, '*.htm*'))
if not l:
raise ConversionError, 'Conversion of lit to html failed. Cannot find html file.'
maxsize, htmlfile = 0, None
for c in l:
sz = os.path.getsize(c)
if sz > maxsize:
maxsize, htmlfile = sz, c
else:
htmlfile = l[0]
for i in range(1, len(sys.argv)):
if sys.argv[i] == args[0]:
sys.argv.remove(sys.argv[i])
break
sys.argv.append(htmlfile)
o_spec = False
for arg in sys.argv[1:]:
arg = arg.lstrip()
if arg.startswith('-o') or arg.startswith('--output'):
o_spec = True
break
ext = '.lrf'
for arg in sys.argv[1:]:
if arg.strip() == '--lrs':
ext = '.lrs'
break
if not o_spec:
sys.argv.append('-o')
sys.argv.append(os.path.splitext(os.path.basename(lit))[0]+ext)
options, args, parser = html_parse_options(parser=parser)
process_file(htmlfile, options)
finally:
shutil.rmtree(tdir)
except ConversionError, err:
print >>sys.stderr, err
sys.exit(1)
raise ConversionError, 'Conversion of lit to html failed. Cannot find html file.'
maxsize, htmlfile = 0, None
for c in l:
sz = os.path.getsize(c)
if sz > maxsize:
maxsize, htmlfile = sz, c
else:
htmlfile = l[0]
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.basename(os.path.splitext(args[1])[0]) + ext
process_file(htmlfile, options)
finally:
shutil.rmtree(tdir)
if __name__ == '__main__':
main()
sys.exit(main())

View File

@ -559,19 +559,16 @@ class LRFMetaFile(object):
self._file.write(val)
def parse_options(argv=None, cli=True):
def option_parser():
from optparse import OptionParser
from libprs500 import __version__ as VERSION
if not argv:
argv = sys.argv[1:]
from libprs500 import __appname__, __version__
parser = OptionParser(usage = \
"""%prog [options] mybook.lrf
Show/edit the metadata in an LRF file.
WARNING: Based on reverse engineering the LRF format.
Making changes may render your LRF file unreadable.
""", version=VERSION)
'''%prog [options] mybook.lrf\n\n'''
'''Show/edit the metadata in an LRF file.\n\n'''
'''WARNING: Based on reverse engineering the LRF format.\n'''
'''Making changes may render your LRF file unreadable.''',
version=__appname__+' '+__version__,
epilog='Created by Kovid Goyal')
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help="Set the book title")
parser.add_option('--title-sort', action='store', type='string', default=None,
@ -594,20 +591,20 @@ def parse_options(argv=None, cli=True):
help="Extract thumbnail from LRF file")
parser.add_option("-p", "--page", action="store", type="string", \
dest="page", help="Don't know what this is for")
options, args = parser.parse_args(args=argv)
if len(args) != 1:
if cli:
parser.print_help()
raise LRFException, 'no filename specified'
return options, args, parser
return parser
def main():
def main(args=sys.argv):
import os.path
try:
options, args, parser = parse_options()
except:
sys.exit(1)
lrf = LRFMetaFile(open(args[0], "r+b"))
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No lrf file specified'
return 1
lrf = LRFMetaFile(open(args[1], "r+b"))
if options.title:
lrf.title = options.title
if options.title_reading != None:
@ -646,4 +643,4 @@ def main():
print "Thumbnail:", td
if __name__ == '__main__':
main()
sys.exit(main())

View File

@ -14,9 +14,8 @@
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import os, sys, tempfile, subprocess, shutil
from libprs500.ebooks.lrf import option_parser
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks.metadata.meta import get_metadata
from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500.ebooks import ConversionError
from libprs500 import isosx
@ -25,20 +24,11 @@ UNRTF = 'unrtf'
if isosx and hasattr(sys, 'frameworks_dir'):
UNRTF = os.path.join(sys.frameworks_dir, UNRTF)
def parse_options(cli=True):
""" CLI for rtf -> lrf conversions """
parser = option_parser(
"""usage: %prog [options] mybook.rtf
%prog converts mybook.rtf to mybook.lrf
"""
def option_parser():
return lrf_option_parser(
'''Usage: %prog [options] mybook.rtf\n\n'''
'''%prog converts mybook.rtf to mybook.lrf'''
)
options, args = parser.parse_args()
if len(args) != 1:
if cli:
parser.print_help()
raise ConversionError, 'no filename specified'
return options, args, parser
def generate_html(rtfpath):
tdir = tempfile.mkdtemp(prefix='rtf2lrf_')
@ -61,58 +51,41 @@ def generate_html(rtfpath):
finally:
os.chdir(cwd)
def main():
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No rtf file specified'
return 1
rtf = os.path.abspath(os.path.expanduser(args[1]))
f = open(rtf, 'rb')
mi = get_metadata(f, 'rtf')
f.close()
html = generate_html(rtf)
tdir = os.path.dirname(html)
try:
options, args, parser = parse_options()
rtf = os.path.abspath(os.path.expanduser(args[0]))
f = open(rtf, 'rb')
mi = get_metadata(f, 'rtf')
f.close()
html = generate_html(rtf)
tdir = os.path.dirname(html)
try:
for i in range(len(sys.argv)):
if sys.argv[i] == args[0]:
sys.argv[i] = html
o_spec = False
for arg in sys.argv[1:]:
arg = arg.lstrip()
if arg.startswith('-o') or arg.startswith('--output'):
o_spec = True
break
ext = '.lrf'
for arg in sys.argv[1:]:
if arg.strip() == '--lrs':
ext = '.lrs'
break
if not o_spec:
sys.argv.append('-o')
sys.argv.append(os.path.splitext(os.path.basename(rtf))[0]+ext)
if (not options.title or options.title == 'Unknown') and mi.title:
sys.argv.append('-t')
sys.argv.append('"'+mi.title+'"')
if (not options.author or options.author == 'Unknown') and mi.author:
sys.argv.append('-a')
sys.argv.append('"'+mi.author+'"')
if (not options.category or options.category == 'Unknown') and mi.category:
sys.argv.append('--category')
sys.argv.append('"'+mi.category+'"')
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
sys.argv.append('--comment')
sys.argv.append('"'+mi.comments+'"')
options, args, parser = html_parse_options(parser=parser)
process_file(html, options)
finally:
#try:
shutil.rmtree(tdir)
#except: # Windows can raise an error if some file is still being used
# pass
except ConversionError, err:
print >>sys.stderr, err
sys.exit(1)
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.basename(os.path.splitext(args[1])[0]) + ext
if (not options.title or options.title == 'Unknown') and mi.title:
sys.argv.append('-t')
sys.argv.append('"'+mi.title+'"')
if (not options.author or options.author == 'Unknown') and mi.author:
sys.argv.append('-a')
sys.argv.append('"'+mi.author+'"')
if (not options.category or options.category == 'Unknown') and mi.category:
sys.argv.append('--category')
sys.argv.append('"'+mi.category+'"')
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
sys.argv.append('--comment')
sys.argv.append('"'+mi.comments+'"')
process_file(html, options)
finally:
shutil.rmtree(tdir)
if __name__ == '__main__':
main()
sys.exit(main())

View File

@ -19,33 +19,21 @@ import os, sys, codecs
from libprs500 import iswindows
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.lrf import option_parser
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks import ConversionError
from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500.ebooks.markdown import markdown
def parse_options(argv=None, cli=True):
""" CLI for txt -> lrf conversions """
if not argv:
argv = sys.argv[1:]
parser = option_parser(
"""usage: %prog [options] mybook.txt
%prog converts mybook.txt to mybook.lrf
"""
)
def option_parser():
parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
'''%prog converts mybook.txt to mybook.lrf''')
defenc = 'cp1252' if iswindows else 'utf8'
enchelp = 'Set the encoding used to decode ' + \
'the text in mybook.txt. Default encoding is %default'
parser.add_option('-e', '--encoding', action='store', type='string', \
dest='encoding', help=enchelp, default=defenc)
options, args = parser.parse_args(args=argv)
if len(args) != 1:
if cli:
parser.print_help()
raise ConversionError, 'no filename specified'
return options, args, parser
return parser
def generate_html(txtfile, encoding):
'''
@ -78,43 +66,22 @@ def generate_html(txtfile, encoding):
codecs.open(p.name, 'wb', enc).write(html)
return p
def main():
try:
options, args, parser = parse_options()
txt = os.path.abspath(os.path.expanduser(args[0]))
p = generate_html(txt, options.encoding)
for i in range(1, len(sys.argv)):
if sys.argv[i] == args[0]:
sys.argv.remove(sys.argv[i])
break
sys.argv.append(p.name)
sys.argv.append('--force-page-break-before')
sys.argv.append('h2')
o_spec = False
for arg in sys.argv[1:]:
arg = arg.lstrip()
if arg.startswith('-o') or arg.startswith('--output'):
o_spec = True
break
ext = '.lrf'
for arg in sys.argv[1:]:
if arg.strip() == '--lrs':
ext = '.lrs'
break
if not o_spec:
sys.argv.append('-o')
sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext)
options, args, parser = html_parse_options(parser=parser)
src = args[0]
if options.verbose:
import warnings
warnings.defaultaction = 'error'
except Exception, err:
print >> sys.stderr, err
sys.exit(1)
process_file(src, options)
def main(args=sys.argv):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No txt file specified'
return 1
txt = os.path.abspath(os.path.expanduser(args[1]))
htmlfile = generate_html(txt, options.encoding)
options.force_page_break = 'h2'
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.basename(os.path.splitext(args[1])[0]) + ext
process_file(htmlfile.name, options)
if __name__ == '__main__':
main()
sys.exit(main())

View File

@ -16,8 +16,8 @@
import sys, os
from subprocess import check_call
def options(parse_options):
options, args, parser = parse_options(['dummy'], cli=False)
def options(option_parser):
parser = option_parser()
options = parser.option_list
for group in parser.option_groups:
options += group.option_list
@ -70,15 +70,16 @@ def setup_completion():
try:
print 'Setting up bash completion...',
sys.stdout.flush()
from libprs500.ebooks.lrf.html.convert_from import parse_options as htmlop
from libprs500.ebooks.lrf.txt.convert_from import parse_options as txtop
from libprs500.ebooks.lrf.meta import parse_options as metaop
from libprs500.ebooks.lrf.html.convert_from import option_parser as htmlop
from libprs500.ebooks.lrf.txt.convert_from import option_parser as txtop
from libprs500.ebooks.lrf.meta import option_parser as metaop
f = open('/etc/bash_completion.d/libprs500', 'wb')
f.write('# libprs500 Bash Shell Completion\n')
f.write(opts_and_exts('html2lrf', htmlop,
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php']))
f.write(opts_and_exts('txt2lrf', txtop, ['txt']))
f.write(opts_and_exts('lit2lrf', txtop, ['lit']))
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write('''
_prs500_ls()