diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index 1022dcf9ba..cddd29562d 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -115,6 +115,40 @@ def option_parser(usage): help='''Top margin of page. Default is %default px.''') page.add_option('--bottom-margin', default=0, dest='bottom_margin', type='int', help='''Bottom margin of page. Default is %default px.''') + link = parser.add_option_group('LINK PROCESSING OPTIONS') + link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ + dest='link_levels', + help=r'''The maximum number of levels to recursively process ''' + '''links. A value of 0 means thats links are not followed. ''' + '''A negative value means that tags are ignored.''') + link.add_option('--link-exclude', dest='link_exclude', default='$', + help='''A regular expression. tags whoose href ''' + '''matches will be ignored. Defaults to %default''') + chapter = parser.add_option_group('CHAPTER OPTIONS') + chapter.add_option('--disable-chapter-detection', action='store_false', + default=True, dest='chapter_detection', + help='''Prevent html2lrf from automatically inserting page breaks''' + ''' before what it thinks are chapters.''') + chapter.add_option('--chapter-regex', dest='chapter_regex', + default='chapter|book|appendix', + help='''The regular expression used to detect chapter titles.''' + ''' It is searched for in heading tags. Defaults to %default''') + chapter.add_option('--page-break-before', dest='page_break', default='h[12]', + help='''If html2lrf does not find any page breaks in the ''' + '''html file and cannot detect chapter headings, it will ''' + '''automatically insert page-breaks before the tags whose ''' + '''names match this regular expression. Defaults to %default. ''' + '''You can disable it by setting the regexp to "$". ''' + '''The purpose of this option is to try to ensure that ''' + '''there are no really long pages as this degrades the page ''' + '''turn performance of the LRF. Thus this option is ignored ''' + '''if the current page has only a few elements.''') + chapter.add_option('--force-page-break-before', dest='force_page_break', + default='$', help='Like --page-break-before, but page breaks are forced.') + prepro = parser.add_option_group('PREPROCESSING OPTIONS') + prepro.add_option('--baen', action='store_true', default=False, dest='baen', + help='''Preprocess Baen HTML files to improve generated LRF.''') + fonts = parser.add_option_group('FONT FAMILIES', '''Specify trutype font families for serif, sans-serif and monospace fonts. ''' diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 65473c5c76..1d471eccb6 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -35,7 +35,7 @@ except ImportError: from libprs500.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, \ Comment, Tag, NavigableString, Declaration, ProcessingInstruction from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, \ - TextBlock, ImageBlock, JumpButton, CharButton, Bold, Space, \ + TextBlock, ImageBlock, JumpButton, CharButton, Bold,\ Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas, DropCaps, \ LrsError from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span @@ -1368,74 +1368,28 @@ def try_opf(path, options): print >>sys.stderr, 'Failed to process opf file', err pass - -def option_parser(parser=None, usage='''Usage: %prog [options] mybook.[html|rar|zip]\n\n''' - '''%prog converts mybook.html to mybook.lrf''' - ): - if not parser: - parser = lrf_option_parser(usage) - link = parser.add_option_group('LINK PROCESSING OPTIONS') - link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ - dest='link_levels', - help=r'''The maximum number of levels to recursively process ''' - '''links. A value of 0 means thats links are not followed. ''' - '''A negative value means that tags are ignored.''') - link.add_option('--link-exclude', dest='link_exclude', default='$', - help='''A regular expression. tags whoose href ''' - '''matches will be ignored. Defaults to %default''') - chapter = parser.add_option_group('CHAPTER OPTIONS') - chapter.add_option('--disable-chapter-detection', action='store_false', - default=True, dest='chapter_detection', - help='''Prevent html2lrf from automatically inserting page breaks''' - ''' before what it thinks are chapters.''') - chapter.add_option('--chapter-regex', dest='chapter_regex', - default='chapter|book|appendix', - help='''The regular expression used to detect chapter titles.''' - ''' It is searched for in heading tags. Defaults to %default''') - chapter.add_option('--page-break-before', dest='page_break', default='h[12]', - help='''If html2lrf does not find any page breaks in the ''' - '''html file and cannot detect chapter headings, it will ''' - '''automatically insert page-breaks before the tags whose ''' - '''names match this regular expression. Defaults to %default. ''' - '''You can disable it by setting the regexp to "$". ''' - '''The purpose of this option is to try to ensure that ''' - '''there are no really long pages as this degrades the page ''' - '''turn performance of the LRF. Thus this option is ignored ''' - '''if the current page has only a few elements.''') - chapter.add_option('--force-page-break-before', dest='force_page_break', - default='$', help='Like --page-break-before, but page breaks are forced.') - prepro = parser.add_option_group('PREPROCESSING OPTIONS') - prepro.add_option('--baen', action='store_true', default=False, dest='baen', - help='''Preprocess Baen HTML files to improve generated LRF.''') - return parser +def option_parser(): + return lrf_option_parser('''Usage: %prog [options] mybook.[html|rar|zip]\n\n''' + '''%prog converts mybook.html to mybook.lrf''') - - -def parse_options(argv=sys.argv[1:], cli=True, parser=None): - """ CLI for html -> lrf conversions """ - parser = option_parser(parser) - options, args = parser.parse_args(args=argv) - - if len(args) != 1: - if cli: - parser.print_help() - raise ConversionError, 'no filename specified' - if options.output: - options.output = os.path.abspath(os.path.expanduser(options.output)) - return options, args, parser - - -def main(): +def main(args=sys.argv): try: - options, args, parser = parse_options() - src = args[0] + parser = option_parser() + options, args = parser.parse_args(args) + if options.output: + options.output = os.path.abspath(os.path.expanduser(options.output)) + if len(args) != 2: + parser.print_help() + return 1 + src = args[1] if options.verbose: import warnings warnings.defaultaction = 'error' except Exception, err: print >> sys.stderr, err - sys.exit(1) + return 1 process_file(src, options) + return 0 def console_query(dirpath, candidate, docs): if len(docs) == 1: @@ -1500,4 +1454,4 @@ def get_path(path, query=console_query): if __name__ == '__main__': - main() \ No newline at end of file + sys.exit(main()) \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/lit/convert_from.py b/src/libprs500/ebooks/lrf/lit/convert_from.py index 9c057cf14f..9c63b6e9e5 100644 --- a/src/libprs500/ebooks/lrf/lit/convert_from.py +++ b/src/libprs500/ebooks/lrf/lit/convert_from.py @@ -15,29 +15,19 @@ import os, sys, shutil, glob from tempfile import mkdtemp from subprocess import Popen, PIPE -from libprs500.ebooks.lrf import option_parser +from libprs500.ebooks.lrf import option_parser as lrf_option_parser from libprs500.ebooks import ConversionError -from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options from libprs500.ebooks.lrf.html.convert_from import process_file from libprs500 import isosx CLIT = 'clit' if isosx and hasattr(sys, 'frameworks_dir'): CLIT = os.path.join(sys.frameworks_dir, CLIT) -def parse_options(cli=True): - """ CLI for lit -> lrf conversions """ - parser = option_parser( - """usage: %prog [options] mybook.lit - - %prog converts mybook.lit to mybook.lrf - """ +def option_parser(): + return lrf_option_parser( + '''Usage: %prog [options] mybook.lit\n\n''' + '''%prog converts mybook.lit to mybook.lrf''' ) - options, args = parser.parse_args() - if len(args) != 1: - if cli: - parser.print_help() - raise ConversionError, 'no filename specified' - return options, args, parser def generate_html(pathtolit): if not os.access(pathtolit, os.R_OK): @@ -52,56 +42,40 @@ def generate_html(pathtolit): raise ConversionError, err return tdir -def main(): +def main(args=sys.argv): + parser = option_parser() + options, args = parser.parse_args(args) + if len(args) != 2: + parser.print_help() + print + print 'No lit file specified' + return 1 + lit = os.path.abspath(os.path.expanduser(args[1])) + tdir = generate_html(lit) try: - options, args, parser = parse_options() - lit = os.path.abspath(os.path.expanduser(args[0])) - tdir = generate_html(lit) - try: - l = glob.glob(os.path.join(tdir, '*toc*.htm*')) + l = glob.glob(os.path.join(tdir, '*toc*.htm*')) + if not l: + l = glob.glob(os.path.join(tdir, '*top*.htm*')) + if not l: + l = glob.glob(os.path.join(tdir, '*contents*.htm*')) + if not l: + l = glob.glob(os.path.join(tdir, '*.htm*')) if not l: - l = glob.glob(os.path.join(tdir, '*top*.htm*')) - if not l: - l = glob.glob(os.path.join(tdir, '*contents*.htm*')) - if not l: - l = glob.glob(os.path.join(tdir, '*.htm*')) - if not l: - raise ConversionError, 'Conversion of lit to html failed. Cannot find html file.' - maxsize, htmlfile = 0, None - for c in l: - sz = os.path.getsize(c) - if sz > maxsize: - maxsize, htmlfile = sz, c - else: - htmlfile = l[0] - for i in range(1, len(sys.argv)): - if sys.argv[i] == args[0]: - sys.argv.remove(sys.argv[i]) - break - sys.argv.append(htmlfile) - o_spec = False - for arg in sys.argv[1:]: - arg = arg.lstrip() - if arg.startswith('-o') or arg.startswith('--output'): - o_spec = True - break - ext = '.lrf' - for arg in sys.argv[1:]: - if arg.strip() == '--lrs': - ext = '.lrs' - break - if not o_spec: - sys.argv.append('-o') - sys.argv.append(os.path.splitext(os.path.basename(lit))[0]+ext) - options, args, parser = html_parse_options(parser=parser) - process_file(htmlfile, options) - finally: - shutil.rmtree(tdir) - except ConversionError, err: - print >>sys.stderr, err - sys.exit(1) + raise ConversionError, 'Conversion of lit to html failed. Cannot find html file.' + maxsize, htmlfile = 0, None + for c in l: + sz = os.path.getsize(c) + if sz > maxsize: + maxsize, htmlfile = sz, c + else: + htmlfile = l[0] + if not options.output: + ext = '.lrs' if options.lrs else '.lrf' + options.output = os.path.basename(os.path.splitext(args[1])[0]) + ext + process_file(htmlfile, options) + finally: + shutil.rmtree(tdir) + if __name__ == '__main__': - main() - - \ No newline at end of file + sys.exit(main()) diff --git a/src/libprs500/ebooks/lrf/meta.py b/src/libprs500/ebooks/lrf/meta.py index 256726209b..dc757e51a3 100644 --- a/src/libprs500/ebooks/lrf/meta.py +++ b/src/libprs500/ebooks/lrf/meta.py @@ -559,19 +559,16 @@ class LRFMetaFile(object): self._file.write(val) -def parse_options(argv=None, cli=True): +def option_parser(): from optparse import OptionParser - from libprs500 import __version__ as VERSION - if not argv: - argv = sys.argv[1:] + from libprs500 import __appname__, __version__ parser = OptionParser(usage = \ - """%prog [options] mybook.lrf - - Show/edit the metadata in an LRF file. - - WARNING: Based on reverse engineering the LRF format. - Making changes may render your LRF file unreadable. - """, version=VERSION) + '''%prog [options] mybook.lrf\n\n''' + '''Show/edit the metadata in an LRF file.\n\n''' + '''WARNING: Based on reverse engineering the LRF format.\n''' + '''Making changes may render your LRF file unreadable.''', + version=__appname__+' '+__version__, + epilog='Created by Kovid Goyal') parser.add_option("-t", "--title", action="store", type="string", \ dest="title", help="Set the book title") parser.add_option('--title-sort', action='store', type='string', default=None, @@ -594,20 +591,20 @@ def parse_options(argv=None, cli=True): help="Extract thumbnail from LRF file") parser.add_option("-p", "--page", action="store", type="string", \ dest="page", help="Don't know what this is for") - options, args = parser.parse_args(args=argv) - if len(args) != 1: - if cli: - parser.print_help() - raise LRFException, 'no filename specified' - return options, args, parser + + return parser -def main(): + +def main(args=sys.argv): import os.path - try: - options, args, parser = parse_options() - except: - sys.exit(1) - lrf = LRFMetaFile(open(args[0], "r+b")) + parser = option_parser() + options, args = parser.parse_args(args) + if len(args) != 2: + parser.print_help() + print + print 'No lrf file specified' + return 1 + lrf = LRFMetaFile(open(args[1], "r+b")) if options.title: lrf.title = options.title if options.title_reading != None: @@ -646,4 +643,4 @@ def main(): print "Thumbnail:", td if __name__ == '__main__': - main() + sys.exit(main()) diff --git a/src/libprs500/ebooks/lrf/rtf/convert_from.py b/src/libprs500/ebooks/lrf/rtf/convert_from.py index 414210d51f..ccf11093bd 100644 --- a/src/libprs500/ebooks/lrf/rtf/convert_from.py +++ b/src/libprs500/ebooks/lrf/rtf/convert_from.py @@ -14,9 +14,8 @@ ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import os, sys, tempfile, subprocess, shutil -from libprs500.ebooks.lrf import option_parser +from libprs500.ebooks.lrf import option_parser as lrf_option_parser from libprs500.ebooks.metadata.meta import get_metadata -from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options from libprs500.ebooks.lrf.html.convert_from import process_file from libprs500.ebooks import ConversionError from libprs500 import isosx @@ -25,20 +24,11 @@ UNRTF = 'unrtf' if isosx and hasattr(sys, 'frameworks_dir'): UNRTF = os.path.join(sys.frameworks_dir, UNRTF) -def parse_options(cli=True): - """ CLI for rtf -> lrf conversions """ - parser = option_parser( - """usage: %prog [options] mybook.rtf - - %prog converts mybook.rtf to mybook.lrf - """ +def option_parser(): + return lrf_option_parser( + '''Usage: %prog [options] mybook.rtf\n\n''' + '''%prog converts mybook.rtf to mybook.lrf''' ) - options, args = parser.parse_args() - if len(args) != 1: - if cli: - parser.print_help() - raise ConversionError, 'no filename specified' - return options, args, parser def generate_html(rtfpath): tdir = tempfile.mkdtemp(prefix='rtf2lrf_') @@ -61,58 +51,41 @@ def generate_html(rtfpath): finally: os.chdir(cwd) -def main(): +def main(args=sys.argv): + parser = option_parser() + options, args = parser.parse_args(args) + if len(args) != 2: + parser.print_help() + print + print 'No rtf file specified' + return 1 + rtf = os.path.abspath(os.path.expanduser(args[1])) + f = open(rtf, 'rb') + mi = get_metadata(f, 'rtf') + f.close() + html = generate_html(rtf) + tdir = os.path.dirname(html) try: - options, args, parser = parse_options() - rtf = os.path.abspath(os.path.expanduser(args[0])) - f = open(rtf, 'rb') - mi = get_metadata(f, 'rtf') - f.close() - html = generate_html(rtf) - tdir = os.path.dirname(html) - try: - for i in range(len(sys.argv)): - if sys.argv[i] == args[0]: - sys.argv[i] = html - o_spec = False - for arg in sys.argv[1:]: - arg = arg.lstrip() - if arg.startswith('-o') or arg.startswith('--output'): - o_spec = True - break - ext = '.lrf' - for arg in sys.argv[1:]: - if arg.strip() == '--lrs': - ext = '.lrs' - break - if not o_spec: - sys.argv.append('-o') - sys.argv.append(os.path.splitext(os.path.basename(rtf))[0]+ext) - - if (not options.title or options.title == 'Unknown') and mi.title: - sys.argv.append('-t') - sys.argv.append('"'+mi.title+'"') - if (not options.author or options.author == 'Unknown') and mi.author: - sys.argv.append('-a') - sys.argv.append('"'+mi.author+'"') - if (not options.category or options.category == 'Unknown') and mi.category: - sys.argv.append('--category') - sys.argv.append('"'+mi.category+'"') - if (not options.freetext or options.freetext == 'Unknown') and mi.comments: - sys.argv.append('--comment') - sys.argv.append('"'+mi.comments+'"') - options, args, parser = html_parse_options(parser=parser) - process_file(html, options) - finally: - #try: - shutil.rmtree(tdir) - #except: # Windows can raise an error if some file is still being used - # pass - except ConversionError, err: - print >>sys.stderr, err - sys.exit(1) + if not options.output: + ext = '.lrs' if options.lrs else '.lrf' + options.output = os.path.basename(os.path.splitext(args[1])[0]) + ext + if (not options.title or options.title == 'Unknown') and mi.title: + sys.argv.append('-t') + sys.argv.append('"'+mi.title+'"') + if (not options.author or options.author == 'Unknown') and mi.author: + sys.argv.append('-a') + sys.argv.append('"'+mi.author+'"') + if (not options.category or options.category == 'Unknown') and mi.category: + sys.argv.append('--category') + sys.argv.append('"'+mi.category+'"') + if (not options.freetext or options.freetext == 'Unknown') and mi.comments: + sys.argv.append('--comment') + sys.argv.append('"'+mi.comments+'"') + process_file(html, options) + finally: + shutil.rmtree(tdir) if __name__ == '__main__': - main() + sys.exit(main()) \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/txt/convert_from.py b/src/libprs500/ebooks/lrf/txt/convert_from.py index 9734f4af7f..a395f4c03f 100644 --- a/src/libprs500/ebooks/lrf/txt/convert_from.py +++ b/src/libprs500/ebooks/lrf/txt/convert_from.py @@ -19,33 +19,21 @@ import os, sys, codecs from libprs500 import iswindows from libprs500.ptempfile import PersistentTemporaryFile -from libprs500.ebooks.lrf import option_parser +from libprs500.ebooks.lrf import option_parser as lrf_option_parser from libprs500.ebooks import ConversionError -from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options from libprs500.ebooks.lrf.html.convert_from import process_file from libprs500.ebooks.markdown import markdown -def parse_options(argv=None, cli=True): - """ CLI for txt -> lrf conversions """ - if not argv: - argv = sys.argv[1:] - parser = option_parser( - """usage: %prog [options] mybook.txt - - %prog converts mybook.txt to mybook.lrf - """ - ) +def option_parser(): + parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n''' + '''%prog converts mybook.txt to mybook.lrf''') defenc = 'cp1252' if iswindows else 'utf8' enchelp = 'Set the encoding used to decode ' + \ 'the text in mybook.txt. Default encoding is %default' parser.add_option('-e', '--encoding', action='store', type='string', \ dest='encoding', help=enchelp, default=defenc) - options, args = parser.parse_args(args=argv) - if len(args) != 1: - if cli: - parser.print_help() - raise ConversionError, 'no filename specified' - return options, args, parser + return parser + def generate_html(txtfile, encoding): ''' @@ -78,43 +66,22 @@ def generate_html(txtfile, encoding): codecs.open(p.name, 'wb', enc).write(html) return p -def main(): - try: - options, args, parser = parse_options() - txt = os.path.abspath(os.path.expanduser(args[0])) - p = generate_html(txt, options.encoding) - for i in range(1, len(sys.argv)): - if sys.argv[i] == args[0]: - sys.argv.remove(sys.argv[i]) - break - sys.argv.append(p.name) - sys.argv.append('--force-page-break-before') - sys.argv.append('h2') - o_spec = False - for arg in sys.argv[1:]: - arg = arg.lstrip() - if arg.startswith('-o') or arg.startswith('--output'): - o_spec = True - break - ext = '.lrf' - for arg in sys.argv[1:]: - if arg.strip() == '--lrs': - ext = '.lrs' - break - if not o_spec: - sys.argv.append('-o') - sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext) - options, args, parser = html_parse_options(parser=parser) - src = args[0] - if options.verbose: - import warnings - warnings.defaultaction = 'error' - except Exception, err: - print >> sys.stderr, err - sys.exit(1) - process_file(src, options) - +def main(args=sys.argv): + parser = option_parser() + options, args = parser.parse_args(args) + if len(args) != 2: + parser.print_help() + print + print 'No txt file specified' + return 1 + txt = os.path.abspath(os.path.expanduser(args[1])) + htmlfile = generate_html(txt, options.encoding) + options.force_page_break = 'h2' + if not options.output: + ext = '.lrs' if options.lrs else '.lrf' + options.output = os.path.basename(os.path.splitext(args[1])[0]) + ext + process_file(htmlfile.name, options) if __name__ == '__main__': - main() \ No newline at end of file + sys.exit(main()) \ No newline at end of file diff --git a/src/libprs500/linux.py b/src/libprs500/linux.py index 193440f699..a4c961b435 100644 --- a/src/libprs500/linux.py +++ b/src/libprs500/linux.py @@ -16,8 +16,8 @@ import sys, os from subprocess import check_call -def options(parse_options): - options, args, parser = parse_options(['dummy'], cli=False) +def options(option_parser): + parser = option_parser() options = parser.option_list for group in parser.option_groups: options += group.option_list @@ -70,15 +70,16 @@ def setup_completion(): try: print 'Setting up bash completion...', sys.stdout.flush() - from libprs500.ebooks.lrf.html.convert_from import parse_options as htmlop - from libprs500.ebooks.lrf.txt.convert_from import parse_options as txtop - from libprs500.ebooks.lrf.meta import parse_options as metaop + from libprs500.ebooks.lrf.html.convert_from import option_parser as htmlop + from libprs500.ebooks.lrf.txt.convert_from import option_parser as txtop + from libprs500.ebooks.lrf.meta import option_parser as metaop f = open('/etc/bash_completion.d/libprs500', 'wb') f.write('# libprs500 Bash Shell Completion\n') f.write(opts_and_exts('html2lrf', htmlop, ['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php'])) f.write(opts_and_exts('txt2lrf', txtop, ['txt'])) - f.write(opts_and_exts('lit2lrf', txtop, ['lit'])) + f.write(opts_and_exts('lit2lrf', htmlop, ['lit'])) + f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf'])) f.write(opts_and_exts('lrf-meta', metaop, ['lrf'])) f.write(''' _prs500_ls()