Working initial HTML->LRF converter with CSS support. Next on list is support for <style>, <link> and <img> tags.

2025-10-26 00:02:25 -04:00 · 2007-04-18 22:51:48 +00:00 · 2007-04-18 22:51:48 +00:00 · d69fad53f4
commit d69fad53f4
parent 15014f74fe
9 changed files with 191 additions and 431 deletions
--- a/.pydevproject
+++ b/.pydevproject
@ -5,5 +5,6 @@
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 <path>/libprs500/src</path>
 <path>/libprs500/libprs500.lrf.txt</path>
 </pydev_pathproperty>
 </pydev_project>
--- a/setup.py
+++ b/setup.py
@ -33,7 +33,10 @@ if sys.argv[1] == 'py2exe':
    f.close()
    try:
        import py2exe
-        console = [{'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500'}]
+        console = [{
                    'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500',
                    'script' : 'src/libprs500/lrf/html/convert_from.py', 'dest_base':'html2lrf'
                  }]
        windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui',
                    'icon_resources':[(1,'icons/library.ico')]}]
        excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk", 
@ -94,9 +97,8 @@ setup(
                             'prs500 = libprs500.cli.main:main', \
                             'lrf-meta = libprs500.lrf.meta:main', \
                             'rtf-meta = libprs500.metadata.rtf:main', \
                             'makelrf = libprs500.lrf.makelrf:main', \
                             'txt2lrf = libprs500.lrf.makelrf:txt', \
-                             'html2lrf = libprs500.lrf.makelrf:html',\
+                             'html2lrf = libprs500.lrf.html.convert_from:main',\
                           ], 
        'gui_scripts'    : [ 'prs500-gui = libprs500.gui.main:main']
      }, 
--- a/src/libprs500/lrf/BBeBook-0.2.jar
+++ b/src/libprs500/lrf/BBeBook-0.2.jar
--- a/src/libprs500/lrf/cover.jpg
+++ b/src/libprs500/lrf/cover.jpg
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@ -19,10 +19,10 @@ Code to convert HTML ebooks into LRF ebooks.
 """
 import os, re, sys
 from htmlentitydefs import name2codepoint
-
+from optparse import OptionParser
 from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString
-from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR
+from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR, Italic
 from libprs500.lrf.pylrs.pylrs import Span as _Span
 from libprs500.lrf import ConversionError
@ -40,7 +40,7 @@ class Span(_Span):
        (an int) if successful. Otherwise, returns None.
        Assumes: 1 pixel is 1/4 mm. One em is 10pts
        """
-        m = re.match("\s*([0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
+        m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
        if m is not None:
            unit = float(m.group(1))
            if m.group(2) == '%':
@ -160,6 +160,10 @@ class Span(_Span):
            src = pat.sub(repl, src)
        if not src:
            raise ConversionError('No point in adding an empty string')
        if 'font-style' in css.keys():
            fs = css.pop('font-style')
            if fs.lower() == 'italic':
                src = Italic(src)
        attrs = Span.translate_attrs(css)
        _Span.__init__(self, text=src, **attrs)
@ -227,6 +231,13 @@ class HTMLConvertor(object):
        """
        Return a dictionary of style properties applicable to Tag tag.
        """
        def merge_parent_css(prop, pcss):
            temp = {}
            for key in pcss.keys():
                if key.lower().startswith('font'):
                    temp[key] = pcss[key]
            prop.update(temp)
        prop = dict()        
        if tag.has_key("align"):
            prop["text-align"] = tag["align"]
@ -238,7 +249,7 @@ class HTMLConvertor(object):
                if self.css.has_key(classname):
                    prop.update(self.css[classname])
        if parent_css:
-            prop.update(parent_css)
+            merge_parent_css(prop, parent_css)
        if tag.has_key("style"):
            prop.update(self.parse_style_properties(tag["style"]))    
        return prop
@ -257,21 +268,51 @@ class HTMLConvertor(object):
        if self.current_page:
            self.book.append(self.current_page)
    def end_page(self):
        self.current_block.append(self.current_para)
        self.current_para = Paragraph()
        self.current_page.append(self.current_block)
        self.current_block = TextBlock()
        self.book.append(self.current_page)
        self.current_page = Page()
    def parse_tag(self, tag, parent_css):
        def sanctify_css(css):
            """ Make css safe for use in a SPAM Xylog tag """
            for key in css.keys():
                test = key.lower()
                if test.startswith('margin') or 'indent' in test or \
                   'padding' in test or 'border' in test or test in \
                   ['color', 'display', 'text-decoration', 'letter-spacing']:
                    css.pop(key)
            return css
        def add_text(tag, css):
            try:
-                self.current_para.append(Span(tag, css))
+                self.current_para.append(Span(tag, sanctify_css(css)))
            except ConversionError, err:
                if self.verbose:
                    print >>sys.stderr, err
        def process_text_tag(tag, pcss):
            if 'page-break-before' in pcss.keys():
                if pcss['page-break-before'].lower() != 'avoid':
                    self.end_page()
                pcss.pop('page-break-before')
            end_page = False
            if 'page-break-after' in pcss.keys():
                end_page = True
                pcss.pop('page-break-after')
            for c in tag.contents:
                if isinstance(tag, NavigableString):
                    add_text(tag, pcss)
                else:
                    self.parse_tag(c, pcss)
            if end_page:
                self.end_page()
        try:
            tagname = tag.name.lower()
@ -280,8 +321,17 @@ class HTMLConvertor(object):
            return
        if tagname in ["title", "script", "meta"]:
            pass
        elif tagname in ['style', 'link']:
            # TODO: Append CSS to self.css
            pass
        elif tagname == 'p':
            css = self.tag_css(tag, parent_css=parent_css)
            indent = css.pop('text-indent', '')
            if indent:
                # TODO: If indent is different from current textblock's parindent
                # start a new TextBlock
                pass
            self.current_para.CR() # Put a paragraph end             
            self.current_block.append(self.current_para)
            self.current_para = Paragraph()
            process_text_tag(tag, css)
@ -302,13 +352,14 @@ class HTMLConvertor(object):
            self.current_para = Paragraph()
            self.current_page = Page()
        else:
            css = self.tag_css(tag, parent_css=parent_css)
            for c in tag.contents:
                if isinstance(c, Comment):
                    continue
                elif isinstance(c, Tag):
-                    self.parse_tag(c)
+                    self.parse_tag(c, css)
                elif isinstance(c, NavigableString):                    
-                    add_text(c, parent_css)
+                    add_text(c, css)
    def writeto(self, path):
        if path.lower().endswith('lrs'):
@ -327,8 +378,33 @@ def process_file(path, options):
        book = Book(title=options.title, author=options.author, \
                    sourceencoding='utf8')
        conv = HTMLConvertor(book, soup)
-        name = os.path.splitext(os.path.basename(path))[0]+'.lrs'
+        name = os.path.splitext(os.path.basename(path))[0]+'.lrf'
        os.chdir(cwd)
        conv.writeto(name)        
    finally:
        os.chdir(cwd)
 def main():
    """ CLI for html -> lrf conversions """
    parser = OptionParser(usage=\
        """usage: %prog [options] mybook.txt
        %prog converts mybook.txt to mybook.lrf
        """\
        )
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help="Set the title")
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help="Set the author", default='Unknown')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.print_help()
        sys.exit(1)
    src = args[0]
    if options.title == None:
        options.title = os.path.splitext(os.path.basename(src))[0]
    process_file(src, options)
 if __name__ == '__main__':
    main()
--- a/src/libprs500/lrf/libtidy.py
+++ b/src/libprs500/lrf/libtidy.py
@ -1,266 +0,0 @@
 ##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
 ##    This program is free software; you can redistribute it and/or modify
 ##    it under the terms of the GNU General Public License as published by
 ##    the Free Software Foundation; either version 2 of the License, or
 ##    (at your option) any later version.
 ##
 ##    This program is distributed in the hope that it will be useful,
 ##    but WITHOUT ANY WARRANTY; without even the implied warranty of
 ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ##    GNU General Public License for more details.
 ##
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 """
 Thin ctypes based wrapper around libtidy. Example usage:
 >>> from libtidy import parseString
 >>> print parseString('<h1>fowehfow</h2>', \
                       output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0)
 <?xml version="1.0" encoding="us-ascii"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title></title>
  </head>
  <body>
    <h1>
      fowehfow
    </h1>
  </body>
 </html>
 """
 import ctypes
 from cStringIO import StringIO
 import weakref
 class TidyLibError(Exception):
    def __init__(self, arg):
        self.arg=arg
 class InvalidOptionError(TidyLibError):
    def __str__(self):
        return "%s was not a valid Tidy option." % (self.arg)
    __repr__=__str__
 class OptionArgError(TidyLibError):
    def __init__(self, arg):
        self.arg=arg
    def __str__(self):
        return self.arg
 # search the path for libtidy using the known names; 
 thelib=None
 for libname in ('cygtidy-0-99-0', 'libtidy', 'libtidy.so', 'tidylib'):
    try:
        thelib = getattr(ctypes.cdll, libname)
        break
    except OSError:
        pass
 if not thelib:
    raise OSError("Couldn't find libtidy, please make sure it is installed.")
 class Loader:
    """
    I am a trivial wrapper that eliminates the need for tidy.tidyFoo, 
    so you can just access tidy.Foo
    """
    def __init__(self):
        self.lib = thelib
    def __getattr__(self, name):
        try:
            return getattr(self.lib, "tidy%s" % name)
        # current ctypes uses ValueError, future will use AttributeError
        except (ValueError, AttributeError):
            return getattr(self.lib, name)
 _tidy=Loader()
 # define a callback to pass to Tidylib
 def _putByte(handle, c):
    """Lookup sink by handle and call its putByte method"""
    sinkfactory[handle].putByte(c)
    return 0
 PUTBYTEFUNC = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char)    
 putByte = PUTBYTEFUNC(_putByte)
 class _OutputSink(ctypes.Structure):
    _fields_ = [("sinkData", ctypes.c_int),
              ("putByte", PUTBYTEFUNC),
              ]
 class _Sink:
    def __init__(self):
        self._data = StringIO()
        self.struct = _OutputSink()
        self.struct.putByte = putByte
    def putByte(self, c):
        self._data.write(c)
    def __str__(self):
        return self._data.getvalue()
 class ReportItem:
    def __init__(self, err):
        self.err = err
        if err.startswith('line'):
            tokens = err.split(' ',6)
            self.severity = tokens[5][0] # W or E
            self.line = int(tokens[1])
            self.col = int(tokens[3])
            self.message = tokens[6]
        else:
            tokens = err.split(' ',1)
            self.severity = tokens[0][0]
            self.message = tokens[1]
            self.line = None
            self.col = None
        # TODO - parse emacs mode
    def __str__(self):
        severities = dict(W='Warning', E='Error', C='Config')
        try:
            if self.line:
                return "line %d col %d - %s: %s" % (self.line, self.col,
                                                    severities[self.severity],
                                                    self.message)
            else:
                return "%s: %s" % (severities[self.severity], self.message)
        except KeyError:
            return self.err
    def __repr__(self):
        return "%s('%s')" % (self.__class__.__name__,
                             str(self).replace("'", "\\'"))
 class FactoryDict(dict):
    """I am a dict with a create method and no __setitem__.  This allows
    me to control my own keys.
    """
    def create(self):
        """Subclasses should implement me to generate a new item"""
    def _setitem(self, name, value):
        dict.__setitem__(self, name, value)
    def __setitem__(self, name, value):
        raise TypeError, "Use create() to get a new object"
 class SinkFactory(FactoryDict):
    """Mapping for lookup of sinks by handle"""
    def __init__(self):
        FactoryDict.__init__(self)
        self.lastsink = 0
    def create(self):
        sink = _Sink()
        sink.struct.sinkData = self.lastsink
        FactoryDict._setitem(self, self.lastsink, sink)
        self.lastsink = self.lastsink+1
        return sink
 sinkfactory = SinkFactory()
 class _Document(object):
    def __init__(self):
        self.cdoc = _tidy.Create()
        self.errsink = sinkfactory.create()
        _tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct))
    def write(self, stream):
        stream.write(str(self))
    def get_errors(self):
        ret = []
        for line in str(self.errsink).split('\n'):
            line = line.strip(' \n\r')
            if line: ret.append(ReportItem(line))
        return ret
    errors=property(get_errors)
    def __str__(self):
        stlen = ctypes.c_int(8192)
        st = ctypes.c_buffer(stlen.value)
        rc = _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen))
        if rc==-12: # buffer too small
            st = ctypes.c_buffer(stlen.value)
            _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen))
        return st.value
 errors = {'missing or malformed argument for option: ': OptionArgError,
          'unknown option: ': InvalidOptionError,
          }
 class DocumentFactory(FactoryDict):
    def _setOptions(self, doc, **options):
        for k in options.keys():
            # this will flush out most argument type errors...
            if options[k] is None: options[k] = ''
            _tidy.OptParseValue(doc.cdoc, 
                                k.replace('_', '-'), 
                                str(options[k]))
            if doc.errors:
                match=filter(doc.errors[-1].message.startswith, errors.keys())
                if match:
                    raise errors[match[0]](doc.errors[-1].message)
    def load(self, doc, arg, loader):
        loader(doc.cdoc, arg)
        _tidy.CleanAndRepair(doc.cdoc)
    def loadFile(self, doc, filename):
        self.load(doc, filename, _tidy.ParseFile)
    def loadString(self, doc, st):
        self.load(doc, st, _tidy.ParseString)
    def _create(self, *args, **kwargs):
        doc = _Document()
        self._setOptions(doc, **kwargs)
        ref = weakref.ref(doc, self.releaseDoc)
        FactoryDict._setitem(self, ref, doc.cdoc)
        return doc
    def parse(self, filename, *args, **kwargs):
        """
        Open and process filename as an HTML file, returning a
        processed document object.
        @param kwargs: named options to pass to TidyLib for processing
        the input file.
        @param filename: the name of a file to process
        @return: a document object
        """
        doc = self._create(**kwargs)
        self.loadFile(doc, filename)
        return doc
    def parseString(self, st, *args, **kwargs):
        """
        Use st as an HTML file, and process it, returning a
        document object.
        @param kwargs: named options to pass to TidyLib for processing
        the input file.
        @param st: the string to parse
        @return: a document object
        """
        doc = self._create(**kwargs)
        self.loadString(doc, st)
        return doc
    def releaseDoc(self, ref):
        _tidy.Release(self[ref])
 docfactory = DocumentFactory()
 parse = docfactory.parse
 parseString = docfactory.parseString
--- a/src/libprs500/lrf/makelrf.py
+++ b/src/libprs500/lrf/makelrf.py
@ -17,19 +17,14 @@ import shutil
 import sys
 import hashlib
 import re
 import time
 import pkg_resources
 import subprocess
 from tempfile import mkdtemp
 from optparse import OptionParser
 import xml.dom.minidom as dom
 from libprs500.lrf import ConversionError
 from libprs500.lrf.meta import LRFException, LRFMetaFile
 from libprs500.ptempfile import PersistentTemporaryFile
 _bbebook = 'BBeBook-0.2.jar'
 def generate_thumbnail(path):
    """ Generate a JPEG thumbnail of size ~ 128x128 (aspect ratio preserved)"""
    try:
@ -45,30 +40,6 @@ def generate_thumbnail(path):
    im.save(thumb.name)
    return thumb
 def create_xml(cfg):
    doc = dom.getDOMImplementation().createDocument(None, None, None)
    def add_field(parent, tag, value):
        elem = doc.createElement(tag)
        elem.appendChild(doc.createTextNode(value))
        parent.appendChild(elem)
    info = doc.createElement('Info')
    info.setAttribute('version', '1.0')
    book_info = doc.createElement('BookInfo')
    doc_info  = doc.createElement('DocInfo')
    info.appendChild(book_info)
    info.appendChild(doc_info)
    add_field(book_info, 'File', cfg['File'])
    add_field(doc_info, 'Output', cfg['Output'])
    for field in ['Title', 'Author', 'BookID', 'Publisher', 'Label', \
                  'Category', 'Classification', 'Icon', 'Cover', 'FreeText']:
        if cfg.has_key(field):
            add_field(book_info, field, cfg[field])
    add_field(doc_info, 'Language', 'en')
    add_field(doc_info, 'Creator', _bbebook)
    add_field(doc_info, 'CreationDate', time.strftime('%Y-%m-%d', time.gmtime()))
    doc.appendChild(info)
    return doc.toxml()
 def makelrf(author=None, title=None, \
            thumbnail=None, src=None, odir=".",\
@ -150,127 +121,3 @@ def makelrf(author=None, title=None, \
        if dirpath: 
            shutil.rmtree(dirpath, True)
 def txt():
    """ CLI for txt -> lrf conversions """
    parser = OptionParser(usage=\
        """usage: %prog [options] mybook.txt
        %prog converts mybook.txt to mybook.lrf
        """\
        )
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help="Set the title")
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help="Set the author", default='Unknown')
    defenc = 'cp1252'
    enchelp = 'Set the encoding used to decode ' + \
              'the text in mybook.txt. Default encoding is ' + defenc
    parser.add_option('-e', '--encoding', action='store', type='string', \
                      dest='encoding', help=enchelp, default=defenc)
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.print_help()
        sys.exit(1)
    src = args[0]
    if options.title == None:
        options.title = os.path.splitext(os.path.basename(src))[0]
    try:
        convert_txt(src, options)
    except ConversionError, err:
        print >>sys.stderr, err
        sys.exit(1)
 def convert_txt(path, options):
    """
    Convert the text file at C{path} into an lrf file.
    @param options: Object with the following attributes:
                    C{author}, C{title}, C{encoding} (the assumed encoding of 
                    the text in C{path}.)
    """
    import fileinput
    from libprs500.lrf.pylrs.pylrs import Book
    book = Book(title=options.title, author=options.author, \
                sourceencoding=options.encoding)
    buffer = ''
    block = book.Page().TextBlock()
    for line in fileinput.input(path):
        line = line.strip()
        if line:
            buffer += line
        else:
            block.Paragraph(buffer)            
            buffer = ''
    basename = os.path.basename(path)
    name = os.path.splitext(basename)[0]+'.lrf'
    try: 
        book.renderLrf(name)
    except UnicodeDecodeError:
        raise ConversionError(path + ' is not encoded in ' + \
                              options.encoding +'. Specify the '+ \
                              'correct encoding with the -e option.')
    return os.path.abspath(name)
 def html():
    """ CLI for html -> lrf conversions """
    parser = OptionParser(usage=\
        """usage: %prog [options] mybook.txt
        %prog converts mybook.txt to mybook.lrf
        """\
        )
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help="Set the title")
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help="Set the author", default='Unknown')
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.print_help()
        sys.exit(1)
    src = args[0]
    if options.title == None:
        options.title = os.path.splitext(os.path.basename(src))[0]
    from libprs500.lrf.html.convert import process_file
    process_file(src, options)
 def main(cargs=None):
    parser = OptionParser(usage=\
        """usage: %prog [options] mybook.[html|pdf|rar]
        %prog converts mybook to mybook.lrf
        If you specify a rar file you must have the unrar command line client
        installed. makelrf assumes the rar file is an archive containing the
        html file you want converted."""\
        )
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help="Set the book title")
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help="Set the author")
    parser.add_option('-r', '--rasterize', action='store_false', \
                    dest="rasterize", 
                    help="Convert pdfs into image files.")
    parser.add_option('-c', '--cover', action='store', dest='cover',\
                    help="Path to a graphic that will be set as the cover. "\
                    "If it is specified the thumbnail is automatically "\
                    "generated from it")
    parser.add_option("--thumbnail", action="store", type="string", \
                    dest="thumbnail", \
                    help="Path to a graphic that will be set as the thumbnail")
    if not cargs:
        cargs = sys.argv
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.print_help()
        sys.exit(1)
    src = args[0]
    root, ext = os.path.splitext(src)
    if ext not in ['.html', '.pdf', '.rar']:
        print >> sys.stderr, "Can only convert files ending in .html|.pdf|.rar"
        parser.print_help()
        sys.exit(1)
    name = makelrf(author=options.author, title=options.title, \
        thumbnail=options.thumbnail, src=src, cover=options.cover, \
        rasterize=options.rasterize)
    print "LRF generated:", name
--- a/src/libprs500/lrf/txt/init.py
+++ b/src/libprs500/lrf/txt/init.py
@ -0,0 +1,14 @@
 ##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
 ##    This program is free software; you can redistribute it and/or modify
 ##    it under the terms of the GNU General Public License as published by
 ##    the Free Software Foundation; either version 2 of the License, or
 ##    (at your option) any later version.
 ##
 ##    This program is distributed in the hope that it will be useful,
 ##    but WITHOUT ANY WARRANTY; without even the implied warranty of
 ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ##    GNU General Public License for more details.
 ##
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
--- a/src/libprs500/lrf/txt/convert_from.py
+++ b/src/libprs500/lrf/txt/convert_from.py
@ -0,0 +1,86 @@
 ##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
 ##    This program is free software; you can redistribute it and/or modify
 ##    it under the terms of the GNU General Public License as published by
 ##    the Free Software Foundation; either version 2 of the License, or
 ##    (at your option) any later version.
 ##
 ##    This program is distributed in the hope that it will be useful,
 ##    but WITHOUT ANY WARRANTY; without even the implied warranty of
 ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ##    GNU General Public License for more details.
 ##
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 """
 Convert .txt files to .lrf
 """
 import os, sys
 from optparse import OptionParser
 from libprs500.lrf import ConversionError
 def main():
    """ CLI for txt -> lrf conversions """
    parser = OptionParser(usage=\
        """usage: %prog [options] mybook.txt
        %prog converts mybook.txt to mybook.lrf
        """\
        )
    parser.add_option("-t", "--title", action="store", type="string", \
                    dest="title", help="Set the title")
    parser.add_option("-a", "--author", action="store", type="string", \
                    dest="author", help="Set the author", default='Unknown')
    defenc = 'cp1252'
    enchelp = 'Set the encoding used to decode ' + \
              'the text in mybook.txt. Default encoding is ' + defenc
    parser.add_option('-e', '--encoding', action='store', type='string', \
                      dest='encoding', help=enchelp, default=defenc)
    options, args = parser.parse_args()
    if len(args) != 1:
        parser.print_help()
        sys.exit(1)
    src = args[0]
    if options.title == None:
        options.title = os.path.splitext(os.path.basename(src))[0]
    try:
        convert_txt(src, options)
    except ConversionError, err:
        print >>sys.stderr, err
        sys.exit(1)
 def convert_txt(path, options):
    """
    Convert the text file at C{path} into an lrf file.
    @param options: Object with the following attributes:
                    C{author}, C{title}, C{encoding} (the assumed encoding of 
                    the text in C{path}.)
    """
    import fileinput
    from libprs500.lrf.pylrs.pylrs import Book
    book = Book(title=options.title, author=options.author, \
                sourceencoding=options.encoding)
    buffer = ''
    block = book.Page().TextBlock()
    for line in fileinput.input(path):
        line = line.strip()
        if line:
            buffer += line
        else:
            block.Paragraph(buffer)            
            buffer = ''
    basename = os.path.basename(path)
    name = os.path.splitext(basename)[0]+'.lrf'
    try: 
        book.renderLrf(name)
    except UnicodeDecodeError:
        raise ConversionError(path + ' is not encoded in ' + \
                              options.encoding +'. Specify the '+ \
                              'correct encoding with the -e option.')
    return os.path.abspath(name)
 if __name__ == '__main__':
    main()