Working initial HTML->LRF converter with CSS support. Next on list is support for <style>, <link> and <img> tags.

2025-06-23 07:20:44 -04:00 · 2007-04-18 22:51:48 +00:00 · 2007-04-18 22:51:48 +00:00 · d69fad53f4
commit d69fad53f4
parent 15014f74fe
9 changed files with 191 additions and 431 deletions
--- a/.pydevproject
+++ b/.pydevproject
@ -5,5 +5,6 @@
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 <path>/libprs500/src</path>
+<path>/libprs500/libprs500.lrf.txt</path>
 </pydev_pathproperty>
 </pydev_project>
--- a/setup.py
+++ b/setup.py
@ -33,7 +33,10 @@ if sys.argv[1] == 'py2exe':
    f.close()
    try:
        import py2exe
-        console = [{'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500'}]
+        console = [{
+                    'script' : 'src/libprs500/cli/main.py', 'dest_base':'prs500',
+                    'script' : 'src/libprs500/lrf/html/convert_from.py', 'dest_base':'html2lrf'
+                  }]
        windows = [{'script' : 'src/libprs500/gui/main.py', 'dest_base':'prs500-gui',
                    'icon_resources':[(1,'icons/library.ico')]}]
        excludes = ["Tkconstants", "Tkinter", "tcl", "_imagingtk", 
@ -94,9 +97,8 @@ setup(
                             'prs500 = libprs500.cli.main:main', \
                             'lrf-meta = libprs500.lrf.meta:main', \
                             'rtf-meta = libprs500.metadata.rtf:main', \
-                             'makelrf = libprs500.lrf.makelrf:main', \
                             'txt2lrf = libprs500.lrf.makelrf:txt', \
-                             'html2lrf = libprs500.lrf.makelrf:html',\
+                             'html2lrf = libprs500.lrf.html.convert_from:main',\
                           ], 
        'gui_scripts'    : [ 'prs500-gui = libprs500.gui.main:main']
      }, 
--- a/src/libprs500/lrf/BBeBook-0.2.jar
+++ b/src/libprs500/lrf/BBeBook-0.2.jar
--- a/src/libprs500/lrf/cover.jpg
+++ b/src/libprs500/lrf/cover.jpg
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@ -19,10 +19,10 @@ Code to convert HTML ebooks into LRF ebooks.
 """
 import os, re, sys
 from htmlentitydefs import name2codepoint
-
+from optparse import OptionParser

 from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, NavigableString
-from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR
+from libprs500.lrf.pylrs.pylrs import Book, Page, Paragraph, TextBlock, CR, Italic
 from libprs500.lrf.pylrs.pylrs import Span as _Span
 from libprs500.lrf import ConversionError

@ -40,7 +40,7 @@ class Span(_Span):
        (an int) if successful. Otherwise, returns None.
        Assumes: 1 pixel is 1/4 mm. One em is 10pts
        """
-        m = re.match("\s*([0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
+        m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
        if m is not None:
            unit = float(m.group(1))
            if m.group(2) == '%':
@ -160,6 +160,10 @@ class Span(_Span):
            src = pat.sub(repl, src)
        if not src:
            raise ConversionError('No point in adding an empty string')
+        if 'font-style' in css.keys():
+            fs = css.pop('font-style')
+            if fs.lower() == 'italic':
+                src = Italic(src)
        attrs = Span.translate_attrs(css)
        _Span.__init__(self, text=src, **attrs)
        
@ -227,6 +231,13 @@ class HTMLConvertor(object):
        """
        Return a dictionary of style properties applicable to Tag tag.
        """
+        def merge_parent_css(prop, pcss):
+            temp = {}
+            for key in pcss.keys():
+                if key.lower().startswith('font'):
+                    temp[key] = pcss[key]
+            prop.update(temp)
+            
        prop = dict()        
        if tag.has_key("align"):
            prop["text-align"] = tag["align"]
@ -238,7 +249,7 @@ class HTMLConvertor(object):
                if self.css.has_key(classname):
                    prop.update(self.css[classname])
        if parent_css:
-            prop.update(parent_css)
+            merge_parent_css(prop, parent_css)
        if tag.has_key("style"):
            prop.update(self.parse_style_properties(tag["style"]))    
        return prop
@ -257,21 +268,51 @@ class HTMLConvertor(object):
        if self.current_page:
            self.book.append(self.current_page)
            
+    def end_page(self):
+        self.current_block.append(self.current_para)
+        self.current_para = Paragraph()
+        self.current_page.append(self.current_block)
+        self.current_block = TextBlock()
+        self.book.append(self.current_page)
+        self.current_page = Page()
+        
        
    def parse_tag(self, tag, parent_css):
+        def sanctify_css(css):
+            """ Make css safe for use in a SPAM Xylog tag """
+            for key in css.keys():
+                test = key.lower()
+                if test.startswith('margin') or 'indent' in test or \
+                   'padding' in test or 'border' in test or test in \
+                   ['color', 'display', 'text-decoration', 'letter-spacing']:
+                    css.pop(key)
+            return css
+                    
        def add_text(tag, css):
            try:
-                self.current_para.append(Span(tag, css))
+                self.current_para.append(Span(tag, sanctify_css(css)))
            except ConversionError, err:
                if self.verbose:
                    print >>sys.stderr, err
        
+        
+                    
        def process_text_tag(tag, pcss):
+            if 'page-break-before' in pcss.keys():
+                if pcss['page-break-before'].lower() != 'avoid':
+                    self.end_page()
+                pcss.pop('page-break-before')
+            end_page = False
+            if 'page-break-after' in pcss.keys():
+                end_page = True
+                pcss.pop('page-break-after')
            for c in tag.contents:
                if isinstance(tag, NavigableString):
                    add_text(tag, pcss)
                else:
                    self.parse_tag(c, pcss)
+            if end_page:
+                self.end_page()
            
        try:
            tagname = tag.name.lower()
@ -280,8 +321,17 @@ class HTMLConvertor(object):
            return
        if tagname in ["title", "script", "meta"]:
            pass
+        elif tagname in ['style', 'link']:
+            # TODO: Append CSS to self.css
+            pass
        elif tagname == 'p':
            css = self.tag_css(tag, parent_css=parent_css)
+            indent = css.pop('text-indent', '')
+            if indent:
+                # TODO: If indent is different from current textblock's parindent
+                # start a new TextBlock
+                pass
+            self.current_para.CR() # Put a paragraph end             
            self.current_block.append(self.current_para)
            self.current_para = Paragraph()
            process_text_tag(tag, css)
@ -302,13 +352,14 @@ class HTMLConvertor(object):
            self.current_para = Paragraph()
            self.current_page = Page()
        else:
+            css = self.tag_css(tag, parent_css=parent_css)
            for c in tag.contents:
                if isinstance(c, Comment):
                    continue
                elif isinstance(c, Tag):
-                    self.parse_tag(c)
+                    self.parse_tag(c, css)
                elif isinstance(c, NavigableString):                    
-                    add_text(c, parent_css)
+                    add_text(c, css)
                    
    def writeto(self, path):
        if path.lower().endswith('lrs'):
@ -327,8 +378,33 @@ def process_file(path, options):
        book = Book(title=options.title, author=options.author, \
                    sourceencoding='utf8')
        conv = HTMLConvertor(book, soup)
-        name = os.path.splitext(os.path.basename(path))[0]+'.lrs'
+        name = os.path.splitext(os.path.basename(path))[0]+'.lrf'
        os.chdir(cwd)
        conv.writeto(name)        
    finally:
        os.chdir(cwd)
+        
+def main():
+    """ CLI for html -> lrf conversions """
+    parser = OptionParser(usage=\
+        """usage: %prog [options] mybook.txt
+        
+        %prog converts mybook.txt to mybook.lrf
+        """\
+        )
+    parser.add_option("-t", "--title", action="store", type="string", \
+                    dest="title", help="Set the title")
+    parser.add_option("-a", "--author", action="store", type="string", \
+                    dest="author", help="Set the author", default='Unknown')
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.print_help()
+        sys.exit(1)
+    src = args[0]
+    if options.title == None:
+        options.title = os.path.splitext(os.path.basename(src))[0]
+    process_file(src, options)
+
+
+if __name__ == '__main__':
+    main()
--- a/src/libprs500/lrf/libtidy.py
+++ b/src/libprs500/lrf/libtidy.py
@ -1,266 +0,0 @@
-##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
-##    This program is free software; you can redistribute it and/or modify
-##    it under the terms of the GNU General Public License as published by
-##    the Free Software Foundation; either version 2 of the License, or
-##    (at your option) any later version.
-##
-##    This program is distributed in the hope that it will be useful,
-##    but WITHOUT ANY WARRANTY; without even the implied warranty of
-##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-##    GNU General Public License for more details.
-##
-##    You should have received a copy of the GNU General Public License along
-##    with this program; if not, write to the Free Software Foundation, Inc.,
-##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-"""
-Thin ctypes based wrapper around libtidy. Example usage:
->>> from libtidy import parseString
->>> print parseString('<h1>fowehfow</h2>', \
-                       output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0)
-<?xml version="1.0" encoding="us-ascii"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
-    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-  <head>
-    <title></title>
-  </head>
-  <body>
-    <h1>
-      fowehfow
-    </h1>
-  </body>
-</html>
-"""
-
-import ctypes
-from cStringIO import StringIO
-import weakref
-
-class TidyLibError(Exception):
-    def __init__(self, arg):
-        self.arg=arg
-
-class InvalidOptionError(TidyLibError):
-    def __str__(self):
-        return "%s was not a valid Tidy option." % (self.arg)
-    __repr__=__str__
-
-class OptionArgError(TidyLibError):
-    def __init__(self, arg):
-        self.arg=arg
-    def __str__(self):
-        return self.arg
-
-# search the path for libtidy using the known names; 
-thelib=None
-for libname in ('cygtidy-0-99-0', 'libtidy', 'libtidy.so', 'tidylib'):
-    try:
-        thelib = getattr(ctypes.cdll, libname)
-        break
-    except OSError:
-        pass
-if not thelib:
-    raise OSError("Couldn't find libtidy, please make sure it is installed.")
-
-class Loader:
-    """
-    I am a trivial wrapper that eliminates the need for tidy.tidyFoo, 
-    so you can just access tidy.Foo
-    """
-    def __init__(self):
-        self.lib = thelib
-    def __getattr__(self, name):
-        try:
-            return getattr(self.lib, "tidy%s" % name)
-        # current ctypes uses ValueError, future will use AttributeError
-        except (ValueError, AttributeError):
-            return getattr(self.lib, name)
-
-_tidy=Loader()
-
-# define a callback to pass to Tidylib
-def _putByte(handle, c):
-    """Lookup sink by handle and call its putByte method"""
-    sinkfactory[handle].putByte(c)
-    return 0
-
-PUTBYTEFUNC = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, ctypes.c_char)    
-putByte = PUTBYTEFUNC(_putByte)
-
-class _OutputSink(ctypes.Structure):
-    _fields_ = [("sinkData", ctypes.c_int),
-              ("putByte", PUTBYTEFUNC),
-              ]
-
-class _Sink:
-    def __init__(self):
-        self._data = StringIO()
-        self.struct = _OutputSink()
-        self.struct.putByte = putByte
-        
-    def putByte(self, c):
-        self._data.write(c)
-        
-    def __str__(self):
-        return self._data.getvalue()
-
-class ReportItem:
-    def __init__(self, err):
-        self.err = err
-        if err.startswith('line'):
-            tokens = err.split(' ',6)
-            self.severity = tokens[5][0] # W or E
-            self.line = int(tokens[1])
-            self.col = int(tokens[3])
-            self.message = tokens[6]
-        else:
-            tokens = err.split(' ',1)
-            self.severity = tokens[0][0]
-            self.message = tokens[1]
-            self.line = None
-            self.col = None
-        # TODO - parse emacs mode
-    
-    def __str__(self):
-        severities = dict(W='Warning', E='Error', C='Config')
-        try:
-            if self.line:
-                return "line %d col %d - %s: %s" % (self.line, self.col,
-                                                    severities[self.severity],
-                                                    self.message)
-            
-            else:
-                return "%s: %s" % (severities[self.severity], self.message)
-        except KeyError:
-            return self.err
-
-    def __repr__(self):
-        return "%s('%s')" % (self.__class__.__name__,
-                             str(self).replace("'", "\\'"))
-        
-class FactoryDict(dict):
-    """I am a dict with a create method and no __setitem__.  This allows
-    me to control my own keys.
-    """
-    def create(self):
-        """Subclasses should implement me to generate a new item"""
-    
-    def _setitem(self, name, value):
-        dict.__setitem__(self, name, value)
-    
-    def __setitem__(self, name, value):
-        raise TypeError, "Use create() to get a new object"
-        
-
-class SinkFactory(FactoryDict):
-    """Mapping for lookup of sinks by handle"""
-    def __init__(self):
-        FactoryDict.__init__(self)
-        self.lastsink = 0
-    
-    def create(self):
-        sink = _Sink()
-        sink.struct.sinkData = self.lastsink
-        FactoryDict._setitem(self, self.lastsink, sink)
-        self.lastsink = self.lastsink+1
-        return sink
-
-sinkfactory = SinkFactory()
-
-class _Document(object):
-    def __init__(self):
-        self.cdoc = _tidy.Create()
-        self.errsink = sinkfactory.create()
-        _tidy.SetErrorSink(self.cdoc, ctypes.byref(self.errsink.struct))
-    
-    def write(self, stream):
-        stream.write(str(self))
-    
-    def get_errors(self):
-        ret = []
-        for line in str(self.errsink).split('\n'):
-            line = line.strip(' \n\r')
-            if line: ret.append(ReportItem(line))
-        return ret
-    
-    errors=property(get_errors)
-    
-    def __str__(self):
-        stlen = ctypes.c_int(8192)
-        st = ctypes.c_buffer(stlen.value)
-        rc = _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen))
-        if rc==-12: # buffer too small
-            st = ctypes.c_buffer(stlen.value)
-            _tidy.SaveString(self.cdoc, st, ctypes.byref(stlen))
-        return st.value
-
-errors = {'missing or malformed argument for option: ': OptionArgError,
-          'unknown option: ': InvalidOptionError,
-          }
-
-
-class DocumentFactory(FactoryDict):
-    def _setOptions(self, doc, **options):
-        for k in options.keys():
-            
-            # this will flush out most argument type errors...
-            if options[k] is None: options[k] = ''
-            
-            _tidy.OptParseValue(doc.cdoc, 
-                                k.replace('_', '-'), 
-                                str(options[k]))
-            if doc.errors:
-                match=filter(doc.errors[-1].message.startswith, errors.keys())
-                if match:
-                    raise errors[match[0]](doc.errors[-1].message)
-    
-    def load(self, doc, arg, loader):
-        loader(doc.cdoc, arg)
-        _tidy.CleanAndRepair(doc.cdoc)
-    
-    def loadFile(self, doc, filename):
-        self.load(doc, filename, _tidy.ParseFile)
-    
-    def loadString(self, doc, st):
-        self.load(doc, st, _tidy.ParseString)
-    
-    def _create(self, *args, **kwargs):
-        doc = _Document()
-        self._setOptions(doc, **kwargs)
-        ref = weakref.ref(doc, self.releaseDoc)
-        FactoryDict._setitem(self, ref, doc.cdoc)
-        return doc
-    
-    def parse(self, filename, *args, **kwargs):
-        """
-        Open and process filename as an HTML file, returning a
-        processed document object.
-        @param kwargs: named options to pass to TidyLib for processing
-        the input file.
-        @param filename: the name of a file to process
-        @return: a document object
-        """
-        doc = self._create(**kwargs)
-        self.loadFile(doc, filename)
-        return doc
-    
-    def parseString(self, st, *args, **kwargs):
-        """
-        Use st as an HTML file, and process it, returning a
-        document object.
-        @param kwargs: named options to pass to TidyLib for processing
-        the input file.
-        @param st: the string to parse
-        @return: a document object
-        """
-        doc = self._create(**kwargs)
-        self.loadString(doc, st)
-        return doc
-    
-    def releaseDoc(self, ref):
-        _tidy.Release(self[ref])
-    
-docfactory = DocumentFactory()
-parse = docfactory.parse
-parseString = docfactory.parseString
--- a/src/libprs500/lrf/makelrf.py
+++ b/src/libprs500/lrf/makelrf.py
@ -17,19 +17,14 @@ import shutil
 import sys
 import hashlib
 import re
-import time
 import pkg_resources
 import subprocess
 from tempfile import mkdtemp
 from optparse import OptionParser
-import xml.dom.minidom as dom
-
 from libprs500.lrf import ConversionError
 from libprs500.lrf.meta import LRFException, LRFMetaFile
 from libprs500.ptempfile import PersistentTemporaryFile

-_bbebook = 'BBeBook-0.2.jar'
-
 def generate_thumbnail(path):
    """ Generate a JPEG thumbnail of size ~ 128x128 (aspect ratio preserved)"""
    try:
@ -45,30 +40,6 @@ def generate_thumbnail(path):
    im.save(thumb.name)
    return thumb
    
-def create_xml(cfg):
-    doc = dom.getDOMImplementation().createDocument(None, None, None)
-    def add_field(parent, tag, value):
-        elem = doc.createElement(tag)
-        elem.appendChild(doc.createTextNode(value))
-        parent.appendChild(elem)
-    
-    info = doc.createElement('Info')
-    info.setAttribute('version', '1.0')
-    book_info = doc.createElement('BookInfo')
-    doc_info  = doc.createElement('DocInfo')
-    info.appendChild(book_info)
-    info.appendChild(doc_info)
-    add_field(book_info, 'File', cfg['File'])
-    add_field(doc_info, 'Output', cfg['Output'])
-    for field in ['Title', 'Author', 'BookID', 'Publisher', 'Label', \
-                  'Category', 'Classification', 'Icon', 'Cover', 'FreeText']:
-        if cfg.has_key(field):
-            add_field(book_info, field, cfg[field])
-    add_field(doc_info, 'Language', 'en')
-    add_field(doc_info, 'Creator', _bbebook)
-    add_field(doc_info, 'CreationDate', time.strftime('%Y-%m-%d', time.gmtime()))
-    doc.appendChild(info)
-    return doc.toxml()

 def makelrf(author=None, title=None, \
            thumbnail=None, src=None, odir=".",\
@ -150,127 +121,3 @@ def makelrf(author=None, title=None, \
        if dirpath: 
            shutil.rmtree(dirpath, True)

-def txt():
-    """ CLI for txt -> lrf conversions """
-    parser = OptionParser(usage=\
-        """usage: %prog [options] mybook.txt
-        
-        %prog converts mybook.txt to mybook.lrf
-        """\
-        )
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help="Set the title")
-    parser.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help="Set the author", default='Unknown')
-    defenc = 'cp1252'
-    enchelp = 'Set the encoding used to decode ' + \
-              'the text in mybook.txt. Default encoding is ' + defenc
-    parser.add_option('-e', '--encoding', action='store', type='string', \
-                      dest='encoding', help=enchelp, default=defenc)
-    options, args = parser.parse_args()
-    if len(args) != 1:
-        parser.print_help()
-        sys.exit(1)
-    src = args[0]
-    if options.title == None:
-        options.title = os.path.splitext(os.path.basename(src))[0]
-    try:
-        convert_txt(src, options)
-    except ConversionError, err:
-        print >>sys.stderr, err
-        sys.exit(1)
-        
-    
-def convert_txt(path, options):
-    """
-    Convert the text file at C{path} into an lrf file.
-    @param options: Object with the following attributes:
-                    C{author}, C{title}, C{encoding} (the assumed encoding of 
-                    the text in C{path}.)
-    """
-    import fileinput
-    from libprs500.lrf.pylrs.pylrs import Book
-    book = Book(title=options.title, author=options.author, \
-                sourceencoding=options.encoding)
-    buffer = ''
-    block = book.Page().TextBlock()
-    for line in fileinput.input(path):
-        line = line.strip()
-        if line:
-            buffer += line
-        else:
-            block.Paragraph(buffer)            
-            buffer = ''
-    basename = os.path.basename(path)
-    name = os.path.splitext(basename)[0]+'.lrf'
-    try: 
-        book.renderLrf(name)
-    except UnicodeDecodeError:
-        raise ConversionError(path + ' is not encoded in ' + \
-                              options.encoding +'. Specify the '+ \
-                              'correct encoding with the -e option.')
-    return os.path.abspath(name)
-    
-
-def html():
-    """ CLI for html -> lrf conversions """
-    parser = OptionParser(usage=\
-        """usage: %prog [options] mybook.txt
-        
-        %prog converts mybook.txt to mybook.lrf
-        """\
-        )
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help="Set the title")
-    parser.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help="Set the author", default='Unknown')
-    options, args = parser.parse_args()
-    if len(args) != 1:
-        parser.print_help()
-        sys.exit(1)
-    src = args[0]
-    if options.title == None:
-        options.title = os.path.splitext(os.path.basename(src))[0]
-    from libprs500.lrf.html.convert import process_file
-    process_file(src, options)
-
-def main(cargs=None):
-    parser = OptionParser(usage=\
-        """usage: %prog [options] mybook.[html|pdf|rar]
-        
-        %prog converts mybook to mybook.lrf
-        If you specify a rar file you must have the unrar command line client
-        installed. makelrf assumes the rar file is an archive containing the
-        html file you want converted."""\
-        )
-    
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help="Set the book title")
-    parser.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help="Set the author")
-    parser.add_option('-r', '--rasterize', action='store_false', \
-                    dest="rasterize", 
-                    help="Convert pdfs into image files.")
-    parser.add_option('-c', '--cover', action='store', dest='cover',\
-                    help="Path to a graphic that will be set as the cover. "\
-                    "If it is specified the thumbnail is automatically "\
-                    "generated from it")
-    parser.add_option("--thumbnail", action="store", type="string", \
-                    dest="thumbnail", \
-                    help="Path to a graphic that will be set as the thumbnail")
-    if not cargs:
-        cargs = sys.argv
-    options, args = parser.parse_args()
-    if len(args) != 1:
-        parser.print_help()
-        sys.exit(1)
-    src = args[0]
-    root, ext = os.path.splitext(src)
-    if ext not in ['.html', '.pdf', '.rar']:
-        print >> sys.stderr, "Can only convert files ending in .html|.pdf|.rar"
-        parser.print_help()
-        sys.exit(1)
-    name = makelrf(author=options.author, title=options.title, \
-        thumbnail=options.thumbnail, src=src, cover=options.cover, \
-        rasterize=options.rasterize)
-    print "LRF generated:", name
--- a/src/libprs500/lrf/txt/init.py
+++ b/src/libprs500/lrf/txt/init.py
@ -0,0 +1,14 @@
+##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
--- a/src/libprs500/lrf/txt/convert_from.py
+++ b/src/libprs500/lrf/txt/convert_from.py
@ -0,0 +1,86 @@
+##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+"""
+Convert .txt files to .lrf
+"""
+import os, sys
+from optparse import OptionParser
+
+from libprs500.lrf import ConversionError
+
+def main():
+    """ CLI for txt -> lrf conversions """
+    parser = OptionParser(usage=\
+        """usage: %prog [options] mybook.txt
+        
+        %prog converts mybook.txt to mybook.lrf
+        """\
+        )
+    parser.add_option("-t", "--title", action="store", type="string", \
+                    dest="title", help="Set the title")
+    parser.add_option("-a", "--author", action="store", type="string", \
+                    dest="author", help="Set the author", default='Unknown')
+    defenc = 'cp1252'
+    enchelp = 'Set the encoding used to decode ' + \
+              'the text in mybook.txt. Default encoding is ' + defenc
+    parser.add_option('-e', '--encoding', action='store', type='string', \
+                      dest='encoding', help=enchelp, default=defenc)
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.print_help()
+        sys.exit(1)
+    src = args[0]
+    if options.title == None:
+        options.title = os.path.splitext(os.path.basename(src))[0]
+    try:
+        convert_txt(src, options)
+    except ConversionError, err:
+        print >>sys.stderr, err
+        sys.exit(1)
+        
+    
+def convert_txt(path, options):
+    """
+    Convert the text file at C{path} into an lrf file.
+    @param options: Object with the following attributes:
+                    C{author}, C{title}, C{encoding} (the assumed encoding of 
+                    the text in C{path}.)
+    """
+    import fileinput
+    from libprs500.lrf.pylrs.pylrs import Book
+    book = Book(title=options.title, author=options.author, \
+                sourceencoding=options.encoding)
+    buffer = ''
+    block = book.Page().TextBlock()
+    for line in fileinput.input(path):
+        line = line.strip()
+        if line:
+            buffer += line
+        else:
+            block.Paragraph(buffer)            
+            buffer = ''
+    basename = os.path.basename(path)
+    name = os.path.splitext(basename)[0]+'.lrf'
+    try: 
+        book.renderLrf(name)
+    except UnicodeDecodeError:
+        raise ConversionError(path + ' is not encoded in ' + \
+                              options.encoding +'. Specify the '+ \
+                              'correct encoding with the -e option.')
+    return os.path.abspath(name)
+    
+
+if __name__ == '__main__':
+    main()