Rationalized CLI of html2lrf

Fixed link handling to show text rather than href by default Fine tuned image handling Added automatic page breaks if page-break not found
2025-07-09 03:04:10 -04:00 · 2007-05-12 21:21:21 +00:00 · 2007-05-12 21:21:21 +00:00 · a29bf8eea0
commit a29bf8eea0
parent 69f20f634d
5 changed files with 194 additions and 123 deletions
--- a/src/libprs500/init.py
+++ b/src/libprs500/init.py
@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
 suit your distribution.
 """
-__version__   = "0.3.25"
+__version__   = "0.3.26"
 __docformat__ = "epytext"
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
--- a/src/libprs500/lrf/init.py
+++ b/src/libprs500/lrf/init.py
@ -17,7 +17,7 @@ This package contains logic to read and write LRF files. The LRF file format is
 At the time fo writing, this package only supports reading and writing LRF meat information. See L{meta}.
 """
-from optparse import OptionParser
+from optparse import OptionParser, OptionValueError
 from libprs500.lrf.pylrs.pylrs import Book as _Book
 from libprs500.lrf.pylrs.pylrs import TextBlock, Header, PutObj, Paragraph, TextStyle
@ -26,31 +26,53 @@ from libprs500 import __version__ as VERSION
 __docformat__ = "epytext"
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 class PRS500_PROFILE(object):
    screen_width  = 600
    screen_height = 800
    page_width    = 575
    page_height   = 747
    dpi           = 166
 def profile_from_string(option, opt_str, value, parser):
    if value == 'prs500':
        setattr(parser.values, option.dest, PRS500_PROFILE)
    else:
        raise OptionValueError('Profile: '+value+' is not implemented')
 class ConversionError(Exception):
    pass
 def option_parser(usage):
-    parser = OptionParser(usage=usage, version='libprs500 '+VERSION)
+    parser = OptionParser(usage=usage, version='libprs500 '+VERSION,
-    parser.add_option('--header', action='store_true', default=False, dest='header',
+                          epilog='html2lrf created by Kovid Goyal')
    metadata = parser.add_option_group('METADATA OPTIONS')
    metadata.add_option('--header', action='store_true', default=False, dest='header',
                      help='Add a header to all the pages with title and author.')
-    parser.add_option("-t", "--title", action="store", type="string", \
+    metadata.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help="Set the title")
+                    dest="title", help="Set the title. Default: filename.")
-    parser.add_option("-a", "--author", action="store", type="string", \
+    metadata.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help="Set the author", default='Unknown')
+                    dest="author", help="Set the author. Default: %default", default='Unknown')
-    parser.add_option("--freetext", action="store", type="string", \
+    metadata.add_option("--freetext", action="store", type="string", \
-                    dest="freetext", help="Set the comments in the metadata", default='  ')
+                    dest="freetext", help="Set the comments.", default='  ')
-    parser.add_option("--category", action="store", type="string", \
+    metadata.add_option("--category", action="store", type="string", \
-                    dest="category", help="Set the category", default='  ')
+                    dest="category", help="Set the category", default='  ')    
    metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
                      help='Sort key for the title')
    metadata.add_option('--author-sort', action='store', default='', dest='author_sort',
                      help='Sort key for the author')
    profiles=['prs500']    
    parser.add_option('-o', '--output', action='store', default=None, \
                      help='Output file name. Default is derived from input filename')
-    parser.add_option('--title-sort', action='store', default='', dest='title_sort',
+    parser.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
-                      help='Sort key for the title')
+                      choices=profiles, action='callback', callback=profile_from_string,
-    parser.add_option('--author-sort', action='store', default='', dest='author_sort',
+                      help='''Profile of the target device for which this LRF is '''
-                      help='Sort key for the author')
+                      '''being generated. Default: ''' + profiles[0] + '''
                      Supported profiles: '''+', '.join(profiles))
    return parser
-def Book(font_delta=0, header=None, **settings):
+def Book(font_delta=0, header=None, profile=PRS500_PROFILE, **settings):
-    ps = dict(textwidth=575, textheight=747)
+    ps = dict(textwidth=profile.page_width, 
              textheight=profile.page_height)
    if header:
        hdr = Header()
        hb = TextBlock(textStyle=TextStyle(align='foot', fontsize=60))
@ -62,5 +84,4 @@ def Book(font_delta=0, header=None, **settings):
        ps['topmargin'] = 10
    return _Book(textstyledefault=dict(fontsize=100+font_delta*20, 
                                       parindent=80, linespace=12), \
-                 pagestyledefault=ps, \
+                 pagestyledefault=ps, **settings)
                  **settings)
--- a/src/libprs500/lrf/html/convert_from.py
+++ b/src/libprs500/lrf/html/convert_from.py
@ -39,7 +39,7 @@ from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBl
                                      Bold, Space, Plot, Image, BlockSpace,\
                                      RuledLine, BookSetting
 from libprs500.lrf.pylrs.pylrs import Span as _Span
-from libprs500.lrf import ConversionError, option_parser, Book
+from libprs500.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
 from libprs500 import extract, filename_to_utf8
 from libprs500.ptempfile import PersistentTemporaryFile
@ -158,7 +158,7 @@ class Span(_Span):
                ans = font_weight(val)                
                if ans:
                    t['fontweight'] = ans
-                    if int(ans) > 1400:                        
+                    if int(ans) > 140:                        
                        t['wordspace'] = '50'
            elif key.startswith("margin"):
                if key == "margin":
@ -214,8 +214,9 @@ class Span(_Span):
 class HTMLConverter(object):
-    SELECTOR_PAT  = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
+    SELECTOR_PAT   = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
-    IGNORED_TAGS  = (Comment, Declaration, ProcessingInstruction)
+    PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
    IGNORED_TAGS   = (Comment, Declaration, ProcessingInstruction)
    # Fix <a /> elements 
    MARKUP_MASSAGE   = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), 
                         lambda match: match.group(1)+"></a>")]
@ -234,12 +235,14 @@ class HTMLConverter(object):
    processed_files = {} #: Files that have been processed
-    def __init__(self, book, path, dpi=166, width=575, height=747, 
+    def __init__(self, book, path, 
                 font_delta=0, verbose=False, cover=None,
                 max_link_levels=sys.maxint, link_level=0,
                 is_root=True, baen=False, chapter_detection=True,
                 chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
-                 link_exclude=re.compile('$')):
+                 link_exclude=re.compile('$'), 
                 page_break=re.compile('h[12]', re.IGNORECASE),
                 profile=PRS500_PROFILE, hide_broken_links=False):
        '''
        Convert HTML file at C{path} and add it to C{book}. After creating
        the object, you must call L{self.process_links} on it to create the links and
@ -270,6 +273,11 @@ class HTMLConverter(object):
        @type chapter_detection: C{bool}
        @param chapter_regex: The compiled regular expression used to search for chapter titles
        @param link_exclude: Compiled regex. Matching hrefs are ignored.
        @param page_break: Compiled regex. Page breaks are inserted before matching
                           tags if no page-breaks are found and no chapter headings
                           are detected.
        @param profile: Defines the geometry of the display device
        @param hide_broken_links: Don't display broken links
        '''
        # Defaults for various formatting tags        
        self.css = dict(
@ -285,10 +293,8 @@ class HTMLConverter(object):
            small  = {'font-size'   :'small'},
            pre    = {'font-family' :'monospace' },
            center = {'text-align'  : 'center'}
-            )
+            )        
-        self.page_width = width   #: The width of the page
+        self.profile     = profile #: Defines the geometry of the display device
        self.page_height = height #: The height of the page
        self.dpi         = dpi    #: The DPI of the intended display device
        self.chapter_detection = chapter_detection #: Flag to toggle chapter detection
        self.chapter_regex = chapter_regex #: Regex used to search for chapter titles
        self.link_exclude = link_exclude #: Ignore matching hrefs
@ -298,6 +304,7 @@ class HTMLConverter(object):
        self.blockquote_style = book.create_block_style(sidemargin=60, 
                                                        topskip=20, footskip=20)
        self.unindented_style = book.create_text_style(parindent=0)
        self.page_break       = page_break #: Regex controlling forced page-break behavior
        self.text_styles      = []#: Keep track of already used textstyles
        self.block_styles     = []#: Keep track of already used blockstyles
        self.images  = {}         #: Images referenced in the HTML document
@ -311,7 +318,8 @@ class HTMLConverter(object):
        self.in_ol = False #: Flag indicating we're in an <ol> element
        self.book = book #: The Book object representing a BBeB book
        self.is_root = is_root           #: Are we converting the root HTML file
-        self.lstrip_toggle = False #; If true the next add_text call will do an lstrip
+        self.lstrip_toggle = False #: If true the next add_text call will do an lstrip
        self.hide_broken_links = hide_broken_links
        path = os.path.abspath(path)
        os.chdir(os.path.dirname(path))
        self.file_name = os.path.basename(path)
@ -331,7 +339,11 @@ class HTMLConverter(object):
        self.verbose = verbose        
        self.current_page = None
        self.current_para = None
-        self.current_style = {}        
+        self.current_style = {}
        self.page_break_found = False
        match = self.PAGE_BREAK_PAT.search(unicode(self.soup))
        if match and not re.match('avoid', match.group(1), re.IGNORECASE):
            self.page_break_found = True
        self.parse_file()
        HTMLConverter.processed_files[path] = self
        print 'done'
@ -440,7 +452,8 @@ class HTMLConverter(object):
    def get_text(self, tag):
            css = self.tag_css(tag)
-            if css.has_key('display') and css['display'].lower() == 'none':
+            if (css.has_key('display') and css['display'].lower() == 'none') or \
               (css.has_key('visibility') and css['visibility'].lower() == 'hidden'):
                return ''
            text = ''
            for c in tag.contents:
@ -485,22 +498,26 @@ class HTMLConverter(object):
                page.contents.remove(bs)
            return ans
-        cwd = os.getcwd()
+        cwd = os.getcwd()        
        for link in self.links:
            para, tag = link.para, link.tag
            text = self.get_text(tag)
            if self.hide_broken_links:
                    para.contents = []
                    para.append(_Span(text=text))
            purl = urlparse(link.tag['href'])
            if purl[1]: # Not a link to a file on the local filesystem
                continue
-            path, fragment = unquote(purl[2]), purl[5]
+            path, fragment = unquote(purl[2]), purl[5]            
            para, tag = link.para, link.tag
            if not path or os.path.basename(path) == self.file_name:
                if fragment in self.targets.keys():
                    tb = get_target_block(fragment, self.targets)
                    if self.is_root:
-                        self.book.addTocEntry(self.get_text(tag), tb)                 
+                        self.book.addTocEntry(text, tb)                 
                    sys.stdout.flush()
                    jb = JumpButton(tb)
                    self.book.append(jb)
-                    cb = CharButton(jb, text=self.get_text(tag))
+                    cb = CharButton(jb, text=text)
                    para.contents = []
                    para.append(cb)
            elif self.link_level < self.max_link_levels:
@ -515,15 +532,16 @@ class HTMLConverter(object):
                if not path in HTMLConverter.processed_files.keys():                    
                    try:                        
                        self.files[path] = HTMLConverter(self.book, path, 
-                                     width=self.page_width, height=self.page_height,
+                                     profile=self.profile,
                                     dpi=self.dpi,
                                     font_delta=self.font_delta, verbose=self.verbose,
                                     link_level=self.link_level+1,
                                     max_link_levels=self.max_link_levels,
                                     is_root = False, baen=self.baen,
                                     chapter_detection=self.chapter_detection,
                                     chapter_regex=self.chapter_regex,
-                                     link_exclude=self.link_exclude)
+                                     link_exclude=self.link_exclude,
                                     page_break=self.page_break,
                                     hide_broken_links=self.hide_broken_links)
                        HTMLConverter.processed_files[path] = self.files[path]
                    except Exception:
                        print >>sys.stderr, 'Unable to process', path
@ -540,10 +558,10 @@ class HTMLConverter(object):
                else:
                    tb = conv.top
                if self.is_root:
-                    self.book.addTocEntry(self.get_text(tag), tb)      
+                    self.book.addTocEntry(text, tb)      
                jb = JumpButton(tb)                
                self.book.append(jb)
-                cb = CharButton(jb, text=self.get_text(tag))
+                cb = CharButton(jb, text=text)
                para.contents = []
                para.append(cb)                
@ -574,10 +592,12 @@ class HTMLConverter(object):
    def add_image_page(self, path):
        if os.access(path, os.R_OK):
-            self.end_page()
+            self.end_page()            
            page = self.book.create_page(evensidemargin=0, oddsidemargin=0, 
-                                         topmargin=0, textwidth=self.page_width,
+                                         topmargin=0, textwidth=self.profile.screen_width,
-                                         textheight=self.page_height)
+                                         headheight=0, headsep=0, footspace=0,
                                         footheight=0,
                                         textheight=self.profile.screen_height)
            if not self.images.has_key(path):
                self.images[path] = ImageStream(path)
            page.append(ImageBlock(self.images[path]))
@ -651,11 +671,8 @@ class HTMLConverter(object):
               'padding' in test or 'border' in test or 'page-break' in test \
               or test.startswith('mso') or test.startswith('background')\
               or test.startswith('line') or test in ['color', 'display', \
-                           'letter-spacing',  
+                           'letter-spacing', 'font-variant']:
-                           'font-variant']:
+                css.pop(key)              
                css.pop(key)
                if self.verbose:
                    print 'Ignoring CSS key:', key
        return css
    def end_current_para(self):
@ -688,7 +705,8 @@ class HTMLConverter(object):
            return
        tag_css = self.tag_css(tag, parent_css=parent_css)
        try: # Skip element if its display attribute is set to none
-            if tag_css['display'].lower() == 'none':
+            if tag_css['display'].lower() == 'none' or \
               tag_css['visibility'].lower() == 'hidden':
                return
        except KeyError:
            pass
@ -701,7 +719,11 @@ class HTMLConverter(object):
           tag_css['page-break-after'].lower() != 'avoid':
            end_page = True
            tag_css.pop('page-break-after')
-            
+        if not self.page_break_found and self.page_break.match(tagname):
            if len(self.current_page.contents) > 3:
                self.end_page()
                if self.verbose:
                    print 'Forcing page break at', tagname
        if tagname in ["title", "script", "meta", 'del', 'frameset']:            
            pass
        elif tagname == 'a' and self.max_link_levels >= 0:
@ -744,12 +766,12 @@ class HTMLConverter(object):
                self.targets[tag['name']] = target
            elif tag.has_key('href') and not self.link_exclude.match(tag['href']):
                purl = urlparse(tag['href'])
-                path = purl[2]
+                path = unquote(purl[2])
                if path and os.path.splitext(path)[1][1:].lower() in \
                    ['png', 'jpg', 'bmp', 'jpeg']:
                    self.add_image_page(path)
                else:
-                    self.add_text('Link: '+tag['href'], tag_css)
+                    self.add_text('Link: ' + tag['href'], tag_css)
                    self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
        elif tagname == 'img':
            if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
@ -772,31 +794,32 @@ class HTMLConverter(object):
                    return pt.name
-                if height > self.page_height:
+                if height > self.profile.page_height:
-                    corrf = self.page_height/(1.*height)
+                    corrf = self.profile.page_height/(1.*height)
-                    width, height = floor(corrf*width), self.page_height-1                        
+                    width, height = floor(corrf*width), self.profile.page_height-1                        
-                    if width > self.page_width:
+                    if width > self.profile.page_width:
-                        corrf = (self.page_width)/(1.*width)
+                        corrf = (self.profile.page_width)/(1.*width)
-                        width, height = self.page_width-1, floor(corrf*height)
+                        width, height = self.profile.page_width-1, floor(corrf*height)
                    path = scale_image(width, height)
-                if width > self.page_width:
+                if width > self.profile.page_width:
-                    corrf = self.page_width/(1.*width)
+                    corrf = self.profile.page_width/(1.*width)
-                    width, height = self.page_width-1, floor(corrf*height)
+                    width, height = self.profile.page_width-1, floor(corrf*height)
-                    if height > self.page_height:
+                    if height > self.profile.page_height:
-                        corrf = (self.page_height)/(1.*height)
+                        corrf = (self.profile.page_height)/(1.*height)
-                        width, height = floor(corrf*width), self.page_height-1                        
+                        width, height = floor(corrf*width), self.profile.page_height-1                        
                    path = scale_image(width, height)
                width, height = int(width), int(height)
                if not self.images.has_key(path):
                    self.images[path] = ImageStream(path)
-                factor = 720./self.dpi
+                factor = 720./self.profile.dpi
-                if max(width, height) <= min(self.page_width, self.page_height)/5.:
+                if max(width, height) <= min(self.profile.page_width, 
                                             self.profile.page_height)/5.:
                    im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
                               xsize=width, ysize=height)                    
                    self.current_para.append(Plot(im, xsize=ceil(width*factor), 
                                                  ysize=ceil(height*factor)))
-                elif height <= self.page_height/1.5:
+                else:
                    pb = self.current_block
                    self.end_current_para()                    
                    self.process_alignment(tag_css)
@ -809,16 +832,7 @@ class HTMLConverter(object):
                    self.current_block = self.book.create_text_block(
                                                    textStyle=pb.textStyle,
                                                    blockStyle=pb.blockStyle)
-                    self.current_para = Paragraph()
+                    self.current_para = Paragraph()                
                else:
                    self.current_block.append(self.current_para)
                    self.current_page.append(self.current_block)
                    self.current_para = Paragraph()
                    self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle,
                                                         blockStyle=self.current_block.blockStyle)
                    im = ImageBlock(self.images[path], x1=width, y1=height, 
                                    xsize=width, ysize=height)
                    self.current_page.append(im)                        
            else:
                print >>sys.stderr, "Failed to process:", tag
        elif tagname in ['style', 'link']:
@ -835,14 +849,16 @@ class HTMLConverter(object):
                        ncss.update(self.parse_css(str(c)))
            elif tag.has_key('type') and tag['type'] == "text/css" \
                    and tag.has_key('href'):
-                url = tag['href']
+                purl = urlparse(tag['href'])
                path = unquote(purl[2])                
                try:
-                    if url.startswith('http://'):
+                    f = open(path, 'rb')
-                        f = urlopen(url)
+                    src = f.read()
                    else:
                        f = open(unquote(url))
                    ncss = self.parse_css(f.read())
                    f.close()
                    match = self.PAGE_BREAK_PAT.search(src) 
                    if match and not re.match('avoid', match.group(1), re.IGNORECASE):
                        self.page_break_found = True
                    ncss = self.parse_css(f.read())
                except IOError:
                    pass
            if ncss:
@ -917,6 +933,7 @@ class HTMLConverter(object):
                    if self.verbose:
                        print 'Detected chapter', src
                    self.end_page()
                    self.page_break_found = True
            self.end_current_para()
            self.lstrip_toggle = True
            if tag_css.has_key('text-indent'):
@ -953,7 +970,7 @@ class HTMLConverter(object):
            self.end_current_para()            
            self.current_block.append(CR())
            self.end_current_block()
-            self.current_page.RuledLine(linelength=self.page_width)
+            self.current_page.RuledLine(linelength=self.profile.page_width)
        else:            
            self.process_children(tag, tag_css)
@ -967,18 +984,21 @@ class HTMLConverter(object):
        for _file in self.scaled_images.values():   
            _file.__del__()
 def process_file(path, options):
    cwd = os.getcwd()
    dirpath = None
    try:
        dirpath, path = get_path(path)
-        cpath, tpath = options.cover, ''
+        cpath, tpath = '', '' 
-        if options.cover and os.access(options.cover, os.R_OK):            
+        if options.cover:
-            try:
+            options.cover = os.path.abspath(os.path.expanduser(options.cover))
            cpath = options.cover
            if os.access(options.cover, os.R_OK):        
                from libprs500.prs500 import PRS500                
                im = PILImage.open(os.path.join(cwd, cpath))
-                cim = im.resize((600, 800), PILImage.BICUBIC)
+                cim = im.resize((options.profile.screen_width, 
                                 options.profile.screen_height), 
                                PILImage.BICUBIC)
                cf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg")
                cf.close()                
                cim.save(cf.name)
@ -989,17 +1009,16 @@ def process_file(path, options):
                tf.close()
                tim.save(tf.name)
                tpath = tf.name
-            except ImportError:
+            else:
-                print >>sys.stderr, "WARNING: You don't have PIL installed. ",
+                raise ConversionError, 'Cannot read from: %s', (options.cover,)
                'Cover and thumbnails wont work'
                pass
        title = (options.title, options.title_sort)
        author = (options.author, options.author_sort)
        args = dict(font_delta=options.font_delta, title=title, \
                    author=author, sourceencoding='utf8',\
                    freetext=options.freetext, category=options.category,
-                    booksetting=BookSetting(dpi=10*options.dpi,screenheight=800,
+                    booksetting=BookSetting(dpi=10*options.profile.dpi,
-                                            screenwidth=600))
+                                            screenheight=options.profile.screen_height,
                                            screenwidth=options.profile.screen_width))
        if tpath:
            args['thumbnail'] = tpath
        header = None
@ -1011,13 +1030,16 @@ def process_file(path, options):
        book = Book(header=header, **args)
        le = re.compile(options.link_exclude) if options.link_exclude else \
             re.compile('$')
-        conv = HTMLConverter(book, path, dpi=options.dpi,
+        pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
             re.compile('$')
        conv = HTMLConverter(book, path, profile=options.profile,
                             font_delta=options.font_delta, 
                             cover=cpath, max_link_levels=options.link_levels,
-                             baen=options.baen, 
+                             verbose=options.verbose, baen=options.baen, 
                             chapter_detection=options.chapter_detection,
                             chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
-                             link_exclude=re.compile(le))
+                             link_exclude=re.compile(le), page_break=pb,
                             hide_broken_links=not options.show_broken_links)
        conv.process_links()
        oname = options.output
        if not oname:
@ -1033,47 +1055,73 @@ def process_file(path, options):
        if dirpath:
            shutil.rmtree(dirpath, True)
-def main():
+def parse_options(argv=None, cli=True):
    """ CLI for html -> lrf conversions """
    if not argv:
        argv = sys.argv[1:]
    parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip]
         %prog converts mybook.html to mybook.lrf""")
    parser.add_option('--cover', action='store', dest='cover', default=None, \
                      help='Path to file containing image to be used as cover')
    parser.add_option('--lrs', action='store_true', dest='lrs', \
                      help='Convert to LRS', default=False)
    parser.add_option('--font-delta', action='store', type='int', default=0, \
                      help="""Increase the font size by 2 * FONT_DELTA pts. 
                      If FONT_DELTA is negative, the font size is decreased.""",
                      dest='font_delta')
-    parser.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
+    link = parser.add_option_group('LINK PROCESSING OPTIONS')
    link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
                      dest='link_levels',
                      help=r'''The maximum number of levels to recursively process '''
                              '''links. A value of 0 means thats links are not followed. '''
                              '''A negative value means that <a> tags are ignored.''')
-    parser.add_option('--baen', action='store_true', default=False, dest='baen',
+    link.add_option('--link-exclude', dest='link_exclude', default='$',
-                      help='''Preprocess Baen HTML files to improve generated LRF.''')
+                      help='''A regular expression. <a> tags whoose href '''
-    parser.add_option('--dpi', action='store', type='int', default=166, dest='dpi',
+                      '''matches will be ignored. Defaults to %default''')
-                      help='''The DPI of the target device. Default is 166 for the
+    chapter = parser.add_option_group('CHAPTER OPTIONS')
-                              Sony PRS 500''')
+    chapter.add_option('--disable-chapter-detection', action='store_false', 
    parser.add_option('--disable-chapter-detection', action='store_false', 
                      default=True, dest='chapter_detection', 
                      help='''Prevent html2lrf from automatically inserting page breaks'''
                      '''before what it thinks are chapters.''')
-    parser.add_option('--chapter-regex', dest='chapter_regex', 
+    chapter.add_option('--chapter-regex', dest='chapter_regex', 
                      default='chapter|book|appendix',
                      help='''The regular expression used to detect chapter titles.'''
-                      '''It is searched for in heading tags. Default is chapter|book|appendix''') 
+                      '''It is searched for in heading tags. Defaults to %default''')     
-    parser.add_option('--link-exclude', dest='link_exclude', default='',
+    chapter.add_option('--page-break-before', dest='page_break', default='h[12]',
-                      help='''A regular expression. <a> tags whoose href '''
+                      help='''If html2lrf does not find any page breaks in the '''
-                      '''matches will be ignored''')
+                      '''html file and cannot detect chapter headings, it will '''
-    options, args = parser.parse_args()
+                      '''automatically insert page-breaks before the tags whose '''
                      '''names match this regular expression. Defaults to %default. '''
                      '''You can disable it by setting the regexp to "$". '''
                      '''The purpose of this option is to try to ensure that '''
                      '''there are no really long pages as this degrades the page '''
                      '''turn performance of the LRF. Thus this option is ignored '''
                      '''if the current page has only a few elements.''')
    prepro = parser.add_option_group('PREPROCESSING OPTIONS')
    prepro.add_option('--baen', action='store_true', default=False, dest='baen',
                      help='''Preprocess Baen HTML files to improve generated LRF.''')
    debug = parser.add_option_group('DEBUG OPTIONS')
    debug.add_option('--verbose', dest='verbose', action='store_true', default=False,
                      help='''Be verbose while processing''')
    debug.add_option('--lrs', action='store_true', dest='lrs', \
                      help='Convert to LRS', default=False)
    debug.add_option('--show-broken-links', dest='show_broken_links', action='store_true',
                    default=False, help='''Show the href of broken links in generated LRF''')   
    options, args = parser.parse_args(args=argv)
    if len(args) != 1:
-        parser.print_help()
+        if cli:
-        sys.exit(1)
+            parser.print_help()
-    src = args[0]
+        raise ConversionError, 'no filename specified'
    if options.title == None:
-        options.title = filename_to_utf8(os.path.splitext(os.path.basename(src))[0])
+        options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0])
    return options, args
 def main():    
    try:
        options, args = parse_options()
        src = args[0]
    except:        
        sys.exit(1)    
    process_file(src, options)
 def console_query(dirpath, candidate, docs):
--- a/src/libprs500/lrf/html/demo/demo.html
+++ b/src/libprs500/lrf/html/demo/demo.html
@ -70,7 +70,7 @@
 <h2><a name='images'>Inline images</a></h2>
 <p>
- Here I demonstrate the use of inline images in the midst of text. Here is a  small image <img src='small.jpg' /> embedded in a sentence. Now we have a  slightly larger image that is automatically put in its own block  <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which is  automatically placed on a page by itself and prevented from being  autoscaled when the user changes from S to M to L. Try changing sizes  and see how the different embedding styles behave.  <img src='large.jpg' />
+ Here I demonstrate the use of inline images in the midst of text. Here is a  small image <img src='small.jpg' /> embedded in a sentence. Now we have a  slightly larger image that is automatically put in its own block  <img style="text-align:center" src='medium.jpg' /> and finally we have a large image which wont fit on this page. Try changing sizes from S to M to L and see how the images behave.  <img src='large.jpg' />
 </p>
 <p class='toc'>
 <hr />
--- a/src/libprs500/lrf/txt/convert_from.py
+++ b/src/libprs500/lrf/txt/convert_from.py
@ -69,7 +69,9 @@ def convert_txt(path, options):
    book = Book(header=header, title=title, author=author, \
                sourceencoding=options.encoding, freetext=options.freetext, \
                category=options.category, booksetting=BookSetting
-                (dpi=10*options.dpi,screenheight=800, screenwidth=600))
+                (dpi=10*options.profile.dpi,
                 screenheight=options.profile.screen_height, 
                 screenwidth=options.profile.screen_height))
    buffer = ''
    pg = book.create_page()
    block = book.create_text_block()