From a29bf8eea0a6cdc9f8974f673dbe0c0dcab02937 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 12 May 2007 21:21:21 +0000 Subject: [PATCH] Rationalized CLI of html2lrf Fixed link handling to show text rather than href by default Fine tuned image handling Added automatic page breaks if page-break not found --- src/libprs500/__init__.py | 2 +- src/libprs500/lrf/__init__.py | 59 ++++-- src/libprs500/lrf/html/convert_from.py | 250 +++++++++++++++---------- src/libprs500/lrf/html/demo/demo.html | 2 +- src/libprs500/lrf/txt/convert_from.py | 4 +- 5 files changed, 194 insertions(+), 123 deletions(-) diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index a06dc9626a..ebf9717551 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to suit your distribution. """ -__version__ = "0.3.25" +__version__ = "0.3.26" __docformat__ = "epytext" __author__ = "Kovid Goyal " diff --git a/src/libprs500/lrf/__init__.py b/src/libprs500/lrf/__init__.py index 61c9067757..0dbf9b0144 100644 --- a/src/libprs500/lrf/__init__.py +++ b/src/libprs500/lrf/__init__.py @@ -17,7 +17,7 @@ This package contains logic to read and write LRF files. The LRF file format is At the time fo writing, this package only supports reading and writing LRF meat information. See L{meta}. """ -from optparse import OptionParser +from optparse import OptionParser, OptionValueError from libprs500.lrf.pylrs.pylrs import Book as _Book from libprs500.lrf.pylrs.pylrs import TextBlock, Header, PutObj, Paragraph, TextStyle @@ -26,31 +26,53 @@ from libprs500 import __version__ as VERSION __docformat__ = "epytext" __author__ = "Kovid Goyal " +class PRS500_PROFILE(object): + screen_width = 600 + screen_height = 800 + page_width = 575 + page_height = 747 + dpi = 166 + +def profile_from_string(option, opt_str, value, parser): + if value == 'prs500': + setattr(parser.values, option.dest, PRS500_PROFILE) + else: + raise OptionValueError('Profile: '+value+' is not implemented') + class ConversionError(Exception): pass def option_parser(usage): - parser = OptionParser(usage=usage, version='libprs500 '+VERSION) - parser.add_option('--header', action='store_true', default=False, dest='header', + parser = OptionParser(usage=usage, version='libprs500 '+VERSION, + epilog='html2lrf created by Kovid Goyal') + metadata = parser.add_option_group('METADATA OPTIONS') + metadata.add_option('--header', action='store_true', default=False, dest='header', help='Add a header to all the pages with title and author.') - parser.add_option("-t", "--title", action="store", type="string", \ - dest="title", help="Set the title") - parser.add_option("-a", "--author", action="store", type="string", \ - dest="author", help="Set the author", default='Unknown') - parser.add_option("--freetext", action="store", type="string", \ - dest="freetext", help="Set the comments in the metadata", default=' ') - parser.add_option("--category", action="store", type="string", \ - dest="category", help="Set the category", default=' ') + metadata.add_option("-t", "--title", action="store", type="string", \ + dest="title", help="Set the title. Default: filename.") + metadata.add_option("-a", "--author", action="store", type="string", \ + dest="author", help="Set the author. Default: %default", default='Unknown') + metadata.add_option("--freetext", action="store", type="string", \ + dest="freetext", help="Set the comments.", default=' ') + metadata.add_option("--category", action="store", type="string", \ + dest="category", help="Set the category", default=' ') + metadata.add_option('--title-sort', action='store', default='', dest='title_sort', + help='Sort key for the title') + metadata.add_option('--author-sort', action='store', default='', dest='author_sort', + help='Sort key for the author') + profiles=['prs500'] parser.add_option('-o', '--output', action='store', default=None, \ help='Output file name. Default is derived from input filename') - parser.add_option('--title-sort', action='store', default='', dest='title_sort', - help='Sort key for the title') - parser.add_option('--author-sort', action='store', default='', dest='author_sort', - help='Sort key for the author') + parser.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice', + choices=profiles, action='callback', callback=profile_from_string, + help='''Profile of the target device for which this LRF is ''' + '''being generated. Default: ''' + profiles[0] + ''' + Supported profiles: '''+', '.join(profiles)) return parser -def Book(font_delta=0, header=None, **settings): - ps = dict(textwidth=575, textheight=747) +def Book(font_delta=0, header=None, profile=PRS500_PROFILE, **settings): + ps = dict(textwidth=profile.page_width, + textheight=profile.page_height) if header: hdr = Header() hb = TextBlock(textStyle=TextStyle(align='foot', fontsize=60)) @@ -62,5 +84,4 @@ def Book(font_delta=0, header=None, **settings): ps['topmargin'] = 10 return _Book(textstyledefault=dict(fontsize=100+font_delta*20, parindent=80, linespace=12), \ - pagestyledefault=ps, \ - **settings) \ No newline at end of file + pagestyledefault=ps, **settings) \ No newline at end of file diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py index 5ced7857bd..800e23401e 100644 --- a/src/libprs500/lrf/html/convert_from.py +++ b/src/libprs500/lrf/html/convert_from.py @@ -39,7 +39,7 @@ from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBl Bold, Space, Plot, Image, BlockSpace,\ RuledLine, BookSetting from libprs500.lrf.pylrs.pylrs import Span as _Span -from libprs500.lrf import ConversionError, option_parser, Book +from libprs500.lrf import ConversionError, option_parser, Book, PRS500_PROFILE from libprs500 import extract, filename_to_utf8 from libprs500.ptempfile import PersistentTemporaryFile @@ -158,7 +158,7 @@ class Span(_Span): ans = font_weight(val) if ans: t['fontweight'] = ans - if int(ans) > 1400: + if int(ans) > 140: t['wordspace'] = '50' elif key.startswith("margin"): if key == "margin": @@ -214,8 +214,9 @@ class Span(_Span): class HTMLConverter(object): - SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") - IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) + SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") + PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE) + IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction) # Fix elements MARKUP_MASSAGE = [(re.compile("(<\s*[aA]\s+.*\/)\s*>"), lambda match: match.group(1)+">")] @@ -234,12 +235,14 @@ class HTMLConverter(object): processed_files = {} #: Files that have been processed - def __init__(self, book, path, dpi=166, width=575, height=747, + def __init__(self, book, path, font_delta=0, verbose=False, cover=None, max_link_levels=sys.maxint, link_level=0, is_root=True, baen=False, chapter_detection=True, chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE), - link_exclude=re.compile('$')): + link_exclude=re.compile('$'), + page_break=re.compile('h[12]', re.IGNORECASE), + profile=PRS500_PROFILE, hide_broken_links=False): ''' Convert HTML file at C{path} and add it to C{book}. After creating the object, you must call L{self.process_links} on it to create the links and @@ -270,6 +273,11 @@ class HTMLConverter(object): @type chapter_detection: C{bool} @param chapter_regex: The compiled regular expression used to search for chapter titles @param link_exclude: Compiled regex. Matching hrefs are ignored. + @param page_break: Compiled regex. Page breaks are inserted before matching + tags if no page-breaks are found and no chapter headings + are detected. + @param profile: Defines the geometry of the display device + @param hide_broken_links: Don't display broken links ''' # Defaults for various formatting tags self.css = dict( @@ -285,10 +293,8 @@ class HTMLConverter(object): small = {'font-size' :'small'}, pre = {'font-family' :'monospace' }, center = {'text-align' : 'center'} - ) - self.page_width = width #: The width of the page - self.page_height = height #: The height of the page - self.dpi = dpi #: The DPI of the intended display device + ) + self.profile = profile #: Defines the geometry of the display device self.chapter_detection = chapter_detection #: Flag to toggle chapter detection self.chapter_regex = chapter_regex #: Regex used to search for chapter titles self.link_exclude = link_exclude #: Ignore matching hrefs @@ -298,6 +304,7 @@ class HTMLConverter(object): self.blockquote_style = book.create_block_style(sidemargin=60, topskip=20, footskip=20) self.unindented_style = book.create_text_style(parindent=0) + self.page_break = page_break #: Regex controlling forced page-break behavior self.text_styles = []#: Keep track of already used textstyles self.block_styles = []#: Keep track of already used blockstyles self.images = {} #: Images referenced in the HTML document @@ -311,7 +318,8 @@ class HTMLConverter(object): self.in_ol = False #: Flag indicating we're in an
    element self.book = book #: The Book object representing a BBeB book self.is_root = is_root #: Are we converting the root HTML file - self.lstrip_toggle = False #; If true the next add_text call will do an lstrip + self.lstrip_toggle = False #: If true the next add_text call will do an lstrip + self.hide_broken_links = hide_broken_links path = os.path.abspath(path) os.chdir(os.path.dirname(path)) self.file_name = os.path.basename(path) @@ -331,7 +339,11 @@ class HTMLConverter(object): self.verbose = verbose self.current_page = None self.current_para = None - self.current_style = {} + self.current_style = {} + self.page_break_found = False + match = self.PAGE_BREAK_PAT.search(unicode(self.soup)) + if match and not re.match('avoid', match.group(1), re.IGNORECASE): + self.page_break_found = True self.parse_file() HTMLConverter.processed_files[path] = self print 'done' @@ -440,7 +452,8 @@ class HTMLConverter(object): def get_text(self, tag): css = self.tag_css(tag) - if css.has_key('display') and css['display'].lower() == 'none': + if (css.has_key('display') and css['display'].lower() == 'none') or \ + (css.has_key('visibility') and css['visibility'].lower() == 'hidden'): return '' text = '' for c in tag.contents: @@ -485,22 +498,26 @@ class HTMLConverter(object): page.contents.remove(bs) return ans - cwd = os.getcwd() + cwd = os.getcwd() for link in self.links: + para, tag = link.para, link.tag + text = self.get_text(tag) + if self.hide_broken_links: + para.contents = [] + para.append(_Span(text=text)) purl = urlparse(link.tag['href']) if purl[1]: # Not a link to a file on the local filesystem continue - path, fragment = unquote(purl[2]), purl[5] - para, tag = link.para, link.tag + path, fragment = unquote(purl[2]), purl[5] if not path or os.path.basename(path) == self.file_name: if fragment in self.targets.keys(): tb = get_target_block(fragment, self.targets) if self.is_root: - self.book.addTocEntry(self.get_text(tag), tb) + self.book.addTocEntry(text, tb) sys.stdout.flush() jb = JumpButton(tb) self.book.append(jb) - cb = CharButton(jb, text=self.get_text(tag)) + cb = CharButton(jb, text=text) para.contents = [] para.append(cb) elif self.link_level < self.max_link_levels: @@ -515,15 +532,16 @@ class HTMLConverter(object): if not path in HTMLConverter.processed_files.keys(): try: self.files[path] = HTMLConverter(self.book, path, - width=self.page_width, height=self.page_height, - dpi=self.dpi, + profile=self.profile, font_delta=self.font_delta, verbose=self.verbose, link_level=self.link_level+1, max_link_levels=self.max_link_levels, is_root = False, baen=self.baen, chapter_detection=self.chapter_detection, chapter_regex=self.chapter_regex, - link_exclude=self.link_exclude) + link_exclude=self.link_exclude, + page_break=self.page_break, + hide_broken_links=self.hide_broken_links) HTMLConverter.processed_files[path] = self.files[path] except Exception: print >>sys.stderr, 'Unable to process', path @@ -540,10 +558,10 @@ class HTMLConverter(object): else: tb = conv.top if self.is_root: - self.book.addTocEntry(self.get_text(tag), tb) + self.book.addTocEntry(text, tb) jb = JumpButton(tb) self.book.append(jb) - cb = CharButton(jb, text=self.get_text(tag)) + cb = CharButton(jb, text=text) para.contents = [] para.append(cb) @@ -574,10 +592,12 @@ class HTMLConverter(object): def add_image_page(self, path): if os.access(path, os.R_OK): - self.end_page() + self.end_page() page = self.book.create_page(evensidemargin=0, oddsidemargin=0, - topmargin=0, textwidth=self.page_width, - textheight=self.page_height) + topmargin=0, textwidth=self.profile.screen_width, + headheight=0, headsep=0, footspace=0, + footheight=0, + textheight=self.profile.screen_height) if not self.images.has_key(path): self.images[path] = ImageStream(path) page.append(ImageBlock(self.images[path])) @@ -651,11 +671,8 @@ class HTMLConverter(object): 'padding' in test or 'border' in test or 'page-break' in test \ or test.startswith('mso') or test.startswith('background')\ or test.startswith('line') or test in ['color', 'display', \ - 'letter-spacing', - 'font-variant']: - css.pop(key) - if self.verbose: - print 'Ignoring CSS key:', key + 'letter-spacing', 'font-variant']: + css.pop(key) return css def end_current_para(self): @@ -688,7 +705,8 @@ class HTMLConverter(object): return tag_css = self.tag_css(tag, parent_css=parent_css) try: # Skip element if its display attribute is set to none - if tag_css['display'].lower() == 'none': + if tag_css['display'].lower() == 'none' or \ + tag_css['visibility'].lower() == 'hidden': return except KeyError: pass @@ -701,7 +719,11 @@ class HTMLConverter(object): tag_css['page-break-after'].lower() != 'avoid': end_page = True tag_css.pop('page-break-after') - + if not self.page_break_found and self.page_break.match(tagname): + if len(self.current_page.contents) > 3: + self.end_page() + if self.verbose: + print 'Forcing page break at', tagname if tagname in ["title", "script", "meta", 'del', 'frameset']: pass elif tagname == 'a' and self.max_link_levels >= 0: @@ -744,12 +766,12 @@ class HTMLConverter(object): self.targets[tag['name']] = target elif tag.has_key('href') and not self.link_exclude.match(tag['href']): purl = urlparse(tag['href']) - path = purl[2] + path = unquote(purl[2]) if path and os.path.splitext(path)[1][1:].lower() in \ ['png', 'jpg', 'bmp', 'jpeg']: self.add_image_page(path) else: - self.add_text('Link: '+tag['href'], tag_css) + self.add_text('Link: ' + tag['href'], tag_css) self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag)) elif tagname == 'img': if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK): @@ -772,31 +794,32 @@ class HTMLConverter(object): return pt.name - if height > self.page_height: - corrf = self.page_height/(1.*height) - width, height = floor(corrf*width), self.page_height-1 - if width > self.page_width: - corrf = (self.page_width)/(1.*width) - width, height = self.page_width-1, floor(corrf*height) + if height > self.profile.page_height: + corrf = self.profile.page_height/(1.*height) + width, height = floor(corrf*width), self.profile.page_height-1 + if width > self.profile.page_width: + corrf = (self.profile.page_width)/(1.*width) + width, height = self.profile.page_width-1, floor(corrf*height) path = scale_image(width, height) - if width > self.page_width: - corrf = self.page_width/(1.*width) - width, height = self.page_width-1, floor(corrf*height) - if height > self.page_height: - corrf = (self.page_height)/(1.*height) - width, height = floor(corrf*width), self.page_height-1 + if width > self.profile.page_width: + corrf = self.profile.page_width/(1.*width) + width, height = self.profile.page_width-1, floor(corrf*height) + if height > self.profile.page_height: + corrf = (self.profile.page_height)/(1.*height) + width, height = floor(corrf*width), self.profile.page_height-1 path = scale_image(width, height) width, height = int(width), int(height) if not self.images.has_key(path): self.images[path] = ImageStream(path) - factor = 720./self.dpi - if max(width, height) <= min(self.page_width, self.page_height)/5.: + factor = 720./self.profile.dpi + if max(width, height) <= min(self.profile.page_width, + self.profile.page_height)/5.: im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\ xsize=width, ysize=height) self.current_para.append(Plot(im, xsize=ceil(width*factor), ysize=ceil(height*factor))) - elif height <= self.page_height/1.5: + else: pb = self.current_block self.end_current_para() self.process_alignment(tag_css) @@ -809,16 +832,7 @@ class HTMLConverter(object): self.current_block = self.book.create_text_block( textStyle=pb.textStyle, blockStyle=pb.blockStyle) - self.current_para = Paragraph() - else: - self.current_block.append(self.current_para) - self.current_page.append(self.current_block) - self.current_para = Paragraph() - self.current_block = self.book.create_text_block(textStyle=self.current_block.textStyle, - blockStyle=self.current_block.blockStyle) - im = ImageBlock(self.images[path], x1=width, y1=height, - xsize=width, ysize=height) - self.current_page.append(im) + self.current_para = Paragraph() else: print >>sys.stderr, "Failed to process:", tag elif tagname in ['style', 'link']: @@ -835,14 +849,16 @@ class HTMLConverter(object): ncss.update(self.parse_css(str(c))) elif tag.has_key('type') and tag['type'] == "text/css" \ and tag.has_key('href'): - url = tag['href'] + purl = urlparse(tag['href']) + path = unquote(purl[2]) try: - if url.startswith('http://'): - f = urlopen(url) - else: - f = open(unquote(url)) - ncss = self.parse_css(f.read()) + f = open(path, 'rb') + src = f.read() f.close() + match = self.PAGE_BREAK_PAT.search(src) + if match and not re.match('avoid', match.group(1), re.IGNORECASE): + self.page_break_found = True + ncss = self.parse_css(f.read()) except IOError: pass if ncss: @@ -917,6 +933,7 @@ class HTMLConverter(object): if self.verbose: print 'Detected chapter', src self.end_page() + self.page_break_found = True self.end_current_para() self.lstrip_toggle = True if tag_css.has_key('text-indent'): @@ -953,7 +970,7 @@ class HTMLConverter(object): self.end_current_para() self.current_block.append(CR()) self.end_current_block() - self.current_page.RuledLine(linelength=self.page_width) + self.current_page.RuledLine(linelength=self.profile.page_width) else: self.process_children(tag, tag_css) @@ -967,18 +984,21 @@ class HTMLConverter(object): for _file in self.scaled_images.values(): _file.__del__() - def process_file(path, options): cwd = os.getcwd() dirpath = None try: dirpath, path = get_path(path) - cpath, tpath = options.cover, '' - if options.cover and os.access(options.cover, os.R_OK): - try: + cpath, tpath = '', '' + if options.cover: + options.cover = os.path.abspath(os.path.expanduser(options.cover)) + cpath = options.cover + if os.access(options.cover, os.R_OK): from libprs500.prs500 import PRS500 im = PILImage.open(os.path.join(cwd, cpath)) - cim = im.resize((600, 800), PILImage.BICUBIC) + cim = im.resize((options.profile.screen_width, + options.profile.screen_height), + PILImage.BICUBIC) cf = PersistentTemporaryFile(prefix="html2lrf_", suffix=".jpg") cf.close() cim.save(cf.name) @@ -989,17 +1009,16 @@ def process_file(path, options): tf.close() tim.save(tf.name) tpath = tf.name - except ImportError: - print >>sys.stderr, "WARNING: You don't have PIL installed. ", - 'Cover and thumbnails wont work' - pass + else: + raise ConversionError, 'Cannot read from: %s', (options.cover,) title = (options.title, options.title_sort) author = (options.author, options.author_sort) args = dict(font_delta=options.font_delta, title=title, \ author=author, sourceencoding='utf8',\ freetext=options.freetext, category=options.category, - booksetting=BookSetting(dpi=10*options.dpi,screenheight=800, - screenwidth=600)) + booksetting=BookSetting(dpi=10*options.profile.dpi, + screenheight=options.profile.screen_height, + screenwidth=options.profile.screen_width)) if tpath: args['thumbnail'] = tpath header = None @@ -1011,13 +1030,16 @@ def process_file(path, options): book = Book(header=header, **args) le = re.compile(options.link_exclude) if options.link_exclude else \ re.compile('$') - conv = HTMLConverter(book, path, dpi=options.dpi, + pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \ + re.compile('$') + conv = HTMLConverter(book, path, profile=options.profile, font_delta=options.font_delta, cover=cpath, max_link_levels=options.link_levels, - baen=options.baen, + verbose=options.verbose, baen=options.baen, chapter_detection=options.chapter_detection, chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE), - link_exclude=re.compile(le)) + link_exclude=re.compile(le), page_break=pb, + hide_broken_links=not options.show_broken_links) conv.process_links() oname = options.output if not oname: @@ -1033,47 +1055,73 @@ def process_file(path, options): if dirpath: shutil.rmtree(dirpath, True) -def main(): +def parse_options(argv=None, cli=True): """ CLI for html -> lrf conversions """ + if not argv: + argv = sys.argv[1:] parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip] %prog converts mybook.html to mybook.lrf""") parser.add_option('--cover', action='store', dest='cover', default=None, \ help='Path to file containing image to be used as cover') - parser.add_option('--lrs', action='store_true', dest='lrs', \ - help='Convert to LRS', default=False) parser.add_option('--font-delta', action='store', type='int', default=0, \ help="""Increase the font size by 2 * FONT_DELTA pts. If FONT_DELTA is negative, the font size is decreased.""", dest='font_delta') - parser.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ + link = parser.add_option_group('LINK PROCESSING OPTIONS') + link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ dest='link_levels', help=r'''The maximum number of levels to recursively process ''' '''links. A value of 0 means thats links are not followed. ''' '''A negative value means that tags are ignored.''') - parser.add_option('--baen', action='store_true', default=False, dest='baen', - help='''Preprocess Baen HTML files to improve generated LRF.''') - parser.add_option('--dpi', action='store', type='int', default=166, dest='dpi', - help='''The DPI of the target device. Default is 166 for the - Sony PRS 500''') - parser.add_option('--disable-chapter-detection', action='store_false', + link.add_option('--link-exclude', dest='link_exclude', default='$', + help='''A regular expression. tags whoose href ''' + '''matches will be ignored. Defaults to %default''') + chapter = parser.add_option_group('CHAPTER OPTIONS') + chapter.add_option('--disable-chapter-detection', action='store_false', default=True, dest='chapter_detection', help='''Prevent html2lrf from automatically inserting page breaks''' '''before what it thinks are chapters.''') - parser.add_option('--chapter-regex', dest='chapter_regex', + chapter.add_option('--chapter-regex', dest='chapter_regex', default='chapter|book|appendix', help='''The regular expression used to detect chapter titles.''' - '''It is searched for in heading tags. Default is chapter|book|appendix''') - parser.add_option('--link-exclude', dest='link_exclude', default='', - help='''A regular expression. tags whoose href ''' - '''matches will be ignored''') - options, args = parser.parse_args() + '''It is searched for in heading tags. Defaults to %default''') + chapter.add_option('--page-break-before', dest='page_break', default='h[12]', + help='''If html2lrf does not find any page breaks in the ''' + '''html file and cannot detect chapter headings, it will ''' + '''automatically insert page-breaks before the tags whose ''' + '''names match this regular expression. Defaults to %default. ''' + '''You can disable it by setting the regexp to "$". ''' + '''The purpose of this option is to try to ensure that ''' + '''there are no really long pages as this degrades the page ''' + '''turn performance of the LRF. Thus this option is ignored ''' + '''if the current page has only a few elements.''') + prepro = parser.add_option_group('PREPROCESSING OPTIONS') + prepro.add_option('--baen', action='store_true', default=False, dest='baen', + help='''Preprocess Baen HTML files to improve generated LRF.''') + debug = parser.add_option_group('DEBUG OPTIONS') + debug.add_option('--verbose', dest='verbose', action='store_true', default=False, + help='''Be verbose while processing''') + debug.add_option('--lrs', action='store_true', dest='lrs', \ + help='Convert to LRS', default=False) + debug.add_option('--show-broken-links', dest='show_broken_links', action='store_true', + default=False, help='''Show the href of broken links in generated LRF''') + options, args = parser.parse_args(args=argv) if len(args) != 1: - parser.print_help() - sys.exit(1) - src = args[0] + if cli: + parser.print_help() + raise ConversionError, 'no filename specified' if options.title == None: - options.title = filename_to_utf8(os.path.splitext(os.path.basename(src))[0]) + options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0]) + return options, args + + +def main(): + try: + options, args = parse_options() + src = args[0] + except: + sys.exit(1) process_file(src, options) def console_query(dirpath, candidate, docs): diff --git a/src/libprs500/lrf/html/demo/demo.html b/src/libprs500/lrf/html/demo/demo.html index 0de23a3e09..df8b910c22 100644 --- a/src/libprs500/lrf/html/demo/demo.html +++ b/src/libprs500/lrf/html/demo/demo.html @@ -70,7 +70,7 @@

    Inline images

    - Here I demonstrate the use of inline images in the midst of text. Here is a small image embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block and finally we have a large image which is automatically placed on a page by itself and prevented from being autoscaled when the user changes from S to M to L. Try changing sizes and see how the different embedding styles behave. + Here I demonstrate the use of inline images in the midst of text. Here is a small image embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block and finally we have a large image which wont fit on this page. Try changing sizes from S to M to L and see how the images behave.


    diff --git a/src/libprs500/lrf/txt/convert_from.py b/src/libprs500/lrf/txt/convert_from.py index 7c3c6aace7..34b3136122 100644 --- a/src/libprs500/lrf/txt/convert_from.py +++ b/src/libprs500/lrf/txt/convert_from.py @@ -69,7 +69,9 @@ def convert_txt(path, options): book = Book(header=header, title=title, author=author, \ sourceencoding=options.encoding, freetext=options.freetext, \ category=options.category, booksetting=BookSetting - (dpi=10*options.dpi,screenheight=800, screenwidth=600)) + (dpi=10*options.profile.dpi, + screenheight=options.profile.screen_height, + screenwidth=options.profile.screen_height)) buffer = '' pg = book.create_page() block = book.create_text_block()