From 99e11d3693afff0a8d954cfb9ed1a797a3fce436 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 20 Apr 2007 17:29:43 +0000 Subject: [PATCH] Improvedd font propoerty handling and fixed thumbnail generation --- src/libprs500/lrf/html/convert_from.py | 146 +++++++++++++++---------- 1 file changed, 88 insertions(+), 58 deletions(-) diff --git a/src/libprs500/lrf/html/convert_from.py b/src/libprs500/lrf/html/convert_from.py index ad0adc85e9..9e2867bfe3 100644 --- a/src/libprs500/lrf/html/convert_from.py +++ b/src/libprs500/lrf/html/convert_from.py @@ -17,6 +17,9 @@ """ Code to convert HTML ebooks into LRF ebooks. + +I am indebted to esperanc for the CSS->Xylog Style conversion routines +and to Falstaff for pylrs. """ import os, re, sys from htmlentitydefs import name2codepoint @@ -48,6 +51,7 @@ class Span(_Span): (an int) if successful. Otherwise, returns None. Assumes: 1 pixel is 1/4 mm. One em is 10pts """ + result = None m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val) if m is not None: unit = float(m.group(1)) @@ -66,12 +70,7 @@ class Span(_Span): elif m.group(2)== 'mm': result = int(unit * 4) elif m.group(2)== 'cm': - result = int(unit * 10 * 4) - else: - try: - result = int(val) - except ValueError: - return None + result = int(unit * 10 * 4) return result @staticmethod @@ -80,53 +79,78 @@ class Span(_Span): Receives a dictionary of html attributes and styles and returns approximate Xylog equivalents in a new dictionary """ + def font_weight(val): + ans = None + m = re.search("([0-9]+)", val) + if m: + ans = str(int(m.group(1))) + elif val.find("bold") >= 0 or val.find("strong") >= 0: + ans = "1000" + return ans + + def font_family(val): + ans = None + if max(val.find("courier"), val.find("mono"), val.find("fixed"), val.find("typewriter"))>=0: + ans = "Courier10 BT Roman" + elif max(val.find("arial"), val.find("helvetica"), val.find("verdana"), + val.find("trebuchet"), val.find("sans")) >= 0: + ans = "Swis721 BT Roman" + return ans + + def font_size(val): + ans = None + unit = Span.unit_convert(val, 14) + if unit: + # Assume a 10 pt font (14 pixels) has fontsize 100 + ans = int (unit / 14.0 * 100) + else: + if "xx-small" in val: + ans = 40 + elif "x-small" in val >= 0: + ans = 60 + elif "small" in val: + ans = 80 + elif "xx-large" in val: + ans = 180 + elif "x-large" in val >= 0: + ans = 140 + elif "large" in val >= 0: + ans = 120 + if ans is not None: + ans += font_delta * 20 + ans = str(ans) + return ans + t = dict() for key in d.keys(): - try: - val = d[key].lower() - except IndexError: - val = None - if key == "font-family": - if max(val.find("courier"), val.find("mono"), val.find("fixed"), val.find("typewriter"))>=0: - t["fontfacename"] = "Courier10 BT Roman" - elif max(val.find("arial"), val.find("helvetica"), val.find("verdana"), - val.find("trebuchet"), val.find("sans")) >= 0: - t["fontfacename"] = "Swis721 BT Roman" - else: - t["fontfacename"] = "Dutch801 Rm BT Roman" + val = d[key].lower() + if key == 'font': + val = val.split() + val.reverse() + for sval in val: + ans = font_family(sval) + if ans: + t['fontfacename'] = ans + else: + ans = font_size(sval) + if ans: + t['fontsize'] = ans + else: + ans = font_weight(sval) + if ans: + t['fontweight'] = ans + elif key in ['font-family', 'font-name']: + ans = font_family(val) + if ans: + t['fontfacename'] = ans elif key == "font-size": - unit = Span.unit_convert(val, 14) - if unit is not None: - # Assume a 10 pt font (14 pixels) has fontsize 100 - t["fontsize"] = str(int (unit / 14.0 * 100)) - else: - if val.find("xx-small") >= 0: - t["fontsize"] = "40" - elif val.find("x-small") >= 0: - t["fontsize"] = "60" - elif val.find("small") >= 0: - t["fontsize"] = "80" - elif val.find("xx-large") >= 0: - t["fontsize"] = "180" - elif val.find("x-large") >= 0: - t["fontsize"] = "140" - elif val.find("large") >= 0: - t["fontsize"] = "120" - else: - t["fontsize"] = "100" - fnsz = int(t['fontsize']) - fnsz += font_delta * 20 - t['fontsize'] = str(fnsz) - elif key == "font-weight": - m = re.match ("\s*([0-9]+)", val) - if m is not None: - #report (m.group(1)) - t["fontweight"] = str(int(int(m.group(1)))) - else: - if val.find("bold") >= 0 or val.find("strong") >= 0: - t["fontweight"] = "1000" - else: - t["fontweight"] = "400" + ans = font_size(val) + if ans: + t['fontsize'] = ans + elif key == 'font-weight': + ans = font_weight(val) + if ans: + t['fontweight'] = val elif key.startswith("margin"): if key == "margin": u = [] @@ -161,7 +185,9 @@ class Span(_Span): else: t["align"] = "head" else: - t[key] = d[key] + print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key] + if 'small' in t.values(): + print d, 'font-size' in d.keys() return t def __init__(self, ns, css, font_delta=0): @@ -182,12 +208,13 @@ class Span(_Span): class HTMLConverter(object): selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") - # Defaults for various formatting tags + class Link(object): def __init__(self, para, tag): self.para = para self.tag = tag + # Defaults for various formatting tags css = dict( h1 = {"font-size":"xx-large", "font-weight":"bold"}, h2 = {"font-size":"x-large", "font-weight":"bold"}, @@ -407,6 +434,7 @@ class HTMLConverter(object): test = key.lower() if test.startswith('margin') or 'indent' in test or \ 'padding' in test or 'border' in test or 'page-break' in test \ + or test.startswith('mso') \ or test in ['color', 'display', 'text-decoration', \ 'letter-spacing', 'text-autospace', 'text-transform']: css.pop(key) @@ -494,7 +522,7 @@ class HTMLConverter(object): f = open(url) self.parse_css(f.read()) f.close() - elif tagname in ['p', 'div', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']: + elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']: # TODO: Implement ol indent = tag_css.pop('text-indent', '') if indent: @@ -529,8 +557,8 @@ def process_file(path, options): cwd = os.getcwd() try: path = os.path.abspath(path) - if options.cover and os.access(options.cover, os.R_OK): - cpath, tpath = options.cover, '' + cpath, tpath = options.cover, '' + if options.cover and os.access(options.cover, os.R_OK): try: from PIL import Image from libprs500.prs500 import PRS500 @@ -551,10 +579,12 @@ def process_file(path, options): print >>sys.stderr, "WARNING: You don't have PIL installed. ", 'Cover and thumbnails wont work' pass - book = Book(font_delta=options.font_delta, title=options.title, \ + args = dict(font_delta=options.font_delta, title=options.title, \ author=options.author, sourceencoding='utf8',\ - thumbnail=tpath, freetext=options.freetext, \ - category=options.category) + freetext=options.freetext, category=options.category) + if tpath: + args['thumbnail'] = tpath + book = Book(**args) conv = HTMLConverter(book, path, font_delta=options.font_delta, cover=cpath) conv.process_links() oname = options.output