mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improvedd font propoerty handling and fixed thumbnail generation
This commit is contained in:
parent
54676835c7
commit
99e11d3693
@ -17,6 +17,9 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
Code to convert HTML ebooks into LRF ebooks.
|
Code to convert HTML ebooks into LRF ebooks.
|
||||||
|
|
||||||
|
I am indebted to esperanc for the CSS->Xylog Style conversion routines
|
||||||
|
and to Falstaff for pylrs.
|
||||||
"""
|
"""
|
||||||
import os, re, sys
|
import os, re, sys
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
@ -48,6 +51,7 @@ class Span(_Span):
|
|||||||
(an int) if successful. Otherwise, returns None.
|
(an int) if successful. Otherwise, returns None.
|
||||||
Assumes: 1 pixel is 1/4 mm. One em is 10pts
|
Assumes: 1 pixel is 1/4 mm. One em is 10pts
|
||||||
"""
|
"""
|
||||||
|
result = None
|
||||||
m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
|
m = re.match("\s*(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)", val)
|
||||||
if m is not None:
|
if m is not None:
|
||||||
unit = float(m.group(1))
|
unit = float(m.group(1))
|
||||||
@ -67,11 +71,6 @@ class Span(_Span):
|
|||||||
result = int(unit * 4)
|
result = int(unit * 4)
|
||||||
elif m.group(2)== 'cm':
|
elif m.group(2)== 'cm':
|
||||||
result = int(unit * 10 * 4)
|
result = int(unit * 10 * 4)
|
||||||
else:
|
|
||||||
try:
|
|
||||||
result = int(val)
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -80,53 +79,78 @@ class Span(_Span):
|
|||||||
Receives a dictionary of html attributes and styles and returns
|
Receives a dictionary of html attributes and styles and returns
|
||||||
approximate Xylog equivalents in a new dictionary
|
approximate Xylog equivalents in a new dictionary
|
||||||
"""
|
"""
|
||||||
|
def font_weight(val):
|
||||||
|
ans = None
|
||||||
|
m = re.search("([0-9]+)", val)
|
||||||
|
if m:
|
||||||
|
ans = str(int(m.group(1)))
|
||||||
|
elif val.find("bold") >= 0 or val.find("strong") >= 0:
|
||||||
|
ans = "1000"
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def font_family(val):
|
||||||
|
ans = None
|
||||||
|
if max(val.find("courier"), val.find("mono"), val.find("fixed"), val.find("typewriter"))>=0:
|
||||||
|
ans = "Courier10 BT Roman"
|
||||||
|
elif max(val.find("arial"), val.find("helvetica"), val.find("verdana"),
|
||||||
|
val.find("trebuchet"), val.find("sans")) >= 0:
|
||||||
|
ans = "Swis721 BT Roman"
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def font_size(val):
|
||||||
|
ans = None
|
||||||
|
unit = Span.unit_convert(val, 14)
|
||||||
|
if unit:
|
||||||
|
# Assume a 10 pt font (14 pixels) has fontsize 100
|
||||||
|
ans = int (unit / 14.0 * 100)
|
||||||
|
else:
|
||||||
|
if "xx-small" in val:
|
||||||
|
ans = 40
|
||||||
|
elif "x-small" in val >= 0:
|
||||||
|
ans = 60
|
||||||
|
elif "small" in val:
|
||||||
|
ans = 80
|
||||||
|
elif "xx-large" in val:
|
||||||
|
ans = 180
|
||||||
|
elif "x-large" in val >= 0:
|
||||||
|
ans = 140
|
||||||
|
elif "large" in val >= 0:
|
||||||
|
ans = 120
|
||||||
|
if ans is not None:
|
||||||
|
ans += font_delta * 20
|
||||||
|
ans = str(ans)
|
||||||
|
return ans
|
||||||
|
|
||||||
t = dict()
|
t = dict()
|
||||||
for key in d.keys():
|
for key in d.keys():
|
||||||
try:
|
val = d[key].lower()
|
||||||
val = d[key].lower()
|
if key == 'font':
|
||||||
except IndexError:
|
val = val.split()
|
||||||
val = None
|
val.reverse()
|
||||||
if key == "font-family":
|
for sval in val:
|
||||||
if max(val.find("courier"), val.find("mono"), val.find("fixed"), val.find("typewriter"))>=0:
|
ans = font_family(sval)
|
||||||
t["fontfacename"] = "Courier10 BT Roman"
|
if ans:
|
||||||
elif max(val.find("arial"), val.find("helvetica"), val.find("verdana"),
|
t['fontfacename'] = ans
|
||||||
val.find("trebuchet"), val.find("sans")) >= 0:
|
else:
|
||||||
t["fontfacename"] = "Swis721 BT Roman"
|
ans = font_size(sval)
|
||||||
else:
|
if ans:
|
||||||
t["fontfacename"] = "Dutch801 Rm BT Roman"
|
t['fontsize'] = ans
|
||||||
|
else:
|
||||||
|
ans = font_weight(sval)
|
||||||
|
if ans:
|
||||||
|
t['fontweight'] = ans
|
||||||
|
elif key in ['font-family', 'font-name']:
|
||||||
|
ans = font_family(val)
|
||||||
|
if ans:
|
||||||
|
t['fontfacename'] = ans
|
||||||
elif key == "font-size":
|
elif key == "font-size":
|
||||||
unit = Span.unit_convert(val, 14)
|
ans = font_size(val)
|
||||||
if unit is not None:
|
if ans:
|
||||||
# Assume a 10 pt font (14 pixels) has fontsize 100
|
t['fontsize'] = ans
|
||||||
t["fontsize"] = str(int (unit / 14.0 * 100))
|
elif key == 'font-weight':
|
||||||
else:
|
ans = font_weight(val)
|
||||||
if val.find("xx-small") >= 0:
|
if ans:
|
||||||
t["fontsize"] = "40"
|
t['fontweight'] = val
|
||||||
elif val.find("x-small") >= 0:
|
|
||||||
t["fontsize"] = "60"
|
|
||||||
elif val.find("small") >= 0:
|
|
||||||
t["fontsize"] = "80"
|
|
||||||
elif val.find("xx-large") >= 0:
|
|
||||||
t["fontsize"] = "180"
|
|
||||||
elif val.find("x-large") >= 0:
|
|
||||||
t["fontsize"] = "140"
|
|
||||||
elif val.find("large") >= 0:
|
|
||||||
t["fontsize"] = "120"
|
|
||||||
else:
|
|
||||||
t["fontsize"] = "100"
|
|
||||||
fnsz = int(t['fontsize'])
|
|
||||||
fnsz += font_delta * 20
|
|
||||||
t['fontsize'] = str(fnsz)
|
|
||||||
elif key == "font-weight":
|
|
||||||
m = re.match ("\s*([0-9]+)", val)
|
|
||||||
if m is not None:
|
|
||||||
#report (m.group(1))
|
|
||||||
t["fontweight"] = str(int(int(m.group(1))))
|
|
||||||
else:
|
|
||||||
if val.find("bold") >= 0 or val.find("strong") >= 0:
|
|
||||||
t["fontweight"] = "1000"
|
|
||||||
else:
|
|
||||||
t["fontweight"] = "400"
|
|
||||||
elif key.startswith("margin"):
|
elif key.startswith("margin"):
|
||||||
if key == "margin":
|
if key == "margin":
|
||||||
u = []
|
u = []
|
||||||
@ -161,7 +185,9 @@ class Span(_Span):
|
|||||||
else:
|
else:
|
||||||
t["align"] = "head"
|
t["align"] = "head"
|
||||||
else:
|
else:
|
||||||
t[key] = d[key]
|
print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key]
|
||||||
|
if 'small' in t.values():
|
||||||
|
print d, 'font-size' in d.keys()
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def __init__(self, ns, css, font_delta=0):
|
def __init__(self, ns, css, font_delta=0):
|
||||||
@ -182,12 +208,13 @@ class Span(_Span):
|
|||||||
|
|
||||||
class HTMLConverter(object):
|
class HTMLConverter(object):
|
||||||
selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
selector_pat = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
||||||
# Defaults for various formatting tags
|
|
||||||
class Link(object):
|
class Link(object):
|
||||||
def __init__(self, para, tag):
|
def __init__(self, para, tag):
|
||||||
self.para = para
|
self.para = para
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
|
|
||||||
|
# Defaults for various formatting tags
|
||||||
css = dict(
|
css = dict(
|
||||||
h1 = {"font-size":"xx-large", "font-weight":"bold"},
|
h1 = {"font-size":"xx-large", "font-weight":"bold"},
|
||||||
h2 = {"font-size":"x-large", "font-weight":"bold"},
|
h2 = {"font-size":"x-large", "font-weight":"bold"},
|
||||||
@ -407,6 +434,7 @@ class HTMLConverter(object):
|
|||||||
test = key.lower()
|
test = key.lower()
|
||||||
if test.startswith('margin') or 'indent' in test or \
|
if test.startswith('margin') or 'indent' in test or \
|
||||||
'padding' in test or 'border' in test or 'page-break' in test \
|
'padding' in test or 'border' in test or 'page-break' in test \
|
||||||
|
or test.startswith('mso') \
|
||||||
or test in ['color', 'display', 'text-decoration', \
|
or test in ['color', 'display', 'text-decoration', \
|
||||||
'letter-spacing', 'text-autospace', 'text-transform']:
|
'letter-spacing', 'text-autospace', 'text-transform']:
|
||||||
css.pop(key)
|
css.pop(key)
|
||||||
@ -494,7 +522,7 @@ class HTMLConverter(object):
|
|||||||
f = open(url)
|
f = open(url)
|
||||||
self.parse_css(f.read())
|
self.parse_css(f.read())
|
||||||
f.close()
|
f.close()
|
||||||
elif tagname in ['p', 'div', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||||
# TODO: Implement ol
|
# TODO: Implement ol
|
||||||
indent = tag_css.pop('text-indent', '')
|
indent = tag_css.pop('text-indent', '')
|
||||||
if indent:
|
if indent:
|
||||||
@ -529,8 +557,8 @@ def process_file(path, options):
|
|||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
try:
|
try:
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
|
cpath, tpath = options.cover, ''
|
||||||
if options.cover and os.access(options.cover, os.R_OK):
|
if options.cover and os.access(options.cover, os.R_OK):
|
||||||
cpath, tpath = options.cover, ''
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from libprs500.prs500 import PRS500
|
from libprs500.prs500 import PRS500
|
||||||
@ -551,10 +579,12 @@ def process_file(path, options):
|
|||||||
print >>sys.stderr, "WARNING: You don't have PIL installed. ",
|
print >>sys.stderr, "WARNING: You don't have PIL installed. ",
|
||||||
'Cover and thumbnails wont work'
|
'Cover and thumbnails wont work'
|
||||||
pass
|
pass
|
||||||
book = Book(font_delta=options.font_delta, title=options.title, \
|
args = dict(font_delta=options.font_delta, title=options.title, \
|
||||||
author=options.author, sourceencoding='utf8',\
|
author=options.author, sourceencoding='utf8',\
|
||||||
thumbnail=tpath, freetext=options.freetext, \
|
freetext=options.freetext, category=options.category)
|
||||||
category=options.category)
|
if tpath:
|
||||||
|
args['thumbnail'] = tpath
|
||||||
|
book = Book(**args)
|
||||||
conv = HTMLConverter(book, path, font_delta=options.font_delta, cover=cpath)
|
conv = HTMLConverter(book, path, font_delta=options.font_delta, cover=cpath)
|
||||||
conv.process_links()
|
conv.process_links()
|
||||||
oname = options.output
|
oname = options.output
|
||||||
|
Loading…
x
Reference in New Issue
Block a user