Support CSS color attribute and support reading title,author from html0 files.

This commit is contained in:
Kovid Goyal 2007-10-06 05:41:57 +00:00
parent 6bed1e2372
commit 16d1518d19
3 changed files with 165 additions and 10 deletions

View File

@ -0,0 +1,125 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import re
NAME_MAP = {
u'aliceblue': u'#F0F8FF',
u'antiquewhite': u'#FAEBD7',
u'aqua': u'#00FFFF',
u'aquamarine': u'#7FFFD4',
u'azure': u'#F0FFFF',
u'beige': u'#F5F5DC',
u'bisque': u'#FFE4C4',
u'black': u'#000000',
u'blanchedalmond': u'#FFEBCD',
u'blue': u'#0000FF',
u'brown': u'#A52A2A',
u'burlywood': u'#DEB887',
u'cadetblue': u'#5F9EA0',
u'chartreuse': u'#7FFF00',
u'chocolate': u'#D2691E',
u'coral': u'#FF7F50',
u'crimson': u'#DC143C',
u'cyan': u'#00FFFF',
u'darkblue': u'#00008B',
u'darkgoldenrod': u'#B8860B',
u'darkgreen': u'#006400',
u'darkkhaki': u'#BDB76B',
u'darkmagenta': u'#8B008B',
u'darkolivegreen': u'#556B2F',
u'darkorange': u'#FF8C00',
u'darkorchid': u'#9932CC',
u'darkred': u'#8B0000',
u'darksalmon': u'#E9967A',
u'darkslateblue': u'#483D8B',
u'darkslategrey': u'#2F4F4F',
u'darkviolet': u'#9400D3',
u'deeppink': u'#FF1493',
u'dodgerblue': u'#1E90FF',
u'firebrick': u'#B22222',
u'floralwhite': u'#FFFAF0',
u'forestgreen': u'#228B22',
u'fuchsia': u'#FF00FF',
u'gainsboro': u'#DCDCDC',
u'ghostwhite': u'#F8F8FF',
u'gold': u'#FFD700',
u'goldenrod': u'#DAA520',
u'indianred ': u'#CD5C5C',
u'indigo ': u'#4B0082',
u'khaki': u'#F0E68C',
u'lavenderblush': u'#FFF0F5',
u'lawngreen': u'#7CFC00',
u'lightblue': u'#ADD8E6',
u'lightcoral': u'#F08080',
u'lightgoldenrodyellow': u'#FAFAD2',
u'lightgray': u'#D3D3D3',
u'lightgrey': u'#D3D3D3',
u'lightskyblue': u'#87CEFA',
u'lightslategrey': u'#778899',
u'lightsteelblue': u'#B0C4DE',
u'lime': u'#87CEFA',
u'linen': u'#FAF0E6',
u'magenta': u'#FF00FF',
u'maroon': u'#800000',
u'mediumaquamarine': u'#66CDAA',
u'mediumblue': u'#0000CD',
u'mediumorchid': u'#BA55D3',
u'mediumpurple': u'#9370D8',
u'mediumseagreen': u'#3CB371',
u'mediumslateblue': u'#7B68EE',
u'midnightblue': u'#191970',
u'moccasin': u'#FFE4B5',
u'navajowhite': u'#FFDEAD',
u'navy': u'#000080',
u'oldlace': u'#FDF5E6',
u'olive': u'#808000',
u'orange': u'#FFA500',
u'orangered': u'#FF4500',
u'orchid': u'#DA70D6',
u'paleturquoise': u'#AFEEEE',
u'papayawhip': u'#FFEFD5',
u'peachpuff': u'#FFDAB9',
u'powderblue': u'#B0E0E6',
u'rosybrown': u'#BC8F8F',
u'royalblue': u'#4169E1',
u'saddlebrown': u'#8B4513',
u'sandybrown': u'#8B4513',
u'seashell': u'#FFF5EE',
u'sienna': u'#A0522D',
u'silver': u'#C0C0C0',
u'skyblue': u'#87CEEB',
u'slategrey': u'#708090',
u'snow': u'#FFFAFA',
u'springgreen': u'#00FF7F',
u'violet': u'#EE82EE',
u'yellowgreen': u'#9ACD32'
}
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
def lrs_color(html_color):
hcol = html_color.lower()
match = hex_pat.search(hcol)
if match:
return '0x00'+match.group(1)+match.group(2)+match.group(3)
match = rgb_pat.search(hcol)
if match:
return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:]
if hcol in NAME_MAP:
return NAME_MAP[hcol]
return '0x00000000'

View File

@ -45,6 +45,7 @@ from libprs500 import filename_to_utf8, setup_cli_handlers, __appname__
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.metadata.opf import OPFReader
from libprs500.devices.interface import Device
from libprs500.ebooks.lrf.html.color_map import lrs_color
class HTMLConverter(object):
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
@ -96,14 +97,20 @@ class HTMLConverter(object):
# Fix Book Designer markup
BOOK_DESIGNER = [
# Create header tags
(re.compile('<h2.*?id=BookTitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h1 align="center">%s</h1>'%(match.group(1),)),
(re.compile('<h2.*?id=BookAuthor.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 align="right">%s</h2>'%(match.group(1),)),
(re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<h2.*?id=BookAuthor.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2>%s</h2>'%(match.group(1),)),
(re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3>%s</h3>'%(match.group(1),)),
# Blank lines
(re.compile('<div.*?>(&nbsp;){4}</div>', re.IGNORECASE),
lambda match : '<p></p>'),
# HR
(re.compile('<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'),
]
def __hasattr__(self, attr):
@ -196,6 +203,8 @@ class HTMLConverter(object):
'content':re.compile('Baen', re.IGNORECASE)}))
def start_on_file(self, path, is_root=True, link_level=0):
self.css = HTMLConverter.CSS.copy()
self.pseudo_css = {}
path = os.path.abspath(path)
os.chdir(os.path.dirname(path))
self.file_name = os.path.basename(path)
@ -210,6 +219,8 @@ class HTMLConverter(object):
if self.pdftohtml:
nmassage.extend(HTMLConverter.PDFTOHTML)
#raw = unicode(raw, 'utf8', 'replace')
if self.book_designer:
nmassage.extend(HTMLConverter.BOOK_DESIGNER)
try:
soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.HTML_ENTITIES,
@ -225,6 +236,13 @@ class HTMLConverter(object):
self.baen = True
self.logger.info('Baen file detected. Re-parsing...')
return self.start_on_file(path, is_root=is_root, link_level=link_level)
if self.book_designer:
t = soup.find(id='BookTitle')
if t:
self.book.set_title(self.get_text(t))
a = soup.find(id='BookAuthor')
if a:
self.book.set_author(self.get_text(a))
self.logger.info('\tConverting to BBeB...')
sys.stdout.flush()
self.current_page = None
@ -234,8 +252,6 @@ class HTMLConverter(object):
match = self.PAGE_BREAK_PAT.search(unicode(soup))
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
self.page_break_found = True
self.css = HTMLConverter.CSS.copy()
self.pseudo_css = {}
self.target_prefix = path
self.links[path] = []
self.previous_text = '\n'
@ -278,7 +294,7 @@ class HTMLConverter(object):
Parses a style attribute. The code within a CSS selector block or in
the style attribute of an HTML element.
@return: A dictionary with one entry for each property where the key
is the property name and the value is the property value.
is the property name and the value is the property value.
"""
prop = dict()
for s in props.split(';'):
@ -301,7 +317,7 @@ class HTMLConverter(object):
# however we need to as we don't do alignment at a block level.
# float is removed by the process_alignment function.
if chk.startswith('font') or chk == 'text-align' or \
chk == 'float' or chk == 'white-space':
chk == 'float' or chk == 'white-space' or chk == 'color':
temp[key] = pcss[key]
prop.update(temp)
@ -656,7 +672,11 @@ class HTMLConverter(object):
unneeded.append(prop)
for prop in unneeded:
fp.pop(prop)
elem = Span(text=src, **fp) if (fp or force_span_use) else src
attrs = {}
if 'color' in css:
attrs['textcolor'] = lrs_color(css['color'])
attrs.update(fp)
elem = Span(text=src, **attrs) if (attrs or force_span_use) else src
if css.has_key('text-decoration'):
dec = css['text-decoration'].lower()
linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None
@ -1372,6 +1392,8 @@ class HTMLConverter(object):
elif tagname == 'font':
if tag.has_key('face'):
tag_css['font-family'] = tag['face']
if tag.has_key('color'):
tag_css['color'] = tag['color']
self.process_children(tag, tag_css, tag_pseudo_css)
elif tagname in ['br']:
self.line_break()

View File

@ -442,6 +442,14 @@ class Book(Delegator):
self.gc_count = 0
def set_title(self, title):
ot = self.delegates[0].delegates[0].delegates[0].title
self.delegates[0].delegates[0].delegates[0].title = (title, ot[1])
def set_author(self, author):
ot = self.delegates[0].delegates[0].delegates[0].author
self.delegates[0].delegates[0].delegates[0].author = (author, ot[1])
def create_text_style(self, **settings):
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
ans.update(settings)