Support CSS color attribute and support reading title,author from html0 files.

This commit is contained in:
Kovid Goyal 2007-10-06 05:41:57 +00:00
parent 6bed1e2372
commit 16d1518d19
3 changed files with 165 additions and 10 deletions

View File

@ -0,0 +1,125 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import re
NAME_MAP = {
u'aliceblue': u'#F0F8FF',
u'antiquewhite': u'#FAEBD7',
u'aqua': u'#00FFFF',
u'aquamarine': u'#7FFFD4',
u'azure': u'#F0FFFF',
u'beige': u'#F5F5DC',
u'bisque': u'#FFE4C4',
u'black': u'#000000',
u'blanchedalmond': u'#FFEBCD',
u'blue': u'#0000FF',
u'brown': u'#A52A2A',
u'burlywood': u'#DEB887',
u'cadetblue': u'#5F9EA0',
u'chartreuse': u'#7FFF00',
u'chocolate': u'#D2691E',
u'coral': u'#FF7F50',
u'crimson': u'#DC143C',
u'cyan': u'#00FFFF',
u'darkblue': u'#00008B',
u'darkgoldenrod': u'#B8860B',
u'darkgreen': u'#006400',
u'darkkhaki': u'#BDB76B',
u'darkmagenta': u'#8B008B',
u'darkolivegreen': u'#556B2F',
u'darkorange': u'#FF8C00',
u'darkorchid': u'#9932CC',
u'darkred': u'#8B0000',
u'darksalmon': u'#E9967A',
u'darkslateblue': u'#483D8B',
u'darkslategrey': u'#2F4F4F',
u'darkviolet': u'#9400D3',
u'deeppink': u'#FF1493',
u'dodgerblue': u'#1E90FF',
u'firebrick': u'#B22222',
u'floralwhite': u'#FFFAF0',
u'forestgreen': u'#228B22',
u'fuchsia': u'#FF00FF',
u'gainsboro': u'#DCDCDC',
u'ghostwhite': u'#F8F8FF',
u'gold': u'#FFD700',
u'goldenrod': u'#DAA520',
u'indianred ': u'#CD5C5C',
u'indigo ': u'#4B0082',
u'khaki': u'#F0E68C',
u'lavenderblush': u'#FFF0F5',
u'lawngreen': u'#7CFC00',
u'lightblue': u'#ADD8E6',
u'lightcoral': u'#F08080',
u'lightgoldenrodyellow': u'#FAFAD2',
u'lightgray': u'#D3D3D3',
u'lightgrey': u'#D3D3D3',
u'lightskyblue': u'#87CEFA',
u'lightslategrey': u'#778899',
u'lightsteelblue': u'#B0C4DE',
u'lime': u'#87CEFA',
u'linen': u'#FAF0E6',
u'magenta': u'#FF00FF',
u'maroon': u'#800000',
u'mediumaquamarine': u'#66CDAA',
u'mediumblue': u'#0000CD',
u'mediumorchid': u'#BA55D3',
u'mediumpurple': u'#9370D8',
u'mediumseagreen': u'#3CB371',
u'mediumslateblue': u'#7B68EE',
u'midnightblue': u'#191970',
u'moccasin': u'#FFE4B5',
u'navajowhite': u'#FFDEAD',
u'navy': u'#000080',
u'oldlace': u'#FDF5E6',
u'olive': u'#808000',
u'orange': u'#FFA500',
u'orangered': u'#FF4500',
u'orchid': u'#DA70D6',
u'paleturquoise': u'#AFEEEE',
u'papayawhip': u'#FFEFD5',
u'peachpuff': u'#FFDAB9',
u'powderblue': u'#B0E0E6',
u'rosybrown': u'#BC8F8F',
u'royalblue': u'#4169E1',
u'saddlebrown': u'#8B4513',
u'sandybrown': u'#8B4513',
u'seashell': u'#FFF5EE',
u'sienna': u'#A0522D',
u'silver': u'#C0C0C0',
u'skyblue': u'#87CEEB',
u'slategrey': u'#708090',
u'snow': u'#FFFAFA',
u'springgreen': u'#00FF7F',
u'violet': u'#EE82EE',
u'yellowgreen': u'#9ACD32'
}
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
def lrs_color(html_color):
hcol = html_color.lower()
match = hex_pat.search(hcol)
if match:
return '0x00'+match.group(1)+match.group(2)+match.group(3)
match = rgb_pat.search(hcol)
if match:
return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:]
if hcol in NAME_MAP:
return NAME_MAP[hcol]
return '0x00000000'

View File

@ -45,6 +45,7 @@ from libprs500 import filename_to_utf8, setup_cli_handlers, __appname__
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.metadata.opf import OPFReader
from libprs500.devices.interface import Device
from libprs500.ebooks.lrf.html.color_map import lrs_color
class HTMLConverter(object):
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
@ -96,14 +97,20 @@ class HTMLConverter(object):
# Fix Book Designer markup
BOOK_DESIGNER = [
# Create header tags
(re.compile('<h2.*?id=BookTitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h1 align="center">%s</h1>'%(match.group(1),)),
(re.compile('<h2.*?id=BookAuthor.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 align="right">%s</h2>'%(match.group(1),)),
(re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<h2.*?id=BookAuthor.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2>%s</h2>'%(match.group(1),)),
(re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3>%s</h3>'%(match.group(1),)),
# Blank lines
(re.compile('<div.*?>(&nbsp;){4}</div>', re.IGNORECASE),
lambda match : '<p></p>'),
# HR
(re.compile('<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'),
]
def __hasattr__(self, attr):
@ -196,6 +203,8 @@ class HTMLConverter(object):
'content':re.compile('Baen', re.IGNORECASE)}))
def start_on_file(self, path, is_root=True, link_level=0):
self.css = HTMLConverter.CSS.copy()
self.pseudo_css = {}
path = os.path.abspath(path)
os.chdir(os.path.dirname(path))
self.file_name = os.path.basename(path)
@ -210,6 +219,8 @@ class HTMLConverter(object):
if self.pdftohtml:
nmassage.extend(HTMLConverter.PDFTOHTML)
#raw = unicode(raw, 'utf8', 'replace')
if self.book_designer:
nmassage.extend(HTMLConverter.BOOK_DESIGNER)
try:
soup = BeautifulSoup(raw,
convertEntities=BeautifulSoup.HTML_ENTITIES,
@ -225,6 +236,13 @@ class HTMLConverter(object):
self.baen = True
self.logger.info('Baen file detected. Re-parsing...')
return self.start_on_file(path, is_root=is_root, link_level=link_level)
if self.book_designer:
t = soup.find(id='BookTitle')
if t:
self.book.set_title(self.get_text(t))
a = soup.find(id='BookAuthor')
if a:
self.book.set_author(self.get_text(a))
self.logger.info('\tConverting to BBeB...')
sys.stdout.flush()
self.current_page = None
@ -234,8 +252,6 @@ class HTMLConverter(object):
match = self.PAGE_BREAK_PAT.search(unicode(soup))
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
self.page_break_found = True
self.css = HTMLConverter.CSS.copy()
self.pseudo_css = {}
self.target_prefix = path
self.links[path] = []
self.previous_text = '\n'
@ -301,7 +317,7 @@ class HTMLConverter(object):
# however we need to as we don't do alignment at a block level.
# float is removed by the process_alignment function.
if chk.startswith('font') or chk == 'text-align' or \
chk == 'float' or chk == 'white-space':
chk == 'float' or chk == 'white-space' or chk == 'color':
temp[key] = pcss[key]
prop.update(temp)
@ -656,7 +672,11 @@ class HTMLConverter(object):
unneeded.append(prop)
for prop in unneeded:
fp.pop(prop)
elem = Span(text=src, **fp) if (fp or force_span_use) else src
attrs = {}
if 'color' in css:
attrs['textcolor'] = lrs_color(css['color'])
attrs.update(fp)
elem = Span(text=src, **attrs) if (attrs or force_span_use) else src
if css.has_key('text-decoration'):
dec = css['text-decoration'].lower()
linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None
@ -1372,6 +1392,8 @@ class HTMLConverter(object):
elif tagname == 'font':
if tag.has_key('face'):
tag_css['font-family'] = tag['face']
if tag.has_key('color'):
tag_css['color'] = tag['color']
self.process_children(tag, tag_css, tag_pseudo_css)
elif tagname in ['br']:
self.line_break()

View File

@ -442,6 +442,14 @@ class Book(Delegator):
self.gc_count = 0
def set_title(self, title):
ot = self.delegates[0].delegates[0].delegates[0].title
self.delegates[0].delegates[0].delegates[0].title = (title, ot[1])
def set_author(self, author):
ot = self.delegates[0].delegates[0].delegates[0].author
self.delegates[0].delegates[0].delegates[0].author = (author, ot[1])
def create_text_style(self, **settings):
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
ans.update(settings)