From 16d1518d196613adc73eb18f93e81723dd385e13 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Oct 2007 05:41:57 +0000 Subject: [PATCH] Support CSS color attribute and support reading title,author from html0 files. --- src/libprs500/ebooks/lrf/html/color_map.py | 125 ++++++++++++++++++ src/libprs500/ebooks/lrf/html/convert_from.py | 42 ++++-- src/libprs500/ebooks/lrf/pylrs/pylrs.py | 8 ++ 3 files changed, 165 insertions(+), 10 deletions(-) create mode 100644 src/libprs500/ebooks/lrf/html/color_map.py diff --git a/src/libprs500/ebooks/lrf/html/color_map.py b/src/libprs500/ebooks/lrf/html/color_map.py new file mode 100644 index 0000000000..07660f30c7 --- /dev/null +++ b/src/libprs500/ebooks/lrf/html/color_map.py @@ -0,0 +1,125 @@ +## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License along +## with this program; if not, write to the Free Software Foundation, Inc., +## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import re + +NAME_MAP = { + u'aliceblue': u'#F0F8FF', + u'antiquewhite': u'#FAEBD7', + u'aqua': u'#00FFFF', + u'aquamarine': u'#7FFFD4', + u'azure': u'#F0FFFF', + u'beige': u'#F5F5DC', + u'bisque': u'#FFE4C4', + u'black': u'#000000', + u'blanchedalmond': u'#FFEBCD', + u'blue': u'#0000FF', + u'brown': u'#A52A2A', + u'burlywood': u'#DEB887', + u'cadetblue': u'#5F9EA0', + u'chartreuse': u'#7FFF00', + u'chocolate': u'#D2691E', + u'coral': u'#FF7F50', + u'crimson': u'#DC143C', + u'cyan': u'#00FFFF', + u'darkblue': u'#00008B', + u'darkgoldenrod': u'#B8860B', + u'darkgreen': u'#006400', + u'darkkhaki': u'#BDB76B', + u'darkmagenta': u'#8B008B', + u'darkolivegreen': u'#556B2F', + u'darkorange': u'#FF8C00', + u'darkorchid': u'#9932CC', + u'darkred': u'#8B0000', + u'darksalmon': u'#E9967A', + u'darkslateblue': u'#483D8B', + u'darkslategrey': u'#2F4F4F', + u'darkviolet': u'#9400D3', + u'deeppink': u'#FF1493', + u'dodgerblue': u'#1E90FF', + u'firebrick': u'#B22222', + u'floralwhite': u'#FFFAF0', + u'forestgreen': u'#228B22', + u'fuchsia': u'#FF00FF', + u'gainsboro': u'#DCDCDC', + u'ghostwhite': u'#F8F8FF', + u'gold': u'#FFD700', + u'goldenrod': u'#DAA520', + u'indianred ': u'#CD5C5C', + u'indigo ': u'#4B0082', + u'khaki': u'#F0E68C', + u'lavenderblush': u'#FFF0F5', + u'lawngreen': u'#7CFC00', + u'lightblue': u'#ADD8E6', + u'lightcoral': u'#F08080', + u'lightgoldenrodyellow': u'#FAFAD2', + u'lightgray': u'#D3D3D3', + u'lightgrey': u'#D3D3D3', + u'lightskyblue': u'#87CEFA', + u'lightslategrey': u'#778899', + u'lightsteelblue': u'#B0C4DE', + u'lime': u'#87CEFA', + u'linen': u'#FAF0E6', + u'magenta': u'#FF00FF', + u'maroon': u'#800000', + u'mediumaquamarine': u'#66CDAA', + u'mediumblue': u'#0000CD', + u'mediumorchid': u'#BA55D3', + u'mediumpurple': u'#9370D8', + u'mediumseagreen': u'#3CB371', + u'mediumslateblue': u'#7B68EE', + u'midnightblue': u'#191970', + u'moccasin': u'#FFE4B5', + u'navajowhite': u'#FFDEAD', + u'navy': u'#000080', + u'oldlace': u'#FDF5E6', + u'olive': u'#808000', + u'orange': u'#FFA500', + u'orangered': u'#FF4500', + u'orchid': u'#DA70D6', + u'paleturquoise': u'#AFEEEE', + u'papayawhip': u'#FFEFD5', + u'peachpuff': u'#FFDAB9', + u'powderblue': u'#B0E0E6', + u'rosybrown': u'#BC8F8F', + u'royalblue': u'#4169E1', + u'saddlebrown': u'#8B4513', + u'sandybrown': u'#8B4513', + u'seashell': u'#FFF5EE', + u'sienna': u'#A0522D', + u'silver': u'#C0C0C0', + u'skyblue': u'#87CEEB', + u'slategrey': u'#708090', + u'snow': u'#FFFAFA', + u'springgreen': u'#00FF7F', + u'violet': u'#EE82EE', + u'yellowgreen': u'#9ACD32' + } + +hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})') +rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) +def lrs_color(html_color): + hcol = html_color.lower() + match = hex_pat.search(hcol) + if match: + return '0x00'+match.group(1)+match.group(2)+match.group(3) + match = rgb_pat.search(hcol) + if match: + return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:] + if hcol in NAME_MAP: + return NAME_MAP[hcol] + return '0x00000000' + + \ No newline at end of file diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index b2fe675cbd..98cf9627e0 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -45,6 +45,7 @@ from libprs500 import filename_to_utf8, setup_cli_handlers, __appname__ from libprs500.ptempfile import PersistentTemporaryFile from libprs500.ebooks.metadata.opf import OPFReader from libprs500.devices.interface import Device +from libprs500.ebooks.lrf.html.color_map import lrs_color class HTMLConverter(object): SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") @@ -96,14 +97,20 @@ class HTMLConverter(object): # Fix Book Designer markup BOOK_DESIGNER = [ # Create header tags - (re.compile('(.*?)', re.IGNORECASE|re.DOTALL), - lambda match : '

%s

'%(match.group(1),)), - (re.compile('(.*?)', re.IGNORECASE|re.DOTALL), - lambda match : '

%s

'%(match.group(1),)), + (re.compile('(.*?)', re.IGNORECASE|re.DOTALL), + lambda match : '

%s

'%(match.group(2) if match.group(2) else 'center', match.group(3))), + (re.compile('(.*?)', re.IGNORECASE|re.DOTALL), + lambda match : '

%s

'%(match.group(2) if match.group(2) else 'center', match.group(3))), (re.compile('(.*?)', re.IGNORECASE|re.DOTALL), lambda match : '

%s

'%(match.group(1),)), (re.compile('(.*?)', re.IGNORECASE|re.DOTALL), - lambda match : '

%s

'%(match.group(1),)), + lambda match : '

%s

'%(match.group(1),)), + # Blank lines + (re.compile('( ){4}', re.IGNORECASE), + lambda match : '

'), + # HR + (re.compile('
', re.IGNORECASE), + lambda match : ' '), ] def __hasattr__(self, attr): @@ -196,6 +203,8 @@ class HTMLConverter(object): 'content':re.compile('Baen', re.IGNORECASE)})) def start_on_file(self, path, is_root=True, link_level=0): + self.css = HTMLConverter.CSS.copy() + self.pseudo_css = {} path = os.path.abspath(path) os.chdir(os.path.dirname(path)) self.file_name = os.path.basename(path) @@ -210,6 +219,8 @@ class HTMLConverter(object): if self.pdftohtml: nmassage.extend(HTMLConverter.PDFTOHTML) #raw = unicode(raw, 'utf8', 'replace') + if self.book_designer: + nmassage.extend(HTMLConverter.BOOK_DESIGNER) try: soup = BeautifulSoup(raw, convertEntities=BeautifulSoup.HTML_ENTITIES, @@ -225,6 +236,13 @@ class HTMLConverter(object): self.baen = True self.logger.info('Baen file detected. Re-parsing...') return self.start_on_file(path, is_root=is_root, link_level=link_level) + if self.book_designer: + t = soup.find(id='BookTitle') + if t: + self.book.set_title(self.get_text(t)) + a = soup.find(id='BookAuthor') + if a: + self.book.set_author(self.get_text(a)) self.logger.info('\tConverting to BBeB...') sys.stdout.flush() self.current_page = None @@ -234,8 +252,6 @@ class HTMLConverter(object): match = self.PAGE_BREAK_PAT.search(unicode(soup)) if match and not re.match('avoid', match.group(1), re.IGNORECASE): self.page_break_found = True - self.css = HTMLConverter.CSS.copy() - self.pseudo_css = {} self.target_prefix = path self.links[path] = [] self.previous_text = '\n' @@ -278,7 +294,7 @@ class HTMLConverter(object): Parses a style attribute. The code within a CSS selector block or in the style attribute of an HTML element. @return: A dictionary with one entry for each property where the key - is the property name and the value is the property value. + is the property name and the value is the property value. """ prop = dict() for s in props.split(';'): @@ -301,7 +317,7 @@ class HTMLConverter(object): # however we need to as we don't do alignment at a block level. # float is removed by the process_alignment function. if chk.startswith('font') or chk == 'text-align' or \ - chk == 'float' or chk == 'white-space': + chk == 'float' or chk == 'white-space' or chk == 'color': temp[key] = pcss[key] prop.update(temp) @@ -656,7 +672,11 @@ class HTMLConverter(object): unneeded.append(prop) for prop in unneeded: fp.pop(prop) - elem = Span(text=src, **fp) if (fp or force_span_use) else src + attrs = {} + if 'color' in css: + attrs['textcolor'] = lrs_color(css['color']) + attrs.update(fp) + elem = Span(text=src, **attrs) if (attrs or force_span_use) else src if css.has_key('text-decoration'): dec = css['text-decoration'].lower() linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None @@ -1372,6 +1392,8 @@ class HTMLConverter(object): elif tagname == 'font': if tag.has_key('face'): tag_css['font-family'] = tag['face'] + if tag.has_key('color'): + tag_css['color'] = tag['color'] self.process_children(tag, tag_css, tag_pseudo_css) elif tagname in ['br']: self.line_break() diff --git a/src/libprs500/ebooks/lrf/pylrs/pylrs.py b/src/libprs500/ebooks/lrf/pylrs/pylrs.py index e3bd25b0aa..4a466e338c 100644 --- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py +++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py @@ -442,6 +442,14 @@ class Book(Delegator): self.gc_count = 0 + def set_title(self, title): + ot = self.delegates[0].delegates[0].delegates[0].title + self.delegates[0].delegates[0].delegates[0].title = (title, ot[1]) + + def set_author(self, author): + ot = self.delegates[0].delegates[0].delegates[0].author + self.delegates[0].delegates[0].delegates[0].author = (author, ot[1]) + def create_text_style(self, **settings): ans = TextStyle(**self.defaultTextStyle.attrs.copy()) ans.update(settings)