Support CSS color attribute and support reading title,author from html0 files.

2025-12-21 20:37:21 -05:00 · 2007-10-06 05:41:57 +00:00 · 2007-10-06 05:41:57 +00:00 · 16d1518d19
commit 16d1518d19
parent 6bed1e2372
3 changed files with 165 additions and 10 deletions
--- a/src/libprs500/ebooks/lrf/html/color_map.py
+++ b/src/libprs500/ebooks/lrf/html/color_map.py
@ -0,0 +1,125 @@
 ##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
 ##    This program is free software; you can redistribute it and/or modify
 ##    it under the terms of the GNU General Public License as published by
 ##    the Free Software Foundation; either version 2 of the License, or
 ##    (at your option) any later version.
 ##
 ##    This program is distributed in the hope that it will be useful,
 ##    but WITHOUT ANY WARRANTY; without even the implied warranty of
 ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ##    GNU General Public License for more details.
 ##
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 import re
 NAME_MAP = {
             u'aliceblue': u'#F0F8FF',
             u'antiquewhite': u'#FAEBD7',
             u'aqua': u'#00FFFF',
             u'aquamarine': u'#7FFFD4',
             u'azure': u'#F0FFFF',
             u'beige': u'#F5F5DC',
             u'bisque': u'#FFE4C4',
             u'black': u'#000000',
             u'blanchedalmond': u'#FFEBCD',
             u'blue': u'#0000FF',
             u'brown': u'#A52A2A',
             u'burlywood': u'#DEB887',
             u'cadetblue': u'#5F9EA0',
             u'chartreuse': u'#7FFF00',
             u'chocolate': u'#D2691E',
             u'coral': u'#FF7F50',
             u'crimson': u'#DC143C',
             u'cyan': u'#00FFFF',
             u'darkblue': u'#00008B',
             u'darkgoldenrod': u'#B8860B',
             u'darkgreen': u'#006400',
             u'darkkhaki': u'#BDB76B',
             u'darkmagenta': u'#8B008B',
             u'darkolivegreen': u'#556B2F',
             u'darkorange': u'#FF8C00',
             u'darkorchid': u'#9932CC',
             u'darkred': u'#8B0000',
             u'darksalmon': u'#E9967A',
             u'darkslateblue': u'#483D8B',
             u'darkslategrey': u'#2F4F4F',
             u'darkviolet': u'#9400D3',
             u'deeppink': u'#FF1493',
             u'dodgerblue': u'#1E90FF',
             u'firebrick': u'#B22222',
             u'floralwhite': u'#FFFAF0',
             u'forestgreen': u'#228B22',
             u'fuchsia': u'#FF00FF',
             u'gainsboro': u'#DCDCDC',
             u'ghostwhite': u'#F8F8FF',
             u'gold': u'#FFD700',
             u'goldenrod': u'#DAA520',
             u'indianred ': u'#CD5C5C',
             u'indigo  ': u'#4B0082',
             u'khaki': u'#F0E68C',
             u'lavenderblush': u'#FFF0F5',
             u'lawngreen': u'#7CFC00',
             u'lightblue': u'#ADD8E6',
             u'lightcoral': u'#F08080',
             u'lightgoldenrodyellow': u'#FAFAD2',
             u'lightgray': u'#D3D3D3',
             u'lightgrey': u'#D3D3D3',
             u'lightskyblue': u'#87CEFA',
             u'lightslategrey': u'#778899',
             u'lightsteelblue': u'#B0C4DE',
             u'lime': u'#87CEFA',
             u'linen': u'#FAF0E6',
             u'magenta': u'#FF00FF',
             u'maroon': u'#800000',
             u'mediumaquamarine': u'#66CDAA',
             u'mediumblue': u'#0000CD',
             u'mediumorchid': u'#BA55D3',
             u'mediumpurple': u'#9370D8',
             u'mediumseagreen': u'#3CB371',
             u'mediumslateblue': u'#7B68EE',
             u'midnightblue': u'#191970',
             u'moccasin': u'#FFE4B5',
             u'navajowhite': u'#FFDEAD',
             u'navy': u'#000080',
             u'oldlace': u'#FDF5E6',
             u'olive': u'#808000',
             u'orange': u'#FFA500',
             u'orangered': u'#FF4500',
             u'orchid': u'#DA70D6',
             u'paleturquoise': u'#AFEEEE',
             u'papayawhip': u'#FFEFD5',
             u'peachpuff': u'#FFDAB9',
             u'powderblue': u'#B0E0E6',
             u'rosybrown': u'#BC8F8F',
             u'royalblue': u'#4169E1',
             u'saddlebrown': u'#8B4513',
             u'sandybrown': u'#8B4513',
             u'seashell': u'#FFF5EE',
             u'sienna': u'#A0522D',
             u'silver': u'#C0C0C0',
             u'skyblue': u'#87CEEB',
             u'slategrey': u'#708090',
             u'snow': u'#FFFAFA',
             u'springgreen': u'#00FF7F',
             u'violet': u'#EE82EE',
             u'yellowgreen': u'#9ACD32'
            } 
 hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
 rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
 def lrs_color(html_color):
    hcol = html_color.lower()
    match = hex_pat.search(hcol)
    if match:
        return '0x00'+match.group(1)+match.group(2)+match.group(3)
    match = rgb_pat.search(hcol)
    if match:
        return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:]
    if hcol in NAME_MAP:
        return NAME_MAP[hcol]
    return '0x00000000'
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -45,6 +45,7 @@ from libprs500 import filename_to_utf8,  setup_cli_handlers, __appname__
 from libprs500.ptempfile import PersistentTemporaryFile
 from libprs500.ebooks.metadata.opf import OPFReader
 from libprs500.devices.interface import Device
 from libprs500.ebooks.lrf.html.color_map import lrs_color
 class HTMLConverter(object):
    SELECTOR_PAT   = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
@ -96,14 +97,20 @@ class HTMLConverter(object):
    # Fix Book Designer markup
    BOOK_DESIGNER = [
                     # Create header tags
-                     (re.compile('<h2.*?id=BookTitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
+                     (re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
-                      lambda match : '<h1 align="center">%s</h1>'%(match.group(1),)),
+                      lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
-                     (re.compile('<h2.*?id=BookAuthor.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
+                     (re.compile('<h2.*?id=BookAuthor.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
-                      lambda match : '<h2 align="right">%s</h2>'%(match.group(1),)),
+                      lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
                     (re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                      lambda match : '<h2>%s</h2>'%(match.group(1),)),
                     (re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
-                      lambda match : '<h3>%s</h3>'%(match.group(1),)), 
+                      lambda match : '<h3>%s</h3>'%(match.group(1),)),
                     # Blank lines
                     (re.compile('<div.*?>(&nbsp;){4}</div>', re.IGNORECASE),
                      lambda match : '<p></p>'), 
                     # HR
                     (re.compile('<hr>', re.IGNORECASE),
                      lambda match : '<span style="page-break-after:always"> </span>'),
                     ]
    def __hasattr__(self, attr):
@ -196,6 +203,8 @@ class HTMLConverter(object):
                        'content':re.compile('Baen', re.IGNORECASE)}))
    def start_on_file(self, path, is_root=True, link_level=0):
        self.css = HTMLConverter.CSS.copy()
        self.pseudo_css = {}
        path = os.path.abspath(path)
        os.chdir(os.path.dirname(path))
        self.file_name = os.path.basename(path)
@ -210,6 +219,8 @@ class HTMLConverter(object):
        if self.pdftohtml:
            nmassage.extend(HTMLConverter.PDFTOHTML)
            #raw = unicode(raw, 'utf8', 'replace')
        if self.book_designer:
            nmassage.extend(HTMLConverter.BOOK_DESIGNER)
        try:
            soup = BeautifulSoup(raw, 
                         convertEntities=BeautifulSoup.HTML_ENTITIES,
@ -225,6 +236,13 @@ class HTMLConverter(object):
            self.baen = True
            self.logger.info('Baen file detected. Re-parsing...')
            return self.start_on_file(path, is_root=is_root, link_level=link_level)
        if self.book_designer:
            t = soup.find(id='BookTitle')
            if t:
                self.book.set_title(self.get_text(t))
            a = soup.find(id='BookAuthor')
            if a:
                self.book.set_author(self.get_text(a))
        self.logger.info('\tConverting to BBeB...')
        sys.stdout.flush()        
        self.current_page = None
@ -234,8 +252,6 @@ class HTMLConverter(object):
        match = self.PAGE_BREAK_PAT.search(unicode(soup))
        if match and not re.match('avoid', match.group(1), re.IGNORECASE):
            self.page_break_found = True
        self.css = HTMLConverter.CSS.copy()
        self.pseudo_css = {}
        self.target_prefix = path
        self.links[path] = []
        self.previous_text = '\n'
@ -278,7 +294,7 @@ class HTMLConverter(object):
        Parses a style attribute. The code within a CSS selector block or in
        the style attribute of an HTML element.
        @return: A dictionary with one entry for each property where the key 
-                 is the property name and the value is the property value.
+                is the property name and the value is the property value.
        """
        prop = dict()
        for s in props.split(';'):
@ -301,7 +317,7 @@ class HTMLConverter(object):
                # however we need to as we don't do alignment at a block level.
                # float is removed by the process_alignment function.
                if chk.startswith('font') or chk == 'text-align' or \
-                chk == 'float' or chk == 'white-space': 
+                chk == 'float' or chk == 'white-space' or chk == 'color':
                    temp[key] = pcss[key]
            prop.update(temp)
@ -656,7 +672,11 @@ class HTMLConverter(object):
                    unneeded.append(prop)
            for prop in unneeded:
                fp.pop(prop)
-            elem = Span(text=src, **fp) if (fp or force_span_use) else src
+            attrs = {}
            if 'color' in css:
                attrs['textcolor'] = lrs_color(css['color'])
            attrs.update(fp)
            elem = Span(text=src, **attrs) if (attrs or force_span_use) else src
            if css.has_key('text-decoration'):
                dec = css['text-decoration'].lower()
                linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None
@ -1372,6 +1392,8 @@ class HTMLConverter(object):
        elif tagname == 'font':
            if tag.has_key('face'):
                tag_css['font-family'] = tag['face']
            if tag.has_key('color'):
                tag_css['color'] = tag['color']
            self.process_children(tag, tag_css, tag_pseudo_css)
        elif tagname in ['br']:
            self.line_break()
--- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py
+++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py
@ -442,6 +442,14 @@ class Book(Delegator):
        self.gc_count = 0
    def set_title(self, title):
        ot = self.delegates[0].delegates[0].delegates[0].title
        self.delegates[0].delegates[0].delegates[0].title = (title, ot[1])
    def set_author(self, author):
        ot = self.delegates[0].delegates[0].delegates[0].author
        self.delegates[0].delegates[0].delegates[0].author = (author, ot[1])
    def create_text_style(self, **settings):
        ans = TextStyle(**self.defaultTextStyle.attrs.copy())
        ans.update(settings)