mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Support CSS color attribute and support reading title,author from html0 files.
This commit is contained in:
parent
6bed1e2372
commit
16d1518d19
125
src/libprs500/ebooks/lrf/html/color_map.py
Normal file
125
src/libprs500/ebooks/lrf/html/color_map.py
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
NAME_MAP = {
|
||||||
|
u'aliceblue': u'#F0F8FF',
|
||||||
|
u'antiquewhite': u'#FAEBD7',
|
||||||
|
u'aqua': u'#00FFFF',
|
||||||
|
u'aquamarine': u'#7FFFD4',
|
||||||
|
u'azure': u'#F0FFFF',
|
||||||
|
u'beige': u'#F5F5DC',
|
||||||
|
u'bisque': u'#FFE4C4',
|
||||||
|
u'black': u'#000000',
|
||||||
|
u'blanchedalmond': u'#FFEBCD',
|
||||||
|
u'blue': u'#0000FF',
|
||||||
|
u'brown': u'#A52A2A',
|
||||||
|
u'burlywood': u'#DEB887',
|
||||||
|
u'cadetblue': u'#5F9EA0',
|
||||||
|
u'chartreuse': u'#7FFF00',
|
||||||
|
u'chocolate': u'#D2691E',
|
||||||
|
u'coral': u'#FF7F50',
|
||||||
|
u'crimson': u'#DC143C',
|
||||||
|
u'cyan': u'#00FFFF',
|
||||||
|
u'darkblue': u'#00008B',
|
||||||
|
u'darkgoldenrod': u'#B8860B',
|
||||||
|
u'darkgreen': u'#006400',
|
||||||
|
u'darkkhaki': u'#BDB76B',
|
||||||
|
u'darkmagenta': u'#8B008B',
|
||||||
|
u'darkolivegreen': u'#556B2F',
|
||||||
|
u'darkorange': u'#FF8C00',
|
||||||
|
u'darkorchid': u'#9932CC',
|
||||||
|
u'darkred': u'#8B0000',
|
||||||
|
u'darksalmon': u'#E9967A',
|
||||||
|
u'darkslateblue': u'#483D8B',
|
||||||
|
u'darkslategrey': u'#2F4F4F',
|
||||||
|
u'darkviolet': u'#9400D3',
|
||||||
|
u'deeppink': u'#FF1493',
|
||||||
|
u'dodgerblue': u'#1E90FF',
|
||||||
|
u'firebrick': u'#B22222',
|
||||||
|
u'floralwhite': u'#FFFAF0',
|
||||||
|
u'forestgreen': u'#228B22',
|
||||||
|
u'fuchsia': u'#FF00FF',
|
||||||
|
u'gainsboro': u'#DCDCDC',
|
||||||
|
u'ghostwhite': u'#F8F8FF',
|
||||||
|
u'gold': u'#FFD700',
|
||||||
|
u'goldenrod': u'#DAA520',
|
||||||
|
u'indianred ': u'#CD5C5C',
|
||||||
|
u'indigo ': u'#4B0082',
|
||||||
|
u'khaki': u'#F0E68C',
|
||||||
|
u'lavenderblush': u'#FFF0F5',
|
||||||
|
u'lawngreen': u'#7CFC00',
|
||||||
|
u'lightblue': u'#ADD8E6',
|
||||||
|
u'lightcoral': u'#F08080',
|
||||||
|
u'lightgoldenrodyellow': u'#FAFAD2',
|
||||||
|
u'lightgray': u'#D3D3D3',
|
||||||
|
u'lightgrey': u'#D3D3D3',
|
||||||
|
u'lightskyblue': u'#87CEFA',
|
||||||
|
u'lightslategrey': u'#778899',
|
||||||
|
u'lightsteelblue': u'#B0C4DE',
|
||||||
|
u'lime': u'#87CEFA',
|
||||||
|
u'linen': u'#FAF0E6',
|
||||||
|
u'magenta': u'#FF00FF',
|
||||||
|
u'maroon': u'#800000',
|
||||||
|
u'mediumaquamarine': u'#66CDAA',
|
||||||
|
u'mediumblue': u'#0000CD',
|
||||||
|
u'mediumorchid': u'#BA55D3',
|
||||||
|
u'mediumpurple': u'#9370D8',
|
||||||
|
u'mediumseagreen': u'#3CB371',
|
||||||
|
u'mediumslateblue': u'#7B68EE',
|
||||||
|
u'midnightblue': u'#191970',
|
||||||
|
u'moccasin': u'#FFE4B5',
|
||||||
|
u'navajowhite': u'#FFDEAD',
|
||||||
|
u'navy': u'#000080',
|
||||||
|
u'oldlace': u'#FDF5E6',
|
||||||
|
u'olive': u'#808000',
|
||||||
|
u'orange': u'#FFA500',
|
||||||
|
u'orangered': u'#FF4500',
|
||||||
|
u'orchid': u'#DA70D6',
|
||||||
|
u'paleturquoise': u'#AFEEEE',
|
||||||
|
u'papayawhip': u'#FFEFD5',
|
||||||
|
u'peachpuff': u'#FFDAB9',
|
||||||
|
u'powderblue': u'#B0E0E6',
|
||||||
|
u'rosybrown': u'#BC8F8F',
|
||||||
|
u'royalblue': u'#4169E1',
|
||||||
|
u'saddlebrown': u'#8B4513',
|
||||||
|
u'sandybrown': u'#8B4513',
|
||||||
|
u'seashell': u'#FFF5EE',
|
||||||
|
u'sienna': u'#A0522D',
|
||||||
|
u'silver': u'#C0C0C0',
|
||||||
|
u'skyblue': u'#87CEEB',
|
||||||
|
u'slategrey': u'#708090',
|
||||||
|
u'snow': u'#FFFAFA',
|
||||||
|
u'springgreen': u'#00FF7F',
|
||||||
|
u'violet': u'#EE82EE',
|
||||||
|
u'yellowgreen': u'#9ACD32'
|
||||||
|
}
|
||||||
|
|
||||||
|
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
|
||||||
|
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
|
||||||
|
def lrs_color(html_color):
|
||||||
|
hcol = html_color.lower()
|
||||||
|
match = hex_pat.search(hcol)
|
||||||
|
if match:
|
||||||
|
return '0x00'+match.group(1)+match.group(2)+match.group(3)
|
||||||
|
match = rgb_pat.search(hcol)
|
||||||
|
if match:
|
||||||
|
return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:]
|
||||||
|
if hcol in NAME_MAP:
|
||||||
|
return NAME_MAP[hcol]
|
||||||
|
return '0x00000000'
|
||||||
|
|
||||||
|
|
@ -45,6 +45,7 @@ from libprs500 import filename_to_utf8, setup_cli_handlers, __appname__
|
|||||||
from libprs500.ptempfile import PersistentTemporaryFile
|
from libprs500.ptempfile import PersistentTemporaryFile
|
||||||
from libprs500.ebooks.metadata.opf import OPFReader
|
from libprs500.ebooks.metadata.opf import OPFReader
|
||||||
from libprs500.devices.interface import Device
|
from libprs500.devices.interface import Device
|
||||||
|
from libprs500.ebooks.lrf.html.color_map import lrs_color
|
||||||
|
|
||||||
class HTMLConverter(object):
|
class HTMLConverter(object):
|
||||||
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
||||||
@ -96,14 +97,20 @@ class HTMLConverter(object):
|
|||||||
# Fix Book Designer markup
|
# Fix Book Designer markup
|
||||||
BOOK_DESIGNER = [
|
BOOK_DESIGNER = [
|
||||||
# Create header tags
|
# Create header tags
|
||||||
(re.compile('<h2.*?id=BookTitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h1 align="center">%s</h1>'%(match.group(1),)),
|
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||||
(re.compile('<h2.*?id=BookAuthor.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<h2.*?id=BookAuthor.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h2 align="right">%s</h2>'%(match.group(1),)),
|
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
|
||||||
(re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h2>%s</h2>'%(match.group(1),)),
|
lambda match : '<h2>%s</h2>'%(match.group(1),)),
|
||||||
(re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
(re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
|
||||||
lambda match : '<h3>%s</h3>'%(match.group(1),)),
|
lambda match : '<h3>%s</h3>'%(match.group(1),)),
|
||||||
|
# Blank lines
|
||||||
|
(re.compile('<div.*?>( ){4}</div>', re.IGNORECASE),
|
||||||
|
lambda match : '<p></p>'),
|
||||||
|
# HR
|
||||||
|
(re.compile('<hr>', re.IGNORECASE),
|
||||||
|
lambda match : '<span style="page-break-after:always"> </span>'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def __hasattr__(self, attr):
|
def __hasattr__(self, attr):
|
||||||
@ -196,6 +203,8 @@ class HTMLConverter(object):
|
|||||||
'content':re.compile('Baen', re.IGNORECASE)}))
|
'content':re.compile('Baen', re.IGNORECASE)}))
|
||||||
|
|
||||||
def start_on_file(self, path, is_root=True, link_level=0):
|
def start_on_file(self, path, is_root=True, link_level=0):
|
||||||
|
self.css = HTMLConverter.CSS.copy()
|
||||||
|
self.pseudo_css = {}
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
os.chdir(os.path.dirname(path))
|
os.chdir(os.path.dirname(path))
|
||||||
self.file_name = os.path.basename(path)
|
self.file_name = os.path.basename(path)
|
||||||
@ -210,6 +219,8 @@ class HTMLConverter(object):
|
|||||||
if self.pdftohtml:
|
if self.pdftohtml:
|
||||||
nmassage.extend(HTMLConverter.PDFTOHTML)
|
nmassage.extend(HTMLConverter.PDFTOHTML)
|
||||||
#raw = unicode(raw, 'utf8', 'replace')
|
#raw = unicode(raw, 'utf8', 'replace')
|
||||||
|
if self.book_designer:
|
||||||
|
nmassage.extend(HTMLConverter.BOOK_DESIGNER)
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(raw,
|
soup = BeautifulSoup(raw,
|
||||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||||
@ -225,6 +236,13 @@ class HTMLConverter(object):
|
|||||||
self.baen = True
|
self.baen = True
|
||||||
self.logger.info('Baen file detected. Re-parsing...')
|
self.logger.info('Baen file detected. Re-parsing...')
|
||||||
return self.start_on_file(path, is_root=is_root, link_level=link_level)
|
return self.start_on_file(path, is_root=is_root, link_level=link_level)
|
||||||
|
if self.book_designer:
|
||||||
|
t = soup.find(id='BookTitle')
|
||||||
|
if t:
|
||||||
|
self.book.set_title(self.get_text(t))
|
||||||
|
a = soup.find(id='BookAuthor')
|
||||||
|
if a:
|
||||||
|
self.book.set_author(self.get_text(a))
|
||||||
self.logger.info('\tConverting to BBeB...')
|
self.logger.info('\tConverting to BBeB...')
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
self.current_page = None
|
self.current_page = None
|
||||||
@ -234,8 +252,6 @@ class HTMLConverter(object):
|
|||||||
match = self.PAGE_BREAK_PAT.search(unicode(soup))
|
match = self.PAGE_BREAK_PAT.search(unicode(soup))
|
||||||
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
|
if match and not re.match('avoid', match.group(1), re.IGNORECASE):
|
||||||
self.page_break_found = True
|
self.page_break_found = True
|
||||||
self.css = HTMLConverter.CSS.copy()
|
|
||||||
self.pseudo_css = {}
|
|
||||||
self.target_prefix = path
|
self.target_prefix = path
|
||||||
self.links[path] = []
|
self.links[path] = []
|
||||||
self.previous_text = '\n'
|
self.previous_text = '\n'
|
||||||
@ -301,7 +317,7 @@ class HTMLConverter(object):
|
|||||||
# however we need to as we don't do alignment at a block level.
|
# however we need to as we don't do alignment at a block level.
|
||||||
# float is removed by the process_alignment function.
|
# float is removed by the process_alignment function.
|
||||||
if chk.startswith('font') or chk == 'text-align' or \
|
if chk.startswith('font') or chk == 'text-align' or \
|
||||||
chk == 'float' or chk == 'white-space':
|
chk == 'float' or chk == 'white-space' or chk == 'color':
|
||||||
temp[key] = pcss[key]
|
temp[key] = pcss[key]
|
||||||
prop.update(temp)
|
prop.update(temp)
|
||||||
|
|
||||||
@ -656,7 +672,11 @@ class HTMLConverter(object):
|
|||||||
unneeded.append(prop)
|
unneeded.append(prop)
|
||||||
for prop in unneeded:
|
for prop in unneeded:
|
||||||
fp.pop(prop)
|
fp.pop(prop)
|
||||||
elem = Span(text=src, **fp) if (fp or force_span_use) else src
|
attrs = {}
|
||||||
|
if 'color' in css:
|
||||||
|
attrs['textcolor'] = lrs_color(css['color'])
|
||||||
|
attrs.update(fp)
|
||||||
|
elem = Span(text=src, **attrs) if (attrs or force_span_use) else src
|
||||||
if css.has_key('text-decoration'):
|
if css.has_key('text-decoration'):
|
||||||
dec = css['text-decoration'].lower()
|
dec = css['text-decoration'].lower()
|
||||||
linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None
|
linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None
|
||||||
@ -1372,6 +1392,8 @@ class HTMLConverter(object):
|
|||||||
elif tagname == 'font':
|
elif tagname == 'font':
|
||||||
if tag.has_key('face'):
|
if tag.has_key('face'):
|
||||||
tag_css['font-family'] = tag['face']
|
tag_css['font-family'] = tag['face']
|
||||||
|
if tag.has_key('color'):
|
||||||
|
tag_css['color'] = tag['color']
|
||||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||||
elif tagname in ['br']:
|
elif tagname in ['br']:
|
||||||
self.line_break()
|
self.line_break()
|
||||||
|
@ -442,6 +442,14 @@ class Book(Delegator):
|
|||||||
self.gc_count = 0
|
self.gc_count = 0
|
||||||
|
|
||||||
|
|
||||||
|
def set_title(self, title):
|
||||||
|
ot = self.delegates[0].delegates[0].delegates[0].title
|
||||||
|
self.delegates[0].delegates[0].delegates[0].title = (title, ot[1])
|
||||||
|
|
||||||
|
def set_author(self, author):
|
||||||
|
ot = self.delegates[0].delegates[0].delegates[0].author
|
||||||
|
self.delegates[0].delegates[0].delegates[0].author = (author, ot[1])
|
||||||
|
|
||||||
def create_text_style(self, **settings):
|
def create_text_style(self, **settings):
|
||||||
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
ans = TextStyle(**self.defaultTextStyle.attrs.copy())
|
||||||
ans.update(settings)
|
ans.update(settings)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user