mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 10:37:00 -04:00 
			
		
		
		
	Support CSS color attribute and support reading title,author from html0 files.
This commit is contained in:
		
							parent
							
								
									6bed1e2372
								
							
						
					
					
						commit
						16d1518d19
					
				
							
								
								
									
										125
									
								
								src/libprs500/ebooks/lrf/html/color_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								src/libprs500/ebooks/lrf/html/color_map.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,125 @@ | |||||||
|  | ##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net | ||||||
|  | ##    This program is free software; you can redistribute it and/or modify | ||||||
|  | ##    it under the terms of the GNU General Public License as published by | ||||||
|  | ##    the Free Software Foundation; either version 2 of the License, or | ||||||
|  | ##    (at your option) any later version. | ||||||
|  | ## | ||||||
|  | ##    This program is distributed in the hope that it will be useful, | ||||||
|  | ##    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | ##    GNU General Public License for more details. | ||||||
|  | ## | ||||||
|  | ##    You should have received a copy of the GNU General Public License along | ||||||
|  | ##    with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  | 
 | ||||||
|  | import re | ||||||
|  | 
 | ||||||
|  | NAME_MAP = { | ||||||
|  |              u'aliceblue': u'#F0F8FF', | ||||||
|  |              u'antiquewhite': u'#FAEBD7', | ||||||
|  |              u'aqua': u'#00FFFF', | ||||||
|  |              u'aquamarine': u'#7FFFD4', | ||||||
|  |              u'azure': u'#F0FFFF', | ||||||
|  |              u'beige': u'#F5F5DC', | ||||||
|  |              u'bisque': u'#FFE4C4', | ||||||
|  |              u'black': u'#000000', | ||||||
|  |              u'blanchedalmond': u'#FFEBCD', | ||||||
|  |              u'blue': u'#0000FF', | ||||||
|  |              u'brown': u'#A52A2A', | ||||||
|  |              u'burlywood': u'#DEB887', | ||||||
|  |              u'cadetblue': u'#5F9EA0', | ||||||
|  |              u'chartreuse': u'#7FFF00', | ||||||
|  |              u'chocolate': u'#D2691E', | ||||||
|  |              u'coral': u'#FF7F50', | ||||||
|  |              u'crimson': u'#DC143C', | ||||||
|  |              u'cyan': u'#00FFFF', | ||||||
|  |              u'darkblue': u'#00008B', | ||||||
|  |              u'darkgoldenrod': u'#B8860B', | ||||||
|  |              u'darkgreen': u'#006400', | ||||||
|  |              u'darkkhaki': u'#BDB76B', | ||||||
|  |              u'darkmagenta': u'#8B008B', | ||||||
|  |              u'darkolivegreen': u'#556B2F', | ||||||
|  |              u'darkorange': u'#FF8C00', | ||||||
|  |              u'darkorchid': u'#9932CC', | ||||||
|  |              u'darkred': u'#8B0000', | ||||||
|  |              u'darksalmon': u'#E9967A', | ||||||
|  |              u'darkslateblue': u'#483D8B', | ||||||
|  |              u'darkslategrey': u'#2F4F4F', | ||||||
|  |              u'darkviolet': u'#9400D3', | ||||||
|  |              u'deeppink': u'#FF1493', | ||||||
|  |              u'dodgerblue': u'#1E90FF', | ||||||
|  |              u'firebrick': u'#B22222', | ||||||
|  |              u'floralwhite': u'#FFFAF0', | ||||||
|  |              u'forestgreen': u'#228B22', | ||||||
|  |              u'fuchsia': u'#FF00FF', | ||||||
|  |              u'gainsboro': u'#DCDCDC', | ||||||
|  |              u'ghostwhite': u'#F8F8FF', | ||||||
|  |              u'gold': u'#FFD700', | ||||||
|  |              u'goldenrod': u'#DAA520', | ||||||
|  |              u'indianred ': u'#CD5C5C', | ||||||
|  |              u'indigo  ': u'#4B0082', | ||||||
|  |              u'khaki': u'#F0E68C', | ||||||
|  |              u'lavenderblush': u'#FFF0F5', | ||||||
|  |              u'lawngreen': u'#7CFC00', | ||||||
|  |              u'lightblue': u'#ADD8E6', | ||||||
|  |              u'lightcoral': u'#F08080', | ||||||
|  |              u'lightgoldenrodyellow': u'#FAFAD2', | ||||||
|  |              u'lightgray': u'#D3D3D3', | ||||||
|  |              u'lightgrey': u'#D3D3D3', | ||||||
|  |              u'lightskyblue': u'#87CEFA', | ||||||
|  |              u'lightslategrey': u'#778899', | ||||||
|  |              u'lightsteelblue': u'#B0C4DE', | ||||||
|  |              u'lime': u'#87CEFA', | ||||||
|  |              u'linen': u'#FAF0E6', | ||||||
|  |              u'magenta': u'#FF00FF', | ||||||
|  |              u'maroon': u'#800000', | ||||||
|  |              u'mediumaquamarine': u'#66CDAA', | ||||||
|  |              u'mediumblue': u'#0000CD', | ||||||
|  |              u'mediumorchid': u'#BA55D3', | ||||||
|  |              u'mediumpurple': u'#9370D8', | ||||||
|  |              u'mediumseagreen': u'#3CB371', | ||||||
|  |              u'mediumslateblue': u'#7B68EE', | ||||||
|  |              u'midnightblue': u'#191970', | ||||||
|  |              u'moccasin': u'#FFE4B5', | ||||||
|  |              u'navajowhite': u'#FFDEAD', | ||||||
|  |              u'navy': u'#000080', | ||||||
|  |              u'oldlace': u'#FDF5E6', | ||||||
|  |              u'olive': u'#808000', | ||||||
|  |              u'orange': u'#FFA500', | ||||||
|  |              u'orangered': u'#FF4500', | ||||||
|  |              u'orchid': u'#DA70D6', | ||||||
|  |              u'paleturquoise': u'#AFEEEE', | ||||||
|  |              u'papayawhip': u'#FFEFD5', | ||||||
|  |              u'peachpuff': u'#FFDAB9', | ||||||
|  |              u'powderblue': u'#B0E0E6', | ||||||
|  |              u'rosybrown': u'#BC8F8F', | ||||||
|  |              u'royalblue': u'#4169E1', | ||||||
|  |              u'saddlebrown': u'#8B4513', | ||||||
|  |              u'sandybrown': u'#8B4513', | ||||||
|  |              u'seashell': u'#FFF5EE', | ||||||
|  |              u'sienna': u'#A0522D', | ||||||
|  |              u'silver': u'#C0C0C0', | ||||||
|  |              u'skyblue': u'#87CEEB', | ||||||
|  |              u'slategrey': u'#708090', | ||||||
|  |              u'snow': u'#FFFAFA', | ||||||
|  |              u'springgreen': u'#00FF7F', | ||||||
|  |              u'violet': u'#EE82EE', | ||||||
|  |              u'yellowgreen': u'#9ACD32' | ||||||
|  |             }  | ||||||
|  | 
 | ||||||
|  | hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})') | ||||||
|  | rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) | ||||||
|  | def lrs_color(html_color): | ||||||
|  |     hcol = html_color.lower() | ||||||
|  |     match = hex_pat.search(hcol) | ||||||
|  |     if match: | ||||||
|  |         return '0x00'+match.group(1)+match.group(2)+match.group(3) | ||||||
|  |     match = rgb_pat.search(hcol) | ||||||
|  |     if match: | ||||||
|  |         return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:] | ||||||
|  |     if hcol in NAME_MAP: | ||||||
|  |         return NAME_MAP[hcol] | ||||||
|  |     return '0x00000000' | ||||||
|  |      | ||||||
|  |      | ||||||
| @ -45,6 +45,7 @@ from libprs500 import filename_to_utf8,  setup_cli_handlers, __appname__ | |||||||
| from libprs500.ptempfile import PersistentTemporaryFile | from libprs500.ptempfile import PersistentTemporaryFile | ||||||
| from libprs500.ebooks.metadata.opf import OPFReader | from libprs500.ebooks.metadata.opf import OPFReader | ||||||
| from libprs500.devices.interface import Device | from libprs500.devices.interface import Device | ||||||
|  | from libprs500.ebooks.lrf.html.color_map import lrs_color | ||||||
|          |          | ||||||
| class HTMLConverter(object): | class HTMLConverter(object): | ||||||
|     SELECTOR_PAT   = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") |     SELECTOR_PAT   = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") | ||||||
| @ -96,14 +97,20 @@ class HTMLConverter(object): | |||||||
|     # Fix Book Designer markup |     # Fix Book Designer markup | ||||||
|     BOOK_DESIGNER = [ |     BOOK_DESIGNER = [ | ||||||
|                      # Create header tags |                      # Create header tags | ||||||
|                      (re.compile('<h2.*?id=BookTitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), |                      (re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL), | ||||||
|                       lambda match : '<h1 align="center">%s</h1>'%(match.group(1),)), |                       lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))), | ||||||
|                      (re.compile('<h2.*?id=BookAuthor.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), |                      (re.compile('<h2.*?id=BookAuthor.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL), | ||||||
|                       lambda match : '<h2 align="right">%s</h2>'%(match.group(1),)), |                       lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))), | ||||||
|                      (re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), |                      (re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), | ||||||
|                       lambda match : '<h2>%s</h2>'%(match.group(1),)), |                       lambda match : '<h2>%s</h2>'%(match.group(1),)), | ||||||
|                      (re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), |                      (re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), | ||||||
|                       lambda match : '<h3>%s</h3>'%(match.group(1),)),  |                       lambda match : '<h3>%s</h3>'%(match.group(1),)), | ||||||
|  |                      # Blank lines | ||||||
|  |                      (re.compile('<div.*?>( ){4}</div>', re.IGNORECASE), | ||||||
|  |                       lambda match : '<p></p>'),  | ||||||
|  |                      # HR | ||||||
|  |                      (re.compile('<hr>', re.IGNORECASE), | ||||||
|  |                       lambda match : '<span style="page-break-after:always"> </span>'), | ||||||
|                      ] |                      ] | ||||||
|      |      | ||||||
|     def __hasattr__(self, attr): |     def __hasattr__(self, attr): | ||||||
| @ -196,6 +203,8 @@ class HTMLConverter(object): | |||||||
|                         'content':re.compile('Baen', re.IGNORECASE)})) |                         'content':re.compile('Baen', re.IGNORECASE)})) | ||||||
|      |      | ||||||
|     def start_on_file(self, path, is_root=True, link_level=0): |     def start_on_file(self, path, is_root=True, link_level=0): | ||||||
|  |         self.css = HTMLConverter.CSS.copy() | ||||||
|  |         self.pseudo_css = {} | ||||||
|         path = os.path.abspath(path) |         path = os.path.abspath(path) | ||||||
|         os.chdir(os.path.dirname(path)) |         os.chdir(os.path.dirname(path)) | ||||||
|         self.file_name = os.path.basename(path) |         self.file_name = os.path.basename(path) | ||||||
| @ -210,6 +219,8 @@ class HTMLConverter(object): | |||||||
|         if self.pdftohtml: |         if self.pdftohtml: | ||||||
|             nmassage.extend(HTMLConverter.PDFTOHTML) |             nmassage.extend(HTMLConverter.PDFTOHTML) | ||||||
|             #raw = unicode(raw, 'utf8', 'replace') |             #raw = unicode(raw, 'utf8', 'replace') | ||||||
|  |         if self.book_designer: | ||||||
|  |             nmassage.extend(HTMLConverter.BOOK_DESIGNER) | ||||||
|         try: |         try: | ||||||
|             soup = BeautifulSoup(raw,  |             soup = BeautifulSoup(raw,  | ||||||
|                          convertEntities=BeautifulSoup.HTML_ENTITIES, |                          convertEntities=BeautifulSoup.HTML_ENTITIES, | ||||||
| @ -225,6 +236,13 @@ class HTMLConverter(object): | |||||||
|             self.baen = True |             self.baen = True | ||||||
|             self.logger.info('Baen file detected. Re-parsing...') |             self.logger.info('Baen file detected. Re-parsing...') | ||||||
|             return self.start_on_file(path, is_root=is_root, link_level=link_level) |             return self.start_on_file(path, is_root=is_root, link_level=link_level) | ||||||
|  |         if self.book_designer: | ||||||
|  |             t = soup.find(id='BookTitle') | ||||||
|  |             if t: | ||||||
|  |                 self.book.set_title(self.get_text(t)) | ||||||
|  |             a = soup.find(id='BookAuthor') | ||||||
|  |             if a: | ||||||
|  |                 self.book.set_author(self.get_text(a)) | ||||||
|         self.logger.info('\tConverting to BBeB...') |         self.logger.info('\tConverting to BBeB...') | ||||||
|         sys.stdout.flush()         |         sys.stdout.flush()         | ||||||
|         self.current_page = None |         self.current_page = None | ||||||
| @ -234,8 +252,6 @@ class HTMLConverter(object): | |||||||
|         match = self.PAGE_BREAK_PAT.search(unicode(soup)) |         match = self.PAGE_BREAK_PAT.search(unicode(soup)) | ||||||
|         if match and not re.match('avoid', match.group(1), re.IGNORECASE): |         if match and not re.match('avoid', match.group(1), re.IGNORECASE): | ||||||
|             self.page_break_found = True |             self.page_break_found = True | ||||||
|         self.css = HTMLConverter.CSS.copy() |  | ||||||
|         self.pseudo_css = {} |  | ||||||
|         self.target_prefix = path |         self.target_prefix = path | ||||||
|         self.links[path] = [] |         self.links[path] = [] | ||||||
|         self.previous_text = '\n' |         self.previous_text = '\n' | ||||||
| @ -278,7 +294,7 @@ class HTMLConverter(object): | |||||||
|         Parses a style attribute. The code within a CSS selector block or in |         Parses a style attribute. The code within a CSS selector block or in | ||||||
|         the style attribute of an HTML element. |         the style attribute of an HTML element. | ||||||
|         @return: A dictionary with one entry for each property where the key  |         @return: A dictionary with one entry for each property where the key  | ||||||
|                  is the property name and the value is the property value. |                 is the property name and the value is the property value. | ||||||
|         """ |         """ | ||||||
|         prop = dict() |         prop = dict() | ||||||
|         for s in props.split(';'): |         for s in props.split(';'): | ||||||
| @ -301,7 +317,7 @@ class HTMLConverter(object): | |||||||
|                 # however we need to as we don't do alignment at a block level. |                 # however we need to as we don't do alignment at a block level. | ||||||
|                 # float is removed by the process_alignment function. |                 # float is removed by the process_alignment function. | ||||||
|                 if chk.startswith('font') or chk == 'text-align' or \ |                 if chk.startswith('font') or chk == 'text-align' or \ | ||||||
|                 chk == 'float' or chk == 'white-space':  |                 chk == 'float' or chk == 'white-space' or chk == 'color': | ||||||
|                     temp[key] = pcss[key] |                     temp[key] = pcss[key] | ||||||
|             prop.update(temp) |             prop.update(temp) | ||||||
|              |              | ||||||
| @ -656,7 +672,11 @@ class HTMLConverter(object): | |||||||
|                     unneeded.append(prop) |                     unneeded.append(prop) | ||||||
|             for prop in unneeded: |             for prop in unneeded: | ||||||
|                 fp.pop(prop) |                 fp.pop(prop) | ||||||
|             elem = Span(text=src, **fp) if (fp or force_span_use) else src |             attrs = {} | ||||||
|  |             if 'color' in css: | ||||||
|  |                 attrs['textcolor'] = lrs_color(css['color']) | ||||||
|  |             attrs.update(fp) | ||||||
|  |             elem = Span(text=src, **attrs) if (attrs or force_span_use) else src | ||||||
|             if css.has_key('text-decoration'): |             if css.has_key('text-decoration'): | ||||||
|                 dec = css['text-decoration'].lower() |                 dec = css['text-decoration'].lower() | ||||||
|                 linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None |                 linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None | ||||||
| @ -1372,6 +1392,8 @@ class HTMLConverter(object): | |||||||
|         elif tagname == 'font': |         elif tagname == 'font': | ||||||
|             if tag.has_key('face'): |             if tag.has_key('face'): | ||||||
|                 tag_css['font-family'] = tag['face'] |                 tag_css['font-family'] = tag['face'] | ||||||
|  |             if tag.has_key('color'): | ||||||
|  |                 tag_css['color'] = tag['color'] | ||||||
|             self.process_children(tag, tag_css, tag_pseudo_css) |             self.process_children(tag, tag_css, tag_pseudo_css) | ||||||
|         elif tagname in ['br']: |         elif tagname in ['br']: | ||||||
|             self.line_break() |             self.line_break() | ||||||
|  | |||||||
| @ -442,6 +442,14 @@ class Book(Delegator): | |||||||
|         self.gc_count = 0 |         self.gc_count = 0 | ||||||
|          |          | ||||||
| 
 | 
 | ||||||
|  |     def set_title(self, title): | ||||||
|  |         ot = self.delegates[0].delegates[0].delegates[0].title | ||||||
|  |         self.delegates[0].delegates[0].delegates[0].title = (title, ot[1]) | ||||||
|  |          | ||||||
|  |     def set_author(self, author): | ||||||
|  |         ot = self.delegates[0].delegates[0].delegates[0].author | ||||||
|  |         self.delegates[0].delegates[0].delegates[0].author = (author, ot[1]) | ||||||
|  |      | ||||||
|     def create_text_style(self, **settings): |     def create_text_style(self, **settings): | ||||||
|         ans = TextStyle(**self.defaultTextStyle.attrs.copy()) |         ans = TextStyle(**self.defaultTextStyle.attrs.copy()) | ||||||
|         ans.update(settings) |         ans.update(settings) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user