mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-30 18:22:25 -04:00 
			
		
		
		
	Support CSS color attribute and support reading title,author from html0 files.
This commit is contained in:
		
							parent
							
								
									6bed1e2372
								
							
						
					
					
						commit
						16d1518d19
					
				
							
								
								
									
										125
									
								
								src/libprs500/ebooks/lrf/html/color_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								src/libprs500/ebooks/lrf/html/color_map.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,125 @@ | ||||
| ##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net | ||||
| ##    This program is free software; you can redistribute it and/or modify | ||||
| ##    it under the terms of the GNU General Public License as published by | ||||
| ##    the Free Software Foundation; either version 2 of the License, or | ||||
| ##    (at your option) any later version. | ||||
| ## | ||||
| ##    This program is distributed in the hope that it will be useful, | ||||
| ##    but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| ##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| ##    GNU General Public License for more details. | ||||
| ## | ||||
| ##    You should have received a copy of the GNU General Public License along | ||||
| ##    with this program; if not, write to the Free Software Foundation, Inc., | ||||
| ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
| 
 | ||||
| import re | ||||
| 
 | ||||
| NAME_MAP = { | ||||
|              u'aliceblue': u'#F0F8FF', | ||||
|              u'antiquewhite': u'#FAEBD7', | ||||
|              u'aqua': u'#00FFFF', | ||||
|              u'aquamarine': u'#7FFFD4', | ||||
|              u'azure': u'#F0FFFF', | ||||
|              u'beige': u'#F5F5DC', | ||||
|              u'bisque': u'#FFE4C4', | ||||
|              u'black': u'#000000', | ||||
|              u'blanchedalmond': u'#FFEBCD', | ||||
|              u'blue': u'#0000FF', | ||||
|              u'brown': u'#A52A2A', | ||||
|              u'burlywood': u'#DEB887', | ||||
|              u'cadetblue': u'#5F9EA0', | ||||
|              u'chartreuse': u'#7FFF00', | ||||
|              u'chocolate': u'#D2691E', | ||||
|              u'coral': u'#FF7F50', | ||||
|              u'crimson': u'#DC143C', | ||||
|              u'cyan': u'#00FFFF', | ||||
|              u'darkblue': u'#00008B', | ||||
|              u'darkgoldenrod': u'#B8860B', | ||||
|              u'darkgreen': u'#006400', | ||||
|              u'darkkhaki': u'#BDB76B', | ||||
|              u'darkmagenta': u'#8B008B', | ||||
|              u'darkolivegreen': u'#556B2F', | ||||
|              u'darkorange': u'#FF8C00', | ||||
|              u'darkorchid': u'#9932CC', | ||||
|              u'darkred': u'#8B0000', | ||||
|              u'darksalmon': u'#E9967A', | ||||
|              u'darkslateblue': u'#483D8B', | ||||
|              u'darkslategrey': u'#2F4F4F', | ||||
|              u'darkviolet': u'#9400D3', | ||||
|              u'deeppink': u'#FF1493', | ||||
|              u'dodgerblue': u'#1E90FF', | ||||
|              u'firebrick': u'#B22222', | ||||
|              u'floralwhite': u'#FFFAF0', | ||||
|              u'forestgreen': u'#228B22', | ||||
|              u'fuchsia': u'#FF00FF', | ||||
|              u'gainsboro': u'#DCDCDC', | ||||
|              u'ghostwhite': u'#F8F8FF', | ||||
|              u'gold': u'#FFD700', | ||||
|              u'goldenrod': u'#DAA520', | ||||
|              u'indianred ': u'#CD5C5C', | ||||
|              u'indigo  ': u'#4B0082', | ||||
|              u'khaki': u'#F0E68C', | ||||
|              u'lavenderblush': u'#FFF0F5', | ||||
|              u'lawngreen': u'#7CFC00', | ||||
|              u'lightblue': u'#ADD8E6', | ||||
|              u'lightcoral': u'#F08080', | ||||
|              u'lightgoldenrodyellow': u'#FAFAD2', | ||||
|              u'lightgray': u'#D3D3D3', | ||||
|              u'lightgrey': u'#D3D3D3', | ||||
|              u'lightskyblue': u'#87CEFA', | ||||
|              u'lightslategrey': u'#778899', | ||||
|              u'lightsteelblue': u'#B0C4DE', | ||||
|              u'lime': u'#87CEFA', | ||||
|              u'linen': u'#FAF0E6', | ||||
|              u'magenta': u'#FF00FF', | ||||
|              u'maroon': u'#800000', | ||||
|              u'mediumaquamarine': u'#66CDAA', | ||||
|              u'mediumblue': u'#0000CD', | ||||
|              u'mediumorchid': u'#BA55D3', | ||||
|              u'mediumpurple': u'#9370D8', | ||||
|              u'mediumseagreen': u'#3CB371', | ||||
|              u'mediumslateblue': u'#7B68EE', | ||||
|              u'midnightblue': u'#191970', | ||||
|              u'moccasin': u'#FFE4B5', | ||||
|              u'navajowhite': u'#FFDEAD', | ||||
|              u'navy': u'#000080', | ||||
|              u'oldlace': u'#FDF5E6', | ||||
|              u'olive': u'#808000', | ||||
|              u'orange': u'#FFA500', | ||||
|              u'orangered': u'#FF4500', | ||||
|              u'orchid': u'#DA70D6', | ||||
|              u'paleturquoise': u'#AFEEEE', | ||||
|              u'papayawhip': u'#FFEFD5', | ||||
|              u'peachpuff': u'#FFDAB9', | ||||
|              u'powderblue': u'#B0E0E6', | ||||
|              u'rosybrown': u'#BC8F8F', | ||||
|              u'royalblue': u'#4169E1', | ||||
|              u'saddlebrown': u'#8B4513', | ||||
|              u'sandybrown': u'#8B4513', | ||||
|              u'seashell': u'#FFF5EE', | ||||
|              u'sienna': u'#A0522D', | ||||
|              u'silver': u'#C0C0C0', | ||||
|              u'skyblue': u'#87CEEB', | ||||
|              u'slategrey': u'#708090', | ||||
|              u'snow': u'#FFFAFA', | ||||
|              u'springgreen': u'#00FF7F', | ||||
|              u'violet': u'#EE82EE', | ||||
|              u'yellowgreen': u'#9ACD32' | ||||
|             }  | ||||
| 
 | ||||
| hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})') | ||||
| rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) | ||||
| def lrs_color(html_color): | ||||
|     hcol = html_color.lower() | ||||
|     match = hex_pat.search(hcol) | ||||
|     if match: | ||||
|         return '0x00'+match.group(1)+match.group(2)+match.group(3) | ||||
|     match = rgb_pat.search(hcol) | ||||
|     if match: | ||||
|         return '0x00'+hex(int(match.group(1)))[2:]+hex(int(match.group(2)))[2:]+hex(int(match.group(3)))[2:] | ||||
|     if hcol in NAME_MAP: | ||||
|         return NAME_MAP[hcol] | ||||
|     return '0x00000000' | ||||
|      | ||||
|      | ||||
| @ -45,6 +45,7 @@ from libprs500 import filename_to_utf8,  setup_cli_handlers, __appname__ | ||||
| from libprs500.ptempfile import PersistentTemporaryFile | ||||
| from libprs500.ebooks.metadata.opf import OPFReader | ||||
| from libprs500.devices.interface import Device | ||||
| from libprs500.ebooks.lrf.html.color_map import lrs_color | ||||
|          | ||||
| class HTMLConverter(object): | ||||
|     SELECTOR_PAT   = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}") | ||||
| @ -96,14 +97,20 @@ class HTMLConverter(object): | ||||
|     # Fix Book Designer markup | ||||
|     BOOK_DESIGNER = [ | ||||
|                      # Create header tags | ||||
|                      (re.compile('<h2.*?id=BookTitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), | ||||
|                       lambda match : '<h1 align="center">%s</h1>'%(match.group(1),)), | ||||
|                      (re.compile('<h2.*?id=BookAuthor.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), | ||||
|                       lambda match : '<h2 align="right">%s</h2>'%(match.group(1),)), | ||||
|                      (re.compile('<h2.*?id=BookTitle.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL), | ||||
|                       lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))), | ||||
|                      (re.compile('<h2.*?id=BookAuthor.*?(align=)*(?(1)(\w+))*.*?>(.*?)</h2>', re.IGNORECASE|re.DOTALL), | ||||
|                       lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))), | ||||
|                      (re.compile('<span.*?id=title.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), | ||||
|                       lambda match : '<h2>%s</h2>'%(match.group(1),)), | ||||
|                      (re.compile('<span.*?id=subtitle.*?>(.*?)</span>', re.IGNORECASE|re.DOTALL), | ||||
|                       lambda match : '<h3>%s</h3>'%(match.group(1),)),  | ||||
|                       lambda match : '<h3>%s</h3>'%(match.group(1),)), | ||||
|                      # Blank lines | ||||
|                      (re.compile('<div.*?>( ){4}</div>', re.IGNORECASE), | ||||
|                       lambda match : '<p></p>'),  | ||||
|                      # HR | ||||
|                      (re.compile('<hr>', re.IGNORECASE), | ||||
|                       lambda match : '<span style="page-break-after:always"> </span>'), | ||||
|                      ] | ||||
|      | ||||
|     def __hasattr__(self, attr): | ||||
| @ -196,6 +203,8 @@ class HTMLConverter(object): | ||||
|                         'content':re.compile('Baen', re.IGNORECASE)})) | ||||
|      | ||||
|     def start_on_file(self, path, is_root=True, link_level=0): | ||||
|         self.css = HTMLConverter.CSS.copy() | ||||
|         self.pseudo_css = {} | ||||
|         path = os.path.abspath(path) | ||||
|         os.chdir(os.path.dirname(path)) | ||||
|         self.file_name = os.path.basename(path) | ||||
| @ -210,6 +219,8 @@ class HTMLConverter(object): | ||||
|         if self.pdftohtml: | ||||
|             nmassage.extend(HTMLConverter.PDFTOHTML) | ||||
|             #raw = unicode(raw, 'utf8', 'replace') | ||||
|         if self.book_designer: | ||||
|             nmassage.extend(HTMLConverter.BOOK_DESIGNER) | ||||
|         try: | ||||
|             soup = BeautifulSoup(raw,  | ||||
|                          convertEntities=BeautifulSoup.HTML_ENTITIES, | ||||
| @ -225,6 +236,13 @@ class HTMLConverter(object): | ||||
|             self.baen = True | ||||
|             self.logger.info('Baen file detected. Re-parsing...') | ||||
|             return self.start_on_file(path, is_root=is_root, link_level=link_level) | ||||
|         if self.book_designer: | ||||
|             t = soup.find(id='BookTitle') | ||||
|             if t: | ||||
|                 self.book.set_title(self.get_text(t)) | ||||
|             a = soup.find(id='BookAuthor') | ||||
|             if a: | ||||
|                 self.book.set_author(self.get_text(a)) | ||||
|         self.logger.info('\tConverting to BBeB...') | ||||
|         sys.stdout.flush()         | ||||
|         self.current_page = None | ||||
| @ -234,8 +252,6 @@ class HTMLConverter(object): | ||||
|         match = self.PAGE_BREAK_PAT.search(unicode(soup)) | ||||
|         if match and not re.match('avoid', match.group(1), re.IGNORECASE): | ||||
|             self.page_break_found = True | ||||
|         self.css = HTMLConverter.CSS.copy() | ||||
|         self.pseudo_css = {} | ||||
|         self.target_prefix = path | ||||
|         self.links[path] = [] | ||||
|         self.previous_text = '\n' | ||||
| @ -278,7 +294,7 @@ class HTMLConverter(object): | ||||
|         Parses a style attribute. The code within a CSS selector block or in | ||||
|         the style attribute of an HTML element. | ||||
|         @return: A dictionary with one entry for each property where the key  | ||||
|                  is the property name and the value is the property value. | ||||
|                 is the property name and the value is the property value. | ||||
|         """ | ||||
|         prop = dict() | ||||
|         for s in props.split(';'): | ||||
| @ -301,7 +317,7 @@ class HTMLConverter(object): | ||||
|                 # however we need to as we don't do alignment at a block level. | ||||
|                 # float is removed by the process_alignment function. | ||||
|                 if chk.startswith('font') or chk == 'text-align' or \ | ||||
|                 chk == 'float' or chk == 'white-space':  | ||||
|                 chk == 'float' or chk == 'white-space' or chk == 'color': | ||||
|                     temp[key] = pcss[key] | ||||
|             prop.update(temp) | ||||
|              | ||||
| @ -656,7 +672,11 @@ class HTMLConverter(object): | ||||
|                     unneeded.append(prop) | ||||
|             for prop in unneeded: | ||||
|                 fp.pop(prop) | ||||
|             elem = Span(text=src, **fp) if (fp or force_span_use) else src | ||||
|             attrs = {} | ||||
|             if 'color' in css: | ||||
|                 attrs['textcolor'] = lrs_color(css['color']) | ||||
|             attrs.update(fp) | ||||
|             elem = Span(text=src, **attrs) if (attrs or force_span_use) else src | ||||
|             if css.has_key('text-decoration'): | ||||
|                 dec = css['text-decoration'].lower() | ||||
|                 linepos = 'after' if dec == 'underline' else 'before' if dec == 'overline' else None | ||||
| @ -1372,6 +1392,8 @@ class HTMLConverter(object): | ||||
|         elif tagname == 'font': | ||||
|             if tag.has_key('face'): | ||||
|                 tag_css['font-family'] = tag['face'] | ||||
|             if tag.has_key('color'): | ||||
|                 tag_css['color'] = tag['color'] | ||||
|             self.process_children(tag, tag_css, tag_pseudo_css) | ||||
|         elif tagname in ['br']: | ||||
|             self.line_break() | ||||
|  | ||||
| @ -442,6 +442,14 @@ class Book(Delegator): | ||||
|         self.gc_count = 0 | ||||
|          | ||||
| 
 | ||||
|     def set_title(self, title): | ||||
|         ot = self.delegates[0].delegates[0].delegates[0].title | ||||
|         self.delegates[0].delegates[0].delegates[0].title = (title, ot[1]) | ||||
|          | ||||
|     def set_author(self, author): | ||||
|         ot = self.delegates[0].delegates[0].delegates[0].author | ||||
|         self.delegates[0].delegates[0].delegates[0].author = (author, ot[1]) | ||||
|      | ||||
|     def create_text_style(self, **settings): | ||||
|         ans = TextStyle(**self.defaultTextStyle.attrs.copy()) | ||||
|         ans.update(settings) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user