mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:...
This commit is contained in:
parent
6bd47906d4
commit
416f49f4c4
@ -63,4 +63,6 @@ def xml_to_unicode(raw, verbose=False):
|
||||
encoding = encoding.lower()
|
||||
if CHARSET_ALIASES.has_key(encoding):
|
||||
encoding = CHARSET_ALIASES[encoding]
|
||||
if encoding == 'ascii':
|
||||
encoding = 'utf-8'
|
||||
return raw.decode(encoding, 'ignore'), encoding
|
||||
|
@ -1,7 +1,8 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
import os, sys, logging, re, shutil
|
||||
import os, sys, logging, re, shutil, tempfile
|
||||
from lxml import html
|
||||
from lxml.etree import XPath
|
||||
get_text = XPath("//text()")
|
||||
@ -36,12 +37,30 @@ class HTMLProcessor(PreProcessor, LoggingInterface):
|
||||
|
||||
self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
|
||||
|
||||
if opts.verbose > 2:
|
||||
self.debug_tree('parsed')
|
||||
|
||||
self.extract_css()
|
||||
|
||||
if opts.verbose > 2:
|
||||
self.debug_tree('nocss')
|
||||
|
||||
self.collect_font_statistics()
|
||||
|
||||
self.split()
|
||||
|
||||
def debug_tree(self, name):
|
||||
'''
|
||||
Dump source tree for later debugging.
|
||||
'''
|
||||
tdir = tempfile.gettempdir()
|
||||
if not os.path.exists(tdir):
|
||||
os.makedirs(tdir)
|
||||
with open(os.path.join(tdir, 'html2epub-%s-%s.html'%\
|
||||
(os.path.basename(self.htmlfile.path), name)), 'wb') as f:
|
||||
f.write(html.tostring(self.root, encoding='utf-8'))
|
||||
self.log_debug(_('Written processed HTML to ')+f.name)
|
||||
|
||||
def parse_html(self):
|
||||
''' Create lxml ElementTree from HTML '''
|
||||
self.log_info('\tParsing '+os.sep.join(self.htmlfile.path.split(os.sep)[-3:]))
|
||||
|
@ -11,7 +11,7 @@
|
||||
</head>
|
||||
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
|
||||
<p>
|
||||
This document contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf</span>, the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit<br/><span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
|
||||
This document contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf</span>, the HTML to LRF converter from <em>calibre.</em> To obtain calibre visit<br/><span style='font:sans-serif'>http://calibre.kovidgoyal.net</span>
|
||||
</p>
|
||||
<br/>
|
||||
<h2 id="toc">Table of Contents</h2>
|
||||
|
Loading…
x
Reference in New Issue
Block a user