mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ereader writer working
This commit is contained in:
parent
6ee829ff79
commit
e7ec12575d
@ -39,7 +39,7 @@ PML_HTML_RULES = [
|
||||
(re.compile(r'\\k(?P<text>.+?)\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
|
||||
(re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')),
|
||||
(re.compile(r'\\U(?P<num>\d+)'), lambda match: '%s' % unichr(int(match.group('num'), 16))),
|
||||
(re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')),
|
||||
(re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % image_name(match.group('name')).strip('\x00')),
|
||||
(re.compile(r'\\q="(?P<target>#.+?)"(?P<text>.+?)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||
(re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
|
||||
(re.compile(r'\\-'), lambda match: ''),
|
||||
@ -83,7 +83,7 @@ HTML_PML_RULES = [
|
||||
(re.compile('<div.*?id="(?P<target>.+?).*?"></div>'), lambda match: '\\\\Q="%s"' % match.group('target')),
|
||||
(re.compile('<a.*?href="(?P<target>#.+?).*?">(?P<text>)</a>', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))),
|
||||
#(re.compile('<img.*?src="images/(?P<name>.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')),
|
||||
(re.compile('<img.*?src="(?P<name>.+?)".*?>(.*?</img>)*'), lambda match: '\\m="%s"' % image_name(match.group('name'))),
|
||||
(re.compile('<img.*?src="(?P<name>.+?)".*?>(.*?</img>)*'), lambda match: '\\m="%s"' % image_name(match.group('name').strip('\x00'))),
|
||||
#(re.compile('&#(?P<num>\d\d\d\d);'), lambda match: '\\U%s' % int(match.group('num'))),
|
||||
(re.compile('&#(?P<num>\d\d\d);'), lambda match: '\\a%s' % match.group('num')),
|
||||
(re.compile('<small .*?>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
|
||||
|
@ -76,7 +76,7 @@ class Reader(FormatReader):
|
||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||
return 'empty', ''
|
||||
data = self.section_data(number)
|
||||
name = data[4:4+32].strip('\0')
|
||||
name = data[4:4+32].strip('\x00')
|
||||
img = data[62:]
|
||||
return name, img
|
||||
|
||||
@ -97,7 +97,7 @@ class Reader(FormatReader):
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
html = '<html><head><title></title></head><body>'
|
||||
html = u'<html><head><title></title></head><body>'
|
||||
|
||||
for i in range(1, self.header_record.num_text_pages + 1):
|
||||
self.log.debug('Extracting text page %i' % i)
|
||||
@ -112,7 +112,6 @@ class Reader(FormatReader):
|
||||
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
|
||||
html += '</dl>'
|
||||
|
||||
|
||||
if self.header_record.sidebar_rec > 0:
|
||||
html += '<br /><h1>%s</h1>' % _('Sidebar')
|
||||
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
|
||||
@ -127,7 +126,8 @@ class Reader(FormatReader):
|
||||
with CurrentDir(output_dir):
|
||||
with open('index.html', 'wb') as index:
|
||||
self.log.debug('Writing text to index.html')
|
||||
index.write(html.encode('utf-8'))
|
||||
index.write(html)
|
||||
# print html
|
||||
|
||||
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
||||
os.makedirs(os.path.join(output_dir, 'images/'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user