Unfinished ereader writer work.

This commit is contained in:
John Schember 2009-04-23 19:09:13 -04:00
parent f96cd13f62
commit d871313ff0
7 changed files with 139 additions and 43 deletions

View File

@ -289,11 +289,12 @@ from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pdb.ereader.output import EREADEROutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input, ODTInput, RTFInput] FB2Input, ODTInput, RTFInput, EREADEROutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
from calibre.ebooks.metadata import authors_to_string
class EREADEROutput(OutputFormatPlugin):
name = 'eReader PDB Output'
author = 'John Schember'
file_type = 'erpdb'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml
# print html_to_pml('<p class="calibre1"> “A hundred kisses from the Princess,” said he, “or else let everyone keep his own!”</p>')
print html_to_pml(str(oeb_book.spine[3]))

View File

@ -13,49 +13,49 @@ import re
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
PML_HTML_RULES = [ PML_HTML_RULES = [
(re.compile('\\\\p'), lambda match: '<br /><br style="page-break-after: always;" />'), (re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
(re.compile('\\\\x(?P<text>.+?)\\\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')), (re.compile(r'\\x(?P<text>.+?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')),
(re.compile('\\\\X(?P<val>[0-4])(?P<text>.+?)\\\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%i</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)), (re.compile(r'\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%s</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
(re.compile('\\\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry (re.compile(r'\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry
(re.compile('\\\\c(?P<text>.+?)\\\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')), (re.compile(r'\\c(?P<text>.+?)\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')),
(re.compile('\\\\r(?P<text>.+?)\\\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')), (re.compile(r'\\r(?P<text>.+?)\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')),
(re.compile('\\\\i(?P<text>.+?)\\\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')), (re.compile(r'\\i(?P<text>.+?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')),
(re.compile('\\\\u(?P<text>.+?)\\\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')), (re.compile(r'\\u(?P<text>.+?)\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')),
(re.compile('\\\\o(?P<text>.+?)\\\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')), (re.compile(r'\\o(?P<text>.+?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')),
(re.compile('\\\\v(?P<text>.+?)\\\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')), (re.compile(r'\\v(?P<text>.+?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')),
(re.compile('\\\\t(?P<text>.+?)\\\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%">%s</div>' % match.group('text')), (re.compile(r'\\t(?P<text>.+?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text')),
(re.compile('\\\\T="(?P<val>\d+%*)"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%">%s</div>' % (match.group('val'), match.group('text'))), (re.compile(r'\\T="(?P<val>\d+)%%*"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%%;">%s</div>' % (match.group('val'), match.group('text'))),
(re.compile('\\\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')), (re.compile(r'\\w="(?P<val>\d+)%%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
(re.compile('\\\\n'), lambda match: ''), (re.compile(r'\\n'), lambda match: ''),
(re.compile('\\\\s'), lambda match: ''), (re.compile(r'\\s'), lambda match: ''),
(re.compile('\\\\b(?P<text>.+?)\\\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead. (re.compile(r'\\b(?P<text>.+?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead.
(re.compile('\\\\l(?P<text>.+?)\\\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')), (re.compile(r'\\l(?P<text>.+?)\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')),
(re.compile('\\\\B(?P<text>.+?)\\\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), (re.compile(r'\\B(?P<text>.+?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')),
(re.compile('\\\\Sp(?P<text>.+?)\\\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')), (re.compile(r'\\Sp(?P<text>.+?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')),
(re.compile('\\\\Sb(?P<text>.+?)\\\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')), (re.compile(r'\\Sb(?P<text>.+?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')),
(re.compile('\\\\k(?P<text>.+?)\\\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')), (re.compile(r'\\k(?P<text>.+?)\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
(re.compile('\\\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')), (re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%i;' % match.group('num')),
(re.compile('\\\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))), (re.compile(r'\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))),
(re.compile('\\\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')), (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')),
(re.compile('\\\\q="(?P<target>#.+?)"(?P<text>)\\\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\q="(?P<target>#.+?)"(?P<text>)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile('\\\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')), (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
(re.compile('\\\\-'), lambda match: ''), (re.compile(r'\\-'), lambda match: ''),
(re.compile('\\\\Fn="(?P<target>.+?)"(?P<text>.+?)\\\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.+?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile('\\\\Sd="(?P<target>.+?)"(?P<text>.+?)\\\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile('\\\\I'), lambda match: ''), (re.compile(r'\\I'), lambda match: ''),
# eReader files are one paragraph per line. # eReader files are one paragraph per line.
# This forces the lines to wrap properly. # This forces the lines to wrap properly.
(re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')), (re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),
# Remove unmatched plm codes. # Remove unmatched plm codes.
(re.compile('(?<=[^\\\\])\\\\[pxcriouvtblBk]'), lambda match: ''), (re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),
(re.compile('(?<=[^\\\\])\\\\X[0-4]'), lambda match: ''), (re.compile(r'(?<=[^\\])\\X[0-4]'), lambda match: ''),
(re.compile('(?<=[^\\\\])\\\\Sp'), lambda match: ''), (re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''),
(re.compile('(?<=[^\\\\])\\\\Sb'), lambda match: ''), (re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''),
# Replace \\ with \. # Replace \\ with \.
(re.compile('\\\\\\\\'), lambda match: '\\'), (re.compile(r'\\\\'), lambda match: '\\'),
] ]
FOOTNOTE_HTML_RULES = [ FOOTNOTE_HTML_RULES = [
@ -66,6 +66,37 @@ SIDEBAR_HTML_RULES = [
(re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>') (re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>')
] ]
HTML_PML_RULES = [
(re.compile(r'\\'), lambda match: '\\\\'),
(re.compile('(?<=[^\n])[ ]*<p.*?>'), lambda match: '\n<p>'),
(re.compile('</p>(^\n|\r\n)'), lambda match: '\n'),
(re.compile('<a.*?href="#sidebar-(?P<target>.+?).*?">(?P<text>.+?)</a>'), lambda match: '\\Sd="%s"%s\\Sd' % (match.group('target'), match.group('text'))),
(re.compile('<a.*?href="#footnote-(?P<target>.+?).*?">(?P<text>.+?)</a>'), lambda match: '\\Fn="%s"%s\\Fn' % (match.group('target'), match.group('text'))),
(re.compile('<div.*?id="(?P<target>.+?).*?"></div>'), lambda match: '\\\\Q="%s"' % match.group('target')),
(re.compile('<a.*?href="(?P<target>#.+?).*?">(?P<text>)</a>', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))),
(re.compile('<img.*?src="images/(?P<name>.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')),
(re.compile('&#(?P<num>\d\d\d\d);'), lambda match: '\\U%i' % int(match.group('num'))),
(re.compile('&#(?P<num>\d\d\d);'), lambda match: '\\a%i' % match.group('num')),
(re.compile('<small.*?>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
(re.compile('<sub.*?>(?P<text>.+?)</sub>', re.DOTALL), lambda match: '\\Sb%s\\Sb' % match.group('text')),
(re.compile('<sup.*?>(?P<text>.+?)</sup>', re.DOTALL), lambda match: '\\Sp%s\\Sp' % match.group('text')),
(re.compile('<b.*?>(?P<text>.+?)</b>', re.DOTALL), lambda match: '\\B%s\\B' % match.group('text')),
(re.compile('<big.*?>(?P<text>.+?)</big>', re.DOTALL), lambda match: '\\l%s\\l' % match.group('text')),
(re.compile('<hr.*?width="(?P<val>\d+)%%".*?>'), lambda match: '\\w="%s%%"' % match.group('val')),
(re.compile('<div.*?style.*?margin-left: (?P<val>\d+)%%*;.*?>(?P<text>.+?)</div>', re.MULTILINE), lambda match: '\\T="%i%%"%s$' % (match.group('val'), match.group('text'))),
(re.compile('<div.*?style.*?margin-left: \d{1,3}%%;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\t%s\\t' % match.group('text')),
(re.compile('<!-- (?P<text>.+?) -->', re.DOTALL), lambda match: '\\v%s\\v' % match.group('text')),
(re.compile('<del.*?>(?P<text>.+?)</del>', re.DOTALL), lambda match: '\\o%s\\o' % match.group('text')),
(re.compile('<div.*?style.*?text-decoration: underline;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\u%s\\u' % match.group('text')),
(re.compile('<i.*?>(?P<text>.+?)</i>', re.DOTALL), lambda match: '\\\\i%s\\i' % match.group('text')),
(re.compile('<div.*?style.*?text-align: right;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\r%s\\r' % match.group('text')),
(re.compile('<div.*?style.*?text-align: center;.*?".*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\c%s\\c' % match.group('text')),
(re.compile('<h(?P<val>[0-4]).*?>(?P<text>.+?)</h[0-4]>', re.DOTALL), lambda match: '\\X%i%s\\X%i' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
(re.compile('<h1.*?>(?P<text>.+?)</h1>', re.DOTALL), lambda match: '\\x%s\\x' % match.group('text')),
(re.compile('<br.*?>'), lambda match: '\\p'),
(re.compile('<.*?>'), lambda match: ''),
(re.compile(r'(\\p){2,}'), lambda match: r'\p'),
]
def pml_to_html(pml): def pml_to_html(pml):
html = pml html = pml
@ -95,3 +126,12 @@ def sidebar_to_html(sidebars):
html = pml_to_html(html) html = pml_to_html(html)
return html return html
def html_to_pml(html):
pml = html
for rule in HTML_PML_RULES:
pml = rule[0].sub(rule[1], pml)
# Replace symbols outside of cp1512 wtih \Uxxxx
return pml

View File

@ -227,8 +227,3 @@ class Reader(FormatReader):
with open(name, 'wb') as imgf: with open(name, 'wb') as imgf:
imgf.write(img) imgf.write(img)
class EreaderMetadata(object):
def __init__(self, record):
pass

View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Write content to ereader pdb file.
'''
from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml
class Writer(object):
def __init__(self, log):
self.oeb_book = oeb_book
def dump(oeb_book):
pml_pages = []
for page in oeb_book.spine:
pml_pages.append(html_to_pml(page))

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import os, struct import os, struct
class PdbHeader(object): class PdbHeaderReader(object):
def __init__(self, stream): def __init__(self, stream):
self.stream = stream self.stream = stream
@ -58,3 +58,20 @@ class PdbHeader(object):
end = self.section_offset(number + 1) end = self.section_offset(number + 1)
self.stream.seek(start) self.stream.seek(start)
return self.stream.read(end - start) return self.stream.read(end - start)
class PdbHeaderWriter(object):
def __init__(self, identity, title):
self.identity = identity[:8]
self.title = title.ljust(32, '\x00')[:32]
def build_header(self, sections)
'''
Sections is a list of section offsets
'''
return header

View File

@ -20,7 +20,7 @@ class PDBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
header = PdbHeader(stream) header = PdbHeaderReader(stream)
Reader = get_reader(header.ident) Reader = get_reader(header.ident)
if Reader is None: if Reader is None: