Pull from driver-dev

This commit is contained in:
Kovid Goyal 2009-04-23 22:33:14 -07:00
commit dc0cf57755
7 changed files with 156 additions and 42 deletions

View File

@ -290,11 +290,12 @@ from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pdb.ereader.output import EREADEROutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input, ODTInput, RTFInput, EPUBOutput] FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin, \
OptionRecommendation
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
from calibre.ebooks.metadata import authors_to_string
class EREADEROutput(OutputFormatPlugin):
name = 'eReader PDB Output'
author = 'John Schember'
file_type = 'erpdb'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml
# print html_to_pml('<p class="calibre1"> “A hundred kisses from the Princess,” said he, “or else let everyone keep his own!”</p>')
print html_to_pml(str(oeb_book.spine[3]))

View File

@ -13,49 +13,49 @@ import re
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
PML_HTML_RULES = [ PML_HTML_RULES = [
(re.compile('\\\\p'), lambda match: '<br /><br style="page-break-after: always;" />'), (re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
(re.compile('\\\\x(?P<text>.+?)\\\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')), (re.compile(r'\\x(?P<text>.+?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')),
(re.compile('\\\\X(?P<val>[0-4])(?P<text>.+?)\\\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%i</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)), (re.compile(r'\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%s</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
(re.compile('\\\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry (re.compile(r'\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry
(re.compile('\\\\c(?P<text>.+?)\\\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')), (re.compile(r'\\c(?P<text>.+?)\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')),
(re.compile('\\\\r(?P<text>.+?)\\\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')), (re.compile(r'\\r(?P<text>.+?)\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')),
(re.compile('\\\\i(?P<text>.+?)\\\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')), (re.compile(r'\\i(?P<text>.+?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')),
(re.compile('\\\\u(?P<text>.+?)\\\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')), (re.compile(r'\\u(?P<text>.+?)\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')),
(re.compile('\\\\o(?P<text>.+?)\\\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')), (re.compile(r'\\o(?P<text>.+?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')),
(re.compile('\\\\v(?P<text>.+?)\\\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')), (re.compile(r'\\v(?P<text>.+?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')),
(re.compile('\\\\t(?P<text>.+?)\\\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%">%s</div>' % match.group('text')), (re.compile(r'\\t(?P<text>.+?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text')),
(re.compile('\\\\T="(?P<val>\d+%*)"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%">%s</div>' % (match.group('val'), match.group('text'))), (re.compile(r'\\T="(?P<val>\d+)%%*"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%%;">%s</div>' % (match.group('val'), match.group('text'))),
(re.compile('\\\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')), (re.compile(r'\\w="(?P<val>\d+)%%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
(re.compile('\\\\n'), lambda match: ''), (re.compile(r'\\n'), lambda match: ''),
(re.compile('\\\\s'), lambda match: ''), (re.compile(r'\\s'), lambda match: ''),
(re.compile('\\\\b(?P<text>.+?)\\\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead. (re.compile(r'\\b(?P<text>.+?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead.
(re.compile('\\\\l(?P<text>.+?)\\\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')), (re.compile(r'\\l(?P<text>.+?)\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')),
(re.compile('\\\\B(?P<text>.+?)\\\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), (re.compile(r'\\B(?P<text>.+?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')),
(re.compile('\\\\Sp(?P<text>.+?)\\\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')), (re.compile(r'\\Sp(?P<text>.+?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')),
(re.compile('\\\\Sb(?P<text>.+?)\\\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')), (re.compile(r'\\Sb(?P<text>.+?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')),
(re.compile('\\\\k(?P<text>.+?)\\\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')), (re.compile(r'\\k(?P<text>.+?)\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
(re.compile('\\\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')), (re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%i;' % match.group('num')),
(re.compile('\\\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))), (re.compile(r'\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))),
(re.compile('\\\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')), (re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')),
(re.compile('\\\\q="(?P<target>#.+?)"(?P<text>)\\\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\q="(?P<target>#.+?)"(?P<text>)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile('\\\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')), (re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
(re.compile('\\\\-'), lambda match: ''), (re.compile(r'\\-'), lambda match: ''),
(re.compile('\\\\Fn="(?P<target>.+?)"(?P<text>.+?)\\\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.+?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile('\\\\Sd="(?P<target>.+?)"(?P<text>.+?)\\\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile('\\\\I'), lambda match: ''), (re.compile(r'\\I'), lambda match: ''),
# eReader files are one paragraph per line. # eReader files are one paragraph per line.
# This forces the lines to wrap properly. # This forces the lines to wrap properly.
(re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')), (re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),
# Remove unmatched plm codes. # Remove unmatched plm codes.
(re.compile('(?<=[^\\\\])\\\\[pxcriouvtblBk]'), lambda match: ''), (re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),
(re.compile('(?<=[^\\\\])\\\\X[0-4]'), lambda match: ''), (re.compile(r'(?<=[^\\])\\X[0-4]'), lambda match: ''),
(re.compile('(?<=[^\\\\])\\\\Sp'), lambda match: ''), (re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''),
(re.compile('(?<=[^\\\\])\\\\Sb'), lambda match: ''), (re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''),
# Replace \\ with \. # Replace \\ with \.
(re.compile('\\\\\\\\'), lambda match: '\\'), (re.compile(r'\\\\'), lambda match: '\\'),
] ]
FOOTNOTE_HTML_RULES = [ FOOTNOTE_HTML_RULES = [
@ -66,6 +66,37 @@ SIDEBAR_HTML_RULES = [
(re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>') (re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>')
] ]
HTML_PML_RULES = [
(re.compile(r'\\'), lambda match: '\\\\'),
(re.compile('(?<=[^\n])[ ]*<p.*?>'), lambda match: '\n<p>'),
(re.compile('</p>(^\n|\r\n)'), lambda match: '\n'),
(re.compile('<a.*?href="#sidebar-(?P<target>.+?).*?">(?P<text>.+?)</a>'), lambda match: '\\Sd="%s"%s\\Sd' % (match.group('target'), match.group('text'))),
(re.compile('<a.*?href="#footnote-(?P<target>.+?).*?">(?P<text>.+?)</a>'), lambda match: '\\Fn="%s"%s\\Fn' % (match.group('target'), match.group('text'))),
(re.compile('<div.*?id="(?P<target>.+?).*?"></div>'), lambda match: '\\\\Q="%s"' % match.group('target')),
(re.compile('<a.*?href="(?P<target>#.+?).*?">(?P<text>)</a>', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))),
(re.compile('<img.*?src="images/(?P<name>.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')),
(re.compile('&#(?P<num>\d\d\d\d);'), lambda match: '\\U%i' % int(match.group('num'))),
(re.compile('&#(?P<num>\d\d\d);'), lambda match: '\\a%i' % match.group('num')),
(re.compile('<small.*?>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
(re.compile('<sub.*?>(?P<text>.+?)</sub>', re.DOTALL), lambda match: '\\Sb%s\\Sb' % match.group('text')),
(re.compile('<sup.*?>(?P<text>.+?)</sup>', re.DOTALL), lambda match: '\\Sp%s\\Sp' % match.group('text')),
(re.compile('<b.*?>(?P<text>.+?)</b>', re.DOTALL), lambda match: '\\B%s\\B' % match.group('text')),
(re.compile('<big.*?>(?P<text>.+?)</big>', re.DOTALL), lambda match: '\\l%s\\l' % match.group('text')),
(re.compile('<hr.*?width="(?P<val>\d+)%%".*?>'), lambda match: '\\w="%s%%"' % match.group('val')),
(re.compile('<div.*?style.*?margin-left: (?P<val>\d+)%%*;.*?>(?P<text>.+?)</div>', re.MULTILINE), lambda match: '\\T="%i%%"%s$' % (match.group('val'), match.group('text'))),
(re.compile('<div.*?style.*?margin-left: \d{1,3}%%;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\t%s\\t' % match.group('text')),
(re.compile('<!-- (?P<text>.+?) -->', re.DOTALL), lambda match: '\\v%s\\v' % match.group('text')),
(re.compile('<del.*?>(?P<text>.+?)</del>', re.DOTALL), lambda match: '\\o%s\\o' % match.group('text')),
(re.compile('<div.*?style.*?text-decoration: underline;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\u%s\\u' % match.group('text')),
(re.compile('<i.*?>(?P<text>.+?)</i>', re.DOTALL), lambda match: '\\\\i%s\\i' % match.group('text')),
(re.compile('<div.*?style.*?text-align: right;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\r%s\\r' % match.group('text')),
(re.compile('<div.*?style.*?text-align: center;.*?".*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\c%s\\c' % match.group('text')),
(re.compile('<h(?P<val>[0-4]).*?>(?P<text>.+?)</h[0-4]>', re.DOTALL), lambda match: '\\X%i%s\\X%i' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
(re.compile('<h1.*?>(?P<text>.+?)</h1>', re.DOTALL), lambda match: '\\x%s\\x' % match.group('text')),
(re.compile('<br.*?>'), lambda match: '\\p'),
(re.compile('<.*?>'), lambda match: ''),
(re.compile(r'(\\p){2,}'), lambda match: r'\p'),
]
def pml_to_html(pml): def pml_to_html(pml):
html = pml html = pml
@ -95,3 +126,12 @@ def sidebar_to_html(sidebars):
html = pml_to_html(html) html = pml_to_html(html)
return html return html
def html_to_pml(html):
pml = html
for rule in HTML_PML_RULES:
pml = rule[0].sub(rule[1], pml)
# Replace symbols outside of cp1512 wtih \Uxxxx
return pml

View File

@ -202,6 +202,10 @@ class Reader(FormatReader):
return os.path.join(output_dir, 'metadata.opf') return os.path.join(output_dir, 'metadata.opf')
def dump_pml(self): def dump_pml(self):
'''
This is primarily used for debugging and 3rd party tools to
get the plm markup that comprises the text in the file.
'''
pml = '' pml = ''
for i in range(1, self.header_record.num_text_pages + 1): for i in range(1, self.header_record.num_text_pages + 1):
@ -209,8 +213,17 @@ class Reader(FormatReader):
return pml return pml
def dump_images(self, output_dir):
'''
This is primarily used for debugging and 3rd party tools to
get the images in the file.
'''
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class EreaderMetadata(object): with CurrentDir(output_dir):
for i in range(0, self.header_record.num_image_pages):
name, img = self.get_image(self.header_record.image_data_offset + i)
with open(name, 'wb') as imgf:
imgf.write(img)
def __init__(self, record):
pass

View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Write content to ereader pdb file.
'''
from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml
class Writer(object):
def __init__(self, log):
self.oeb_book = oeb_book
def dump(oeb_book):
pml_pages = []
for page in oeb_book.spine:
pml_pages.append(html_to_pml(page))

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import os, struct import os, struct
class PdbHeader(object): class PdbHeaderReader(object):
def __init__(self, stream): def __init__(self, stream):
self.stream = stream self.stream = stream
@ -58,3 +58,20 @@ class PdbHeader(object):
end = self.section_offset(number + 1) end = self.section_offset(number + 1)
self.stream.seek(start) self.stream.seek(start)
return self.stream.read(end - start) return self.stream.read(end - start)
class PdbHeaderWriter(object):
def __init__(self, identity, title):
self.identity = identity[:8]
self.title = title.ljust(32, '\x00')[:32]
def build_header(self, sections)
'''
Sections is a list of section offsets
'''
return header

View File

@ -20,7 +20,7 @@ class PDBInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
header = PdbHeader(stream) header = PdbHeaderReader(stream)
Reader = get_reader(header.ident) Reader = get_reader(header.ident)
if Reader is None: if Reader is None: