mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Unfinished ereader writer work.
This commit is contained in:
parent
f96cd13f62
commit
d871313ff0
@ -289,11 +289,12 @@ from calibre.ebooks.html.input import HTMLInput
|
|||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
from calibre.ebooks.pdf.output import PDFOutput
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
|
from calibre.ebooks.pdb.ereader.output import EREADEROutput
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
||||||
FB2Input, ODTInput, RTFInput]
|
FB2Input, ODTInput, RTFInput, EREADEROutput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
23
src/calibre/ebooks/pdb/ereader/output.py
Normal file
23
src/calibre/ebooks/pdb/ereader/output.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
|
OptionRecommendation
|
||||||
|
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
|
||||||
|
class EREADEROutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'eReader PDB Output'
|
||||||
|
author = 'John Schember'
|
||||||
|
file_type = 'erpdb'
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml
|
||||||
|
|
||||||
|
# print html_to_pml('<p class="calibre1"> “A hundred kisses from the Princess,” said he, “or else let everyone keep his own!”</p>')
|
||||||
|
print html_to_pml(str(oeb_book.spine[3]))
|
@ -13,49 +13,49 @@ import re
|
|||||||
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
||||||
|
|
||||||
PML_HTML_RULES = [
|
PML_HTML_RULES = [
|
||||||
(re.compile('\\\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
|
(re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
|
||||||
(re.compile('\\\\x(?P<text>.+?)\\\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')),
|
(re.compile(r'\\x(?P<text>.+?)\\x', re.DOTALL), lambda match: '<h1 style="page-break-before: always;">%s</h1>' % match.group('text')),
|
||||||
(re.compile('\\\\X(?P<val>[0-4])(?P<text>.+?)\\\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%i</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
|
(re.compile(r'\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', re.DOTALL), lambda match: '<h%i style="page-break-before: always;">%s</h%i>' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
|
||||||
(re.compile('\\\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry
|
(re.compile(r'\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry
|
||||||
(re.compile('\\\\c(?P<text>.+?)\\\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')),
|
(re.compile(r'\\c(?P<text>.+?)\\c', re.DOTALL), lambda match: '<div style="text-align: center; display: block; margin: auto;">%s</div>' % match.group('text')),
|
||||||
(re.compile('\\\\r(?P<text>.+?)\\\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')),
|
(re.compile(r'\\r(?P<text>.+?)\\r', re.DOTALL), lambda match: '<div style="text-align: right; display: block;">%s</div>' % match.group('text')),
|
||||||
(re.compile('\\\\i(?P<text>.+?)\\\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')),
|
(re.compile(r'\\i(?P<text>.+?)\\i', re.DOTALL), lambda match: '<i>%s</i>' % match.group('text')),
|
||||||
(re.compile('\\\\u(?P<text>.+?)\\\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')),
|
(re.compile(r'\\u(?P<text>.+?)\\u', re.DOTALL), lambda match: '<div style="text-decoration: underline;">%s</div>' % match.group('text')),
|
||||||
(re.compile('\\\\o(?P<text>.+?)\\\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')),
|
(re.compile(r'\\o(?P<text>.+?)\\o', re.DOTALL), lambda match: '<del>%s</del>' % match.group('text')),
|
||||||
(re.compile('\\\\v(?P<text>.+?)\\\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')),
|
(re.compile(r'\\v(?P<text>.+?)\\v', re.DOTALL), lambda match: '<!-- %s -->' % match.group('text')),
|
||||||
(re.compile('\\\\t(?P<text>.+?)\\\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%">%s</div>' % match.group('text')),
|
(re.compile(r'\\t(?P<text>.+?)\\t', re.DOTALL), lambda match: '<div style="margin-left: 5%%;">%s</div>' % match.group('text')),
|
||||||
(re.compile('\\\\T="(?P<val>\d+%*)"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%">%s</div>' % (match.group('val'), match.group('text'))),
|
(re.compile(r'\\T="(?P<val>\d+)%%*"(?P<text>.+?)$', re.MULTILINE), lambda match: '<div style="margin-left: %i%%;">%s</div>' % (match.group('val'), match.group('text'))),
|
||||||
(re.compile('\\\\w="(?P<val>\d+)%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
|
(re.compile(r'\\w="(?P<val>\d+)%%"'), lambda match: '<hr width="%s%%" />' % match.group('val')),
|
||||||
(re.compile('\\\\n'), lambda match: ''),
|
(re.compile(r'\\n'), lambda match: ''),
|
||||||
(re.compile('\\\\s'), lambda match: ''),
|
(re.compile(r'\\s'), lambda match: ''),
|
||||||
(re.compile('\\\\b(?P<text>.+?)\\\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead.
|
(re.compile(r'\\b(?P<text>.+?)\\b', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')), # \b is deprecated; \B should be used instead.
|
||||||
(re.compile('\\\\l(?P<text>.+?)\\\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')),
|
(re.compile(r'\\l(?P<text>.+?)\\l', re.DOTALL), lambda match: '<big>%s</big>' % match.group('text')),
|
||||||
(re.compile('\\\\B(?P<text>.+?)\\\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')),
|
(re.compile(r'\\B(?P<text>.+?)\\B', re.DOTALL), lambda match: '<b>%s</b>' % match.group('text')),
|
||||||
(re.compile('\\\\Sp(?P<text>.+?)\\\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')),
|
(re.compile(r'\\Sp(?P<text>.+?)\\Sp', re.DOTALL), lambda match: '<sup>%s</sup>' % match.group('text')),
|
||||||
(re.compile('\\\\Sb(?P<text>.+?)\\\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')),
|
(re.compile(r'\\Sb(?P<text>.+?)\\Sb', re.DOTALL), lambda match: '<sub>%s</sub>' % match.group('text')),
|
||||||
(re.compile('\\\\k(?P<text>.+?)\\\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
|
(re.compile(r'\\k(?P<text>.+?)\\k', re.DOTALL), lambda match: '<small>%s</small>' % match.group('text')),
|
||||||
(re.compile('\\\\a(?P<num>\d\d\d)'), lambda match: '&#%s;' % match.group('num')),
|
(re.compile(r'\\a(?P<num>\d\d\d)'), lambda match: '&#%i;' % match.group('num')),
|
||||||
(re.compile('\\\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))),
|
(re.compile(r'\\U(?P<num>\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))),
|
||||||
(re.compile('\\\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')),
|
(re.compile(r'\\m="(?P<name>.+?)"'), lambda match: '<img src="images/%s" />' % match.group('name')),
|
||||||
(re.compile('\\\\q="(?P<target>#.+?)"(?P<text>)\\\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
|
(re.compile(r'\\q="(?P<target>#.+?)"(?P<text>)\\q', re.DOTALL), lambda match: '<a href="%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||||
(re.compile('\\\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
|
(re.compile(r'\\Q="(?P<target>.+?)"'), lambda match: '<div id="%s"></div>' % match.group('target')),
|
||||||
(re.compile('\\\\-'), lambda match: ''),
|
(re.compile(r'\\-'), lambda match: ''),
|
||||||
(re.compile('\\\\Fn="(?P<target>.+?)"(?P<text>.+?)\\\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))),
|
(re.compile(r'\\Fn="(?P<target>.+?)"(?P<text>.+?)\\Fn'), lambda match: '<a href="#footnote-%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||||
(re.compile('\\\\Sd="(?P<target>.+?)"(?P<text>.+?)\\\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
|
(re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
|
||||||
(re.compile('\\\\I'), lambda match: ''),
|
(re.compile(r'\\I'), lambda match: ''),
|
||||||
|
|
||||||
# eReader files are one paragraph per line.
|
# eReader files are one paragraph per line.
|
||||||
# This forces the lines to wrap properly.
|
# This forces the lines to wrap properly.
|
||||||
(re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),
|
(re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),
|
||||||
|
|
||||||
# Remove unmatched plm codes.
|
# Remove unmatched plm codes.
|
||||||
(re.compile('(?<=[^\\\\])\\\\[pxcriouvtblBk]'), lambda match: ''),
|
(re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''),
|
||||||
(re.compile('(?<=[^\\\\])\\\\X[0-4]'), lambda match: ''),
|
(re.compile(r'(?<=[^\\])\\X[0-4]'), lambda match: ''),
|
||||||
(re.compile('(?<=[^\\\\])\\\\Sp'), lambda match: ''),
|
(re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''),
|
||||||
(re.compile('(?<=[^\\\\])\\\\Sb'), lambda match: ''),
|
(re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''),
|
||||||
|
|
||||||
# Replace \\ with \.
|
# Replace \\ with \.
|
||||||
(re.compile('\\\\\\\\'), lambda match: '\\'),
|
(re.compile(r'\\\\'), lambda match: '\\'),
|
||||||
]
|
]
|
||||||
|
|
||||||
FOOTNOTE_HTML_RULES = [
|
FOOTNOTE_HTML_RULES = [
|
||||||
@ -66,6 +66,37 @@ SIDEBAR_HTML_RULES = [
|
|||||||
(re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>')
|
(re.compile('<sidebar id="(?P<id>.+?)">(?P<text>.+?)</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
HTML_PML_RULES = [
|
||||||
|
(re.compile(r'\\'), lambda match: '\\\\'),
|
||||||
|
(re.compile('(?<=[^\n])[ ]*<p.*?>'), lambda match: '\n<p>'),
|
||||||
|
(re.compile('</p>(^\n|\r\n)'), lambda match: '\n'),
|
||||||
|
(re.compile('<a.*?href="#sidebar-(?P<target>.+?).*?">(?P<text>.+?)</a>'), lambda match: '\\Sd="%s"%s\\Sd' % (match.group('target'), match.group('text'))),
|
||||||
|
(re.compile('<a.*?href="#footnote-(?P<target>.+?).*?">(?P<text>.+?)</a>'), lambda match: '\\Fn="%s"%s\\Fn' % (match.group('target'), match.group('text'))),
|
||||||
|
(re.compile('<div.*?id="(?P<target>.+?).*?"></div>'), lambda match: '\\\\Q="%s"' % match.group('target')),
|
||||||
|
(re.compile('<a.*?href="(?P<target>#.+?).*?">(?P<text>)</a>', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))),
|
||||||
|
(re.compile('<img.*?src="images/(?P<name>.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')),
|
||||||
|
(re.compile('&#(?P<num>\d\d\d\d);'), lambda match: '\\U%i' % int(match.group('num'))),
|
||||||
|
(re.compile('&#(?P<num>\d\d\d);'), lambda match: '\\a%i' % match.group('num')),
|
||||||
|
(re.compile('<small.*?>(?P<text>.+?)</small>', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')),
|
||||||
|
(re.compile('<sub.*?>(?P<text>.+?)</sub>', re.DOTALL), lambda match: '\\Sb%s\\Sb' % match.group('text')),
|
||||||
|
(re.compile('<sup.*?>(?P<text>.+?)</sup>', re.DOTALL), lambda match: '\\Sp%s\\Sp' % match.group('text')),
|
||||||
|
(re.compile('<b.*?>(?P<text>.+?)</b>', re.DOTALL), lambda match: '\\B%s\\B' % match.group('text')),
|
||||||
|
(re.compile('<big.*?>(?P<text>.+?)</big>', re.DOTALL), lambda match: '\\l%s\\l' % match.group('text')),
|
||||||
|
(re.compile('<hr.*?width="(?P<val>\d+)%%".*?>'), lambda match: '\\w="%s%%"' % match.group('val')),
|
||||||
|
(re.compile('<div.*?style.*?margin-left: (?P<val>\d+)%%*;.*?>(?P<text>.+?)</div>', re.MULTILINE), lambda match: '\\T="%i%%"%s$' % (match.group('val'), match.group('text'))),
|
||||||
|
(re.compile('<div.*?style.*?margin-left: \d{1,3}%%;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\t%s\\t' % match.group('text')),
|
||||||
|
(re.compile('<!-- (?P<text>.+?) -->', re.DOTALL), lambda match: '\\v%s\\v' % match.group('text')),
|
||||||
|
(re.compile('<del.*?>(?P<text>.+?)</del>', re.DOTALL), lambda match: '\\o%s\\o' % match.group('text')),
|
||||||
|
(re.compile('<div.*?style.*?text-decoration: underline;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\u%s\\u' % match.group('text')),
|
||||||
|
(re.compile('<i.*?>(?P<text>.+?)</i>', re.DOTALL), lambda match: '\\\\i%s\\i' % match.group('text')),
|
||||||
|
(re.compile('<div.*?style.*?text-align: right;.*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\r%s\\r' % match.group('text')),
|
||||||
|
(re.compile('<div.*?style.*?text-align: center;.*?".*?>(?P<text>.+?)</div>', re.DOTALL), lambda match: '\\c%s\\c' % match.group('text')),
|
||||||
|
(re.compile('<h(?P<val>[0-4]).*?>(?P<text>.+?)</h[0-4]>', re.DOTALL), lambda match: '\\X%i%s\\X%i' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)),
|
||||||
|
(re.compile('<h1.*?>(?P<text>.+?)</h1>', re.DOTALL), lambda match: '\\x%s\\x' % match.group('text')),
|
||||||
|
(re.compile('<br.*?>'), lambda match: '\\p'),
|
||||||
|
(re.compile('<.*?>'), lambda match: ''),
|
||||||
|
(re.compile(r'(\\p){2,}'), lambda match: r'\p'),
|
||||||
|
]
|
||||||
|
|
||||||
def pml_to_html(pml):
|
def pml_to_html(pml):
|
||||||
html = pml
|
html = pml
|
||||||
@ -95,3 +126,12 @@ def sidebar_to_html(sidebars):
|
|||||||
html = pml_to_html(html)
|
html = pml_to_html(html)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
def html_to_pml(html):
|
||||||
|
pml = html
|
||||||
|
for rule in HTML_PML_RULES:
|
||||||
|
pml = rule[0].sub(rule[1], pml)
|
||||||
|
|
||||||
|
# Replace symbols outside of cp1512 wtih \Uxxxx
|
||||||
|
|
||||||
|
return pml
|
||||||
|
@ -227,8 +227,3 @@ class Reader(FormatReader):
|
|||||||
with open(name, 'wb') as imgf:
|
with open(name, 'wb') as imgf:
|
||||||
imgf.write(img)
|
imgf.write(img)
|
||||||
|
|
||||||
|
|
||||||
class EreaderMetadata(object):
|
|
||||||
|
|
||||||
def __init__(self, record):
|
|
||||||
pass
|
|
||||||
|
20
src/calibre/ebooks/pdb/ereader/writer.py
Normal file
20
src/calibre/ebooks/pdb/ereader/writer.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import with_statement
|
||||||
|
'''
|
||||||
|
Write content to ereader pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml
|
||||||
|
|
||||||
|
class Writer(object):
|
||||||
|
|
||||||
|
def __init__(self, log):
|
||||||
|
self.oeb_book = oeb_book
|
||||||
|
|
||||||
|
def dump(oeb_book):
|
||||||
|
pml_pages = []
|
||||||
|
for page in oeb_book.spine:
|
||||||
|
pml_pages.append(html_to_pml(page))
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import os, struct
|
import os, struct
|
||||||
|
|
||||||
class PdbHeader(object):
|
class PdbHeaderReader(object):
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
self.stream = stream
|
self.stream = stream
|
||||||
@ -58,3 +58,20 @@ class PdbHeader(object):
|
|||||||
end = self.section_offset(number + 1)
|
end = self.section_offset(number + 1)
|
||||||
self.stream.seek(start)
|
self.stream.seek(start)
|
||||||
return self.stream.read(end - start)
|
return self.stream.read(end - start)
|
||||||
|
|
||||||
|
|
||||||
|
class PdbHeaderWriter(object):
|
||||||
|
|
||||||
|
def __init__(self, identity, title):
|
||||||
|
self.identity = identity[:8]
|
||||||
|
self.title = title.ljust(32, '\x00')[:32]
|
||||||
|
|
||||||
|
def build_header(self, sections)
|
||||||
|
'''
|
||||||
|
Sections is a list of section offsets
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return header
|
||||||
|
@ -20,7 +20,7 @@ class PDBInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
header = PdbHeader(stream)
|
header = PdbHeaderReader(stream)
|
||||||
Reader = get_reader(header.ident)
|
Reader = get_reader(header.ident)
|
||||||
|
|
||||||
if Reader is None:
|
if Reader is None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user