diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index e0e9158f0e..945616a0ba 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -289,11 +289,12 @@ from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.pdf.output import PDFOutput +from calibre.ebooks.pdb.ereader.output import EREADEROutput from calibre.customize.profiles import input_profiles, output_profiles plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, - FB2Input, ODTInput, RTFInput] + FB2Input, ODTInput, RTFInput, EREADEROutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/ebooks/pdb/ereader/output.py b/src/calibre/ebooks/pdb/ereader/output.py new file mode 100644 index 0000000000..034508b0da --- /dev/null +++ b/src/calibre/ebooks/pdb/ereader/output.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import OutputFormatPlugin, \ + OptionRecommendation +from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata +from calibre.ebooks.metadata import authors_to_string + +class EREADEROutput(OutputFormatPlugin): + + name = 'eReader PDB Output' + author = 'John Schember' + file_type = 'erpdb' + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml + +# print html_to_pml('

β€œA hundred kisses from the Princess,” said he, β€œor else let everyone keep his own!”

') + print html_to_pml(str(oeb_book.spine[3])) diff --git a/src/calibre/ebooks/pdb/ereader/pmlconverter.py b/src/calibre/ebooks/pdb/ereader/pmlconverter.py index 250b74eb56..8ff30e9349 100644 --- a/src/calibre/ebooks/pdb/ereader/pmlconverter.py +++ b/src/calibre/ebooks/pdb/ereader/pmlconverter.py @@ -13,49 +13,49 @@ import re from calibre.ebooks.htmlsymbols import HTML_SYMBOLS PML_HTML_RULES = [ - (re.compile('\\\\p'), lambda match: '

'), - (re.compile('\\\\x(?P.+?)\\\\x', re.DOTALL), lambda match: '

%s

' % match.group('text')), - (re.compile('\\\\X(?P[0-4])(?P.+?)\\\\X[0-4]', re.DOTALL), lambda match: '%i' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)), - (re.compile('\\\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry - (re.compile('\\\\c(?P.+?)\\\\c', re.DOTALL), lambda match: '
%s
' % match.group('text')), - (re.compile('\\\\r(?P.+?)\\\\r', re.DOTALL), lambda match: '
%s
' % match.group('text')), - (re.compile('\\\\i(?P.+?)\\\\i', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\u(?P.+?)\\\\u', re.DOTALL), lambda match: '
%s
' % match.group('text')), - (re.compile('\\\\o(?P.+?)\\\\o', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\v(?P.+?)\\\\v', re.DOTALL), lambda match: '' % match.group('text')), - (re.compile('\\\\t(?P.+?)\\\\t', re.DOTALL), lambda match: '
%s
' % match.group('text')), - (re.compile('\\\\T="(?P\d+%*)"(?P.+?)$', re.MULTILINE), lambda match: '
%s
' % (match.group('val'), match.group('text'))), - (re.compile('\\\\w="(?P\d+)%"'), lambda match: '
' % match.group('val')), - (re.compile('\\\\n'), lambda match: ''), - (re.compile('\\\\s'), lambda match: ''), - (re.compile('\\\\b(?P.+?)\\\\b', re.DOTALL), lambda match: '%s' % match.group('text')), # \b is deprecated; \B should be used instead. - (re.compile('\\\\l(?P.+?)\\\\l', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\B(?P.+?)\\\\B', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\Sp(?P.+?)\\\\Sp', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\Sb(?P.+?)\\\\Sb', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\k(?P.+?)\\\\k', re.DOTALL), lambda match: '%s' % match.group('text')), - (re.compile('\\\\a(?P\d\d\d)'), lambda match: '&#%s;' % match.group('num')), - (re.compile('\\\\U(?P\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))), - (re.compile('\\\\m="(?P.+?)"'), lambda match: '' % match.group('name')), - (re.compile('\\\\q="(?P#.+?)"(?P)\\\\q', re.DOTALL), lambda match: '%s' % (match.group('target'), match.group('text'))), - (re.compile('\\\\Q="(?P.+?)"'), lambda match: '
' % match.group('target')), - (re.compile('\\\\-'), lambda match: ''), - (re.compile('\\\\Fn="(?P.+?)"(?P.+?)\\\\Fn'), lambda match: '%s' % (match.group('target'), match.group('text'))), - (re.compile('\\\\Sd="(?P.+?)"(?P.+?)\\\\Sd'), lambda match: '%s' % (match.group('target'), match.group('text'))), - (re.compile('\\\\I'), lambda match: ''), + (re.compile(r'\\p'), lambda match: '

'), + (re.compile(r'\\x(?P.+?)\\x', re.DOTALL), lambda match: '

%s

' % match.group('text')), + (re.compile(r'\\X(?P[0-4])(?P.+?)\\X[0-4]', re.DOTALL), lambda match: '%s' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)), + (re.compile(r'\\C\d=".+"'), lambda match: ''), # This should be made to create a TOC entry + (re.compile(r'\\c(?P.+?)\\c', re.DOTALL), lambda match: '
%s
' % match.group('text')), + (re.compile(r'\\r(?P.+?)\\r', re.DOTALL), lambda match: '
%s
' % match.group('text')), + (re.compile(r'\\i(?P.+?)\\i', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\u(?P.+?)\\u', re.DOTALL), lambda match: '
%s
' % match.group('text')), + (re.compile(r'\\o(?P.+?)\\o', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\v(?P.+?)\\v', re.DOTALL), lambda match: '' % match.group('text')), + (re.compile(r'\\t(?P.+?)\\t', re.DOTALL), lambda match: '
%s
' % match.group('text')), + (re.compile(r'\\T="(?P\d+)%%*"(?P.+?)$', re.MULTILINE), lambda match: '
%s
' % (match.group('val'), match.group('text'))), + (re.compile(r'\\w="(?P\d+)%%"'), lambda match: '
' % match.group('val')), + (re.compile(r'\\n'), lambda match: ''), + (re.compile(r'\\s'), lambda match: ''), + (re.compile(r'\\b(?P.+?)\\b', re.DOTALL), lambda match: '%s' % match.group('text')), # \b is deprecated; \B should be used instead. + (re.compile(r'\\l(?P.+?)\\l', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\B(?P.+?)\\B', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\Sp(?P.+?)\\Sp', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\Sb(?P.+?)\\Sb', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\k(?P.+?)\\k', re.DOTALL), lambda match: '%s' % match.group('text')), + (re.compile(r'\\a(?P\d\d\d)'), lambda match: '&#%i;' % match.group('num')), + (re.compile(r'\\U(?P\d\d\d\d)'), lambda match: '&#%i;' % int(match.group('num'))), + (re.compile(r'\\m="(?P.+?)"'), lambda match: '' % match.group('name')), + (re.compile(r'\\q="(?P#.+?)"(?P)\\q', re.DOTALL), lambda match: '%s' % (match.group('target'), match.group('text'))), + (re.compile(r'\\Q="(?P.+?)"'), lambda match: '
' % match.group('target')), + (re.compile(r'\\-'), lambda match: ''), + (re.compile(r'\\Fn="(?P.+?)"(?P.+?)\\Fn'), lambda match: '%s' % (match.group('target'), match.group('text'))), + (re.compile(r'\\Sd="(?P.+?)"(?P.+?)\\Sd'), lambda match: '%s' % (match.group('target'), match.group('text'))), + (re.compile(r'\\I'), lambda match: ''), # eReader files are one paragraph per line. # This forces the lines to wrap properly. (re.compile('^(?P.+)$', re.MULTILINE), lambda match: '

%s

' % match.group('text')), # Remove unmatched plm codes. - (re.compile('(?<=[^\\\\])\\\\[pxcriouvtblBk]'), lambda match: ''), - (re.compile('(?<=[^\\\\])\\\\X[0-4]'), lambda match: ''), - (re.compile('(?<=[^\\\\])\\\\Sp'), lambda match: ''), - (re.compile('(?<=[^\\\\])\\\\Sb'), lambda match: ''), + (re.compile(r'(?<=[^\\])\\[pxcriouvtblBk]'), lambda match: ''), + (re.compile(r'(?<=[^\\])\\X[0-4]'), lambda match: ''), + (re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''), + (re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''), # Replace \\ with \. - (re.compile('\\\\\\\\'), lambda match: '\\'), + (re.compile(r'\\\\'), lambda match: '\\'), ] FOOTNOTE_HTML_RULES = [ @@ -66,6 +66,37 @@ SIDEBAR_HTML_RULES = [ (re.compile('(?P.+?)', re.DOTALL), lambda match: '') ] +HTML_PML_RULES = [ + (re.compile(r'\\'), lambda match: '\\\\'), + (re.compile('(?<=[^\n])[ ]*'), lambda match: '\n

'), + (re.compile('

(^\n|\r\n)'), lambda match: '\n'), + (re.compile('.+?).*?">(?P.+?)'), lambda match: '\\Sd="%s"%s\\Sd' % (match.group('target'), match.group('text'))), + (re.compile('.+?).*?">(?P.+?)'), lambda match: '\\Fn="%s"%s\\Fn' % (match.group('target'), match.group('text'))), + (re.compile('.+?).*?">'), lambda match: '\\\\Q="%s"' % match.group('target')), + (re.compile('#.+?).*?">(?P)', re.DOTALL), lambda match: '\\q="%s"%s\\q' % (match.group('target'), match.group('text'))), + (re.compile('.+?)".*?>'), lambda match: '\\m="%s"' % match.group('name')), + (re.compile('&#(?P\d\d\d\d);'), lambda match: '\\U%i' % int(match.group('num'))), + (re.compile('&#(?P\d\d\d);'), lambda match: '\\a%i' % match.group('num')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\k%s\\k' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\Sb%s\\Sb' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\Sp%s\\Sp' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\B%s\\B' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\l%s\\l' % match.group('text')), + (re.compile('\d+)%%".*?>'), lambda match: '\\w="%s%%"' % match.group('val')), + (re.compile('\d+)%%*;.*?>(?P.+?)', re.MULTILINE), lambda match: '\\T="%i%%"%s$' % (match.group('val'), match.group('text'))), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\t%s\\t' % match.group('text')), + (re.compile('', re.DOTALL), lambda match: '\\v%s\\v' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\o%s\\o' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\u%s\\u' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\\\i%s\\i' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\r%s\\r' % match.group('text')), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\c%s\\c' % match.group('text')), + (re.compile('[0-4]).*?>(?P.+?)', re.DOTALL), lambda match: '\\X%i%s\\X%i' % (int(match.group('val')) + 1, match.group('text'), int(match.group('val')) + 1)), + (re.compile('(?P.+?)', re.DOTALL), lambda match: '\\x%s\\x' % match.group('text')), + (re.compile(''), lambda match: '\\p'), + (re.compile('<.*?>'), lambda match: ''), + (re.compile(r'(\\p){2,}'), lambda match: r'\p'), +] def pml_to_html(pml): html = pml @@ -95,3 +126,12 @@ def sidebar_to_html(sidebars): html = pml_to_html(html) return html + +def html_to_pml(html): + pml = html + for rule in HTML_PML_RULES: + pml = rule[0].sub(rule[1], pml) + + # Replace symbols outside of cp1512 wtih \Uxxxx + + return pml diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index f9b58633a6..8a0abb970e 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -227,8 +227,3 @@ class Reader(FormatReader): with open(name, 'wb') as imgf: imgf.write(img) - -class EreaderMetadata(object): - - def __init__(self, record): - pass diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py new file mode 100644 index 0000000000..c9493d2915 --- /dev/null +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import with_statement +''' +Write content to ereader pdb file. +''' + +from calibre.ebooks.pdb.ereader.pmlconverter import html_to_pml + +class Writer(object): + + def __init__(self, log): + self.oeb_book = oeb_book + + def dump(oeb_book): + pml_pages = [] + for page in oeb_book.spine: + pml_pages.append(html_to_pml(page)) + + + \ No newline at end of file diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py index efa727dac9..5b47e48a16 100644 --- a/src/calibre/ebooks/pdb/header.py +++ b/src/calibre/ebooks/pdb/header.py @@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en' import os, struct -class PdbHeader(object): +class PdbHeaderReader(object): def __init__(self, stream): self.stream = stream @@ -58,3 +58,20 @@ class PdbHeader(object): end = self.section_offset(number + 1) self.stream.seek(start) return self.stream.read(end - start) + + +class PdbHeaderWriter(object): + + def __init__(self, identity, title): + self.identity = identity[:8] + self.title = title.ljust(32, '\x00')[:32] + + def build_header(self, sections) + ''' + Sections is a list of section offsets + ''' + + + + + return header diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py index 9d848b1c24..180e0814a6 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/pdb/input.py @@ -20,7 +20,7 @@ class PDBInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): - header = PdbHeader(stream) + header = PdbHeaderReader(stream) Reader = get_reader(header.ident) if Reader is None: