mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Read support for the Haodoo PDB ebook format by Kan-Ru Chen. Fixes #976478 (Add Haodoo.net PDB format support)
This commit is contained in:
parent
0698e98ab8
commit
b9be0e215c
@ -289,7 +289,7 @@ class OPFMetadataReader(MetadataReaderPlugin):
|
|||||||
class PDBMetadataReader(MetadataReaderPlugin):
|
class PDBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read PDB metadata'
|
name = 'Read PDB metadata'
|
||||||
file_types = set(['pdb'])
|
file_types = set(['pdb', 'updb'])
|
||||||
description = _('Read metadata from %s files') % 'PDB'
|
description = _('Read metadata from %s files') % 'PDB'
|
||||||
author = 'John Schember'
|
author = 'John Schember'
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class ParserError(ValueError):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
|
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
|
||||||
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||||
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
'epub', 'fb2', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
|
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
|
||||||
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
|
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'md',
|
||||||
|
@ -13,7 +13,7 @@ class PDBInput(InputFormatPlugin):
|
|||||||
name = 'PDB Input'
|
name = 'PDB Input'
|
||||||
author = 'John Schember'
|
author = 'John Schember'
|
||||||
description = 'Convert PDB to HTML'
|
description = 'Convert PDB to HTML'
|
||||||
file_types = set(['pdb'])
|
file_types = set(['pdb', 'updb'])
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
23
src/calibre/ebooks/metadata/haodoo.py
Normal file
23
src/calibre/ebooks/metadata/haodoo.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read meta information from Haodoo.net pdb files.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kan-Ru Chen <kanru@kanru.info>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
from calibre.ebooks.pdb.haodoo.reader import Reader
|
||||||
|
|
||||||
|
def get_metadata(stream, extract_cover=True):
|
||||||
|
'''
|
||||||
|
Return metadata as a L{MetaInfo} object
|
||||||
|
'''
|
||||||
|
stream.seek(0)
|
||||||
|
|
||||||
|
pheader = PdbHeaderReader(stream)
|
||||||
|
reader = Reader(pheader, stream, None, None)
|
||||||
|
|
||||||
|
return reader.get_metadata()
|
@ -14,11 +14,14 @@ from calibre.ebooks.metadata import MetaInformation
|
|||||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
from calibre.ebooks.metadata.ereader import get_metadata as get_eReader
|
from calibre.ebooks.metadata.ereader import get_metadata as get_eReader
|
||||||
from calibre.ebooks.metadata.plucker import get_metadata as get_plucker
|
from calibre.ebooks.metadata.plucker import get_metadata as get_plucker
|
||||||
|
from calibre.ebooks.metadata.haodoo import get_metadata as get_Haodoo
|
||||||
|
|
||||||
MREADER = {
|
MREADER = {
|
||||||
'PNPdPPrs' : get_eReader,
|
'PNPdPPrs' : get_eReader,
|
||||||
'PNRdPPrs' : get_eReader,
|
'PNRdPPrs' : get_eReader,
|
||||||
'DataPlkr' : get_plucker,
|
'DataPlkr' : get_plucker,
|
||||||
|
'BOOKMTIT' : get_Haodoo,
|
||||||
|
'BOOKMTIU' : get_Haodoo,
|
||||||
}
|
}
|
||||||
|
|
||||||
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
|
from calibre.ebooks.metadata.ereader import set_metadata as set_eReader
|
||||||
|
@ -16,6 +16,7 @@ def _import_readers():
|
|||||||
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
|
||||||
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
|
from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
|
||||||
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
|
from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
|
||||||
|
from calibre.ebooks.pdb.haodoo.reader import Reader as haodoo_reader
|
||||||
|
|
||||||
FORMAT_READERS = {
|
FORMAT_READERS = {
|
||||||
'PNPdPPrs': ereader_reader,
|
'PNPdPPrs': ereader_reader,
|
||||||
@ -24,6 +25,8 @@ def _import_readers():
|
|||||||
'TEXtREAd': palmdoc_reader,
|
'TEXtREAd': palmdoc_reader,
|
||||||
'.pdfADBE': pdf_reader,
|
'.pdfADBE': pdf_reader,
|
||||||
'DataPlkr': plucker_reader,
|
'DataPlkr': plucker_reader,
|
||||||
|
'BOOKMTIT': haodoo_reader,
|
||||||
|
'BOOKMTIU': haodoo_reader,
|
||||||
}
|
}
|
||||||
|
|
||||||
ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
|
ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
|
||||||
@ -47,6 +50,8 @@ IDENTITY_TO_NAME = {
|
|||||||
'TEXtREAd': 'PalmDOC',
|
'TEXtREAd': 'PalmDOC',
|
||||||
'.pdfADBE': 'Adobe Reader',
|
'.pdfADBE': 'Adobe Reader',
|
||||||
'DataPlkr': 'Plucker',
|
'DataPlkr': 'Plucker',
|
||||||
|
'BOOKMTIT': 'Haodoo.net',
|
||||||
|
'BOOKMTIU': 'Haodoo.net',
|
||||||
|
|
||||||
'BVokBDIC': 'BDicty',
|
'BVokBDIC': 'BDicty',
|
||||||
'DB99DBOS': 'DB (Database program)',
|
'DB99DBOS': 'DB (Database program)',
|
||||||
|
151
src/calibre/ebooks/pdb/haodoo/reader.py
Normal file
151
src/calibre/ebooks/pdb/haodoo/reader.py
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from Haodoo.net pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kan-Ru Chen <kanru@kanru.info>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
import struct
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre import prepare_string_for_xml
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.txt.processor import opf_writer, HTML_TEMPLATE
|
||||||
|
|
||||||
|
BPDB_IDENT = b'BOOKMTIT'
|
||||||
|
UPDB_IDENT = b'BOOKMTIU'
|
||||||
|
|
||||||
|
punct_table = {
|
||||||
|
u"︵": u"(",
|
||||||
|
u"︶": u")",
|
||||||
|
u"︷": u"{",
|
||||||
|
u"︸": u"}",
|
||||||
|
u"︹": u"〔",
|
||||||
|
u"︺": u"〕",
|
||||||
|
u"︻": u"【",
|
||||||
|
u"︼": u"】",
|
||||||
|
u"︗": u"〖",
|
||||||
|
u"︘": u"〗",
|
||||||
|
u"﹇": u"[]",
|
||||||
|
u"﹈": u"[]",
|
||||||
|
u"︽": u"《",
|
||||||
|
u"︾": u"》",
|
||||||
|
u"︿": u"〈",
|
||||||
|
u"﹀": u"〉",
|
||||||
|
u"﹁": u"「",
|
||||||
|
u"﹂": u"」",
|
||||||
|
u"﹃": u"『",
|
||||||
|
u"﹄": u"』",
|
||||||
|
u"|": u"—",
|
||||||
|
u"︙": u"…",
|
||||||
|
u"ⸯ": u"~",
|
||||||
|
u"│": u"…",
|
||||||
|
u"¦": u"…",
|
||||||
|
u" ": u" ",
|
||||||
|
}
|
||||||
|
|
||||||
|
def fix_punct(line):
|
||||||
|
for (key, value) in punct_table.items():
|
||||||
|
line = line.replace(key, value)
|
||||||
|
return line
|
||||||
|
|
||||||
|
class LegacyHeaderRecord(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
fields = raw.lstrip().replace(b'\x1b\x1b\x1b', b'\x1b').split(b'\x1b')
|
||||||
|
self.title = fix_punct(fields[0].decode('cp950', 'replace'))
|
||||||
|
self.num_records = int(fields[1])
|
||||||
|
self.chapter_titles = map(
|
||||||
|
lambda x: fix_punct(x.decode('cp950', 'replace').rstrip(b'\x00')),
|
||||||
|
fields[2:])
|
||||||
|
|
||||||
|
class UnicodeHeaderRecord(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
fields = raw.lstrip().replace(b'\x1b\x00\x1b\x00\x1b\x00',
|
||||||
|
b'\x1b\x00').split(b'\x1b\x00')
|
||||||
|
self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore'))
|
||||||
|
self.num_records = int(fields[1])
|
||||||
|
self.chapter_titles = map(
|
||||||
|
lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip(b'\x00')),
|
||||||
|
fields[2].split(b'\r\x00\n\x00'))
|
||||||
|
|
||||||
|
class Reader(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, options):
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
if header.ident == BPDB_IDENT:
|
||||||
|
self.header_record = LegacyHeaderRecord(self.section_data(0))
|
||||||
|
self.encoding = 'cp950'
|
||||||
|
else:
|
||||||
|
self.header_record = UnicodeHeaderRecord(self.section_data(0))
|
||||||
|
self.encoding = 'utf_16_le'
|
||||||
|
|
||||||
|
def author(self):
|
||||||
|
self.stream.seek(35)
|
||||||
|
version = struct.unpack(b'>b', self.stream.read(1))[0]
|
||||||
|
if version == 2:
|
||||||
|
self.stream.seek(0)
|
||||||
|
author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace')
|
||||||
|
return author
|
||||||
|
else:
|
||||||
|
return u'Unknown'
|
||||||
|
|
||||||
|
def get_metadata(self):
|
||||||
|
mi = MetaInformation(self.header_record.title,
|
||||||
|
[self.author()])
|
||||||
|
mi.language = u'zh-tw'
|
||||||
|
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
return self.section_data(number).decode(self.encoding,
|
||||||
|
'replace').rstrip(b'\x00')
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
txt = u''
|
||||||
|
|
||||||
|
self.log.info(u'Decompressing text...')
|
||||||
|
for i in range(1, self.header_record.num_records + 1):
|
||||||
|
self.log.debug(u'\tDecompressing text section %i' % i)
|
||||||
|
title = self.header_record.chapter_titles[i-1]
|
||||||
|
lines = []
|
||||||
|
title_added = False
|
||||||
|
for line in self.decompress_text(i).splitlines():
|
||||||
|
line = fix_punct(line)
|
||||||
|
line = line.strip()
|
||||||
|
if not title_added and title in line:
|
||||||
|
line = u'<h1 class="chapter">' + line + u'</h1>\n'
|
||||||
|
title_added = True
|
||||||
|
else:
|
||||||
|
line = prepare_string_for_xml(line)
|
||||||
|
lines.append(u'<p>%s</p>' % line)
|
||||||
|
if not title_added:
|
||||||
|
lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n')
|
||||||
|
txt += u'\n'.join(lines)
|
||||||
|
|
||||||
|
self.log.info(u'Converting text to OEB...')
|
||||||
|
html = HTML_TEMPLATE % (self.header_record.title, txt)
|
||||||
|
with open(os.path.join(output_dir, u'index.html'), 'wb') as index:
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
mi = self.get_metadata()
|
||||||
|
manifest = [(u'index.html', None)]
|
||||||
|
spine = [u'index.html']
|
||||||
|
opf_writer(output_dir, u'metadata.opf', manifest, spine, mi)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, u'metadata.opf')
|
Loading…
x
Reference in New Issue
Block a user