From e690e7196e094787985dd148038d38a6d5e08163 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 24 Apr 2011 09:44:50 -0400 Subject: [PATCH] Plucker metadata reader. --- src/calibre/ebooks/metadata/pdb.py | 4 +- src/calibre/ebooks/metadata/plucker.py | 73 ++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 src/calibre/ebooks/metadata/plucker.py diff --git a/src/calibre/ebooks/metadata/pdb.py b/src/calibre/ebooks/metadata/pdb.py index ddf2b0c818..d01bb0ecdb 100644 --- a/src/calibre/ebooks/metadata/pdb.py +++ b/src/calibre/ebooks/metadata/pdb.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- ''' -Read meta information from eReader pdb files. +Read meta information from pdb files. ''' __license__ = 'GPL v3' @@ -13,10 +13,12 @@ import re from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.metadata.ereader import get_metadata as get_eReader +from calibre.ebooks.metadata.plucker import get_metadata as get_plucker MREADER = { 'PNPdPPrs' : get_eReader, 'PNRdPPrs' : get_eReader, + 'DataPlkr' : get_plucker, } from calibre.ebooks.metadata.ereader import set_metadata as set_eReader diff --git a/src/calibre/ebooks/metadata/plucker.py b/src/calibre/ebooks/metadata/plucker.py new file mode 100644 index 0000000000..991945f42b --- /dev/null +++ b/src/calibre/ebooks/metadata/plucker.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +''' +Read meta information from Plucker pdb files. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import struct +from datetime import datetime + +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.pdb.header import PdbHeaderReader +from calibre.ebooks.pdb.plucker.reader import SectionHeader, DATATYPE_METADATA, \ + MIBNUM_TO_NAME + +def get_metadata(stream, extract_cover=True): + ''' + Return metadata as a L{MetaInfo} object + ''' + mi = MetaInformation(_('Unknown'), [_('Unknown')]) + stream.seek(0) + + pheader = PdbHeaderReader(stream) + section_data = None + for i in range(1, pheader.num_sections): + raw_data = pheader.section_data(i) + section_header = SectionHeader(raw_data) + if section_header.type == DATATYPE_METADATA: + section_data = raw_data[8:] + break + + if not section_data: + return mi + + default_encoding = 'latin-1' + record_count, = struct.unpack('>H', section_data[0:2]) + adv = 0 + title = None + author = None + pubdate = 0 + for i in xrange(record_count): + type, = struct.unpack('>H', section_data[2+adv:4+adv]) + length, = struct.unpack('>H', section_data[4+adv:6+adv]) + + # CharSet + if type == 1: + val, = struct.unpack('>H', section_data[6+adv:8+adv]) + default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1') + # Author + elif type == 4: + author = section_data[6+adv+(2*length)] + # Title + elif type == 5: + title = section_data[6+adv+(2*length)] + # Publication Date + elif type == 6: + pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4]) + + adv += 2*length + + if title: + mi.title = title.replace('\0', '').decode(default_encoding, 'replace') + if author: + author = author.replace('\0', '').decode(default_encoding, 'replace') + mi.author = author.split(',') + mi.pubdate = datetime.fromtimestamp(pubdate) + + return mi