Refactoring.

This commit is contained in:
John Schember 2009-05-21 16:22:24 -04:00
parent 4be2cbb770
commit 24ca1a1134
14 changed files with 230 additions and 185 deletions

View File

@ -89,7 +89,7 @@ if __name__ == '__main__':
include_dirs=['src/calibre/utils/msdes']),
Extension('calibre.plugins.cPalmdoc',
sources=['src/calibre/ebooks/mobi/palmdoc.c']),
sources=['src/calibre/ebooks/compression/palmdoc.c']),
PyQtExtension('calibre.plugins.pictureflow',
['src/calibre/gui2/pictureflow/pictureflow.cpp',

View File

@ -0,0 +1,5 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'

View File

@ -25,15 +25,9 @@ TAG_MAP = {
'div' : 'p',
}
STYLE_MAP = {
'bold' : 'strong',
'bolder' : 'strong',
'italic' : 'emphasis',
}
STYLES = [
'font-weight',
'font-style',
('font-weight', {'bold' : 'strong', 'bolder' : 'strong'}),
('font-style', {'italic' : 'emphasis'}),
]
class FB2MLizer(object):
@ -107,8 +101,9 @@ class FB2MLizer(object):
fb2_text += '<%s>' % fb2_tag
tag_stack.append(fb2_tag)
# Processes style information
for s in STYLES:
style_tag = STYLE_MAP.get(style[s], None)
style_tag = s[1].get(style[s[0]], None)
if style_tag:
tag_count += 1
fb2_text += '<%s>' % style_tag

View File

@ -1,11 +1,17 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Read data from .mobi files
'''
import struct, os, cStringIO, re, functools, datetime, textwrap
import datetime
import functools
import os
import re
import struct
import textwrap
import cStringIO
try:
from PIL import Image as PILImage
@ -21,8 +27,8 @@ from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.huffcdic import HuffReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
from calibre.ebooks.metadata.toc import TOC
@ -40,8 +46,8 @@ class EXTHHeader(object):
while left > 0:
left -= 1
id, size = struct.unpack('>LL', raw[pos:pos+8])
content = raw[pos+8:pos+size]
id, size = struct.unpack('>LL', raw[pos:pos + 8])
content = raw[pos + 8:pos + size]
pos += size
if id >= 100 and id < 200:
self.process_metadata(id, content, codec)
@ -87,7 +93,7 @@ class EXTHHeader(object):
elif id == 106:
try:
self.mi.publish_date = datetime.datetime.strptime(
content, '%Y-%m-%d',).date()
content, '%Y-%m-%d', ).date()
except:
pass
elif id == 108:
@ -123,13 +129,13 @@ class BookHeader(object):
try:
self.codec = {
1252 : 'cp1252',
65001 : 'utf-8',
}[self.codepage]
1252: 'cp1252',
65001: 'utf-8',
}[self.codepage]
except (IndexError, KeyError):
self.codec = 'cp1252' if user_encoding is None else user_encoding
log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
self.codec))
log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
self.codec))
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
self.extra_flags = 0
else:
@ -147,14 +153,14 @@ class BookHeader(object):
self.language = main_language.get(langid, 'ENGLISH')
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
self.mobi_version = struct.unpack('>I', raw[0x68:0x6c])[0]
self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c+4])[0]
self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c + 4])[0]
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None
if not isinstance(self.title, unicode):
self.title = self.title.decode(self.codec, 'replace')
if self.exth_flag & 0x40:
self.exth = EXTHHeader(raw[16+self.length:], self.codec, self.title)
self.exth = EXTHHeader(raw[16 + self.length:], self.codec, self.title)
self.exth.mi.uid = self.unique_id
self.exth.mi.language = self.language
@ -182,7 +188,7 @@ class MetadataHeader(BookHeader):
return struct.unpack('>H', self.stream.read(2))[0]
def section_offset(self, number):
self.stream.seek(78+number*8)
self.stream.seek(78 + number * 8)
return struct.unpack('>LBBBB', self.stream.read(8))[0]
def header(self):
@ -242,15 +248,15 @@ class MobiReader(object):
self.name = self.header[:32].replace('\x00', '')
self.num_sections, = struct.unpack('>H', raw[76:78])
self.ident = self.header[0x3C:0x3C+8].upper()
self.ident = self.header[0x3C:0x3C + 8].upper()
if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
raise MobiError('Unknown book type: %s'%self.ident)
raise MobiError('Unknown book type: %s' % self.ident)
self.sections = []
self.section_headers = []
for i in range(self.num_sections):
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78+i*8:78+i*8+8])
flags, val = a1, a2<<16 | a3<<8 | a4
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78 + i * 8:78 + i * 8 + 8])
flags, val = a1, a2 << 16 | a3 << 8 | a4
self.section_headers.append((offset, flags, val))
def section(section_number):
@ -266,7 +272,7 @@ class MobiReader(object):
self.book_header = BookHeader(self.sections[0][0], self.ident,
user_encoding, self.log)
user_encoding, self.log)
self.name = self.name.decode(self.book_header.codec, 'replace')
def extract_content(self, output_dir, parse_cache):
@ -279,13 +285,13 @@ class MobiReader(object):
parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
self.add_anchors()
self.processed_html = self.processed_html.decode(self.book_header.codec,
'ignore')
'ignore')
for pat in ENCODING_PATS:
self.processed_html = pat.sub('', self.processed_html)
e2u = functools.partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
self.processed_html = re.sub(r'&(\S+?);', e2u,
self.processed_html)
self.processed_html)
self.extract_images(processed_records, output_dir)
self.replace_page_breaks()
self.cleanup_html()
@ -295,7 +301,7 @@ class MobiReader(object):
if root.xpath('descendant::p/descendant::p'):
from lxml.html import soupparser
self.log.warning('Markup contains unclosed <p> tags, parsing using',
'BeatifulSoup')
'BeatifulSoup')
root = soupparser.fromstring(self.processed_html)
if root.tag != 'html':
self.log.warn('File does not have opening <html> tag')
@ -346,45 +352,45 @@ class MobiReader(object):
fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir,
sanitize_file_name(fname)+'.html')
sanitize_file_name(fname) + '.html')
try:
for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'):
ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
except AttributeError:
pass
parse_cache[htmlfile] = root
self.htmlfile = htmlfile
ncx = cStringIO.StringIO()
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
opf.render(open(self.created_opf_path, 'wb'), ncx,
ncx_manifest_entry=ncx_manifest_entry)
ncx_manifest_entry=ncx_manifest_entry)
ncx = ncx.getvalue()
if ncx:
ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
open(ncx_path, 'wb').write(ncx)
with open('styles.css', 'wb') as s:
s.write(self.base_css_rules+'\n\n')
s.write(self.base_css_rules + '\n\n')
for cls, rule in self.tag_css_rules.items():
if isinstance(rule, unicode):
rule = rule.encode('utf-8')
s.write('.%s { %s }\n\n'%(cls, rule))
s.write('.%s { %s }\n\n' % (cls, rule))
if self.book_header.exth is not None or self.embedded_mi is not None:
self.log.debug('Creating OPF...')
ncx = cStringIO.StringIO()
opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx,
ncx_manifest_entry )
opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
ncx_manifest_entry)
ncx = ncx.getvalue()
if ncx:
open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
def read_embedded_metadata(self, root, elem, guide):
raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
raw = '<package>' + html.tostring(elem, encoding='utf-8') + '</package>'
stream = cStringIO.StringIO(raw)
opf = OPF(stream)
self.embedded_mi = MetaInformation(opf)
@ -394,7 +400,7 @@ class MobiReader(object):
href = ref.get('href', '')
if href.startswith('#'):
href = href[1:]
anchors = root.xpath('//*[@id="%s"]'%href)
anchors = root.xpath('//*[@id="%s"]' % href)
if anchors:
cpos = anchors[0]
reached = False
@ -412,27 +418,27 @@ class MobiReader(object):
self.log.debug('Cleaning up HTML...')
self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<')
def upshift_markup(self, root):
self.log.debug('Converting style information to CSS...')
size_map = {
'xx-small' : '0.5',
'x-small' : '1',
'small' : '2',
'medium' : '3',
'large' : '4',
'x-large' : '5',
'xx-large' : '6',
}
'xx-small': '0.5',
'x-small': '1',
'small': '2',
'medium': '3',
'large': '4',
'x-large': '5',
'xx-large': '6',
}
mobi_version = self.book_header.mobi_version
for i, tag in enumerate(root.iter(etree.Element)):
tag.attrib.pop('xmlns', '')
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content'):
tag.tag = 'div' if tag.tag == 'content' else 'span'
'state', 'city', 'street', 'address', 'content'):
tag.tag = 'div' if tag.tag == 'content' else 'span'
for key in tag.attrib.keys():
tag.attrib.pop(key)
continue
@ -450,7 +456,7 @@ class MobiReader(object):
if width:
styles.append('text-indent: %s' % width)
if width.startswith('-'):
styles.append('margin-left: %s'%(width[1:]))
styles.append('margin-left: %s' % (width[1:]))
if attrib.has_key('align'):
align = attrib.pop('align').strip()
if align:
@ -502,7 +508,7 @@ class MobiReader(object):
cls = sel
break
if cls is None:
ncls = 'calibre_%d'%i
ncls = 'calibre_%d' % i
self.tag_css_rules[ncls] = rule
cls = attrib.get('class', '')
cls = cls + (' ' if cls else '') + ncls
@ -514,17 +520,17 @@ class MobiReader(object):
mi = MetaInformation(self.book_header.title, [_('Unknown')])
opf = OPFCreator(os.path.dirname(htmlfile), mi)
if hasattr(self.book_header.exth, 'cover_offset'):
opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
opf.cover = 'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
elif mi.cover is not None:
opf.cover = mi.cover
else:
opf.cover = 'images/%05d.jpg'%1
opf.cover = 'images/%05d.jpg' % 1
if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
*opf.cover.split('/'))):
opf.cover = None
* opf.cover.split('/'))):
opf.cover = None
manifest = [(htmlfile, 'text/x-oeb1-document'),
(os.path.abspath('styles.css'), 'text/css')]
(os.path.abspath('styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []):
manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
@ -541,7 +547,7 @@ class MobiReader(object):
ncx_manifest_entry = None
if toc:
ncx_manifest_entry = 'toc.ncx'
elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
tocobj = None
ent_pat = re.compile(r'&(\S+?);')
if elems:
@ -556,12 +562,12 @@ class MobiReader(object):
if href and re.match('\w+://', href) is None:
try:
text = u' '.join([t.strip() for t in \
x.xpath('descendant::text()')])
x.xpath('descendant::text()')])
except:
text = ''
text = ent_pat.sub(entity_to_unicode, text)
tocobj.add_item(toc.partition('#')[0], href[1:],
text)
text)
if reached and x.get('class', None) == 'mbp_pagebreak':
break
if tocobj is not None:
@ -599,17 +605,17 @@ class MobiReader(object):
def extract_text(self):
self.log.debug('Extracting text...')
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
processed_records = list(range(0, self.book_header.records+1))
text_sections = [self.text_section(i) for i in range(1, self.book_header.records + 1)]
processed_records = list(range(0, self.book_header.records + 1))
self.mobi_html = ''
if self.book_header.compression_type == 'DH':
huffs = [self.sections[i][0] for i in
range(self.book_header.huff_offset,
self.book_header.huff_offset+self.book_header.huff_number)]
range(self.book_header.huff_offset,
self.book_header.huff_offset + self.book_header.huff_number)]
processed_records += list(range(self.book_header.huff_offset,
self.book_header.huff_offset+self.book_header.huff_number))
self.book_header.huff_offset + self.book_header.huff_number))
huff = HuffReader(huffs)
self.mobi_html = huff.decompress(text_sections)
@ -620,7 +626,7 @@ class MobiReader(object):
elif self.book_header.compression_type == '\x00\x01':
self.mobi_html = ''.join(text_sections)
else:
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type))
if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
self.mobi_html = self.mobi_html.replace('\0', '')
@ -636,7 +642,7 @@ class MobiReader(object):
self.log.debug('Adding anchors...')
positions = set([])
link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
re.IGNORECASE)
re.IGNORECASE)
for match in link_pattern.finditer(self.mobi_html):
positions.add(int(match.group(1)))
pos = 0
@ -652,10 +658,10 @@ class MobiReader(object):
if r > -1 and (r < l or l == end or l == -1):
p = self.mobi_html.rfind('<', 0, end + 1)
if pos < end and p > -1 and \
not end_tag_re.match(self.mobi_html[p:r]) and \
not self.mobi_html[p:r+1].endswith('/>'):
anchor = ' filepos-id="filepos%d"'
end = r
not end_tag_re.match(self.mobi_html[p:r]) and \
not self.mobi_html[p:r + 1].endswith('/>'):
anchor = ' filepos-id="filepos%d"'
end = r
else:
end = r + 1
self.processed_html += self.mobi_html[pos:end] + (anchor % oend)
@ -673,7 +679,7 @@ class MobiReader(object):
start = getattr(self.book_header, 'first_image_index', -1)
if start > self.num_sections or start < 0:
# BAEN PRC files have bad headers
start=0
start = 0
for i in range(start, self.num_sections):
if i in processed_records:
continue
@ -687,7 +693,7 @@ class MobiReader(object):
except IOError:
continue
path = os.path.join(output_dir, '%05d.jpg'%image_index)
path = os.path.join(output_dir, '%05d.jpg' % image_index)
self.image_names.append(os.path.basename(path))
im.save(open(path, 'wb'), format='JPEG')

View File

@ -1,27 +1,32 @@
'''
Write content to Mobipocket books.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
from collections import defaultdict
from itertools import count
from itertools import izip
import random
import re
from struct import pack
import time
import random
from cStringIO import StringIO
import re
from itertools import izip, count
from collections import defaultdict
from urlparse import urldefrag
from PIL import Image
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import namespace, prefixname
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.mobi.palmdoc import compress_doc
from cStringIO import StringIO
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS
from calibre.ebooks.oeb.base import OEB_DOCS
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.base import XHTML_NS
from calibre.ebooks.oeb.base import XML_NS
from calibre.ebooks.oeb.base import namespace
from calibre.ebooks.oeb.base import prefixname
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.compression.palmdoc import compress_doc
# TODO:
# - Allow override CSS (?)
@ -174,7 +179,7 @@ class Serializer(object):
item = hrefs[path] if path else None
if item and item.spine_position is None:
return False
path = item.href if item else base.href
path = item.href if item else base.href
href = '#'.join((path, frag)) if frag else path
buffer.write('filepos=')
self.href_offsets[href].append(buffer.tell())
@ -211,8 +216,8 @@ class Serializer(object):
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
buffer = self.buffer
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) not in nsrmap:
return
or namespace(elem.tag) not in nsrmap:
return
tag = prefixname(elem.tag, nsrmap)
# Previous layers take care of @name
id = elem.attrib.pop('id', None)
@ -221,9 +226,9 @@ class Serializer(object):
offset = self.anchor_offset or buffer.tell()
self.id_offsets[href] = offset
if self.anchor_offset is not None and \
tag == 'a' and not elem.attrib and \
not len(elem) and not elem.text:
return
tag == 'a' and not elem.attrib and \
not len(elem) and not elem.text:
return
self.anchor_offset = buffer.tell()
buffer.write('<')
buffer.write(tag)
@ -286,8 +291,8 @@ class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def __init__(self, compression=PALMDOC, imagemax=None,
prefer_author_sort=False):
self._compression = compression or UNCOMPRESSED
prefer_author_sort=False):
self._compression = compression or UNCOMPRESSED
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
self._prefer_author_sort = prefer_author_sort
@ -297,7 +302,7 @@ class MobiWriter(object):
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
prefer_author_sort = opts.prefer_author_sort
return cls(compression=PALMDOC, imagemax=imagemax,
prefer_author_sort=prefer_author_sort)
prefer_author_sort=prefer_author_sort)
def __call__(self, oeb, path):
if hasattr(path, 'write'):
@ -305,7 +310,7 @@ class MobiWriter(object):
with open(path, 'w+b') as stream:
return self._dump_stream(oeb, stream)
def _write(self, *data):
def _write(self, * data):
for datum in data:
self._stream.write(datum)

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -10,14 +9,14 @@ class PDBError(Exception):
from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
FORMAT_READERS = {
'PNPdPPrs' : ereader_reader,
'PNRdPPrs' : ereader_reader,
'zTXTGPlm' : ztxt_reader,
'TEXtREAd' : palmdoc_reader,
'PNPdPPrs': ereader_reader,
'PNRdPPrs': ereader_reader,
'zTXTGPlm': ztxt_reader,
'TEXtREAd': palmdoc_reader,
}
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@ -25,41 +24,41 @@ from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
FORMAT_WRITERS = {
'doc' : palmdoc_writer,
'ztxt' : ztxt_writer,
'ereader' : ereader_writer,
'doc': palmdoc_writer,
'ztxt': ztxt_writer,
'ereader': ereader_writer,
}
IDENTITY_TO_NAME = {
'PNPdPPrs' : 'eReader',
'PNRdPPrs' : 'eReader',
'zTXTGPlm' : 'zTXT',
'TEXtREAd' : 'PalmDOC',
'PNPdPPrs': 'eReader',
'PNRdPPrs': 'eReader',
'zTXTGPlm': 'zTXT',
'TEXtREAd': 'PalmDOC',
'.pdfADBE' : 'Adobe Reader',
'BVokBDIC' : 'BDicty',
'DB99DBOS' : 'DB (Database program)',
'vIMGView' : 'FireViewer (ImageViewer)',
'PmDBPmDB' : 'HanDBase',
'InfoINDB' : 'InfoView',
'ToGoToGo' : 'iSilo',
'SDocSilX' : 'iSilo 3',
'JbDbJBas' : 'JFile',
'JfDbJFil' : 'JFile Pro',
'DATALSdb' : 'LIST',
'Mdb1Mdb1' : 'MobileDB',
'BOOKMOBI' : 'MobiPocket',
'DataPlkr' : 'Plucker',
'DataSprd' : 'QuickSheet',
'SM01SMem' : 'SuperMemo',
'TEXtTlDc' : 'TealDoc',
'InfoTlIf' : 'TealInfo',
'DataTlMl' : 'TealMeal',
'DataTlPt' : 'TealPaint',
'dataTDBP' : 'ThinkDB',
'TdatTide' : 'Tides',
'ToRaTRPW' : 'TomeRaider',
'BDOCWrdS' : 'WordSmith',
'.pdfADBE': 'Adobe Reader',
'BVokBDIC': 'BDicty',
'DB99DBOS': 'DB (Database program)',
'vIMGView': 'FireViewer (ImageViewer)',
'PmDBPmDB': 'HanDBase',
'InfoINDB': 'InfoView',
'ToGoToGo': 'iSilo',
'SDocSilX': 'iSilo 3',
'JbDbJBas': 'JFile',
'JfDbJFil': 'JFile Pro',
'DATALSdb': 'LIST',
'Mdb1Mdb1': 'MobileDB',
'BOOKMOBI': 'MobiPocket',
'DataPlkr': 'Plucker',
'DataSprd': 'QuickSheet',
'SM01SMem': 'SuperMemo',
'TEXtTlDc': 'TealDoc',
'InfoTlIf': 'TealInfo',
'DataTlMl': 'TealMeal',
'DataTlPt': 'TealPaint',
'dataTDBP': 'ThinkDB',
'TdatTide': 'Tides',
'ToRaTRPW': 'TomeRaider',
'BDOCWrdS': 'WordSmith',
}
def get_reader(identity):

View File

@ -8,16 +8,19 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, re, struct, zlib
import os
import re
import struct
import zlib
from calibre import CurrentDir
from calibre.ebooks import DRMError
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pml.pmlconverter import pml_to_html, \
footnote_sidebar_to_html
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pml.pmlconverter import footnote_sidebar_to_html
from calibre.ebooks.pml.pmlconverter import pml_to_html
class HeaderRecord(object):
'''
@ -32,7 +35,7 @@ class HeaderRecord(object):
self.non_text_offset, = struct.unpack('>H', raw[12:14])
self.has_metadata, = struct.unpack('>H', raw[24:26])
self.footnote_rec, = struct.unpack('>H', raw[28:30])
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
self.sidebar_rec, = struct.unpack('>H', raw[30:32])
self.bookmark_offset, = struct.unpack('>H', raw[32:34])
self.image_data_offset, = struct.unpack('>H', raw[40:42])
self.metadata_offset, = struct.unpack('>H', raw[44:46])
@ -79,7 +82,7 @@ class Reader(FormatReader):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
return 'empty', ''
data = self.section_data(number)
name = data[4:4+32].strip('\x00')
name = data[4:4 + 32].strip('\x00')
img = data[62:]
return name, img

View File

@ -8,9 +8,11 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import struct, zlib
import struct
import zlib
import Image, cStringIO
import Image
import cStringIO
from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.oeb.base import OEB_IMAGES

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Read the header data from a pdb file.
'''
@ -8,7 +7,9 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, struct, time
import re
import struct
import time
class PdbHeaderReader(object):
@ -35,16 +36,16 @@ class PdbHeaderReader(object):
if number not in range(0, self.num_sections):
raise ValueError('Not a valid section number %i' % number)
self.stream.seek(78+number*8)
self.stream.seek(78 + number * 8)
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
flags, val = a1, a2<<16 | a3<<8 | a4
flags, val = a1, a2 << 16 | a3 << 8 | a4
return (offset, flags, val)
def section_offset(self, number):
if number not in range(0, self.num_sections):
raise ValueError('Not a valid section number %i' % number)
self.stream.seek(78+number*8)
self.stream.seek(78 + number * 8)
return struct.unpack('>LBBBB', self.stream.read(8))[0]
def section_data(self, number):

View File

@ -8,11 +8,13 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, struct, zlib
import os
import struct
from calibre.ebooks.compression.palmdoc import decompress_doc
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
from calibre.ebooks.txt.processor import opf_writer
from calibre.ebooks.txt.processor import txt_to_markdown
class HeaderRecord(object):
'''

View File

@ -10,10 +10,11 @@ __docformat__ = 'restructuredtext en'
import struct
from calibre.ebooks.compression.palmdoc import compress_doc
from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.pdb.header import PdbHeaderBuilder
from calibre.ebooks.txt.writer import TxtNewlines
from calibre.ebooks.txt.writer import TxtWriter
MAX_RECORD_SIZE = 4096
@ -40,7 +41,7 @@ class Writer(FormatWriter):
hb = PdbHeaderBuilder('TEXtREAd', title)
hb.build_header(section_lengths, out_stream)
for record in [header_record]+txt_records:
for record in [header_record] + txt_records:
out_stream.write(record)
def _generate_text(self, spine):
@ -51,7 +52,7 @@ class Writer(FormatWriter):
txt_records = []
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
txt_records.append(txt[i * MAX_RECORD_SIZE: (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
return txt_records, txt_length

View File

@ -8,7 +8,8 @@ __docformat__ = 'restructuredtext en'
Transform OEB content into PML markup
'''
import os, re
import os
import re
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
@ -40,6 +41,31 @@ STYLES = [
('text-align', {'right' : 'r', 'center' : 'c'}),
]
BLOCK_TAGS = [
'p',
]
BLOCK_STYLES = [
'block',
]
LINK_TAGS = [
'a',
]
SEPARATE_TAGS = [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'p',
'div',
'li',
'tr',
]
class PMLMLizer(object):
def __init__(self, ignore_tables=False):
self.ignore_tables = ignore_tables
@ -104,7 +130,7 @@ class PMLMLizer(object):
tag_count = 0
# Are we in a paragraph block?
if tag == 'p' or style['display'] in ('block'):
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
if 'block' not in tag_stack:
tag_count += 1
tag_stack.append('block')
@ -136,7 +162,7 @@ class PMLMLizer(object):
# Special processing of tags that require an argument.
# Anchors links
if tag == 'a' and 'q' not in tag_stack:
if tag in LINK_TAGS and 'q' not in tag_stack:
href = elem.get('href')
if href and '://' not in href:
if '#' in href:
@ -168,7 +194,7 @@ class PMLMLizer(object):
for i in range(0, tag_count):
close_tag_list.insert(0, tag_stack.pop())
text += self.close_tags(close_tag_list)
if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'li', 'tr'):
if tag in SEPARATE_TAGS:
text += os.linesep + os.linesep
if 'block' not in tag_stack: