Refactoring.

2025-08-11 09:13:57 -04:00 · 2009-05-21 16:22:24 -04:00 · 2009-05-21 16:22:24 -04:00 · 24ca1a1134
commit 24ca1a1134
parent 4be2cbb770
14 changed files with 230 additions and 185 deletions
--- a/setup.py
+++ b/setup.py
@ -89,7 +89,7 @@ if __name__ == '__main__':
                             include_dirs=['src/calibre/utils/msdes']),
                    Extension('calibre.plugins.cPalmdoc',
-                        sources=['src/calibre/ebooks/mobi/palmdoc.c']),
+                        sources=['src/calibre/ebooks/compression/palmdoc.c']),
                    PyQtExtension('calibre.plugins.pictureflow',
                                  ['src/calibre/gui2/pictureflow/pictureflow.cpp',
--- a/src/calibre/ebooks/compression/init.py
+++ b/src/calibre/ebooks/compression/init.py
@ -0,0 +1,5 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
--- a/src/calibre/ebooks/compression/palmdoc.c
+++ b/src/calibre/ebooks/compression/palmdoc.c
--- a/src/calibre/ebooks/compression/palmdoc.py
+++ b/src/calibre/ebooks/compression/palmdoc.py
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -25,15 +25,9 @@ TAG_MAP = {
    'div' : 'p',
 }
 STYLE_MAP = {
    'bold'   : 'strong',
    'bolder' : 'strong',
    'italic' : 'emphasis',
 }
 STYLES = [
-    'font-weight',
+    ('font-weight', {'bold'   : 'strong', 'bolder' : 'strong'}),
-    'font-style',
+    ('font-style', {'italic' : 'emphasis'}),
 ]
 class FB2MLizer(object):
@ -107,8 +101,9 @@ class FB2MLizer(object):
                fb2_text += '<%s>' % fb2_tag
                tag_stack.append(fb2_tag)
            # Processes style information
            for s in STYLES:
-                style_tag = STYLE_MAP.get(style[s], None)
+                style_tag = s[1].get(style[s[0]], None)
                if style_tag:
                    tag_count += 1
                    fb2_text += '<%s>' % style_tag
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -1,11 +1,17 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Read data from .mobi files
 '''
-import struct, os, cStringIO, re, functools, datetime, textwrap
+import datetime
 import functools
 import os
 import re
 import struct
 import textwrap
 import cStringIO
 try:
    from PIL import Image as PILImage
@ -21,8 +27,8 @@ from calibre.ebooks import DRMError
 from calibre.ebooks.chardet import ENCODING_PATS
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
 from calibre.ebooks.mobi.palmdoc import decompress_doc
 from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
 from calibre.ebooks.metadata.toc import TOC
@ -40,8 +46,8 @@ class EXTHHeader(object):
        while left > 0:
            left -= 1
-            id, size = struct.unpack('>LL', raw[pos:pos+8])
+            id, size = struct.unpack('>LL', raw[pos:pos + 8])
-            content = raw[pos+8:pos+size]
+            content = raw[pos + 8:pos + size]
            pos += size
            if id >= 100 and id < 200:
                self.process_metadata(id, content, codec)
@ -87,7 +93,7 @@ class EXTHHeader(object):
        elif id == 106:
            try:
                self.mi.publish_date = datetime.datetime.strptime(
-                        content, '%Y-%m-%d',).date()
+                    content, '%Y-%m-%d', ).date()
            except:
                pass
        elif id == 108:
@ -123,13 +129,13 @@ class BookHeader(object):
            try:
                self.codec = {
-                          1252  : 'cp1252',
+                    1252: 'cp1252',
-                          65001 : 'utf-8',
+                    65001: 'utf-8',
-                          }[self.codepage]
+                    }[self.codepage]
            except (IndexError, KeyError):
                self.codec = 'cp1252' if user_encoding is None else user_encoding
-                log.warn('Unknown codepage %d. Assuming %s'%(self.codepage,
+                log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
-                                                            self.codec))
+                    self.codec))
            if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
                self.extra_flags = 0
            else:
@ -147,14 +153,14 @@ class BookHeader(object):
            self.language = main_language.get(langid, 'ENGLISH')
            self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
            self.mobi_version = struct.unpack('>I', raw[0x68:0x6c])[0]
-            self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c+4])[0]
+            self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c + 4])[0]
            self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
            self.exth = None
            if not isinstance(self.title, unicode):
                self.title = self.title.decode(self.codec, 'replace')
            if self.exth_flag & 0x40:
-                self.exth = EXTHHeader(raw[16+self.length:], self.codec, self.title)
+                self.exth = EXTHHeader(raw[16 + self.length:], self.codec, self.title)
                self.exth.mi.uid = self.unique_id
                self.exth.mi.language = self.language
@ -182,7 +188,7 @@ class MetadataHeader(BookHeader):
        return struct.unpack('>H', self.stream.read(2))[0]
    def section_offset(self, number):
-        self.stream.seek(78+number*8)
+        self.stream.seek(78 + number * 8)
        return struct.unpack('>LBBBB', self.stream.read(8))[0]
    def header(self):
@ -242,15 +248,15 @@ class MobiReader(object):
        self.name     = self.header[:32].replace('\x00', '')
        self.num_sections, = struct.unpack('>H', raw[76:78])
-        self.ident = self.header[0x3C:0x3C+8].upper()
+        self.ident = self.header[0x3C:0x3C + 8].upper()
        if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
-            raise MobiError('Unknown book type: %s'%self.ident)
+            raise MobiError('Unknown book type: %s' % self.ident)
        self.sections = []
        self.section_headers = []
        for i in range(self.num_sections):
-            offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78+i*8:78+i*8+8])
+            offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78 + i * 8:78 + i * 8 + 8])
-            flags, val = a1, a2<<16 | a3<<8 | a4
+            flags, val = a1, a2 << 16 | a3 << 8 | a4
            self.section_headers.append((offset, flags, val))
        def section(section_number):
@ -266,7 +272,7 @@ class MobiReader(object):
        self.book_header = BookHeader(self.sections[0][0], self.ident,
-                                      user_encoding, self.log)
+            user_encoding, self.log)
        self.name = self.name.decode(self.book_header.codec, 'replace')
    def extract_content(self, output_dir, parse_cache):
@ -279,13 +285,13 @@ class MobiReader(object):
            parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
        self.add_anchors()
        self.processed_html = self.processed_html.decode(self.book_header.codec,
-                                                          'ignore')
+            'ignore')
        for pat in ENCODING_PATS:
            self.processed_html = pat.sub('', self.processed_html)
        e2u = functools.partial(entity_to_unicode,
-                                exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
+            exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
        self.processed_html = re.sub(r'&(\S+?);', e2u,
-                                     self.processed_html)
+            self.processed_html)
        self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()
        self.cleanup_html()
@ -295,7 +301,7 @@ class MobiReader(object):
        if root.xpath('descendant::p/descendant::p'):
            from lxml.html import soupparser
            self.log.warning('Markup contains unclosed <p> tags, parsing using',
-                    'BeatifulSoup')
+                'BeatifulSoup')
            root = soupparser.fromstring(self.processed_html)
        if root.tag != 'html':
            self.log.warn('File does not have opening <html> tag')
@ -346,45 +352,45 @@ class MobiReader(object):
        fname = self.name.encode('ascii', 'replace')
        fname = re.sub(r'[\x08\x15\0]+', '', fname)
        htmlfile = os.path.join(output_dir,
-                                sanitize_file_name(fname)+'.html')
+            sanitize_file_name(fname) + '.html')
        try:
            for ref in guide.xpath('descendant::reference'):
                if ref.attrib.has_key('href'):
-                    ref.attrib['href'] = os.path.basename(htmlfile)+ref.attrib['href']
+                    ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
        except AttributeError:
            pass
        parse_cache[htmlfile] = root
        self.htmlfile = htmlfile
        ncx = cStringIO.StringIO()
        opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
-        self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
+        self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
        opf.render(open(self.created_opf_path, 'wb'), ncx,
-                ncx_manifest_entry=ncx_manifest_entry)
+            ncx_manifest_entry=ncx_manifest_entry)
        ncx = ncx.getvalue()
        if ncx:
            ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
            open(ncx_path, 'wb').write(ncx)
        with open('styles.css', 'wb') as s:
-            s.write(self.base_css_rules+'\n\n')
+            s.write(self.base_css_rules + '\n\n')
            for cls, rule in self.tag_css_rules.items():
                if isinstance(rule, unicode):
                    rule = rule.encode('utf-8')
-                s.write('.%s { %s }\n\n'%(cls, rule))
+                s.write('.%s { %s }\n\n' % (cls, rule))
        if self.book_header.exth is not None or self.embedded_mi is not None:
            self.log.debug('Creating OPF...')
            ncx = cStringIO.StringIO()
            opf, ncx_manifest_entry  = self.create_opf(htmlfile, guide, root)
-            opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx,
+            opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
-                    ncx_manifest_entry )
+                ncx_manifest_entry)
            ncx = ncx.getvalue()
            if ncx:
-                open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
+                open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
    def read_embedded_metadata(self, root, elem, guide):
-        raw = '<package>'+html.tostring(elem, encoding='utf-8')+'</package>'
+        raw = '<package>' + html.tostring(elem, encoding='utf-8') + '</package>'
        stream = cStringIO.StringIO(raw)
        opf = OPF(stream)
        self.embedded_mi = MetaInformation(opf)
@ -394,7 +400,7 @@ class MobiReader(object):
                    href = ref.get('href', '')
                    if href.startswith('#'):
                        href = href[1:]
-                    anchors = root.xpath('//*[@id="%s"]'%href)
+                    anchors = root.xpath('//*[@id="%s"]' % href)
                    if anchors:
                        cpos = anchors[0]
                        reached = False
@ -412,27 +418,27 @@ class MobiReader(object):
        self.log.debug('Cleaning up HTML...')
        self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
        if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
-            self.processed_html = '<html><p>'+self.processed_html.replace('\n\n', '<p>')+'</html>'
+            self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
        self.processed_html = self.processed_html.replace('\r\n', '\n')
        self.processed_html = self.processed_html.replace('> <', '>\n<')
    def upshift_markup(self, root):
        self.log.debug('Converting style information to CSS...')
        size_map = {
-                    'xx-small' : '0.5',
+            'xx-small': '0.5',
-                    'x-small'  : '1',
+            'x-small': '1',
-                    'small'    : '2',
+            'small': '2',
-                    'medium'   : '3',
+            'medium': '3',
-                    'large'    : '4',
+            'large': '4',
-                    'x-large'  : '5',
+            'x-large': '5',
-                    'xx-large' : '6',
+            'xx-large': '6',
-                    }
+            }
        mobi_version = self.book_header.mobi_version
        for i, tag in enumerate(root.iter(etree.Element)):
            tag.attrib.pop('xmlns', '')
            if tag.tag in ('country-region', 'place', 'placetype', 'placename',
-                           'state', 'city', 'street', 'address', 'content'):
+                'state', 'city', 'street', 'address', 'content'):
-                tag.tag = 'div' if tag.tag == 'content' else 'span'
+            tag.tag = 'div' if tag.tag == 'content' else 'span'
                for key in tag.attrib.keys():
                    tag.attrib.pop(key)
                continue
@ -450,7 +456,7 @@ class MobiReader(object):
                if width:
                    styles.append('text-indent: %s' % width)
                    if width.startswith('-'):
-                        styles.append('margin-left: %s'%(width[1:]))
+                        styles.append('margin-left: %s' % (width[1:]))
            if attrib.has_key('align'):
                align = attrib.pop('align').strip()
                if align:
@ -502,7 +508,7 @@ class MobiReader(object):
                        cls = sel
                        break
                if cls is None:
-                    ncls = 'calibre_%d'%i
+                    ncls = 'calibre_%d' % i
                    self.tag_css_rules[ncls] = rule
                cls = attrib.get('class', '')
                cls = cls + (' ' if cls else '') + ncls
@ -514,17 +520,17 @@ class MobiReader(object):
            mi = MetaInformation(self.book_header.title, [_('Unknown')])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
-            opf.cover = 'images/%05d.jpg'%(self.book_header.exth.cover_offset+1)
+            opf.cover = 'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
        elif mi.cover is not None:
            opf.cover = mi.cover
        else:
-            opf.cover = 'images/%05d.jpg'%1
+            opf.cover = 'images/%05d.jpg' % 1
            if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
-                                               *opf.cover.split('/'))):
+                * opf.cover.split('/'))):
-                opf.cover = None
+            opf.cover = None
        manifest = [(htmlfile, 'text/x-oeb1-document'),
-                    (os.path.abspath('styles.css'), 'text/css')]
+            (os.path.abspath('styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        for i in getattr(self, 'image_names', []):
            manifest.append((os.path.join(bp, 'images/', i), 'image/jpeg'))
@ -541,7 +547,7 @@ class MobiReader(object):
        ncx_manifest_entry = None
        if toc:
            ncx_manifest_entry = 'toc.ncx'
-            elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
+            elems = root.xpath('//*[@id="%s"]' % toc.partition('#')[-1])
            tocobj = None
            ent_pat = re.compile(r'&(\S+?);')
            if elems:
@ -556,12 +562,12 @@ class MobiReader(object):
                        if href and re.match('\w+://', href) is None:
                            try:
                                text = u' '.join([t.strip() for t in \
-                                                x.xpath('descendant::text()')])
+                                    x.xpath('descendant::text()')])
                            except:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
                            tocobj.add_item(toc.partition('#')[0], href[1:],
-                                            text)
+                                text)
                    if reached and x.get('class', None) == 'mbp_pagebreak':
                        break
            if tocobj is not None:
@ -599,17 +605,17 @@ class MobiReader(object):
    def extract_text(self):
        self.log.debug('Extracting text...')
-        text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
+        text_sections = [self.text_section(i) for i in range(1, self.book_header.records + 1)]
-        processed_records = list(range(0, self.book_header.records+1))
+        processed_records = list(range(0, self.book_header.records + 1))
        self.mobi_html = ''
        if self.book_header.compression_type == 'DH':
            huffs = [self.sections[i][0] for i in
-                  range(self.book_header.huff_offset,
+                range(self.book_header.huff_offset,
-                        self.book_header.huff_offset+self.book_header.huff_number)]
+                    self.book_header.huff_offset + self.book_header.huff_number)]
            processed_records += list(range(self.book_header.huff_offset,
-                        self.book_header.huff_offset+self.book_header.huff_number))
+                self.book_header.huff_offset + self.book_header.huff_number))
            huff = HuffReader(huffs)
            self.mobi_html = huff.decompress(text_sections)
@ -620,7 +626,7 @@ class MobiReader(object):
        elif self.book_header.compression_type == '\x00\x01':
            self.mobi_html = ''.join(text_sections)
        else:
-            raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
+            raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type))
        if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
            self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
        self.mobi_html = self.mobi_html.replace('\0', '')
@ -636,7 +642,7 @@ class MobiReader(object):
        self.log.debug('Adding anchors...')
        positions = set([])
        link_pattern = re.compile(r'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
-                                  re.IGNORECASE)
+            re.IGNORECASE)
        for match in link_pattern.finditer(self.mobi_html):
            positions.add(int(match.group(1)))
        pos = 0
@ -652,10 +658,10 @@ class MobiReader(object):
            if r > -1 and (r < l or l == end or l == -1):
                p = self.mobi_html.rfind('<', 0, end + 1)
                if pos < end and p > -1 and \
-                   not end_tag_re.match(self.mobi_html[p:r]) and \
+                    not end_tag_re.match(self.mobi_html[p:r]) and \
-                   not self.mobi_html[p:r+1].endswith('/>'):
+                    not self.mobi_html[p:r + 1].endswith('/>'):
-                    anchor = ' filepos-id="filepos%d"'
+                        anchor = ' filepos-id="filepos%d"'
-                    end = r
+                        end = r
                else:
                    end = r + 1
            self.processed_html += self.mobi_html[pos:end] + (anchor % oend)
@ -673,7 +679,7 @@ class MobiReader(object):
        start = getattr(self.book_header, 'first_image_index', -1)
        if start > self.num_sections or start < 0:
            # BAEN PRC files have bad headers
-            start=0
+            start = 0
        for i in range(start, self.num_sections):
            if i in processed_records:
                continue
@ -687,7 +693,7 @@ class MobiReader(object):
            except IOError:
                continue
-            path = os.path.join(output_dir, '%05d.jpg'%image_index)
+            path = os.path.join(output_dir, '%05d.jpg' % image_index)
            self.image_names.append(os.path.basename(path))
            im.save(open(path, 'wb'), format='JPEG')
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -1,27 +1,32 @@
 '''
 Write content to Mobipocket books.
 '''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
 from collections import defaultdict
 from itertools import count
 from itertools import izip
 import random
 import re
 from struct import pack
 import time
 import random
 from cStringIO import StringIO
 import re
 from itertools import izip, count
 from collections import defaultdict
 from urlparse import urldefrag
 from PIL import Image
-from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
+from cStringIO import StringIO
    OEB_RASTER_IMAGES
 from calibre.ebooks.oeb.base import namespace, prefixname
 from calibre.ebooks.oeb.base import urlnormalize
 from calibre.ebooks.mobi.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.ebooks.oeb.base import OEB_DOCS
 from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.ebooks.oeb.base import XHTML
 from calibre.ebooks.oeb.base import XHTML_NS
 from calibre.ebooks.oeb.base import XML_NS
 from calibre.ebooks.oeb.base import namespace
 from calibre.ebooks.oeb.base import prefixname
 from calibre.ebooks.oeb.base import urlnormalize
 from calibre.ebooks.compression.palmdoc import compress_doc
 # TODO:
 # - Allow override CSS (?)
@ -174,7 +179,7 @@ class Serializer(object):
        item = hrefs[path] if path else None
        if item and item.spine_position is None:
            return False
-        path =  item.href if item else base.href
+        path = item.href if item else base.href
        href = '#'.join((path, frag)) if frag else path
        buffer.write('filepos=')
        self.href_offsets[href].append(buffer.tell())
@ -211,8 +216,8 @@ class Serializer(object):
    def serialize_elem(self, elem, item, nsrmap=NSRMAP):
        buffer = self.buffer
        if not isinstance(elem.tag, basestring) \
-           or namespace(elem.tag) not in nsrmap:
+            or namespace(elem.tag) not in nsrmap:
-            return
+                return
        tag = prefixname(elem.tag, nsrmap)
        # Previous layers take care of @name
        id = elem.attrib.pop('id', None)
@ -221,9 +226,9 @@ class Serializer(object):
            offset = self.anchor_offset or buffer.tell()
            self.id_offsets[href] = offset
        if self.anchor_offset is not None and \
-           tag == 'a' and not elem.attrib and \
+            tag == 'a' and not elem.attrib and \
-           not len(elem) and not elem.text:
+            not len(elem) and not elem.text:
-            return
+                return
        self.anchor_offset = buffer.tell()
        buffer.write('<')
        buffer.write(tag)
@ -286,8 +291,8 @@ class MobiWriter(object):
    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
    def __init__(self, compression=PALMDOC, imagemax=None,
-                 prefer_author_sort=False):
+        prefer_author_sort=False):
-        self._compression = compression or UNCOMPRESSED
+    self._compression = compression or UNCOMPRESSED
        self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
        self._prefer_author_sort = prefer_author_sort
@ -297,7 +302,7 @@ class MobiWriter(object):
        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
        prefer_author_sort = opts.prefer_author_sort
        return cls(compression=PALMDOC, imagemax=imagemax,
-                   prefer_author_sort=prefer_author_sort)
+            prefer_author_sort=prefer_author_sort)
    def __call__(self, oeb, path):
        if hasattr(path, 'write'):
@ -305,7 +310,7 @@ class MobiWriter(object):
        with open(path, 'w+b') as stream:
            return self._dump_stream(oeb, stream)
-    def _write(self, *data):
+    def _write(self, * data):
        for datum in data:
            self._stream.write(datum)
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -7,17 +6,17 @@ __docformat__ = 'restructuredtext en'
 class PDBError(Exception):
    pass
-    
+
 from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
 from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
 from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
 from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
 FORMAT_READERS = {
-    'PNPdPPrs' : ereader_reader,
+    'PNPdPPrs': ereader_reader,
-    'PNRdPPrs' : ereader_reader,
+    'PNRdPPrs': ereader_reader,
-    'zTXTGPlm' : ztxt_reader,
+    'zTXTGPlm': ztxt_reader,
-    'TEXtREAd' : palmdoc_reader,
+    'TEXtREAd': palmdoc_reader,
 }
 from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
@ -25,41 +24,41 @@ from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
 from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
 FORMAT_WRITERS = {
-    'doc'     : palmdoc_writer,
+    'doc': palmdoc_writer,
-    'ztxt'    : ztxt_writer,
+    'ztxt': ztxt_writer,
-    'ereader' : ereader_writer,
+    'ereader': ereader_writer,
 }
 IDENTITY_TO_NAME = {
-    'PNPdPPrs' : 'eReader',
+    'PNPdPPrs': 'eReader',
-    'PNRdPPrs' : 'eReader',
+    'PNRdPPrs': 'eReader',
-    'zTXTGPlm' : 'zTXT',
+    'zTXTGPlm': 'zTXT',
-    'TEXtREAd' : 'PalmDOC',
+    'TEXtREAd': 'PalmDOC',
-    
+
-    '.pdfADBE' : 'Adobe Reader',
+    '.pdfADBE': 'Adobe Reader',
-    'BVokBDIC' : 'BDicty',
+    'BVokBDIC': 'BDicty',
-    'DB99DBOS' : 'DB (Database program)',
+    'DB99DBOS': 'DB (Database program)',
-    'vIMGView' : 'FireViewer (ImageViewer)',
+    'vIMGView': 'FireViewer (ImageViewer)',
-    'PmDBPmDB' : 'HanDBase',
+    'PmDBPmDB': 'HanDBase',
-    'InfoINDB' : 'InfoView',
+    'InfoINDB': 'InfoView',
-    'ToGoToGo' : 'iSilo',
+    'ToGoToGo': 'iSilo',
-    'SDocSilX' : 'iSilo 3',
+    'SDocSilX': 'iSilo 3',
-    'JbDbJBas' : 'JFile',
+    'JbDbJBas': 'JFile',
-    'JfDbJFil' : 'JFile Pro',
+    'JfDbJFil': 'JFile Pro',
-    'DATALSdb' : 'LIST',
+    'DATALSdb': 'LIST',
-    'Mdb1Mdb1' : 'MobileDB',
+    'Mdb1Mdb1': 'MobileDB',
-    'BOOKMOBI' : 'MobiPocket',
+    'BOOKMOBI': 'MobiPocket',
-    'DataPlkr' : 'Plucker',
+    'DataPlkr': 'Plucker',
-    'DataSprd' : 'QuickSheet',
+    'DataSprd': 'QuickSheet',
-    'SM01SMem' : 'SuperMemo',
+    'SM01SMem': 'SuperMemo',
-    'TEXtTlDc' : 'TealDoc',
+    'TEXtTlDc': 'TealDoc',
-    'InfoTlIf' : 'TealInfo',
+    'InfoTlIf': 'TealInfo',
-    'DataTlMl' : 'TealMeal',
+    'DataTlMl': 'TealMeal',
-    'DataTlPt' : 'TealPaint',
+    'DataTlPt': 'TealPaint',
-    'dataTDBP' : 'ThinkDB',
+    'dataTDBP': 'ThinkDB',
-    'TdatTide' : 'Tides',
+    'TdatTide': 'Tides',
-    'ToRaTRPW' : 'TomeRaider',
+    'ToRaTRPW': 'TomeRaider',
-    'BDOCWrdS' : 'WordSmith',
+    'BDOCWrdS': 'WordSmith',
 }
 def get_reader(identity):
@ -67,10 +66,10 @@ def get_reader(identity):
    Returns None if no reader is found for the identity.
    '''
    return FORMAT_READERS.get(identity, None)
-        
+
 def get_writer(extension):
    '''
    Returns None if no writer is found for extension.
    '''
    return FORMAT_WRITERS.get(extension, None)
-    
+
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@ -8,16 +8,19 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import os, re, struct, zlib
+import os
 import re
 import struct
 import zlib
 from calibre import CurrentDir
 from calibre.ebooks import DRMError
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ereader import EreaderError
 from calibre.ebooks.pml.pmlconverter import pml_to_html, \
    footnote_sidebar_to_html
 from calibre.ebooks.mobi.palmdoc import decompress_doc
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.ereader import EreaderError
 from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pml.pmlconverter import footnote_sidebar_to_html
 from calibre.ebooks.pml.pmlconverter import pml_to_html
 class HeaderRecord(object):
    '''
@ -32,7 +35,7 @@ class HeaderRecord(object):
        self.non_text_offset, = struct.unpack('>H', raw[12:14])
        self.has_metadata, = struct.unpack('>H', raw[24:26])
        self.footnote_rec, = struct.unpack('>H', raw[28:30])
-        self.sidebar_rec, =  struct.unpack('>H', raw[30:32])
+        self.sidebar_rec, = struct.unpack('>H', raw[30:32])
        self.bookmark_offset, = struct.unpack('>H', raw[32:34])
        self.image_data_offset, = struct.unpack('>H', raw[40:42])
        self.metadata_offset, = struct.unpack('>H', raw[44:46])
@ -79,7 +82,7 @@ class Reader(FormatReader):
        if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
            return 'empty', ''
        data = self.section_data(number)
-        name = data[4:4+32].strip('\x00')
+        name = data[4:4 + 32].strip('\x00')
        img = data[62:]
        return name, img
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -8,9 +8,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import struct, zlib
+import struct
 import zlib
-import Image, cStringIO
+import Image
 import cStringIO
 from calibre.ebooks.pdb.formatwriter import FormatWriter
 from calibre.ebooks.oeb.base import OEB_IMAGES
--- a/src/calibre/ebooks/pdb/header.py
+++ b/src/calibre/ebooks/pdb/header.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
 from __future__ import with_statement
 '''
 Read the header data from a pdb file.
 '''
@ -8,7 +7,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import re, struct, time
+import re
 import struct
 import time
 class PdbHeaderReader(object):
@ -35,16 +36,16 @@ class PdbHeaderReader(object):
        if number not in range(0, self.num_sections):
            raise ValueError('Not a valid section number %i' % number)
-        self.stream.seek(78+number*8)
+        self.stream.seek(78 + number * 8)
        offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
-        flags, val = a1, a2<<16 | a3<<8 | a4
+        flags, val = a1, a2 << 16 | a3 << 8 | a4
        return (offset, flags, val)
    def section_offset(self, number):
        if number not in range(0, self.num_sections):
            raise ValueError('Not a valid section number %i' % number)
-        self.stream.seek(78+number*8)
+        self.stream.seek(78 + number * 8)
        return struct.unpack('>LBBBB', self.stream.read(8))[0]
    def section_data(self, number):
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@ -8,11 +8,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import os, struct, zlib
+import os
 import struct
 from calibre.ebooks.compression.palmdoc import decompress_doc
 from calibre.ebooks.pdb.formatreader import FormatReader
-from calibre.ebooks.mobi.palmdoc import decompress_doc
+from calibre.ebooks.txt.processor import opf_writer
-from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
+from calibre.ebooks.txt.processor import txt_to_markdown
 class HeaderRecord(object):
    '''
@ -25,15 +27,15 @@ class HeaderRecord(object):
    def __init__(self, raw):
        self.compression, = struct.unpack('>H', raw[0:2])
        self.num_records, = struct.unpack('>H', raw[8:10])
-        
+
-    
+
 class Reader(FormatReader):
-    
+
    def __init__(self, header, stream, log, encoding=None):
        self.stream = stream
        self.log = log
        self.encoding = encoding
-    
+
        self.sections = []
        for i in range(header.num_sections):
            self.sections.append(header.section_data(i))
@ -52,7 +54,7 @@ class Reader(FormatReader):
    def extract_content(self, output_dir):
        txt = ''
-        
+
        self.log.info('Decompressing text...')
        for i in range(1, self.header_record.num_records + 1):
            self.log.debug('\tDecompressing text section %i' % i)
@ -62,12 +64,12 @@ class Reader(FormatReader):
        html = txt_to_markdown(txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))
-                        
+
        from calibre.ebooks.metadata.meta import get_metadata
        mi = get_metadata(self.stream, 'pdb')
        manifest = [('index.html', None)]
        spine = ['index.html']
        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
-        
+
        return os.path.join(output_dir, 'metadata.opf')
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@ -10,10 +10,11 @@ __docformat__ = 'restructuredtext en'
 import struct
 from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.ebooks.pdb.formatwriter import FormatWriter
 from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines
 from calibre.ebooks.mobi.palmdoc import compress_doc
 from calibre.ebooks.pdb.header import PdbHeaderBuilder
 from calibre.ebooks.txt.writer import TxtNewlines
 from calibre.ebooks.txt.writer import TxtWriter
 MAX_RECORD_SIZE = 4096
@ -22,48 +23,48 @@ class Writer(FormatWriter):
    def __init__(self, opts, log):
        self.opts = opts
        self.log = log
-        
+
    def write_content(self, oeb_book, out_stream, metadata=None):
        title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
        txt_records, txt_length = self._generate_text(oeb_book.spine)
        header_record = self._header_record(txt_length, len(txt_records))
-        
+
        section_lengths = [len(header_record)]
        self.log.info('Compessing data...')
        for i in range(0, len(txt_records)):
            self.log.debug('\tCompressing record %i' % i)
            txt_records[i] = compress_doc(txt_records[i].encode('utf-8'))
            section_lengths.append(len(txt_records[i]))
-            
+
        out_stream.seek(0)
        hb = PdbHeaderBuilder('TEXtREAd', title)
        hb.build_header(section_lengths, out_stream)
-        
+
-        for record in [header_record]+txt_records:
+        for record in [header_record] + txt_records:
            out_stream.write(record)
-        
+
    def _generate_text(self, spine):
        txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
        txt = txt_writer.dump(spine)
-        
+
        txt_length = len(txt)
-        
+
        txt_records = []
        for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
-            txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
+            txt_records.append(txt[i * MAX_RECORD_SIZE: (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
-            
+
        return txt_records, txt_length
-        
+
    def _header_record(self, txt_length, record_count):
        record = ''
-        
+
        record += struct.pack('>H', 2)                  # [0:2],   PalmDoc compression. (1 = No compression).
        record += struct.pack('>H', 0)                  # [2:4],   Always 0.
        record += struct.pack('>L', txt_length)         # [4:8],   Uncompressed length of the entire text of the book.
        record += struct.pack('>H', record_count)       # [8:10],  Number of PDB records used for the text of the book.
        record += struct.pack('>H', MAX_RECORD_SIZE)    # [10-12], Maximum size of each record containing text, always 4096.
        record += struct.pack('>L', 0)                  # [12-16], Current reading position, as an offset into the uncompressed text.
-        
+
        return record
-        
+
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -8,7 +8,8 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into PML markup
 '''
-import os, re
+import os
 import re
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
@ -40,6 +41,31 @@ STYLES = [
    ('text-align', {'right' : 'r', 'center' : 'c'}),
 ]
 BLOCK_TAGS = [
    'p',
 ]
 BLOCK_STYLES = [
    'block',
 ]
 LINK_TAGS = [
    'a',
 ]
 SEPARATE_TAGS = [
    'h1',
    'h2',
    'h3',
    'h4',
    'h5',
    'h6',
    'p',
    'div',
    'li',
    'tr',
 ]
 class PMLMLizer(object):
    def __init__(self, ignore_tables=False):
        self.ignore_tables = ignore_tables
@ -104,7 +130,7 @@ class PMLMLizer(object):
        tag_count = 0
        # Are we in a paragraph block?
-        if tag == 'p' or style['display'] in ('block'):
+        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
            if 'block' not in tag_stack:
                tag_count += 1
                tag_stack.append('block')
@ -136,7 +162,7 @@ class PMLMLizer(object):
            # Special processing of tags that require an argument.
            # Anchors links
-            if tag == 'a' and 'q' not in tag_stack:
+            if tag in LINK_TAGS and 'q' not in tag_stack:
                href = elem.get('href')
                if href and '://' not in href:
                    if '#' in href:
@ -168,7 +194,7 @@ class PMLMLizer(object):
        for i in range(0, tag_count):
            close_tag_list.insert(0, tag_stack.pop())
        text += self.close_tags(close_tag_list)
-        if tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'li', 'tr'):
+        if tag in SEPARATE_TAGS:
            text += os.linesep + os.linesep
        if 'block' not in tag_stack: