Integrate patched pychm into calibre build process

2025-07-09 03:04:10 -04:00 · 2010-02-27 22:15:25 -07:00 · 2010-02-27 22:15:25 -07:00 · 2f437907a3
commit 2f437907a3
parent 95f655585b
16 changed files with 3377 additions and 235 deletions
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
 fc_lib = '/usr/lib'
 podofo_inc = '/usr/include/podofo'
 podofo_lib = '/usr/lib'
+chmlib_inc_dirs = chmlib_lib_dirs = []

 if iswindows:
    prefix  = r'C:\cygwin\home\kovid\sw'
@ -96,6 +97,10 @@ if iswindows:
    sw_lib_dir  = os.path.join(prefix, 'lib')
    fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
    fc_lib = sw_lib_dir
+    chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
+        'build', 'chmlib-0.40', 'src'))
+    chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
+        'build', 'chmlib-0.40', 'src', 'Release'))
    png_inc_dirs = [sw_inc_dir]
    png_lib_dirs = [sw_lib_dir]
    png_libs = ['png12']
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -12,12 +12,13 @@ from distutils import sysconfig
 from PyQt4.pyqtconfig import QtGuiModuleMakefile

 from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
-from setup.build_environment import fc_inc, fc_lib, \
+from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
        fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
        podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
        QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
        magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
-        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
+        magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
+        jpg_lib_dirs, chmlib_lib_dirs
 MT
 isunix = islinux or isosx or isfreebsd

@ -56,6 +57,22 @@ if iswindows:
    pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']

 extensions = [
+
+    Extension('chmlib',
+            ['calibre/utils/chm/swig_chm.c'],
+            libraries=['ChmLib' if iswindows else 'chm'],
+            inc_dirs=chmlib_inc_dirs,
+            lib_dirs=chmlib_lib_dirs,
+            cflags=["-DSWIG_COBJECT_TYPES"]),
+
+    Extension('chm_extra',
+            ['calibre/utils/chm/extra.c'],
+            libraries=['ChmLib' if iswindows else 'chm'],
+            inc_dirs=chmlib_inc_dirs,
+            lib_dirs=chmlib_lib_dirs,
+            cflags=["-D__PYTHON__"]),
+
+
    Extension('pdfreflow',
                reflow_sources,
                headers=reflow_headers,
--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
                        '/usr/lib/liblcms.so.1',
                        '/tmp/calibre-mount-helper',
                        '/usr/lib/libunrar.so',
+                        '/usr/lib/libchm.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libsqlite3.so.0',
                        '/usr/lib/libmng.so.1',
--- a/setup/installer/osx/app/main.py
+++ b/setup/installer/osx/app/main.py
@ -459,7 +459,7 @@ class Py2App(object):

    @flush
    def add_misc_libraries(self):
-        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
+        for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
            info('\nAdding', x)
            x = 'lib%s.dylib'%x
            shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -57,7 +57,8 @@ if plugins is None:
        sys.path.insert(0, plugin_path)

        for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
-            'fontconfig', 'pdfreflow', 'progress_indicator'] + \
+            'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
+            'chm_extra'] + \
                    (['winutil'] if iswindows else []) + \
                    (['usbobserver'] if isosx else []):
            try:
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -103,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
            mi.cover_data = (ext.lower(), data)
        return mi

+class CHMMetadataReader(MetadataReaderPlugin):
+
+    name        = 'Read CHM metadata'
+    file_types  = set(['chm'])
+    description = _('Read metadata from %s files') % 'CHM'
+
+    def get_metadata(self, stream, ftype):
+        from calibre.ebooks.metadata.chm import get_metadata
+        return get_metadata(stream)
+
+
 class EPUBMetadataReader(MetadataReaderPlugin):

    name        = 'Read EPUB metadata'
@ -384,6 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
+from calibre.ebooks.chm.input import CHMInput

 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.fb2.output import FB2Output
@ -444,6 +456,7 @@ plugins += [
    TCRInput,
    TXTInput,
    LRFInput,
+    CHMInput,
 ]
 plugins += [
    EPUBOutput,
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -15,7 +15,7 @@ class ANDROID(USBMS):
    supported_platforms = ['windows', 'osx', 'linux']

    # Ordered list of supported formats
-    FORMATS     = ['epub']
+    FORMATS     = ['epub', 'pdf']

    VENDOR_ID   = {
            0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -1,213 +1,18 @@
-from __future__ import with_statement
 ''' CHM File decoding support '''
 __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'

-import os, shutil, uuid, re
-from tempfile import mkdtemp
-from mimetypes import guess_type as guess_mimetype
+import os, uuid

-from BeautifulSoup import BeautifulSoup, NavigableString
 from lxml import html
-from pychm.chm import CHMFile
-from pychm.chmlib import (
-  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
-  chm_enumerate,
-)

-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
-from calibre.utils.config import OptionParser
-from calibre.ebooks.metadata.toc import TOC
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.ebooks.chm.reader import CHMReader, match_string
+from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename

-
-def match_string(s1, s2_already_lowered):
-    if s1 is not None and s2_already_lowered is not None:
-        if s1.lower()==s2_already_lowered:
-            return True
-    return False
-
-def check_all_prev_empty(tag):
-    if tag is None:
-        return True
-    if tag.__class__ == NavigableString and not check_empty(tag):
-        return False
-    return check_all_prev_empty(tag.previousSibling)
-
-def check_empty(s, rex = re.compile(r'\S')):
-    return rex.search(s) is None
-
-
-def option_parser():
-    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
-    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
-    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
-    parser.add_option("-t", "--title", action="store", type="string", \
-                    dest="title", help=_("Set the book title"))
-    parser.add_option('--title-sort', action='store', type='string', default=None,
-                      dest='title_sort', help=_('Set sort key for the title'))
-    parser.add_option("-a", "--author", action="store", type="string", \
-                    dest="author", help=_("Set the author"))
-    parser.add_option('--author-sort', action='store', type='string', default=None,
-                      dest='author_sort', help=_('Set sort key for the author'))
-    parser.add_option("-c", "--category", action="store", type="string", \
-                    dest="category", help=_("The category this book belongs"
-                    " to. E.g.: History"))
-    parser.add_option("--thumbnail", action="store", type="string", \
-                    dest="thumbnail", help=_("Path to a graphic that will be"
-                    " set as this files' thumbnail"))
-    parser.add_option("--comment", action="store", type="string", \
-                    dest="freetext", help=_("Path to a txt file containing a comment."))
-    parser.add_option("--get-thumbnail", action="store_true", \
-                    dest="get_thumbnail", default=False, \
-                    help=_("Extract thumbnail from LRF file"))
-    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
-    parser.add_option('--classification', default=None, help=_('Set the book classification'))
-    parser.add_option('--creator', default=None, help=_('Set the book creator'))
-    parser.add_option('--producer', default=None, help=_('Set the book producer'))
-    parser.add_option('--get-cover', action='store_true', default=False,
-                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
-    parser.add_option('--bookid', action='store', type='string', default=None,
-                      dest='book_id', help=_('Set book ID'))
-    parser.add_option('--font-delta', action='store', type='int', default=0,
-                      dest='font_delta', help=_('Set font delta'))
-    return parser
-
-class CHMError(Exception):
-    pass
-
-class CHMReader(CHMFile):
-    def __init__(self, input, log):
-        CHMFile.__init__(self)
-        if not self.LoadCHM(input):
-            raise CHMError("Unable to open CHM file '%s'"%(input,))
-        self.log = log
-        self._sourcechm = input
-        self._contents = None
-        self._playorder = 0
-        self._metadata = False
-        self._extracted = False
-
-        # location of '.hhc' file, which is the CHM TOC.
-        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
-        self.hhc_path = self.root + ".hhc"
-
-
-    def _parse_toc(self, ul, basedir=os.getcwdu()):
-        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
-        self._playorder += 1
-        for li in ul('li', recursive=False):
-            href = li.object('param', {'name': 'Local'})[0]['value']
-            if href.count('#'):
-                href, frag = href.split('#')
-            else:
-                frag = None
-            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
-            #print "========>", name
-            toc.add_item(href, frag, name, play_order=self._playorder)
-            self._playorder += 1
-            if li.ul:
-               child = self._parse_toc(li.ul)
-               child.parent = toc
-               toc.append(child)
-        #print toc
-        return toc
-
-
-    def GetFile(self, path):
-        # have to have abs paths for ResolveObject, but Contents() deliberately
-        # makes them relative. So we don't have to worry, re-add the leading /.
-        # note this path refers to the internal CHM structure
-        if path[0] != '/':
-            path = '/' + path
-        res, ui = self.ResolveObject(path)
-        if res != CHM_RESOLVE_SUCCESS:
-            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
-        size, data = self.RetrieveObject(ui)
-        if size == 0:
-            raise CHMError("'%s' is zero bytes in length!"%(path,))
-        return data
-
-    def ExtractFiles(self, output_dir=os.getcwdu()):
-        for path in self.Contents():
-            lpath = os.path.join(output_dir, path)
-            self._ensure_dir(lpath)
-            data = self.GetFile(path)
-            with open(lpath, 'wb') as f:
-                if guess_mimetype(path)[0] == ('text/html'):
-                    data = self._reformat(data)
-                f.write(data)
-        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
-        self._extracted = True
-
-    def _reformat(self, data):
-        try:
-            soup = BeautifulSoup(data)
-        except UnicodeEncodeError:
-            # hit some strange encoding problems...
-            print "Unable to parse html for cleaning, leaving it :("
-            return data
-        # nuke javascript...
-        [s.extract() for s in soup('script')]
-        # remove forward and back nav bars from the top/bottom of each page
-        # cos they really fuck with the flow of things and generally waste space
-        # since we can't use [a,b] syntax to select arbitrary items from a list
-        # we'll have to do this manually...
-        t = soup('table')
-        if t:
-            if (t[0].previousSibling is None
-              or t[0].previousSibling.previousSibling is None):
-                t[0].extract()
-            if (t[-1].nextSibling is None
-              or t[-1].nextSibling.nextSibling is None):
-                t[-1].extract()
-        # for some very odd reason each page's content appears to be in a table
-        # too. and this table has sub-tables for random asides... grr.
-
-        # remove br at top of page if present after nav bars removed
-        br = soup('br')
-        if br:
-            if check_all_prev_empty(br[0].previousSibling):
-                br[0].extract()
-
-        # some images seem to be broken in some chm's :/
-        for img in soup('img'):
-            try:
-                # some are supposedly "relative"... lies.
-                while img['src'].startswith('../'): img['src'] = img['src'][3:]
-                # some have ";<junk>" at the end.
-                img['src'] = img['src'].split(';')[0]
-            except KeyError:
-                # and some don't even have a src= ?!
-                pass
-        # now give back some pretty html.
-        return soup.prettify()
-
-    def Contents(self):
-        if self._contents is not None:
-            return self._contents
-        paths = []
-        def get_paths(chm, ui, ctx):
-            # skip directories
-            # note this path refers to the internal CHM structure
-            if ui.path[-1] != '/':
-                # and make paths relative
-                paths.append(ui.path.lstrip('/'))
-        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
-        self._contents = paths
-        return self._contents
-
-    def _ensure_dir(self, path):
-        dir = os.path.dirname(path)
-        if not os.path.isdir(dir):
-            os.makedirs(dir)
-
-    def extract_content(self, output_dir=os.getcwdu()):
-        self.ExtractFiles(output_dir=output_dir)
-
-
 class CHMInput(InputFormatPlugin):

    name        = 'CHM Input'
@ -215,11 +20,6 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])

-    options = set([
-        OptionRecommendation(name='dummy_option', recommended_value=False,
-            help=_('dummy option until real options are determined.')),
-    ])
-
    def _chmtohtml(self, output_dir, chm_path, no_images, log):
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log)
@ -230,37 +30,36 @@ class CHMInput(InputFormatPlugin):

    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.metadata.chm import get_metadata_
+        from calibre.customize.ui import plugin_for_input_format

        log.debug('Processing CHM...')
-        tdir = mkdtemp(prefix='chm2oeb_')
-        from calibre.customize.ui import plugin_for_input_format
-        html_input = plugin_for_input_format('html')
-        for opt in html_input.options:
-            setattr(options, opt.option.name, opt.recommended_value)
-        options.input_encoding = 'utf-8'
-        no_images = False #options.no_images
-        chm_name = stream.name
-        #chm_data = stream.read()
+        with TemporaryDirectory('chm2oeb') as tdir:
+            html_input = plugin_for_input_format('html')
+            for opt in html_input.options:
+                setattr(options, opt.option.name, opt.recommended_value)
+            options.input_encoding = 'utf-8'
+            no_images = False #options.no_images
+            chm_name = stream.name
+            #chm_data = stream.read()

-        #closing stream so CHM can be opened by external library
-        stream.close()
-        log.debug('tdir=%s' % tdir)
-        log.debug('stream.name=%s' % stream.name)
-        mainname = self._chmtohtml(tdir, chm_name, no_images, log)
-        mainpath = os.path.join(tdir, mainname)
+            #closing stream so CHM can be opened by external library
+            stream.close()
+            log.debug('tdir=%s' % tdir)
+            log.debug('stream.name=%s' % stream.name)
+            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+            mainpath = os.path.join(tdir, mainname)

-        metadata = get_metadata_(tdir)
+            metadata = get_metadata_(tdir)

-        odi = options.debug_pipeline
-        options.debug_pipeline = None
-        # try a custom conversion:
-        #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
-        # try using html converter:
-        htmlpath = self._create_html_root(mainpath, log)
-        oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
-        options.debug_pipeline = odi
-        #log.debug('DEBUG: Not removing tempdir %s' % tdir)
-        shutil.rmtree(tdir)
+            odi = options.debug_pipeline
+            options.debug_pipeline = None
+            # try a custom conversion:
+            #oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
+            # try using html converter:
+            htmlpath = self._create_html_root(mainpath, log)
+            oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
+            options.debug_pipeline = odi
+            #log.debug('DEBUG: Not removing tempdir %s' % tdir)
        return oeb

    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -0,0 +1,207 @@
+from __future__ import with_statement
+''' CHM File decoding support '''
+__license__ = 'GPL v3'
+__copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
+                 ' and Alex Bramley <a.bramley at gmail.com>.'
+
+import os, re
+from mimetypes import guess_type as guess_mimetype
+
+from BeautifulSoup import BeautifulSoup, NavigableString
+
+from calibre.utils.chm.chm import CHMFile
+from calibre.utils.chm.chmlib import (
+  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
+  chm_enumerate,
+)
+
+from calibre.utils.config import OptionParser
+from calibre.ebooks.metadata.toc import TOC
+
+
+def match_string(s1, s2_already_lowered):
+    if s1 is not None and s2_already_lowered is not None:
+        if s1.lower()==s2_already_lowered:
+            return True
+    return False
+
+def check_all_prev_empty(tag):
+    if tag is None:
+        return True
+    if tag.__class__ == NavigableString and not check_empty(tag):
+        return False
+    return check_all_prev_empty(tag.previousSibling)
+
+def check_empty(s, rex = re.compile(r'\S')):
+    return rex.search(s) is None
+
+
+def option_parser():
+    parser = OptionParser(usage=_('%prog [options] mybook.chm'))
+    parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
+    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
+    parser.add_option("-t", "--title", action="store", type="string", \
+                    dest="title", help=_("Set the book title"))
+    parser.add_option('--title-sort', action='store', type='string', default=None,
+                      dest='title_sort', help=_('Set sort key for the title'))
+    parser.add_option("-a", "--author", action="store", type="string", \
+                    dest="author", help=_("Set the author"))
+    parser.add_option('--author-sort', action='store', type='string', default=None,
+                      dest='author_sort', help=_('Set sort key for the author'))
+    parser.add_option("-c", "--category", action="store", type="string", \
+                    dest="category", help=_("The category this book belongs"
+                    " to. E.g.: History"))
+    parser.add_option("--thumbnail", action="store", type="string", \
+                    dest="thumbnail", help=_("Path to a graphic that will be"
+                    " set as this files' thumbnail"))
+    parser.add_option("--comment", action="store", type="string", \
+                    dest="freetext", help=_("Path to a txt file containing a comment."))
+    parser.add_option("--get-thumbnail", action="store_true", \
+                    dest="get_thumbnail", default=False, \
+                    help=_("Extract thumbnail from LRF file"))
+    parser.add_option('--publisher', default=None, help=_('Set the publisher'))
+    parser.add_option('--classification', default=None, help=_('Set the book classification'))
+    parser.add_option('--creator', default=None, help=_('Set the book creator'))
+    parser.add_option('--producer', default=None, help=_('Set the book producer'))
+    parser.add_option('--get-cover', action='store_true', default=False,
+                      help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
+    parser.add_option('--bookid', action='store', type='string', default=None,
+                      dest='book_id', help=_('Set book ID'))
+    parser.add_option('--font-delta', action='store', type='int', default=0,
+                      dest='font_delta', help=_('Set font delta'))
+    return parser
+
+class CHMError(Exception):
+    pass
+
+class CHMReader(CHMFile):
+    def __init__(self, input, log):
+        CHMFile.__init__(self)
+        if not self.LoadCHM(input):
+            raise CHMError("Unable to open CHM file '%s'"%(input,))
+        self.log = log
+        self._sourcechm = input
+        self._contents = None
+        self._playorder = 0
+        self._metadata = False
+        self._extracted = False
+
+        # location of '.hhc' file, which is the CHM TOC.
+        self.root, ext = os.path.splitext(self.topics.lstrip('/'))
+        self.hhc_path = self.root + ".hhc"
+
+
+    def _parse_toc(self, ul, basedir=os.getcwdu()):
+        toc = TOC(play_order=self._playorder, base_path=basedir, text='')
+        self._playorder += 1
+        for li in ul('li', recursive=False):
+            href = li.object('param', {'name': 'Local'})[0]['value']
+            if href.count('#'):
+                href, frag = href.split('#')
+            else:
+                frag = None
+            name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
+            #print "========>", name
+            toc.add_item(href, frag, name, play_order=self._playorder)
+            self._playorder += 1
+            if li.ul:
+               child = self._parse_toc(li.ul)
+               child.parent = toc
+               toc.append(child)
+        #print toc
+        return toc
+
+
+    def GetFile(self, path):
+        # have to have abs paths for ResolveObject, but Contents() deliberately
+        # makes them relative. So we don't have to worry, re-add the leading /.
+        # note this path refers to the internal CHM structure
+        if path[0] != '/':
+            path = '/' + path
+        res, ui = self.ResolveObject(path)
+        if res != CHM_RESOLVE_SUCCESS:
+            raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
+        size, data = self.RetrieveObject(ui)
+        if size == 0:
+            raise CHMError("'%s' is zero bytes in length!"%(path,))
+        return data
+
+    def ExtractFiles(self, output_dir=os.getcwdu()):
+        for path in self.Contents():
+            lpath = os.path.join(output_dir, path)
+            self._ensure_dir(lpath)
+            data = self.GetFile(path)
+            with open(lpath, 'wb') as f:
+                if guess_mimetype(path)[0] == ('text/html'):
+                    data = self._reformat(data)
+                f.write(data)
+        #subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
+        self._extracted = True
+
+    def _reformat(self, data):
+        try:
+            soup = BeautifulSoup(data)
+        except UnicodeEncodeError:
+            # hit some strange encoding problems...
+            print "Unable to parse html for cleaning, leaving it :("
+            return data
+        # nuke javascript...
+        [s.extract() for s in soup('script')]
+        # remove forward and back nav bars from the top/bottom of each page
+        # cos they really fuck with the flow of things and generally waste space
+        # since we can't use [a,b] syntax to select arbitrary items from a list
+        # we'll have to do this manually...
+        t = soup('table')
+        if t:
+            if (t[0].previousSibling is None
+              or t[0].previousSibling.previousSibling is None):
+                t[0].extract()
+            if (t[-1].nextSibling is None
+              or t[-1].nextSibling.nextSibling is None):
+                t[-1].extract()
+        # for some very odd reason each page's content appears to be in a table
+        # too. and this table has sub-tables for random asides... grr.
+
+        # remove br at top of page if present after nav bars removed
+        br = soup('br')
+        if br:
+            if check_all_prev_empty(br[0].previousSibling):
+                br[0].extract()
+
+        # some images seem to be broken in some chm's :/
+        for img in soup('img'):
+            try:
+                # some are supposedly "relative"... lies.
+                while img['src'].startswith('../'): img['src'] = img['src'][3:]
+                # some have ";<junk>" at the end.
+                img['src'] = img['src'].split(';')[0]
+            except KeyError:
+                # and some don't even have a src= ?!
+                pass
+        # now give back some pretty html.
+        return soup.prettify()
+
+    def Contents(self):
+        if self._contents is not None:
+            return self._contents
+        paths = []
+        def get_paths(chm, ui, ctx):
+            # skip directories
+            # note this path refers to the internal CHM structure
+            if ui.path[-1] != '/':
+                # and make paths relative
+                paths.append(ui.path.lstrip('/'))
+        chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
+        self._contents = paths
+        return self._contents
+
+    def _ensure_dir(self, path):
+        dir = os.path.dirname(path)
+        if not os.path.isdir(dir):
+            os.makedirs(dir)
+
+    def extract_content(self, output_dir=os.getcwdu()):
+        self.ExtractFiles(output_dir=output_dir)
+
+
+
--- a/src/calibre/utils/chm/init.py
+++ b/src/calibre/utils/chm/init.py
@ -0,0 +1,34 @@
+## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
+
+## pychm is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+
+## You should have received a copy of the GNU General Public
+## License along with this program; see the file COPYING.  If not,
+## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+## Boston, MA 02111-1307, USA
+
+## $Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $
+
+'''
+   chm - A package to manipulate CHM files
+
+   The chm package provides four modules: chm, chmlib, extra and
+   _chmlib. _chmlib and chmlib are very low level libraries generated
+   from  SWIG interface files, and are simple wrappers around the API
+   defined by the C library chmlib.
+   The extra module adds full-text search support.
+   the chm module provides some higher level classes to simplify
+   access to the CHM files information.
+'''
+__all__ = ["chm", "chmlib", "_chmlib", "extra"]
+__version__ = "0.8.4"
+__revision__ = "$Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $"
+
--- a/src/calibre/utils/chm/chm.py
+++ b/src/calibre/utils/chm/chm.py
@ -0,0 +1,512 @@
+## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
+
+## Based on code by:
+## Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>
+
+## pychm is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+
+## You should have received a copy of the GNU General Public
+## License along with this program; see the file COPYING.  If not,
+## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+## Boston, MA 02111-1307, USA
+
+## $Id: chm.py,v 1.12 2006/08/07 12:31:51 rubensr Exp $
+
+'''
+   chm - A high-level front end for the chmlib python module.
+
+   The chm module provides high level access to the functionality
+   included in chmlib. It encapsulates functions in the CHMFile class, and
+   provides some additional features, such as the ability to obtain
+   the contents tree of a CHM archive.
+
+'''
+
+import array
+import string
+import sys
+
+import calibre.utils.chm.chmlib as chmlib
+from calibre.constants import plugins
+
+extra, extra_err = plugins['chm_extra']
+if extra_err:
+    raise RuntimeError('Failed to load chm.extra: '+extra_err)
+
+charset_table = {
+    0   : 'iso8859_1',  # ANSI_CHARSET
+    238 : 'iso8859_2',  # EASTEUROPE_CHARSET
+    178 : 'iso8859_6',  # ARABIC_CHARSET
+    161 : 'iso8859_7',  # GREEK_CHARSET
+    177 : 'iso8859_8',  # HEBREW_CHARSET
+    162 : 'iso8859_9',  # TURKISH_CHARSET
+    222 : 'iso8859_11', # THAI_CHARSET - hmm not in python 2.2...
+    186 : 'iso8859_13', # BALTIC_CHARSET
+    204 : 'cp1251',     # RUSSIAN_CHARSET
+    255 : 'cp437',      # OEM_CHARSET
+    128 : 'cp932',      # SHIFTJIS_CHARSET
+    134 : 'cp936',      # GB2312_CHARSET
+    129 : 'cp949',      # HANGUL_CHARSET
+    136 : 'cp950',      # CHINESEBIG5_CHARSET
+    1   : None,         # DEFAULT_CHARSET
+    2   : None,         # SYMBOL_CHARSET
+    130 : None,         # JOHAB_CHARSET
+    163 : None,         # VIETNAMESE_CHARSET
+    77  : None,         # MAC_CHARSET
+}
+
+locale_table = {
+    0x0436 : ('iso8859_1', "Afrikaans", "Western Europe & US"),
+    0x041c : ('iso8859_2', "Albanian", "Central Europe"),
+    0x0401 : ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
+    0x0801 : ('iso8859_6', "Arabic_Iraq", "Arabic"),
+    0x0c01 : ('iso8859_6', "Arabic_Egypt", "Arabic"),
+    0x1001 : ('iso8859_6', "Arabic_Libya", "Arabic"),
+    0x1401 : ('iso8859_6', "Arabic_Algeria", "Arabic"),
+    0x1801 : ('iso8859_6', "Arabic_Morocco", "Arabic"),
+    0x1c01 : ('iso8859_6', "Arabic_Tunisia", "Arabic"),
+    0x2001 : ('iso8859_6', "Arabic_Oman", "Arabic"),
+    0x2401 : ('iso8859_6', "Arabic_Yemen", "Arabic"),
+    0x2801 : ('iso8859_6', "Arabic_Syria", "Arabic"),
+    0x2c01 : ('iso8859_6', "Arabic_Jordan", "Arabic"),
+    0x3001 : ('iso8859_6', "Arabic_Lebanon", "Arabic"),
+    0x3401 : ('iso8859_6', "Arabic_Kuwait", "Arabic"),
+    0x3801 : ('iso8859_6', "Arabic_UAE", "Arabic"),
+    0x3c01 : ('iso8859_6', "Arabic_Bahrain", "Arabic"),
+    0x4001 : ('iso8859_6', "Arabic_Qatar", "Arabic"),
+    0x042b : (None,        "Armenian","Armenian"),
+    0x042c : ('iso8859_9', "Azeri_Latin", "Turkish"),
+    0x082c : ('cp1251',    "Azeri_Cyrillic", "Cyrillic"),
+    0x042d : ('iso8859_1', "Basque", "Western Europe & US"),
+    0x0423 : ('cp1251',    "Belarusian", "Cyrillic"),
+    0x0402 : ('cp1251',    "Bulgarian", "Cyrillic"),
+    0x0403 : ('iso8859_1', "Catalan", "Western Europe & US"),
+    0x0404 : ('cp950',     "Chinese_Taiwan", "Traditional Chinese"),
+    0x0804 : ('cp936',     "Chinese_PRC", "Simplified Chinese"),
+    0x0c04 : ('cp950',     "Chinese_Hong_Kong", "Traditional Chinese"),
+    0x1004 : ('cp936',     "Chinese_Singapore", "Simplified Chinese"),
+    0x1404 : ('cp950',     "Chinese_Macau", "Traditional Chinese"),
+    0x041a : ('iso8859_2', "Croatian", "Central Europe"),
+    0x0405 : ('iso8859_2', "Czech", "Central Europe"),
+    0x0406 : ('iso8859_1', "Danish", "Western Europe & US"),
+    0x0413 : ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
+    0x0813 : ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
+    0x0409 : ('iso8859_1', "English_United_States", "Western Europe & US"),
+    0x0809 : ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
+    0x0c09 : ('iso8859_1', "English_Australian", "Western Europe & US"),
+    0x1009 : ('iso8859_1', "English_Canadian", "Western Europe & US"),
+    0x1409 : ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
+    0x1809 : ('iso8859_1', "English_Irish", "Western Europe & US"),
+    0x1c09 : ('iso8859_1', "English_South_Africa", "Western Europe & US"),
+    0x2009 : ('iso8859_1', "English_Jamaica", "Western Europe & US"),
+    0x2409 : ('iso8859_1', "English_Caribbean", "Western Europe & US"),
+    0x2809 : ('iso8859_1', "English_Belize", "Western Europe & US"),
+    0x2c09 : ('iso8859_1', "English_Trinidad", "Western Europe & US"),
+    0x3009 : ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
+    0x3409 : ('iso8859_1', "English_Philippines", "Western Europe & US"),
+    0x0425 : ('iso8859_13',"Estonian", "Baltic",),
+    0x0438 : ('iso8859_1', "Faeroese", "Western Europe & US"),
+    0x0429 : ('iso8859_6', "Farsi", "Arabic"),
+    0x040b : ('iso8859_1', "Finnish", "Western Europe & US"),
+    0x040c : ('iso8859_1', "French_Standard", "Western Europe & US"),
+    0x080c : ('iso8859_1', "French_Belgian", "Western Europe & US"),
+    0x0c0c : ('iso8859_1', "French_Canadian", "Western Europe & US"),
+    0x100c : ('iso8859_1', "French_Swiss", "Western Europe & US"),
+    0x140c : ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
+    0x180c : ('iso8859_1', "French_Monaco", "Western Europe & US"),
+    0x0437 : (None,        "Georgian", "Georgian"),
+    0x0407 : ('iso8859_1', "German_Standard", "Western Europe & US"),
+    0x0807 : ('iso8859_1', "German_Swiss", "Western Europe & US"),
+    0x0c07 : ('iso8859_1', "German_Austrian", "Western Europe & US"),
+    0x1007 : ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
+    0x1407 : ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
+    0x0408 : ('iso8859_7', "Greek", "Greek"),
+    0x040d : ('iso8859_8', "Hebrew", "Hebrew"),
+    0x0439 : (None,        "Hindi", "Indic"),
+    0x040e : ('iso8859_2', "Hungarian", "Central Europe"),
+    0x040f : ('iso8859_1', "Icelandic", "Western Europe & US"),
+    0x0421 : ('iso8859_1', "Indonesian", "Western Europe & US"),
+    0x0410 : ('iso8859_1', "Italian_Standard", "Western Europe & US"),
+    0x0810 : ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
+    0x0411 : ('cp932',     "Japanese", "Japanese"),
+    0x043f : ('cp1251',    "Kazakh", "Cyrillic"),
+    0x0457 : (None,        "Konkani", "Indic"),
+    0x0412 : ('cp949',     "Korean", "Korean"),
+    0x0426 : ('iso8859_13',"Latvian", "Baltic",),
+    0x0427 : ('iso8859_13',"Lithuanian", "Baltic",),
+    0x042f : ('cp1251',    "Macedonian", "Cyrillic"),
+    0x043e : ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
+    0x083e : ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
+    0x044e : (None,        "Marathi", "Indic"),
+    0x0414 : ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
+    0x0814 : ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
+    0x0415 : ('iso8859_2', "Polish", "Central Europe"),
+    0x0416 : ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
+    0x0816 : ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
+    0x0418 : ('iso8859_2', "Romanian", "Central Europe"),
+    0x0419 : ('cp1251',    "Russian", "Cyrillic"),
+    0x044f : (None,        "Sanskrit", "Indic"),
+    0x081a : ('iso8859_2', "Serbian_Latin", "Central Europe"),
+    0x0c1a : ('cp1251',    "Serbian_Cyrillic", "Cyrillic"),
+    0x041b : ('iso8859_2', "Slovak", "Central Europe"),
+    0x0424 : ('iso8859_2', "Slovenian", "Central Europe"),
+    0x040a : ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
+    0x080a : ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
+    0x0c0a : ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
+    0x100a : ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
+    0x140a : ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
+    0x180a : ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
+    0x1c0a : ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
+    0x200a : ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
+    0x240a : ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
+    0x280a : ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
+    0x2c0a : ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
+    0x300a : ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
+    0x340a : ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
+    0x380a : ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
+    0x3c0a : ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
+    0x400a : ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
+    0x440a : ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
+    0x480a : ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
+    0x4c0a : ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
+    0x500a : ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
+    0x0441 : ('iso8859_1', "Swahili", "Western Europe & US"),
+    0x041d : ('iso8859_1', "Swedish", "Western Europe & US"),
+    0x081d : ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
+    0x0449 : (None,        "Tamil", "Indic"),
+    0x0444 : ('cp1251',    "Tatar", "Cyrillic"),
+    0x041e : ('iso8859_11',"Thai", "Thai"),
+    0x041f : ('iso8859_9', "Turkish", "Turkish"),
+    0x0422 : ('cp1251',    "Ukrainian", "Cyrillic"),
+    0x0420 : ('iso8859_6', "Urdu", "Arabic"),
+    0x0443 : ('iso8859_9', "Uzbek_Latin", "Turkish"),
+    0x0843 : ('cp1251',    "Uzbek_Cyrillic", "Cyrillic"),
+    0x042a : (None,        "Vietnamese", "Vietnamese")
+}
+
+class CHMFile:
+    "A class to manage access to CHM files."
+    filename = ""
+    file = None
+    title = ""
+    home = "/"
+    index = None
+    topics = None
+    encoding = None
+    lcid = None
+    binaryindex = None
+
+    def __init__(self):
+        self.searchable = 0
+
+    def LoadCHM(self, archiveName):
+        '''Loads a CHM archive.
+        This function will also call GetArchiveInfo to obtain information
+        such as the index file name and the topics file. It returns 1 on
+        success, and 0 if it fails.
+        '''
+        if (self.filename != None):
+            self.CloseCHM()
+
+        self.file = chmlib.chm_open(archiveName)
+        if (self.file == None):
+            return 0
+
+        self.filename = archiveName
+        self.GetArchiveInfo()
+
+        return 1
+
+    def CloseCHM(self):
+        '''Closes the CHM archive.
+        This function will close the CHM file, if it is open. All variables
+        are also reset.
+        '''
+        if (self.filename != None):
+            chmlib.chm_close(self.file)
+            self.file = None
+            self.filename = ''
+            self.title = ""
+            self.home = "/"
+            self.index = None
+            self.topics = None
+            self.encoding = None
+
+    def GetArchiveInfo(self):
+        '''Obtains information on CHM archive.
+        This function checks the /#SYSTEM file inside the CHM archive to
+        obtain the index, home page, topics, encoding and title. It is called
+        from LoadCHM.
+        '''
+
+        #extra.is_searchable crashed...
+        #self.searchable = extra.is_searchable (self.file)
+        self.searchable = False
+        self.lcid = None
+
+        result, ui = chmlib.chm_resolve_object(self.file, '/#SYSTEM')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
+            return 0
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 4l, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetArchiveInfo: file size = 0\n')
+            return 0
+
+        buff = array.array('B', text)
+
+        index = 0
+        while (index < size):
+            cursor = buff[index] + (buff[index+1] * 256)
+
+            if (cursor == 0):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.topics = '/' + text[index:index+cursor-1]
+            elif (cursor == 1):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.index = '/' + text[index:index+cursor-1]
+            elif (cursor == 2):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.home = '/' + text[index:index+cursor-1]
+            elif (cursor == 3):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.title = text[index:index+cursor-1]
+            elif (cursor == 4):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.lcid = buff[index] + (buff[index+1] * 256)
+            elif (cursor == 6):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                tmp = text[index:index+cursor-1]
+                if not self.topics:
+                    tmp1 = '/' + tmp + '.hhc'
+                    tmp2 = '/' + tmp + '.hhk'
+                    res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
+                    res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
+                    if (not self.topics) and \
+                           (res1 == chmlib.CHM_RESOLVE_SUCCESS):
+                        self.topics = '/' + tmp + '.hhc'
+                    if (not self.index) and \
+                           (res2 == chmlib.CHM_RESOLVE_SUCCESS):
+                        self.index = '/' + tmp + '.hhk'
+            elif (cursor == 16):
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+                self.encoding = text[index:index+cursor-1]
+            else:
+                index += 2
+                cursor = buff[index] + (buff[index+1] * 256)
+                index += 2
+            index += cursor
+
+        self.GetWindowsInfo()
+
+        if not self.lcid:
+            self.lcid = extra.get_lcid (self.file)
+
+        return 1
+
+    def GetTopicsTree(self):
+        '''Reads and returns the topics tree.
+        This auxiliary function reads and returns the topics tree file
+        contents for the CHM archive.
+        '''
+        if (self.topics == None):
+            return None
+
+        if self.topics:
+            res, ui = chmlib.chm_resolve_object(self.file, self.topics)
+            if (res != chmlib.CHM_RESOLVE_SUCCESS):
+                return None
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetTopicsTree: file size = 0\n')
+            return None
+        return text
+
+    def GetIndex(self):
+        '''Reads and returns the index tree.
+        This auxiliary function reads and returns the index tree file
+        contents for the CHM archive.
+        '''
+        if (self.index == None):
+            return None
+
+        if self.index:
+            res, ui = chmlib.chm_resolve_object(self.file, self.index)
+            if (res != chmlib.CHM_RESOLVE_SUCCESS):
+                return None
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
+        if (size == 0):
+            sys.stderr.write('GetIndex: file size = 0\n')
+            return None
+        return text
+
+    def ResolveObject(self, document):
+        '''Tries to locate a document in the archive.
+        This function tries to locate the document inside the archive. It
+        returns a tuple where the first element is zero if the function
+        was successful, and the second is the UnitInfo for that document.
+        The UnitInfo is used to retrieve the document contents
+        '''
+        if self.file:
+            #path = os.path.abspath(document)
+            path = document
+            return chmlib.chm_resolve_object(self.file, path)
+        else:
+            return (1, None)
+
+    def RetrieveObject(self, ui, start = -1, length = -1):
+        '''Retrieves the contents of a document.
+        This function takes a UnitInfo and two optional arguments, the first
+        being the start address and the second is the length. These define
+        the amount of data to be read from the archive.
+        '''
+        if self.file and ui:
+            if length == -1:
+                len = ui.length
+            else:
+                len = length
+            if start == -1:
+                st = 0l
+            else:
+                st = long(start)
+            return chmlib.chm_retrieve_object(self.file, ui, st, len)
+        else:
+            return (0, '')
+
+    def Search(self, text, wholewords=0, titleonly=0):
+        '''Performs full-text search on the archive.
+        The first parameter is the word to look for, the second
+        indicates if the search should be for whole words only, and
+        the third parameter indicates if the search should be
+        restricted to page titles.
+        This method will return a tuple, the first item
+        indicating if the search results were partial, and the second
+        item being a dictionary containing the results.'''
+        if text and text != '' and self.file:
+            return extra.search (self.file, text, wholewords,
+                                 titleonly)
+        else:
+            return None
+
+    def IsSearchable(self):
+        '''Indicates if the full-text search is available for this
+        archive - this flag is updated when GetArchiveInfo is called'''
+        return self.searchable
+
+    def GetEncoding(self):
+        '''Returns a string that can be used with the codecs python package
+        to encode or decode the files in the chm archive. If an error is
+        found, or if it is not possible to find the encoding, None is
+        returned.'''
+        if self.encoding:
+            vals = string.split(self.encoding, ',')
+            if len(vals) > 2:
+                try:
+                    return charset_table[int(vals[2])]
+                except KeyError:
+                    pass
+        return None
+
+    def GetLCID(self):
+        '''Returns the archive Locale ID'''
+        if self.lcid in locale_table:
+            return locale_table[self.lcid]
+        else:
+            return None
+
+    def GetDWORD(self, buff, idx=0):
+        '''Internal method.
+        Reads a double word (4 bytes) from a buffer.
+        '''
+        result = buff[idx] + (buff[idx+1]<<8) + (buff[idx+2]<<16) + \
+                 (buff[idx+3]<<24)
+
+        if result == 0xFFFFFFFF:
+            result = 0
+
+        return result
+
+    def GetString(self, text, idx):
+        '''Internal method.
+        Retrieves a string from the #STRINGS buffer.
+        '''
+        next = string.find(text, '\x00', idx)
+        chunk = text[idx:next]
+        return chunk
+
+    def GetWindowsInfo(self):
+        '''Gets information from the #WINDOWS file.
+        Checks the #WINDOWS file to see if it has any info that was
+        not found in #SYSTEM (topics, index or default page.
+        '''
+        result, ui = chmlib.chm_resolve_object(self.file, '/#WINDOWS')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            return -1
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, 8)
+        if (size < 8):
+            return -2
+
+        buff = array.array('B', text)
+        num_entries = self.GetDWORD(buff, 0)
+        entry_size = self.GetDWORD(buff, 4)
+
+        if num_entries < 1:
+            return -3
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 8l, entry_size)
+        if (size < entry_size):
+            return -4
+
+        buff = array.array('B', text)
+        toc_index = self.GetDWORD(buff, 0x60)
+        idx_index = self.GetDWORD(buff, 0x64)
+        dft_index = self.GetDWORD(buff, 0x68)
+
+        result, ui = chmlib.chm_resolve_object(self.file, '/#STRINGS')
+        if (result != chmlib.CHM_RESOLVE_SUCCESS):
+            return -5
+
+        size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
+        if (size == 0):
+            return -6
+
+        if (not self.topics):
+            self.topics = self.GetString(text, toc_index)
+            if not self.topics.startswith("/"):
+                self.topics = "/" + self.topics
+
+        if (not self.index):
+            self.index = self.GetString(text, idx_index)
+            if not self.index.startswith("/"):
+                self.index = "/" + self.index
+
+        if (dft_index != 0):
+            self.home = self.GetString(text, dft_index)
+            if not self.home.startswith("/"):
+                self.home = "/" + self.home
--- a/src/calibre/utils/chm/chmlib.py
+++ b/src/calibre/utils/chm/chmlib.py
@ -0,0 +1,100 @@
+# This file was created automatically by SWIG.
+# Don't modify this file, modify the SWIG interface instead.
+# This file is compatible with both classic and new-style classes.
+
+from calibre.constants import plugins
+
+_chmlib, chmlib_err = plugins['chmlib']
+
+if chmlib_err:
+    raise RuntimeError('Failed to load chmlib: '+chmlib_err)
+
+def _swig_setattr(self,class_type,name,value):
+    if (name == "this"):
+        if isinstance(value, class_type):
+            self.__dict__[name] = value.this
+            if hasattr(value,"thisown"): self.__dict__["thisown"] = value.thisown
+            del value.thisown
+            return
+    method = class_type.__swig_setmethods__.get(name,None)
+    if method: return method(self,value)
+    self.__dict__[name] = value
+
+def _swig_getattr(self,class_type,name):
+    method = class_type.__swig_getmethods__.get(name,None)
+    if method: return method(self)
+    raise AttributeError,name
+
+import types
+try:
+    _object = types.ObjectType
+    _newclass = 1
+except AttributeError:
+    class _object : pass
+    _newclass = 0
+
+
+CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
+CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
+CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
+class chmUnitInfo(_object):
+    __swig_setmethods__ = {}
+    __setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
+    __swig_getmethods__ = {}
+    __getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
+    __swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
+    __swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
+    if _newclass:start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
+    __swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
+    __swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
+    if _newclass:length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
+    __swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
+    __swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
+    if _newclass:space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
+    __swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
+    __swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
+    if _newclass:path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
+    def __init__(self,*args):
+        _swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
+        _swig_setattr(self, chmUnitInfo, 'thisown', 1)
+    def __del__(self, destroy= _chmlib.delete_chmUnitInfo):
+        try:
+            if self.thisown: destroy(self)
+        except: pass
+    def __repr__(self):
+        return "<C chmUnitInfo instance at %s>" % (self.this,)
+
+class chmUnitInfoPtr(chmUnitInfo):
+    def __init__(self,this):
+        _swig_setattr(self, chmUnitInfo, 'this', this)
+        if not hasattr(self,"thisown"): _swig_setattr(self, chmUnitInfo, 'thisown', 0)
+        _swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
+_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
+
+chm_open = _chmlib.chm_open
+
+chm_close = _chmlib.chm_close
+
+CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
+chm_set_param = _chmlib.chm_set_param
+
+CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
+CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
+chm_resolve_object = _chmlib.chm_resolve_object
+
+chm_retrieve_object = _chmlib.chm_retrieve_object
+
+CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
+CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
+CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
+CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
+CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
+CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
+CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
+CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
+CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
+chm_enumerate = _chmlib.chm_enumerate
+
+chm_enumerate_dir = _chmlib.chm_enumerate_dir
+
+
--- a/src/calibre/utils/chm/extra.c
+++ b/src/calibre/utils/chm/extra.c
@ -0,0 +1,759 @@
+/*
+ * extra.c - full-text search support for pychm
+ *
+ * Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, 
+ * Boston, MA 02111-1307, USA.
+ *
+ * Author: Rubens Ramos <rubensr@users.sourceforge.net>
+ *
+ * Heavily based on work done by:
+ * Pabs <pabs@zip.to> - chmdeco
+ * Razvan Cojocaru <razvanco@gmx.net> - xCHM
+ *
+ */
+
+#include "chm_lib.h"
+#ifdef __PYTHON__
+#include "Python.h"
+#else
+#include <stdio.h>
+#define PyObject void
+#endif
+
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+#include "stdint.h"
+#define strcasecmp stricmp
+#define strncasecmp strnicmp
+#else
+#include <inttypes.h>
+#include <strings.h>
+#endif
+
+#if defined( _MSC_VER ) && !defined( __cplusplus )
+# define inline __inline
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__)
+#       if defined(_MSC_VER)
+#               if defined(STATIC_LINKED)
+#                       define MODEXPORT(a) a
+#                       define MODIMPORT(a) extern a
+#               else
+#                       define MODEXPORT(a) __declspec(dllexport) a
+#                       define MODIMPORT(a) extern a
+#               endif
+#       else
+#               if defined(__BORLANDC__)
+#                       define MODEXPORT(a) a _export
+#                       define MODIMPORT(a) a _export
+#               else
+#                       define MODEXPORT(a) a
+#                       define MODIMPORT(a) a
+#               endif
+#       endif
+#else
+#       define MODEXPORT(a) a
+#       define MODIMPORT(a) a
+#endif
+
+#define false 0
+#define true 1
+
+#define FTS_HEADER_LEN 0x32
+#define TOPICS_ENTRY_LEN 16
+#define COMMON_BUF_LEN 1025
+
+#define FREE(x) free (x); x = NULL
+
+inline uint16_t 
+get_uint16 (uint8_t* b) {
+  return b[0] |
+    b[1]<<8;
+}
+
+inline uint32_t 
+get_uint32 (uint8_t* b) {
+  return b[0] |
+    b[1]<<8   |
+    b[2]<<16  |
+    b[3]<<24;
+}
+
+inline uint64_t 
+get_uint64 (uint8_t* b) {
+  return b[0]           |
+    b[1]<<8             |
+    b[2]<<16            |
+    b[3]<<24            |
+    (uint64_t) b[4]<<32 |
+    (uint64_t) b[5]<<40 |
+    (uint64_t) b[6]<<48 |
+    (uint64_t) b[7]<<56;
+}
+
+inline uint64_t 
+be_encint (unsigned char *buffer, size_t *length)
+{
+  uint64_t result = 0;
+  int shift=0;
+  *length = 0;
+  
+  do {
+    result |= ((*buffer) & 0x7f) << shift;
+    shift += 7;
+    *length = *length + 1;
+  
+  } while (*(buffer++) & 0x80);
+  
+  return result;
+}
+
+/*
+  Finds the first unset bit in memory. Returns the number of set bits found.
+  Returns -1 if the buffer runs out before we find an unset bit.
+*/
+inline int
+ffus (unsigned char* byte, int* bit, size_t *length) {
+  int bits = 0;
+  *length = 0;
+  
+  while(*byte & (1 << *bit)){
+    if(*bit)
+      --(*bit);
+    else {
+      ++byte;
+      ++(*length);
+      *bit = 7;
+    }
+    ++bits;
+  }
+  
+  if(*bit)
+    --(*bit);
+  else {
+    ++(*length);
+    *bit = 7;
+  }
+  
+  return bits;
+}
+
+
+inline uint64_t
+sr_int(unsigned char* byte, int* bit,
+       unsigned char s, unsigned char r, size_t *length)
+{
+  uint64_t ret;
+  unsigned char mask;
+  int n, n_bits, num_bits, base, count;
+  size_t fflen;
+
+  *length = 0;
+  
+  if(!bit || *bit > 7 || s != 2)
+    return ~(uint64_t)0;
+  ret = 0;
+  
+  count = ffus(byte, bit, &fflen);
+  *length += fflen;
+  byte += *length;
+  
+  n_bits = n = r + (count ? count-1 : 0) ;
+  
+  while (n > 0) {
+    num_bits = n > *bit ? *bit : n-1;
+    base = n > *bit ? 0 : *bit - (n-1);
+  
+    switch (num_bits){
+    case 0:
+      mask = 1;
+      break;
+    case 1:
+      mask = 3;
+      break;
+    case 2:
+      mask = 7;
+      break;
+    case 3:
+      mask = 0xf;
+      break;
+    case 4:
+      mask = 0x1f;
+      break;
+    case 5:
+      mask = 0x3f;
+      break;
+    case 6:
+      mask = 0x7f;
+      break;
+    case 7:
+      mask = 0xff;
+      break;
+    default:
+      mask = 0xff;
+      break;
+    }
+  
+    mask <<= base;
+    ret = (ret << (num_bits+1)) |
+      (uint64_t)((*byte & mask) >> base);
+  
+    if( n > *bit ){
+      ++byte;
+      ++(*length);
+      n -= *bit+1;
+      *bit = 7;
+    } else {
+      *bit -= n;
+      n = 0;
+    }
+  }
+  
+  if(count)
+    ret |= (uint64_t)1 << n_bits;
+  
+  return ret;
+}
+
+            
+inline uint32_t
+get_leaf_node_offset(struct chmFile *chmfile,
+                     const char *text,
+                     uint32_t initial_offset,
+                     uint32_t buff_size,
+                     uint16_t tree_depth,
+                     struct chmUnitInfo *ui)
+{
+  unsigned char word_len;
+  unsigned char pos;
+  uint16_t free_space;
+  char *wrd_buf;
+  char *word = NULL;
+  uint32_t test_offset = 0;
+  uint32_t i = sizeof(uint16_t);
+  unsigned char *buffer = (unsigned char *)malloc (buff_size);
+
+  if (NULL == buffer)
+    return 0;
+  
+  while (--tree_depth) {
+    if (initial_offset == test_offset) {
+      FREE(buffer);
+      return 0;
+    }
+    
+    test_offset = initial_offset;
+    if (chm_retrieve_object (chmfile, ui, buffer, 
+                             initial_offset, buff_size) == 0) {
+      FREE(buffer);
+      return 0;
+    }
+    
+    free_space = get_uint16 (buffer);
+    
+    while (i < buff_size - free_space) {
+
+      word_len = *(buffer + i);
+      pos = *(buffer + i + 1);
+      
+      wrd_buf = (char*)malloc (word_len);
+      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
+      wrd_buf[word_len - 1] = 0;
+      
+      if (pos == 0) {
+        FREE (word);
+        word = (char *) strdup (wrd_buf);
+      } else {
+        word = (char*)realloc (word, word_len + pos + 1);
+        strcpy (word + pos, wrd_buf);
+      }
+
+      FREE(wrd_buf);
+      
+      if (strcasecmp (text, word) <= 0) {
+        initial_offset = get_uint32 (buffer + i + word_len + 1);
+        break;
+      }
+      
+      i += word_len + sizeof (unsigned char) + sizeof(uint32_t) + 
+        sizeof(uint16_t);
+    }
+  }
+  
+  if(initial_offset == test_offset)
+    initial_offset = 0;
+
+  FREE(word);
+  FREE(buffer);
+
+  return initial_offset;
+}
+
+inline int 
+pychm_process_wlc (struct chmFile *chmfile,
+                   uint64_t wlc_count, uint64_t wlc_size,
+                   uint32_t wlc_offset, unsigned char ds,
+                   unsigned char dr, unsigned char cs,
+                   unsigned char cr, unsigned char ls,
+                   unsigned char lr, struct chmUnitInfo *uimain,
+                   struct chmUnitInfo* uitbl,
+                   struct chmUnitInfo *uistrings,
+                   struct chmUnitInfo* topics,
+                   struct chmUnitInfo *urlstr,
+                   PyObject *dict)
+{
+  uint32_t stroff, urloff;
+  uint64_t i, j, count;
+  size_t length;
+  int wlc_bit = 7;
+  size_t off = 0;
+  uint64_t index = 0;
+  unsigned char entry[TOPICS_ENTRY_LEN];
+  unsigned char combuf[COMMON_BUF_LEN];
+  unsigned char *buffer = (unsigned char *)malloc (wlc_size);
+  char *url = NULL;
+  char *topic = NULL;
+
+  if (chm_retrieve_object(chmfile, uimain, buffer, 
+                          wlc_offset, wlc_size) == 0) {
+    FREE(buffer);
+    return false;
+  }
+
+  for (i = 0; i < wlc_count; ++i) {
+    
+    if(wlc_bit != 7) {
+      ++off;
+      wlc_bit = 7;
+    }
+
+    index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
+    off += length;
+
+    if(chm_retrieve_object(chmfile, topics, entry, 
+                           index * 16, TOPICS_ENTRY_LEN) == 0) {
+      FREE(topic);
+      FREE(url);
+      FREE(buffer);
+      return false;
+    }
+
+    combuf[COMMON_BUF_LEN - 1] = 0;
+    stroff = get_uint32 (entry + 4);
+
+    FREE (topic);
+    if (chm_retrieve_object (chmfile, uistrings, combuf, 
+                             stroff, COMMON_BUF_LEN - 1) == 0) {
+      topic = strdup ("Untitled in index");
+
+    } else {
+      combuf[COMMON_BUF_LEN - 1] = 0;
+      
+      topic = strdup ((char*)combuf);
+    }
+        
+    urloff = get_uint32 (entry + 8);
+
+    if(chm_retrieve_object (chmfile, uitbl, combuf, 
+                            urloff, 12) == 0) {
+      FREE(buffer);
+      return false;
+    }
+
+    urloff = get_uint32 (combuf + 8);
+    
+    if (chm_retrieve_object (chmfile, urlstr, combuf, 
+                             urloff + 8, COMMON_BUF_LEN - 1) == 0) {
+      FREE(topic);
+      FREE(url);
+      FREE(buffer);
+      return false;
+    }
+         
+    combuf[COMMON_BUF_LEN - 1] = 0;
+
+    FREE (url);
+    url = strdup ((char*)combuf);
+
+    if (url && topic) {
+#ifdef __PYTHON__
+      PyDict_SetItemString (dict, topic, 
+                            PyString_FromString (url));
+#else
+      printf ("%s ==> %s\n", url, topic);
+#endif
+    }
+        
+    count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
+    off += length;
+    
+    for (j = 0; j < count; ++j) {
+      sr_int (buffer + off, &wlc_bit, ls, lr, &length);
+      off += length;
+    }
+  }
+
+  FREE(topic);
+  FREE(url);
+  FREE(buffer);
+
+  return true;
+}
+
+int 
+chm_search (struct chmFile *chmfile,
+            const char *text, int whole_words, 
+            int titles_only, PyObject *dict)
+{
+  unsigned char header[FTS_HEADER_LEN];
+  unsigned char doc_index_s;
+  unsigned char doc_index_r;
+  unsigned char code_count_s;
+  unsigned char code_count_r;
+  unsigned char loc_codes_s;
+  unsigned char loc_codes_r;
+  unsigned char word_len, pos;
+  unsigned char *buffer;
+  char *word = NULL;
+  uint32_t node_offset;
+  uint32_t node_len;
+  uint16_t tree_depth;
+  uint32_t i;
+  uint16_t free_space;
+  uint64_t wlc_count, wlc_size;
+  uint32_t wlc_offset;
+  char *wrd_buf;
+  unsigned char title;
+  size_t encsz;
+  struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
+  int partial = false;
+
+  if (NULL == text)
+    return -1;
+
+  if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
+      CHM_RESOLVE_SUCCESS || 
+      chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
+      CHM_RESOLVE_SUCCESS ||
+      chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
+      CHM_RESOLVE_SUCCESS)
+    return false;
+
+  if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
+    return false;
+  
+  doc_index_s = header[0x1E];
+  doc_index_r = header[0x1F];
+  code_count_s = header[0x20];
+  code_count_r = header[0x21];
+  loc_codes_s = header[0x22];
+  loc_codes_r = header[0x23];
+
+  if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
+    return false;
+  }
+
+  node_offset = get_uint32 (header + 0x14);
+  node_len = get_uint32 (header + 0x2e);
+  tree_depth = get_uint16 (header + 0x18);
+
+  i = sizeof(uint16_t);
+  
+  buffer = (unsigned char*)malloc (node_len);
+  
+  node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
+                                      tree_depth, &ui);
+  
+  if (!node_offset) { 
+    FREE(buffer);
+    return false;
+  }
+  
+  do {
+    
+    if (chm_retrieve_object (chmfile, &ui, buffer, 
+                             node_offset, node_len) == 0) {
+      FREE(word);
+      FREE(buffer);
+      return false;
+    }
+    
+    free_space = get_uint16 (buffer + 6);
+    
+    i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
+    
+    encsz = 0;
+
+    while (i < node_len - free_space) {
+      word_len = *(buffer + i);
+      pos = *(buffer + i + 1);
+                        
+      wrd_buf = (char*)malloc (word_len);
+      memcpy (wrd_buf, buffer + i + 2, word_len - 1);
+      wrd_buf[word_len - 1] = 0;
+      
+      if (pos == 0) {
+        FREE(word);
+        word = (char *) strdup (wrd_buf);
+      } else {
+        word = (char*)realloc (word, word_len + pos + 1);
+        strcpy (word + pos, wrd_buf);
+      }
+
+      FREE(wrd_buf);
+      
+      i += 2 + word_len;
+      title = *(buffer + i - 1);
+
+      wlc_count = be_encint (buffer + i, &encsz);
+      i += encsz;
+      
+      wlc_offset = get_uint32 (buffer + i);
+
+      i += sizeof(uint32_t) + sizeof(uint16_t);
+      wlc_size =  be_encint (buffer + i, &encsz);
+      i += encsz;
+      
+      node_offset = get_uint32 (buffer);
+
+      if (!title && titles_only)
+        continue;
+      
+      if (whole_words && !strcasecmp(text, word)) {
+        partial = pychm_process_wlc (chmfile, wlc_count, wlc_size, 
+                                     wlc_offset, doc_index_s, 
+                                     doc_index_r,code_count_s, 
+                                     code_count_r, loc_codes_s, 
+                                     loc_codes_r, &ui, &uiurltbl,
+                                     &uistrings, &uitopics,
+                                     &uiurlstr, dict);
+        FREE(word);
+        FREE(buffer);
+        return partial;
+      }
+      
+      if (!whole_words) {
+        if (!strncasecmp (word, text, strlen(text))) {
+          partial = true;
+          pychm_process_wlc (chmfile, wlc_count, wlc_size, 
+                             wlc_offset, doc_index_s, 
+                             doc_index_r,code_count_s, 
+                             code_count_r, loc_codes_s, 
+                             loc_codes_r, &ui, &uiurltbl,
+                             &uistrings, &uitopics,
+                             &uiurlstr, dict);
+          
+        } else if (strncasecmp (text, word, strlen(text)) < -1)
+          break;
+      }
+
+    }
+  } while (!whole_words && 
+           !strncmp (word, text, strlen(text)) && 
+           node_offset);
+  
+  FREE(word);
+  FREE(buffer);
+
+  return partial;
+}
+
+typedef struct {
+  const char *file;
+  int offset;
+} Langrec;
+
+Langrec lang_files[] = {
+  {"/$FIftiMain",               0x7E},
+  {"$WWKeywordLinks/BTree",     0x34},
+  {"$WWAssociativeLinks/BTree", 0x34}
+};
+
+#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
+
+int
+chm_get_lcid (struct chmFile *chmfile) {
+  struct chmUnitInfo ui;
+  uint32_t lang;
+  int i;
+
+  for (i=0; i<LANG_FILES_SIZE; i++) {
+  
+    if (chm_resolve_object (chmfile, lang_files[i].file, &ui) == 
+        CHM_RESOLVE_SUCCESS) {
+    
+      if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang, 
+                               lang_files[i].offset, sizeof(uint32_t)) != 0)
+        return lang;
+    }
+  }
+
+  return -1;
+}
+
+#ifdef __PYTHON__
+
+static PyObject *
+is_searchable (PyObject *self, PyObject *args) {
+  struct chmFile *file;
+  PyObject *obj0;
+  struct chmUnitInfo ui;
+
+  if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
+
+    file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
+
+    if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
+        CHM_RESOLVE_SUCCESS || 
+        chm_resolve_object (file, "/#TOPICS", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#STRINGS", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#URLTBL", &ui) !=
+        CHM_RESOLVE_SUCCESS ||
+        chm_resolve_object (file, "/#URLSTR", &ui) !=
+        CHM_RESOLVE_SUCCESS)
+      return Py_BuildValue ("i", 0);
+    else
+      return Py_BuildValue ("i", 1);
+  } else {
+    PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
+    return NULL;
+  }
+}
+
+static PyObject *
+search (PyObject *self, PyObject *args) {
+  char *text;
+  int whole_words;
+  int titles_only;
+  int partial;
+  struct chmFile *file;
+  PyObject *obj0;
+  PyObject *dict;
+
+  if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text, 
+                        &whole_words, &titles_only)) {
+
+    dict = PyDict_New();
+
+    if (dict) {
+      file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
+
+      partial = chm_search (file, 
+                            text, whole_words, titles_only, dict);
+    
+      return Py_BuildValue ("(iO)", partial, dict);
+
+    } else {
+      PyErr_NoMemory();
+      return NULL;
+    }
+  } else {
+    PyErr_SetString(PyExc_TypeError,
+                    "Expected chmfile (not CHMFile!), string, int, int");
+    return NULL;
+  }
+}
+
+static PyObject *
+get_lcid (PyObject *self, PyObject *args) {
+  int code;
+  struct chmFile *file;
+  PyObject *obj0;
+
+  if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
+
+      file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
+
+      code = chm_get_lcid (file);
+    
+      if (code != -1)
+        return Py_BuildValue ("i", code);
+      else 
+        Py_INCREF(Py_None);
+      return Py_None;
+  } else {
+    PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
+    return NULL;
+  }
+}
+
+static PyMethodDef
+IndexMethods[] = {
+  {"get_lcid", get_lcid, METH_VARARGS, 
+   "Returns LCID (Locale ID) for archive."},
+  {"search", search, METH_VARARGS, 
+   "Perform Full-Text search."},
+  {"is_searchable", is_searchable, METH_VARARGS, 
+   "Return 1 if it is possible to search the archive, 0 otherwise."},
+  {NULL, NULL, 0, NULL}
+};
+
+#ifdef __cplusplus
+extern "C"
+#endif
+MODEXPORT(void)
+initchm_extra (void) {
+  Py_InitModule ("chm_extra", IndexMethods);
+}
+
+#else
+
+int
+main (int argc, char **argv) {
+  struct chmFile *file;
+  char text[255];
+  int whole_words, titles_only;
+  int partial;
+
+  if (argc == 2) {
+    file = chm_open (argv[1]);
+
+    if (file) {
+      printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
+      while (1) {
+        printf ("\n<whole_words> <titles_only> <string>\n");
+        printf ("> ");
+        if (scanf ("%d %d %s", &whole_words, &titles_only, text))
+          partial = chm_search (file, 
+                                text, whole_words, titles_only, NULL);
+        else 
+          break;
+        
+        printf ("Partial = %d\n", partial);
+      }
+
+      chm_close (file);
+      return 0;
+    }
+
+    return -1;
+
+  } else {
+    printf ("\n%s <filename>\n", argv[0]);
+    return 0;
+  }
+}
+
+#endif
--- a/src/calibre/utils/chm/stdint.h
+++ b/src/calibre/utils/chm/stdint.h
@ -0,0 +1,247 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2008 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+#  include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
--- a/src/calibre/utils/chm/swig_chm.c
+++ b/src/calibre/utils/chm/swig_chm.c
--- a/src/calibre/utils/chm/swig_chm.i
+++ b/src/calibre/utils/chm/swig_chm.i
@ -0,0 +1,214 @@
+%module chmlib
+%include "typemaps.i"
+%include "cstring.i"
+
+%{
+/*
+ Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
+
+ Based on code by:
+ Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>
+
+ pychm is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public
+ License along with this program; see the file COPYING.  If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA
+
+ $Id: swig_chm.i,v 1.1.1.1 2003/12/02 12:38:14 rubensr Exp $
+*/
+#include "chm_lib.h"
+#include <stdio.h>
+
+static PyObject *my_callback = NULL;
+
+static PyObject *
+my_set_callback(PyObject *dummy, PyObject *arg)
+{
+    PyObject *result = NULL;
+
+    if (!PyCallable_Check(arg)) {
+      PyErr_SetString(PyExc_TypeError, "parameter must be callable");
+      return NULL;
+    }
+    Py_XINCREF(arg);         /* Add a reference to new callback */
+    Py_XDECREF(my_callback);  /* Dispose of previous callback */
+    my_callback = arg;       /* Remember new callback */
+    /* Boilerplate to return "None" */
+    Py_INCREF(Py_None);
+    result = Py_None;
+    return result;
+}
+
+int dummy_enumerator (struct chmFile *h, 
+                      struct chmUnitInfo *ui, 
+                      void *context) {
+    PyObject *arglist;
+    PyObject *result;
+    PyObject *py_h;
+    PyObject *py_ui;
+    PyObject *py_c;
+
+    py_h  = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
+    py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
+    py_c  = PyCObject_AsVoidPtr(context);
+
+    /* Time to call the callback */
+    arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
+    if (arglist) {
+      result = PyEval_CallObject(my_callback, arglist);
+      Py_DECREF(arglist);
+      Py_DECREF(result);
+      
+      Py_DECREF(py_h);
+      Py_DECREF(py_ui);
+      
+      if (result == NULL) {
+        return 0; /* Pass error back */
+      } else {
+        return 1;
+      }
+    } else
+      return 0;
+ }
+%}
+
+%typemap(in) CHM_ENUMERATOR {
+  if (!my_set_callback(self, $input)) goto fail;
+  $1 = dummy_enumerator;
+}
+
+%typemap(in) void *context {
+  if (!($1 = PyCObject_FromVoidPtr($input, NULL))) goto fail;
+}
+
+%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
+  $1 = temp;
+}
+
+%typemap(argout) struct chmUnitInfo *OutValue {
+  PyObject *o, *o2, *o3;
+  o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
+  if ((!$result) || ($result == Py_None)) {
+    $result = o;
+  } else {
+    if (!PyTuple_Check($result)) {
+      PyObject *o2 = $result;
+      $result = PyTuple_New(1);
+      PyTuple_SetItem($result,0,o2);
+    }
+    o3 = PyTuple_New(1);
+    PyTuple_SetItem(o3,0,o);
+    o2 = $result;
+    $result = PySequence_Concat(o2,o3);
+    Py_DECREF(o2);
+    Py_DECREF(o3);
+  }
+}
+
+%typemap(check) unsigned char *OUTPUT {
+  /* nasty hack */
+#ifdef __cplusplus
+   $1 = ($1_ltype) new char[arg5];
+#else
+   $1 = ($1_ltype) malloc(arg5);
+#endif
+   if ($1 == NULL) SWIG_fail;
+}
+
+%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
+   PyObject *o;
+   o = PyString_FromStringAndSize($1, arg5);
+   $result = t_output_helper($result,o);
+#ifdef __cplusplus
+   delete [] $1;
+#else
+   free($1);
+#endif
+}
+
+#ifdef WIN32
+typedef unsigned __int64 LONGUINT64;
+typedef __int64          LONGINT64;
+#else
+typedef unsigned long long LONGUINT64;
+typedef long long          LONGINT64;
+#endif
+
+/* the two available spaces in a CHM file                      */
+/* N.B.: The format supports arbitrarily many spaces, but only */
+/*       two appear to be used at present.                     */
+#define CHM_UNCOMPRESSED (0)
+#define CHM_COMPRESSED   (1)
+
+/* structure representing an ITS (CHM) file stream             */
+struct chmFile;
+
+/* structure representing an element from an ITS file stream   */
+#define CHM_MAX_PATHLEN  256
+struct chmUnitInfo
+{
+    LONGUINT64         start;
+    LONGUINT64         length;
+    int                space;
+    char               path[CHM_MAX_PATHLEN+1];
+};
+
+/* open an ITS archive */
+struct chmFile* chm_open(const char *filename);
+
+/* close an ITS archive */
+void chm_close(struct chmFile *h);
+
+/* methods for ssetting tuning parameters for particular file */
+#define CHM_PARAM_MAX_BLOCKS_CACHED 0
+void chm_set_param(struct chmFile *h,
+                   int paramType,
+                   int paramVal);
+
+/* resolve a particular object from the archive */
+#define CHM_RESOLVE_SUCCESS (0)
+#define CHM_RESOLVE_FAILURE (1)
+int chm_resolve_object(struct chmFile *h,
+                       const char *objPath,
+                       struct chmUnitInfo *OutValue);
+
+/* retrieve part of an object from the archive */
+LONGINT64 chm_retrieve_object(struct chmFile *h,
+                              struct chmUnitInfo *ui,
+                              unsigned char *OUTPUT,
+                              LONGUINT64 addr,
+                              LONGINT64 len);
+
+/* enumerate the objects in the .chm archive */
+typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
+                              struct chmUnitInfo *ui,
+                              void *context);
+#define CHM_ENUMERATE_NORMAL    (1)
+#define CHM_ENUMERATE_META      (2)
+#define CHM_ENUMERATE_SPECIAL   (4)
+#define CHM_ENUMERATE_FILES     (8)
+#define CHM_ENUMERATE_DIRS      (16)
+#define CHM_ENUMERATE_ALL       (31)
+#define CHM_ENUMERATOR_FAILURE  (0)
+#define CHM_ENUMERATOR_CONTINUE (1)
+#define CHM_ENUMERATOR_SUCCESS  (2)
+int chm_enumerate(struct chmFile *h,
+                  int what,
+                  CHM_ENUMERATOR e,
+                  void *context);
+
+int chm_enumerate_dir(struct chmFile *h,
+                      const char *prefix,
+                      int what,
+                      CHM_ENUMERATOR e,
+                      void *context);