mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Integrate patched pychm into calibre build process
This commit is contained in:
parent
95f655585b
commit
2f437907a3
@ -89,6 +89,7 @@ fc_inc = '/usr/include/fontconfig'
|
|||||||
fc_lib = '/usr/lib'
|
fc_lib = '/usr/lib'
|
||||||
podofo_inc = '/usr/include/podofo'
|
podofo_inc = '/usr/include/podofo'
|
||||||
podofo_lib = '/usr/lib'
|
podofo_lib = '/usr/lib'
|
||||||
|
chmlib_inc_dirs = chmlib_lib_dirs = []
|
||||||
|
|
||||||
if iswindows:
|
if iswindows:
|
||||||
prefix = r'C:\cygwin\home\kovid\sw'
|
prefix = r'C:\cygwin\home\kovid\sw'
|
||||||
@ -96,6 +97,10 @@ if iswindows:
|
|||||||
sw_lib_dir = os.path.join(prefix, 'lib')
|
sw_lib_dir = os.path.join(prefix, 'lib')
|
||||||
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
fc_inc = os.path.join(sw_inc_dir, 'fontconfig')
|
||||||
fc_lib = sw_lib_dir
|
fc_lib = sw_lib_dir
|
||||||
|
chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix,
|
||||||
|
'build', 'chmlib-0.40', 'src'))
|
||||||
|
chmlib_lib_dirs = consolidate('CHMLIB_LIB_DIR', os.path.join(prefix,
|
||||||
|
'build', 'chmlib-0.40', 'src', 'Release'))
|
||||||
png_inc_dirs = [sw_inc_dir]
|
png_inc_dirs = [sw_inc_dir]
|
||||||
png_lib_dirs = [sw_lib_dir]
|
png_lib_dirs = [sw_lib_dir]
|
||||||
png_libs = ['png12']
|
png_libs = ['png12']
|
||||||
|
@ -12,12 +12,13 @@ from distutils import sysconfig
|
|||||||
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
||||||
|
|
||||||
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
|
from setup import Command, islinux, isfreebsd, isosx, SRC, iswindows
|
||||||
from setup.build_environment import fc_inc, fc_lib, \
|
from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \
|
||||||
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
||||||
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
||||||
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \
|
||||||
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
||||||
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, jpg_lib_dirs
|
magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \
|
||||||
|
jpg_lib_dirs, chmlib_lib_dirs
|
||||||
MT
|
MT
|
||||||
isunix = islinux or isosx or isfreebsd
|
isunix = islinux or isosx or isfreebsd
|
||||||
|
|
||||||
@ -56,6 +57,22 @@ if iswindows:
|
|||||||
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
pdfreflow_libs = ['advapi32', 'User32', 'Gdi32']
|
||||||
|
|
||||||
extensions = [
|
extensions = [
|
||||||
|
|
||||||
|
Extension('chmlib',
|
||||||
|
['calibre/utils/chm/swig_chm.c'],
|
||||||
|
libraries=['ChmLib' if iswindows else 'chm'],
|
||||||
|
inc_dirs=chmlib_inc_dirs,
|
||||||
|
lib_dirs=chmlib_lib_dirs,
|
||||||
|
cflags=["-DSWIG_COBJECT_TYPES"]),
|
||||||
|
|
||||||
|
Extension('chm_extra',
|
||||||
|
['calibre/utils/chm/extra.c'],
|
||||||
|
libraries=['ChmLib' if iswindows else 'chm'],
|
||||||
|
inc_dirs=chmlib_inc_dirs,
|
||||||
|
lib_dirs=chmlib_lib_dirs,
|
||||||
|
cflags=["-D__PYTHON__"]),
|
||||||
|
|
||||||
|
|
||||||
Extension('pdfreflow',
|
Extension('pdfreflow',
|
||||||
reflow_sources,
|
reflow_sources,
|
||||||
headers=reflow_headers,
|
headers=reflow_headers,
|
||||||
|
@ -42,6 +42,7 @@ class LinuxFreeze(Command):
|
|||||||
'/usr/lib/liblcms.so.1',
|
'/usr/lib/liblcms.so.1',
|
||||||
'/tmp/calibre-mount-helper',
|
'/tmp/calibre-mount-helper',
|
||||||
'/usr/lib/libunrar.so',
|
'/usr/lib/libunrar.so',
|
||||||
|
'/usr/lib/libchm.so.0',
|
||||||
'/usr/lib/libsqlite3.so.0',
|
'/usr/lib/libsqlite3.so.0',
|
||||||
'/usr/lib/libsqlite3.so.0',
|
'/usr/lib/libsqlite3.so.0',
|
||||||
'/usr/lib/libmng.so.1',
|
'/usr/lib/libmng.so.1',
|
||||||
|
@ -459,7 +459,7 @@ class Py2App(object):
|
|||||||
|
|
||||||
@flush
|
@flush
|
||||||
def add_misc_libraries(self):
|
def add_misc_libraries(self):
|
||||||
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7'):
|
for x in ('usb', 'unrar', 'readline.6.0', 'wmflite-0.2.7', 'chm.0'):
|
||||||
info('\nAdding', x)
|
info('\nAdding', x)
|
||||||
x = 'lib%s.dylib'%x
|
x = 'lib%s.dylib'%x
|
||||||
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
|
shutil.copy2(join(SW, 'lib', x), self.frameworks_dir)
|
||||||
|
@ -57,7 +57,8 @@ if plugins is None:
|
|||||||
sys.path.insert(0, plugin_path)
|
sys.path.insert(0, plugin_path)
|
||||||
|
|
||||||
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
|
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
|
||||||
'fontconfig', 'pdfreflow', 'progress_indicator'] + \
|
'fontconfig', 'pdfreflow', 'progress_indicator', 'chmlib',
|
||||||
|
'chm_extra'] + \
|
||||||
(['winutil'] if iswindows else []) + \
|
(['winutil'] if iswindows else []) + \
|
||||||
(['usbobserver'] if isosx else []):
|
(['usbobserver'] if isosx else []):
|
||||||
try:
|
try:
|
||||||
|
@ -103,6 +103,17 @@ class ComicMetadataReader(MetadataReaderPlugin):
|
|||||||
mi.cover_data = (ext.lower(), data)
|
mi.cover_data = (ext.lower(), data)
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
class CHMMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read CHM metadata'
|
||||||
|
file_types = set(['chm'])
|
||||||
|
description = _('Read metadata from %s files') % 'CHM'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.chm import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
|
||||||
class EPUBMetadataReader(MetadataReaderPlugin):
|
class EPUBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read EPUB metadata'
|
name = 'Read EPUB metadata'
|
||||||
@ -384,6 +395,7 @@ from calibre.ebooks.rtf.input import RTFInput
|
|||||||
from calibre.ebooks.tcr.input import TCRInput
|
from calibre.ebooks.tcr.input import TCRInput
|
||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
from calibre.ebooks.lrf.input import LRFInput
|
from calibre.ebooks.lrf.input import LRFInput
|
||||||
|
from calibre.ebooks.chm.input import CHMInput
|
||||||
|
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
@ -444,6 +456,7 @@ plugins += [
|
|||||||
TCRInput,
|
TCRInput,
|
||||||
TXTInput,
|
TXTInput,
|
||||||
LRFInput,
|
LRFInput,
|
||||||
|
CHMInput,
|
||||||
]
|
]
|
||||||
plugins += [
|
plugins += [
|
||||||
EPUBOutput,
|
EPUBOutput,
|
||||||
|
@ -15,7 +15,7 @@ class ANDROID(USBMS):
|
|||||||
supported_platforms = ['windows', 'osx', 'linux']
|
supported_platforms = ['windows', 'osx', 'linux']
|
||||||
|
|
||||||
# Ordered list of supported formats
|
# Ordered list of supported formats
|
||||||
FORMATS = ['epub']
|
FORMATS = ['epub', 'pdf']
|
||||||
|
|
||||||
VENDOR_ID = {
|
VENDOR_ID = {
|
||||||
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
0x0bb4 : { 0x0c02 : [0x100], 0x0c01 : [0x100]},
|
||||||
|
@ -1,213 +1,18 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
''' CHM File decoding support '''
|
''' CHM File decoding support '''
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
' and Alex Bramley <a.bramley at gmail.com>.'
|
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||||
|
|
||||||
import os, shutil, uuid, re
|
import os, uuid
|
||||||
from tempfile import mkdtemp
|
|
||||||
from mimetypes import guess_type as guess_mimetype
|
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from pychm.chm import CHMFile
|
|
||||||
from pychm.chmlib import (
|
|
||||||
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
|
||||||
chm_enumerate,
|
|
||||||
)
|
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.ebooks.chm.reader import CHMReader, match_string
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
|
|
||||||
def match_string(s1, s2_already_lowered):
|
|
||||||
if s1 is not None and s2_already_lowered is not None:
|
|
||||||
if s1.lower()==s2_already_lowered:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def check_all_prev_empty(tag):
|
|
||||||
if tag is None:
|
|
||||||
return True
|
|
||||||
if tag.__class__ == NavigableString and not check_empty(tag):
|
|
||||||
return False
|
|
||||||
return check_all_prev_empty(tag.previousSibling)
|
|
||||||
|
|
||||||
def check_empty(s, rex = re.compile(r'\S')):
|
|
||||||
return rex.search(s) is None
|
|
||||||
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
|
||||||
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
|
||||||
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
|
||||||
parser.add_option("-t", "--title", action="store", type="string", \
|
|
||||||
dest="title", help=_("Set the book title"))
|
|
||||||
parser.add_option('--title-sort', action='store', type='string', default=None,
|
|
||||||
dest='title_sort', help=_('Set sort key for the title'))
|
|
||||||
parser.add_option("-a", "--author", action="store", type="string", \
|
|
||||||
dest="author", help=_("Set the author"))
|
|
||||||
parser.add_option('--author-sort', action='store', type='string', default=None,
|
|
||||||
dest='author_sort', help=_('Set sort key for the author'))
|
|
||||||
parser.add_option("-c", "--category", action="store", type="string", \
|
|
||||||
dest="category", help=_("The category this book belongs"
|
|
||||||
" to. E.g.: History"))
|
|
||||||
parser.add_option("--thumbnail", action="store", type="string", \
|
|
||||||
dest="thumbnail", help=_("Path to a graphic that will be"
|
|
||||||
" set as this files' thumbnail"))
|
|
||||||
parser.add_option("--comment", action="store", type="string", \
|
|
||||||
dest="freetext", help=_("Path to a txt file containing a comment."))
|
|
||||||
parser.add_option("--get-thumbnail", action="store_true", \
|
|
||||||
dest="get_thumbnail", default=False, \
|
|
||||||
help=_("Extract thumbnail from LRF file"))
|
|
||||||
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
|
||||||
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
|
||||||
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
|
||||||
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
|
||||||
parser.add_option('--get-cover', action='store_true', default=False,
|
|
||||||
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
|
||||||
parser.add_option('--bookid', action='store', type='string', default=None,
|
|
||||||
dest='book_id', help=_('Set book ID'))
|
|
||||||
parser.add_option('--font-delta', action='store', type='int', default=0,
|
|
||||||
dest='font_delta', help=_('Set font delta'))
|
|
||||||
return parser
|
|
||||||
|
|
||||||
class CHMError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class CHMReader(CHMFile):
|
|
||||||
def __init__(self, input, log):
|
|
||||||
CHMFile.__init__(self)
|
|
||||||
if not self.LoadCHM(input):
|
|
||||||
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
|
||||||
self.log = log
|
|
||||||
self._sourcechm = input
|
|
||||||
self._contents = None
|
|
||||||
self._playorder = 0
|
|
||||||
self._metadata = False
|
|
||||||
self._extracted = False
|
|
||||||
|
|
||||||
# location of '.hhc' file, which is the CHM TOC.
|
|
||||||
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
|
||||||
self.hhc_path = self.root + ".hhc"
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
|
||||||
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
|
||||||
self._playorder += 1
|
|
||||||
for li in ul('li', recursive=False):
|
|
||||||
href = li.object('param', {'name': 'Local'})[0]['value']
|
|
||||||
if href.count('#'):
|
|
||||||
href, frag = href.split('#')
|
|
||||||
else:
|
|
||||||
frag = None
|
|
||||||
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
|
||||||
#print "========>", name
|
|
||||||
toc.add_item(href, frag, name, play_order=self._playorder)
|
|
||||||
self._playorder += 1
|
|
||||||
if li.ul:
|
|
||||||
child = self._parse_toc(li.ul)
|
|
||||||
child.parent = toc
|
|
||||||
toc.append(child)
|
|
||||||
#print toc
|
|
||||||
return toc
|
|
||||||
|
|
||||||
|
|
||||||
def GetFile(self, path):
|
|
||||||
# have to have abs paths for ResolveObject, but Contents() deliberately
|
|
||||||
# makes them relative. So we don't have to worry, re-add the leading /.
|
|
||||||
# note this path refers to the internal CHM structure
|
|
||||||
if path[0] != '/':
|
|
||||||
path = '/' + path
|
|
||||||
res, ui = self.ResolveObject(path)
|
|
||||||
if res != CHM_RESOLVE_SUCCESS:
|
|
||||||
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
|
||||||
size, data = self.RetrieveObject(ui)
|
|
||||||
if size == 0:
|
|
||||||
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
|
||||||
return data
|
|
||||||
|
|
||||||
def ExtractFiles(self, output_dir=os.getcwdu()):
|
|
||||||
for path in self.Contents():
|
|
||||||
lpath = os.path.join(output_dir, path)
|
|
||||||
self._ensure_dir(lpath)
|
|
||||||
data = self.GetFile(path)
|
|
||||||
with open(lpath, 'wb') as f:
|
|
||||||
if guess_mimetype(path)[0] == ('text/html'):
|
|
||||||
data = self._reformat(data)
|
|
||||||
f.write(data)
|
|
||||||
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
|
||||||
self._extracted = True
|
|
||||||
|
|
||||||
def _reformat(self, data):
|
|
||||||
try:
|
|
||||||
soup = BeautifulSoup(data)
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
# hit some strange encoding problems...
|
|
||||||
print "Unable to parse html for cleaning, leaving it :("
|
|
||||||
return data
|
|
||||||
# nuke javascript...
|
|
||||||
[s.extract() for s in soup('script')]
|
|
||||||
# remove forward and back nav bars from the top/bottom of each page
|
|
||||||
# cos they really fuck with the flow of things and generally waste space
|
|
||||||
# since we can't use [a,b] syntax to select arbitrary items from a list
|
|
||||||
# we'll have to do this manually...
|
|
||||||
t = soup('table')
|
|
||||||
if t:
|
|
||||||
if (t[0].previousSibling is None
|
|
||||||
or t[0].previousSibling.previousSibling is None):
|
|
||||||
t[0].extract()
|
|
||||||
if (t[-1].nextSibling is None
|
|
||||||
or t[-1].nextSibling.nextSibling is None):
|
|
||||||
t[-1].extract()
|
|
||||||
# for some very odd reason each page's content appears to be in a table
|
|
||||||
# too. and this table has sub-tables for random asides... grr.
|
|
||||||
|
|
||||||
# remove br at top of page if present after nav bars removed
|
|
||||||
br = soup('br')
|
|
||||||
if br:
|
|
||||||
if check_all_prev_empty(br[0].previousSibling):
|
|
||||||
br[0].extract()
|
|
||||||
|
|
||||||
# some images seem to be broken in some chm's :/
|
|
||||||
for img in soup('img'):
|
|
||||||
try:
|
|
||||||
# some are supposedly "relative"... lies.
|
|
||||||
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
|
||||||
# some have ";<junk>" at the end.
|
|
||||||
img['src'] = img['src'].split(';')[0]
|
|
||||||
except KeyError:
|
|
||||||
# and some don't even have a src= ?!
|
|
||||||
pass
|
|
||||||
# now give back some pretty html.
|
|
||||||
return soup.prettify()
|
|
||||||
|
|
||||||
def Contents(self):
|
|
||||||
if self._contents is not None:
|
|
||||||
return self._contents
|
|
||||||
paths = []
|
|
||||||
def get_paths(chm, ui, ctx):
|
|
||||||
# skip directories
|
|
||||||
# note this path refers to the internal CHM structure
|
|
||||||
if ui.path[-1] != '/':
|
|
||||||
# and make paths relative
|
|
||||||
paths.append(ui.path.lstrip('/'))
|
|
||||||
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
|
||||||
self._contents = paths
|
|
||||||
return self._contents
|
|
||||||
|
|
||||||
def _ensure_dir(self, path):
|
|
||||||
dir = os.path.dirname(path)
|
|
||||||
if not os.path.isdir(dir):
|
|
||||||
os.makedirs(dir)
|
|
||||||
|
|
||||||
def extract_content(self, output_dir=os.getcwdu()):
|
|
||||||
self.ExtractFiles(output_dir=output_dir)
|
|
||||||
|
|
||||||
|
|
||||||
class CHMInput(InputFormatPlugin):
|
class CHMInput(InputFormatPlugin):
|
||||||
|
|
||||||
name = 'CHM Input'
|
name = 'CHM Input'
|
||||||
@ -215,11 +20,6 @@ class CHMInput(InputFormatPlugin):
|
|||||||
description = 'Convert CHM files to OEB'
|
description = 'Convert CHM files to OEB'
|
||||||
file_types = set(['chm'])
|
file_types = set(['chm'])
|
||||||
|
|
||||||
options = set([
|
|
||||||
OptionRecommendation(name='dummy_option', recommended_value=False,
|
|
||||||
help=_('dummy option until real options are determined.')),
|
|
||||||
])
|
|
||||||
|
|
||||||
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
def _chmtohtml(self, output_dir, chm_path, no_images, log):
|
||||||
log.debug('Opening CHM file')
|
log.debug('Opening CHM file')
|
||||||
rdr = CHMReader(chm_path, log)
|
rdr = CHMReader(chm_path, log)
|
||||||
@ -230,37 +30,36 @@ class CHMInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log, accelerators):
|
def convert(self, stream, options, file_ext, log, accelerators):
|
||||||
from calibre.ebooks.metadata.chm import get_metadata_
|
from calibre.ebooks.metadata.chm import get_metadata_
|
||||||
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
|
|
||||||
log.debug('Processing CHM...')
|
log.debug('Processing CHM...')
|
||||||
tdir = mkdtemp(prefix='chm2oeb_')
|
with TemporaryDirectory('chm2oeb') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
html_input = plugin_for_input_format('html')
|
||||||
html_input = plugin_for_input_format('html')
|
for opt in html_input.options:
|
||||||
for opt in html_input.options:
|
setattr(options, opt.option.name, opt.recommended_value)
|
||||||
setattr(options, opt.option.name, opt.recommended_value)
|
options.input_encoding = 'utf-8'
|
||||||
options.input_encoding = 'utf-8'
|
no_images = False #options.no_images
|
||||||
no_images = False #options.no_images
|
chm_name = stream.name
|
||||||
chm_name = stream.name
|
#chm_data = stream.read()
|
||||||
#chm_data = stream.read()
|
|
||||||
|
|
||||||
#closing stream so CHM can be opened by external library
|
#closing stream so CHM can be opened by external library
|
||||||
stream.close()
|
stream.close()
|
||||||
log.debug('tdir=%s' % tdir)
|
log.debug('tdir=%s' % tdir)
|
||||||
log.debug('stream.name=%s' % stream.name)
|
log.debug('stream.name=%s' % stream.name)
|
||||||
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
mainname = self._chmtohtml(tdir, chm_name, no_images, log)
|
||||||
mainpath = os.path.join(tdir, mainname)
|
mainpath = os.path.join(tdir, mainname)
|
||||||
|
|
||||||
metadata = get_metadata_(tdir)
|
metadata = get_metadata_(tdir)
|
||||||
|
|
||||||
odi = options.debug_pipeline
|
odi = options.debug_pipeline
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
# try a custom conversion:
|
# try a custom conversion:
|
||||||
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
#oeb = self._create_oebbook(mainpath, tdir, options, log, metadata)
|
||||||
# try using html converter:
|
# try using html converter:
|
||||||
htmlpath = self._create_html_root(mainpath, log)
|
htmlpath = self._create_html_root(mainpath, log)
|
||||||
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
|
||||||
options.debug_pipeline = odi
|
options.debug_pipeline = odi
|
||||||
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
#log.debug('DEBUG: Not removing tempdir %s' % tdir)
|
||||||
shutil.rmtree(tdir)
|
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
|
||||||
|
207
src/calibre/ebooks/chm/reader.py
Normal file
207
src/calibre/ebooks/chm/reader.py
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
''' CHM File decoding support '''
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
||||||
|
' and Alex Bramley <a.bramley at gmail.com>.'
|
||||||
|
|
||||||
|
import os, re
|
||||||
|
from mimetypes import guess_type as guess_mimetype
|
||||||
|
|
||||||
|
from BeautifulSoup import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
|
from calibre.utils.chm.chm import CHMFile
|
||||||
|
from calibre.utils.chm.chmlib import (
|
||||||
|
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
|
||||||
|
chm_enumerate,
|
||||||
|
)
|
||||||
|
|
||||||
|
from calibre.utils.config import OptionParser
|
||||||
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
|
||||||
|
|
||||||
|
def match_string(s1, s2_already_lowered):
|
||||||
|
if s1 is not None and s2_already_lowered is not None:
|
||||||
|
if s1.lower()==s2_already_lowered:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_all_prev_empty(tag):
|
||||||
|
if tag is None:
|
||||||
|
return True
|
||||||
|
if tag.__class__ == NavigableString and not check_empty(tag):
|
||||||
|
return False
|
||||||
|
return check_all_prev_empty(tag.previousSibling)
|
||||||
|
|
||||||
|
def check_empty(s, rex = re.compile(r'\S')):
|
||||||
|
return rex.search(s) is None
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = OptionParser(usage=_('%prog [options] mybook.chm'))
|
||||||
|
parser.add_option('--output-dir', '-d', default='.', help=_('Output directory. Defaults to current directory'), dest='output')
|
||||||
|
parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
|
||||||
|
parser.add_option("-t", "--title", action="store", type="string", \
|
||||||
|
dest="title", help=_("Set the book title"))
|
||||||
|
parser.add_option('--title-sort', action='store', type='string', default=None,
|
||||||
|
dest='title_sort', help=_('Set sort key for the title'))
|
||||||
|
parser.add_option("-a", "--author", action="store", type="string", \
|
||||||
|
dest="author", help=_("Set the author"))
|
||||||
|
parser.add_option('--author-sort', action='store', type='string', default=None,
|
||||||
|
dest='author_sort', help=_('Set sort key for the author'))
|
||||||
|
parser.add_option("-c", "--category", action="store", type="string", \
|
||||||
|
dest="category", help=_("The category this book belongs"
|
||||||
|
" to. E.g.: History"))
|
||||||
|
parser.add_option("--thumbnail", action="store", type="string", \
|
||||||
|
dest="thumbnail", help=_("Path to a graphic that will be"
|
||||||
|
" set as this files' thumbnail"))
|
||||||
|
parser.add_option("--comment", action="store", type="string", \
|
||||||
|
dest="freetext", help=_("Path to a txt file containing a comment."))
|
||||||
|
parser.add_option("--get-thumbnail", action="store_true", \
|
||||||
|
dest="get_thumbnail", default=False, \
|
||||||
|
help=_("Extract thumbnail from LRF file"))
|
||||||
|
parser.add_option('--publisher', default=None, help=_('Set the publisher'))
|
||||||
|
parser.add_option('--classification', default=None, help=_('Set the book classification'))
|
||||||
|
parser.add_option('--creator', default=None, help=_('Set the book creator'))
|
||||||
|
parser.add_option('--producer', default=None, help=_('Set the book producer'))
|
||||||
|
parser.add_option('--get-cover', action='store_true', default=False,
|
||||||
|
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
|
||||||
|
parser.add_option('--bookid', action='store', type='string', default=None,
|
||||||
|
dest='book_id', help=_('Set book ID'))
|
||||||
|
parser.add_option('--font-delta', action='store', type='int', default=0,
|
||||||
|
dest='font_delta', help=_('Set font delta'))
|
||||||
|
return parser
|
||||||
|
|
||||||
|
class CHMError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class CHMReader(CHMFile):
|
||||||
|
def __init__(self, input, log):
|
||||||
|
CHMFile.__init__(self)
|
||||||
|
if not self.LoadCHM(input):
|
||||||
|
raise CHMError("Unable to open CHM file '%s'"%(input,))
|
||||||
|
self.log = log
|
||||||
|
self._sourcechm = input
|
||||||
|
self._contents = None
|
||||||
|
self._playorder = 0
|
||||||
|
self._metadata = False
|
||||||
|
self._extracted = False
|
||||||
|
|
||||||
|
# location of '.hhc' file, which is the CHM TOC.
|
||||||
|
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
|
||||||
|
self.hhc_path = self.root + ".hhc"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_toc(self, ul, basedir=os.getcwdu()):
|
||||||
|
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
|
||||||
|
self._playorder += 1
|
||||||
|
for li in ul('li', recursive=False):
|
||||||
|
href = li.object('param', {'name': 'Local'})[0]['value']
|
||||||
|
if href.count('#'):
|
||||||
|
href, frag = href.split('#')
|
||||||
|
else:
|
||||||
|
frag = None
|
||||||
|
name = self._deentity(li.object('param', {'name': 'Name'})[0]['value'])
|
||||||
|
#print "========>", name
|
||||||
|
toc.add_item(href, frag, name, play_order=self._playorder)
|
||||||
|
self._playorder += 1
|
||||||
|
if li.ul:
|
||||||
|
child = self._parse_toc(li.ul)
|
||||||
|
child.parent = toc
|
||||||
|
toc.append(child)
|
||||||
|
#print toc
|
||||||
|
return toc
|
||||||
|
|
||||||
|
|
||||||
|
def GetFile(self, path):
|
||||||
|
# have to have abs paths for ResolveObject, but Contents() deliberately
|
||||||
|
# makes them relative. So we don't have to worry, re-add the leading /.
|
||||||
|
# note this path refers to the internal CHM structure
|
||||||
|
if path[0] != '/':
|
||||||
|
path = '/' + path
|
||||||
|
res, ui = self.ResolveObject(path)
|
||||||
|
if res != CHM_RESOLVE_SUCCESS:
|
||||||
|
raise CHMError("Unable to locate '%s' within CHM file '%s'"%(path, self.filename))
|
||||||
|
size, data = self.RetrieveObject(ui)
|
||||||
|
if size == 0:
|
||||||
|
raise CHMError("'%s' is zero bytes in length!"%(path,))
|
||||||
|
return data
|
||||||
|
|
||||||
|
def ExtractFiles(self, output_dir=os.getcwdu()):
|
||||||
|
for path in self.Contents():
|
||||||
|
lpath = os.path.join(output_dir, path)
|
||||||
|
self._ensure_dir(lpath)
|
||||||
|
data = self.GetFile(path)
|
||||||
|
with open(lpath, 'wb') as f:
|
||||||
|
if guess_mimetype(path)[0] == ('text/html'):
|
||||||
|
data = self._reformat(data)
|
||||||
|
f.write(data)
|
||||||
|
#subprocess.call(['extract_chmLib.exe', self._sourcechm, output_dir])
|
||||||
|
self._extracted = True
|
||||||
|
|
||||||
|
def _reformat(self, data):
|
||||||
|
try:
|
||||||
|
soup = BeautifulSoup(data)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
# hit some strange encoding problems...
|
||||||
|
print "Unable to parse html for cleaning, leaving it :("
|
||||||
|
return data
|
||||||
|
# nuke javascript...
|
||||||
|
[s.extract() for s in soup('script')]
|
||||||
|
# remove forward and back nav bars from the top/bottom of each page
|
||||||
|
# cos they really fuck with the flow of things and generally waste space
|
||||||
|
# since we can't use [a,b] syntax to select arbitrary items from a list
|
||||||
|
# we'll have to do this manually...
|
||||||
|
t = soup('table')
|
||||||
|
if t:
|
||||||
|
if (t[0].previousSibling is None
|
||||||
|
or t[0].previousSibling.previousSibling is None):
|
||||||
|
t[0].extract()
|
||||||
|
if (t[-1].nextSibling is None
|
||||||
|
or t[-1].nextSibling.nextSibling is None):
|
||||||
|
t[-1].extract()
|
||||||
|
# for some very odd reason each page's content appears to be in a table
|
||||||
|
# too. and this table has sub-tables for random asides... grr.
|
||||||
|
|
||||||
|
# remove br at top of page if present after nav bars removed
|
||||||
|
br = soup('br')
|
||||||
|
if br:
|
||||||
|
if check_all_prev_empty(br[0].previousSibling):
|
||||||
|
br[0].extract()
|
||||||
|
|
||||||
|
# some images seem to be broken in some chm's :/
|
||||||
|
for img in soup('img'):
|
||||||
|
try:
|
||||||
|
# some are supposedly "relative"... lies.
|
||||||
|
while img['src'].startswith('../'): img['src'] = img['src'][3:]
|
||||||
|
# some have ";<junk>" at the end.
|
||||||
|
img['src'] = img['src'].split(';')[0]
|
||||||
|
except KeyError:
|
||||||
|
# and some don't even have a src= ?!
|
||||||
|
pass
|
||||||
|
# now give back some pretty html.
|
||||||
|
return soup.prettify()
|
||||||
|
|
||||||
|
def Contents(self):
|
||||||
|
if self._contents is not None:
|
||||||
|
return self._contents
|
||||||
|
paths = []
|
||||||
|
def get_paths(chm, ui, ctx):
|
||||||
|
# skip directories
|
||||||
|
# note this path refers to the internal CHM structure
|
||||||
|
if ui.path[-1] != '/':
|
||||||
|
# and make paths relative
|
||||||
|
paths.append(ui.path.lstrip('/'))
|
||||||
|
chm_enumerate(self.file, CHM_ENUMERATE_NORMAL, get_paths, None)
|
||||||
|
self._contents = paths
|
||||||
|
return self._contents
|
||||||
|
|
||||||
|
def _ensure_dir(self, path):
|
||||||
|
dir = os.path.dirname(path)
|
||||||
|
if not os.path.isdir(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir=os.getcwdu()):
|
||||||
|
self.ExtractFiles(output_dir=output_dir)
|
||||||
|
|
||||||
|
|
||||||
|
|
34
src/calibre/utils/chm/__init__.py
Normal file
34
src/calibre/utils/chm/__init__.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||||
|
|
||||||
|
## pychm is free software; you can redistribute it and/or
|
||||||
|
## modify it under the terms of the GNU General Public License as
|
||||||
|
## published by the Free Software Foundation; either version 2 of the
|
||||||
|
## License, or (at your option) any later version.
|
||||||
|
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
## General Public License for more details.
|
||||||
|
|
||||||
|
## You should have received a copy of the GNU General Public
|
||||||
|
## License along with this program; see the file COPYING. If not,
|
||||||
|
## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
## Boston, MA 02111-1307, USA
|
||||||
|
|
||||||
|
## $Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $
|
||||||
|
|
||||||
|
'''
|
||||||
|
chm - A package to manipulate CHM files
|
||||||
|
|
||||||
|
The chm package provides four modules: chm, chmlib, extra and
|
||||||
|
_chmlib. _chmlib and chmlib are very low level libraries generated
|
||||||
|
from SWIG interface files, and are simple wrappers around the API
|
||||||
|
defined by the C library chmlib.
|
||||||
|
The extra module adds full-text search support.
|
||||||
|
the chm module provides some higher level classes to simplify
|
||||||
|
access to the CHM files information.
|
||||||
|
'''
|
||||||
|
__all__ = ["chm", "chmlib", "_chmlib", "extra"]
|
||||||
|
__version__ = "0.8.4"
|
||||||
|
__revision__ = "$Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $"
|
||||||
|
|
512
src/calibre/utils/chm/chm.py
Normal file
512
src/calibre/utils/chm/chm.py
Normal file
@ -0,0 +1,512 @@
|
|||||||
|
## Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||||
|
|
||||||
|
## Based on code by:
|
||||||
|
## Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
|
||||||
|
|
||||||
|
## pychm is free software; you can redistribute it and/or
|
||||||
|
## modify it under the terms of the GNU General Public License as
|
||||||
|
## published by the Free Software Foundation; either version 2 of the
|
||||||
|
## License, or (at your option) any later version.
|
||||||
|
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
## General Public License for more details.
|
||||||
|
|
||||||
|
## You should have received a copy of the GNU General Public
|
||||||
|
## License along with this program; see the file COPYING. If not,
|
||||||
|
## write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
## Boston, MA 02111-1307, USA
|
||||||
|
|
||||||
|
## $Id: chm.py,v 1.12 2006/08/07 12:31:51 rubensr Exp $
|
||||||
|
|
||||||
|
'''
|
||||||
|
chm - A high-level front end for the chmlib python module.
|
||||||
|
|
||||||
|
The chm module provides high level access to the functionality
|
||||||
|
included in chmlib. It encapsulates functions in the CHMFile class, and
|
||||||
|
provides some additional features, such as the ability to obtain
|
||||||
|
the contents tree of a CHM archive.
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
import array
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import calibre.utils.chm.chmlib as chmlib
|
||||||
|
from calibre.constants import plugins
|
||||||
|
|
||||||
|
extra, extra_err = plugins['chm_extra']
|
||||||
|
if extra_err:
|
||||||
|
raise RuntimeError('Failed to load chm.extra: '+extra_err)
|
||||||
|
|
||||||
|
charset_table = {
|
||||||
|
0 : 'iso8859_1', # ANSI_CHARSET
|
||||||
|
238 : 'iso8859_2', # EASTEUROPE_CHARSET
|
||||||
|
178 : 'iso8859_6', # ARABIC_CHARSET
|
||||||
|
161 : 'iso8859_7', # GREEK_CHARSET
|
||||||
|
177 : 'iso8859_8', # HEBREW_CHARSET
|
||||||
|
162 : 'iso8859_9', # TURKISH_CHARSET
|
||||||
|
222 : 'iso8859_11', # THAI_CHARSET - hmm not in python 2.2...
|
||||||
|
186 : 'iso8859_13', # BALTIC_CHARSET
|
||||||
|
204 : 'cp1251', # RUSSIAN_CHARSET
|
||||||
|
255 : 'cp437', # OEM_CHARSET
|
||||||
|
128 : 'cp932', # SHIFTJIS_CHARSET
|
||||||
|
134 : 'cp936', # GB2312_CHARSET
|
||||||
|
129 : 'cp949', # HANGUL_CHARSET
|
||||||
|
136 : 'cp950', # CHINESEBIG5_CHARSET
|
||||||
|
1 : None, # DEFAULT_CHARSET
|
||||||
|
2 : None, # SYMBOL_CHARSET
|
||||||
|
130 : None, # JOHAB_CHARSET
|
||||||
|
163 : None, # VIETNAMESE_CHARSET
|
||||||
|
77 : None, # MAC_CHARSET
|
||||||
|
}
|
||||||
|
|
||||||
|
locale_table = {
|
||||||
|
0x0436 : ('iso8859_1', "Afrikaans", "Western Europe & US"),
|
||||||
|
0x041c : ('iso8859_2', "Albanian", "Central Europe"),
|
||||||
|
0x0401 : ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
|
||||||
|
0x0801 : ('iso8859_6', "Arabic_Iraq", "Arabic"),
|
||||||
|
0x0c01 : ('iso8859_6', "Arabic_Egypt", "Arabic"),
|
||||||
|
0x1001 : ('iso8859_6', "Arabic_Libya", "Arabic"),
|
||||||
|
0x1401 : ('iso8859_6', "Arabic_Algeria", "Arabic"),
|
||||||
|
0x1801 : ('iso8859_6', "Arabic_Morocco", "Arabic"),
|
||||||
|
0x1c01 : ('iso8859_6', "Arabic_Tunisia", "Arabic"),
|
||||||
|
0x2001 : ('iso8859_6', "Arabic_Oman", "Arabic"),
|
||||||
|
0x2401 : ('iso8859_6', "Arabic_Yemen", "Arabic"),
|
||||||
|
0x2801 : ('iso8859_6', "Arabic_Syria", "Arabic"),
|
||||||
|
0x2c01 : ('iso8859_6', "Arabic_Jordan", "Arabic"),
|
||||||
|
0x3001 : ('iso8859_6', "Arabic_Lebanon", "Arabic"),
|
||||||
|
0x3401 : ('iso8859_6', "Arabic_Kuwait", "Arabic"),
|
||||||
|
0x3801 : ('iso8859_6', "Arabic_UAE", "Arabic"),
|
||||||
|
0x3c01 : ('iso8859_6', "Arabic_Bahrain", "Arabic"),
|
||||||
|
0x4001 : ('iso8859_6', "Arabic_Qatar", "Arabic"),
|
||||||
|
0x042b : (None, "Armenian","Armenian"),
|
||||||
|
0x042c : ('iso8859_9', "Azeri_Latin", "Turkish"),
|
||||||
|
0x082c : ('cp1251', "Azeri_Cyrillic", "Cyrillic"),
|
||||||
|
0x042d : ('iso8859_1', "Basque", "Western Europe & US"),
|
||||||
|
0x0423 : ('cp1251', "Belarusian", "Cyrillic"),
|
||||||
|
0x0402 : ('cp1251', "Bulgarian", "Cyrillic"),
|
||||||
|
0x0403 : ('iso8859_1', "Catalan", "Western Europe & US"),
|
||||||
|
0x0404 : ('cp950', "Chinese_Taiwan", "Traditional Chinese"),
|
||||||
|
0x0804 : ('cp936', "Chinese_PRC", "Simplified Chinese"),
|
||||||
|
0x0c04 : ('cp950', "Chinese_Hong_Kong", "Traditional Chinese"),
|
||||||
|
0x1004 : ('cp936', "Chinese_Singapore", "Simplified Chinese"),
|
||||||
|
0x1404 : ('cp950', "Chinese_Macau", "Traditional Chinese"),
|
||||||
|
0x041a : ('iso8859_2', "Croatian", "Central Europe"),
|
||||||
|
0x0405 : ('iso8859_2', "Czech", "Central Europe"),
|
||||||
|
0x0406 : ('iso8859_1', "Danish", "Western Europe & US"),
|
||||||
|
0x0413 : ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
|
||||||
|
0x0813 : ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
|
||||||
|
0x0409 : ('iso8859_1', "English_United_States", "Western Europe & US"),
|
||||||
|
0x0809 : ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
|
||||||
|
0x0c09 : ('iso8859_1', "English_Australian", "Western Europe & US"),
|
||||||
|
0x1009 : ('iso8859_1', "English_Canadian", "Western Europe & US"),
|
||||||
|
0x1409 : ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
|
||||||
|
0x1809 : ('iso8859_1', "English_Irish", "Western Europe & US"),
|
||||||
|
0x1c09 : ('iso8859_1', "English_South_Africa", "Western Europe & US"),
|
||||||
|
0x2009 : ('iso8859_1', "English_Jamaica", "Western Europe & US"),
|
||||||
|
0x2409 : ('iso8859_1', "English_Caribbean", "Western Europe & US"),
|
||||||
|
0x2809 : ('iso8859_1', "English_Belize", "Western Europe & US"),
|
||||||
|
0x2c09 : ('iso8859_1', "English_Trinidad", "Western Europe & US"),
|
||||||
|
0x3009 : ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
|
||||||
|
0x3409 : ('iso8859_1', "English_Philippines", "Western Europe & US"),
|
||||||
|
0x0425 : ('iso8859_13',"Estonian", "Baltic",),
|
||||||
|
0x0438 : ('iso8859_1', "Faeroese", "Western Europe & US"),
|
||||||
|
0x0429 : ('iso8859_6', "Farsi", "Arabic"),
|
||||||
|
0x040b : ('iso8859_1', "Finnish", "Western Europe & US"),
|
||||||
|
0x040c : ('iso8859_1', "French_Standard", "Western Europe & US"),
|
||||||
|
0x080c : ('iso8859_1', "French_Belgian", "Western Europe & US"),
|
||||||
|
0x0c0c : ('iso8859_1', "French_Canadian", "Western Europe & US"),
|
||||||
|
0x100c : ('iso8859_1', "French_Swiss", "Western Europe & US"),
|
||||||
|
0x140c : ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
|
||||||
|
0x180c : ('iso8859_1', "French_Monaco", "Western Europe & US"),
|
||||||
|
0x0437 : (None, "Georgian", "Georgian"),
|
||||||
|
0x0407 : ('iso8859_1', "German_Standard", "Western Europe & US"),
|
||||||
|
0x0807 : ('iso8859_1', "German_Swiss", "Western Europe & US"),
|
||||||
|
0x0c07 : ('iso8859_1', "German_Austrian", "Western Europe & US"),
|
||||||
|
0x1007 : ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
|
||||||
|
0x1407 : ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
|
||||||
|
0x0408 : ('iso8859_7', "Greek", "Greek"),
|
||||||
|
0x040d : ('iso8859_8', "Hebrew", "Hebrew"),
|
||||||
|
0x0439 : (None, "Hindi", "Indic"),
|
||||||
|
0x040e : ('iso8859_2', "Hungarian", "Central Europe"),
|
||||||
|
0x040f : ('iso8859_1', "Icelandic", "Western Europe & US"),
|
||||||
|
0x0421 : ('iso8859_1', "Indonesian", "Western Europe & US"),
|
||||||
|
0x0410 : ('iso8859_1', "Italian_Standard", "Western Europe & US"),
|
||||||
|
0x0810 : ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
|
||||||
|
0x0411 : ('cp932', "Japanese", "Japanese"),
|
||||||
|
0x043f : ('cp1251', "Kazakh", "Cyrillic"),
|
||||||
|
0x0457 : (None, "Konkani", "Indic"),
|
||||||
|
0x0412 : ('cp949', "Korean", "Korean"),
|
||||||
|
0x0426 : ('iso8859_13',"Latvian", "Baltic",),
|
||||||
|
0x0427 : ('iso8859_13',"Lithuanian", "Baltic",),
|
||||||
|
0x042f : ('cp1251', "Macedonian", "Cyrillic"),
|
||||||
|
0x043e : ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
|
||||||
|
0x083e : ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
|
||||||
|
0x044e : (None, "Marathi", "Indic"),
|
||||||
|
0x0414 : ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
|
||||||
|
0x0814 : ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
|
||||||
|
0x0415 : ('iso8859_2', "Polish", "Central Europe"),
|
||||||
|
0x0416 : ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
|
||||||
|
0x0816 : ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
|
||||||
|
0x0418 : ('iso8859_2', "Romanian", "Central Europe"),
|
||||||
|
0x0419 : ('cp1251', "Russian", "Cyrillic"),
|
||||||
|
0x044f : (None, "Sanskrit", "Indic"),
|
||||||
|
0x081a : ('iso8859_2', "Serbian_Latin", "Central Europe"),
|
||||||
|
0x0c1a : ('cp1251', "Serbian_Cyrillic", "Cyrillic"),
|
||||||
|
0x041b : ('iso8859_2', "Slovak", "Central Europe"),
|
||||||
|
0x0424 : ('iso8859_2', "Slovenian", "Central Europe"),
|
||||||
|
0x040a : ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
|
||||||
|
0x080a : ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
|
||||||
|
0x0c0a : ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
|
||||||
|
0x100a : ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
|
||||||
|
0x140a : ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
|
||||||
|
0x180a : ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
|
||||||
|
0x1c0a : ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
|
||||||
|
0x200a : ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
|
||||||
|
0x240a : ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
|
||||||
|
0x280a : ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
|
||||||
|
0x2c0a : ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
|
||||||
|
0x300a : ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
|
||||||
|
0x340a : ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
|
||||||
|
0x380a : ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
|
||||||
|
0x3c0a : ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
|
||||||
|
0x400a : ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
|
||||||
|
0x440a : ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
|
||||||
|
0x480a : ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
|
||||||
|
0x4c0a : ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
|
||||||
|
0x500a : ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
|
||||||
|
0x0441 : ('iso8859_1', "Swahili", "Western Europe & US"),
|
||||||
|
0x041d : ('iso8859_1', "Swedish", "Western Europe & US"),
|
||||||
|
0x081d : ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
|
||||||
|
0x0449 : (None, "Tamil", "Indic"),
|
||||||
|
0x0444 : ('cp1251', "Tatar", "Cyrillic"),
|
||||||
|
0x041e : ('iso8859_11',"Thai", "Thai"),
|
||||||
|
0x041f : ('iso8859_9', "Turkish", "Turkish"),
|
||||||
|
0x0422 : ('cp1251', "Ukrainian", "Cyrillic"),
|
||||||
|
0x0420 : ('iso8859_6', "Urdu", "Arabic"),
|
||||||
|
0x0443 : ('iso8859_9', "Uzbek_Latin", "Turkish"),
|
||||||
|
0x0843 : ('cp1251', "Uzbek_Cyrillic", "Cyrillic"),
|
||||||
|
0x042a : (None, "Vietnamese", "Vietnamese")
|
||||||
|
}
|
||||||
|
|
||||||
|
class CHMFile:
|
||||||
|
"A class to manage access to CHM files."
|
||||||
|
filename = ""
|
||||||
|
file = None
|
||||||
|
title = ""
|
||||||
|
home = "/"
|
||||||
|
index = None
|
||||||
|
topics = None
|
||||||
|
encoding = None
|
||||||
|
lcid = None
|
||||||
|
binaryindex = None
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.searchable = 0
|
||||||
|
|
||||||
|
def LoadCHM(self, archiveName):
|
||||||
|
'''Loads a CHM archive.
|
||||||
|
This function will also call GetArchiveInfo to obtain information
|
||||||
|
such as the index file name and the topics file. It returns 1 on
|
||||||
|
success, and 0 if it fails.
|
||||||
|
'''
|
||||||
|
if (self.filename != None):
|
||||||
|
self.CloseCHM()
|
||||||
|
|
||||||
|
self.file = chmlib.chm_open(archiveName)
|
||||||
|
if (self.file == None):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
self.filename = archiveName
|
||||||
|
self.GetArchiveInfo()
|
||||||
|
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def CloseCHM(self):
|
||||||
|
'''Closes the CHM archive.
|
||||||
|
This function will close the CHM file, if it is open. All variables
|
||||||
|
are also reset.
|
||||||
|
'''
|
||||||
|
if (self.filename != None):
|
||||||
|
chmlib.chm_close(self.file)
|
||||||
|
self.file = None
|
||||||
|
self.filename = ''
|
||||||
|
self.title = ""
|
||||||
|
self.home = "/"
|
||||||
|
self.index = None
|
||||||
|
self.topics = None
|
||||||
|
self.encoding = None
|
||||||
|
|
||||||
|
def GetArchiveInfo(self):
|
||||||
|
'''Obtains information on CHM archive.
|
||||||
|
This function checks the /#SYSTEM file inside the CHM archive to
|
||||||
|
obtain the index, home page, topics, encoding and title. It is called
|
||||||
|
from LoadCHM.
|
||||||
|
'''
|
||||||
|
|
||||||
|
#extra.is_searchable crashed...
|
||||||
|
#self.searchable = extra.is_searchable (self.file)
|
||||||
|
self.searchable = False
|
||||||
|
self.lcid = None
|
||||||
|
|
||||||
|
result, ui = chmlib.chm_resolve_object(self.file, '/#SYSTEM')
|
||||||
|
if (result != chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
size, text = chmlib.chm_retrieve_object(self.file, ui, 4l, ui.length)
|
||||||
|
if (size == 0):
|
||||||
|
sys.stderr.write('GetArchiveInfo: file size = 0\n')
|
||||||
|
return 0
|
||||||
|
|
||||||
|
buff = array.array('B', text)
|
||||||
|
|
||||||
|
index = 0
|
||||||
|
while (index < size):
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
|
||||||
|
if (cursor == 0):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
self.topics = '/' + text[index:index+cursor-1]
|
||||||
|
elif (cursor == 1):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
self.index = '/' + text[index:index+cursor-1]
|
||||||
|
elif (cursor == 2):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
self.home = '/' + text[index:index+cursor-1]
|
||||||
|
elif (cursor == 3):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
self.title = text[index:index+cursor-1]
|
||||||
|
elif (cursor == 4):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
self.lcid = buff[index] + (buff[index+1] * 256)
|
||||||
|
elif (cursor == 6):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
tmp = text[index:index+cursor-1]
|
||||||
|
if not self.topics:
|
||||||
|
tmp1 = '/' + tmp + '.hhc'
|
||||||
|
tmp2 = '/' + tmp + '.hhk'
|
||||||
|
res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
|
||||||
|
res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
|
||||||
|
if (not self.topics) and \
|
||||||
|
(res1 == chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
self.topics = '/' + tmp + '.hhc'
|
||||||
|
if (not self.index) and \
|
||||||
|
(res2 == chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
self.index = '/' + tmp + '.hhk'
|
||||||
|
elif (cursor == 16):
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
self.encoding = text[index:index+cursor-1]
|
||||||
|
else:
|
||||||
|
index += 2
|
||||||
|
cursor = buff[index] + (buff[index+1] * 256)
|
||||||
|
index += 2
|
||||||
|
index += cursor
|
||||||
|
|
||||||
|
self.GetWindowsInfo()
|
||||||
|
|
||||||
|
if not self.lcid:
|
||||||
|
self.lcid = extra.get_lcid (self.file)
|
||||||
|
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def GetTopicsTree(self):
|
||||||
|
'''Reads and returns the topics tree.
|
||||||
|
This auxiliary function reads and returns the topics tree file
|
||||||
|
contents for the CHM archive.
|
||||||
|
'''
|
||||||
|
if (self.topics == None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.topics:
|
||||||
|
res, ui = chmlib.chm_resolve_object(self.file, self.topics)
|
||||||
|
if (res != chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
return None
|
||||||
|
|
||||||
|
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
|
||||||
|
if (size == 0):
|
||||||
|
sys.stderr.write('GetTopicsTree: file size = 0\n')
|
||||||
|
return None
|
||||||
|
return text
|
||||||
|
|
||||||
|
def GetIndex(self):
|
||||||
|
'''Reads and returns the index tree.
|
||||||
|
This auxiliary function reads and returns the index tree file
|
||||||
|
contents for the CHM archive.
|
||||||
|
'''
|
||||||
|
if (self.index == None):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.index:
|
||||||
|
res, ui = chmlib.chm_resolve_object(self.file, self.index)
|
||||||
|
if (res != chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
return None
|
||||||
|
|
||||||
|
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
|
||||||
|
if (size == 0):
|
||||||
|
sys.stderr.write('GetIndex: file size = 0\n')
|
||||||
|
return None
|
||||||
|
return text
|
||||||
|
|
||||||
|
def ResolveObject(self, document):
|
||||||
|
'''Tries to locate a document in the archive.
|
||||||
|
This function tries to locate the document inside the archive. It
|
||||||
|
returns a tuple where the first element is zero if the function
|
||||||
|
was successful, and the second is the UnitInfo for that document.
|
||||||
|
The UnitInfo is used to retrieve the document contents
|
||||||
|
'''
|
||||||
|
if self.file:
|
||||||
|
#path = os.path.abspath(document)
|
||||||
|
path = document
|
||||||
|
return chmlib.chm_resolve_object(self.file, path)
|
||||||
|
else:
|
||||||
|
return (1, None)
|
||||||
|
|
||||||
|
def RetrieveObject(self, ui, start = -1, length = -1):
|
||||||
|
'''Retrieves the contents of a document.
|
||||||
|
This function takes a UnitInfo and two optional arguments, the first
|
||||||
|
being the start address and the second is the length. These define
|
||||||
|
the amount of data to be read from the archive.
|
||||||
|
'''
|
||||||
|
if self.file and ui:
|
||||||
|
if length == -1:
|
||||||
|
len = ui.length
|
||||||
|
else:
|
||||||
|
len = length
|
||||||
|
if start == -1:
|
||||||
|
st = 0l
|
||||||
|
else:
|
||||||
|
st = long(start)
|
||||||
|
return chmlib.chm_retrieve_object(self.file, ui, st, len)
|
||||||
|
else:
|
||||||
|
return (0, '')
|
||||||
|
|
||||||
|
def Search(self, text, wholewords=0, titleonly=0):
|
||||||
|
'''Performs full-text search on the archive.
|
||||||
|
The first parameter is the word to look for, the second
|
||||||
|
indicates if the search should be for whole words only, and
|
||||||
|
the third parameter indicates if the search should be
|
||||||
|
restricted to page titles.
|
||||||
|
This method will return a tuple, the first item
|
||||||
|
indicating if the search results were partial, and the second
|
||||||
|
item being a dictionary containing the results.'''
|
||||||
|
if text and text != '' and self.file:
|
||||||
|
return extra.search (self.file, text, wholewords,
|
||||||
|
titleonly)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def IsSearchable(self):
|
||||||
|
'''Indicates if the full-text search is available for this
|
||||||
|
archive - this flag is updated when GetArchiveInfo is called'''
|
||||||
|
return self.searchable
|
||||||
|
|
||||||
|
def GetEncoding(self):
|
||||||
|
'''Returns a string that can be used with the codecs python package
|
||||||
|
to encode or decode the files in the chm archive. If an error is
|
||||||
|
found, or if it is not possible to find the encoding, None is
|
||||||
|
returned.'''
|
||||||
|
if self.encoding:
|
||||||
|
vals = string.split(self.encoding, ',')
|
||||||
|
if len(vals) > 2:
|
||||||
|
try:
|
||||||
|
return charset_table[int(vals[2])]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def GetLCID(self):
|
||||||
|
'''Returns the archive Locale ID'''
|
||||||
|
if self.lcid in locale_table:
|
||||||
|
return locale_table[self.lcid]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def GetDWORD(self, buff, idx=0):
|
||||||
|
'''Internal method.
|
||||||
|
Reads a double word (4 bytes) from a buffer.
|
||||||
|
'''
|
||||||
|
result = buff[idx] + (buff[idx+1]<<8) + (buff[idx+2]<<16) + \
|
||||||
|
(buff[idx+3]<<24)
|
||||||
|
|
||||||
|
if result == 0xFFFFFFFF:
|
||||||
|
result = 0
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def GetString(self, text, idx):
|
||||||
|
'''Internal method.
|
||||||
|
Retrieves a string from the #STRINGS buffer.
|
||||||
|
'''
|
||||||
|
next = string.find(text, '\x00', idx)
|
||||||
|
chunk = text[idx:next]
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
def GetWindowsInfo(self):
|
||||||
|
'''Gets information from the #WINDOWS file.
|
||||||
|
Checks the #WINDOWS file to see if it has any info that was
|
||||||
|
not found in #SYSTEM (topics, index or default page.
|
||||||
|
'''
|
||||||
|
result, ui = chmlib.chm_resolve_object(self.file, '/#WINDOWS')
|
||||||
|
if (result != chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
return -1
|
||||||
|
|
||||||
|
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, 8)
|
||||||
|
if (size < 8):
|
||||||
|
return -2
|
||||||
|
|
||||||
|
buff = array.array('B', text)
|
||||||
|
num_entries = self.GetDWORD(buff, 0)
|
||||||
|
entry_size = self.GetDWORD(buff, 4)
|
||||||
|
|
||||||
|
if num_entries < 1:
|
||||||
|
return -3
|
||||||
|
|
||||||
|
size, text = chmlib.chm_retrieve_object(self.file, ui, 8l, entry_size)
|
||||||
|
if (size < entry_size):
|
||||||
|
return -4
|
||||||
|
|
||||||
|
buff = array.array('B', text)
|
||||||
|
toc_index = self.GetDWORD(buff, 0x60)
|
||||||
|
idx_index = self.GetDWORD(buff, 0x64)
|
||||||
|
dft_index = self.GetDWORD(buff, 0x68)
|
||||||
|
|
||||||
|
result, ui = chmlib.chm_resolve_object(self.file, '/#STRINGS')
|
||||||
|
if (result != chmlib.CHM_RESOLVE_SUCCESS):
|
||||||
|
return -5
|
||||||
|
|
||||||
|
size, text = chmlib.chm_retrieve_object(self.file, ui, 0l, ui.length)
|
||||||
|
if (size == 0):
|
||||||
|
return -6
|
||||||
|
|
||||||
|
if (not self.topics):
|
||||||
|
self.topics = self.GetString(text, toc_index)
|
||||||
|
if not self.topics.startswith("/"):
|
||||||
|
self.topics = "/" + self.topics
|
||||||
|
|
||||||
|
if (not self.index):
|
||||||
|
self.index = self.GetString(text, idx_index)
|
||||||
|
if not self.index.startswith("/"):
|
||||||
|
self.index = "/" + self.index
|
||||||
|
|
||||||
|
if (dft_index != 0):
|
||||||
|
self.home = self.GetString(text, dft_index)
|
||||||
|
if not self.home.startswith("/"):
|
||||||
|
self.home = "/" + self.home
|
100
src/calibre/utils/chm/chmlib.py
Normal file
100
src/calibre/utils/chm/chmlib.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
# This file was created automatically by SWIG.
|
||||||
|
# Don't modify this file, modify the SWIG interface instead.
|
||||||
|
# This file is compatible with both classic and new-style classes.
|
||||||
|
|
||||||
|
from calibre.constants import plugins
|
||||||
|
|
||||||
|
_chmlib, chmlib_err = plugins['chmlib']
|
||||||
|
|
||||||
|
if chmlib_err:
|
||||||
|
raise RuntimeError('Failed to load chmlib: '+chmlib_err)
|
||||||
|
|
||||||
|
def _swig_setattr(self,class_type,name,value):
|
||||||
|
if (name == "this"):
|
||||||
|
if isinstance(value, class_type):
|
||||||
|
self.__dict__[name] = value.this
|
||||||
|
if hasattr(value,"thisown"): self.__dict__["thisown"] = value.thisown
|
||||||
|
del value.thisown
|
||||||
|
return
|
||||||
|
method = class_type.__swig_setmethods__.get(name,None)
|
||||||
|
if method: return method(self,value)
|
||||||
|
self.__dict__[name] = value
|
||||||
|
|
||||||
|
def _swig_getattr(self,class_type,name):
|
||||||
|
method = class_type.__swig_getmethods__.get(name,None)
|
||||||
|
if method: return method(self)
|
||||||
|
raise AttributeError,name
|
||||||
|
|
||||||
|
import types
|
||||||
|
try:
|
||||||
|
_object = types.ObjectType
|
||||||
|
_newclass = 1
|
||||||
|
except AttributeError:
|
||||||
|
class _object : pass
|
||||||
|
_newclass = 0
|
||||||
|
|
||||||
|
|
||||||
|
CHM_UNCOMPRESSED = _chmlib.CHM_UNCOMPRESSED
|
||||||
|
CHM_COMPRESSED = _chmlib.CHM_COMPRESSED
|
||||||
|
CHM_MAX_PATHLEN = _chmlib.CHM_MAX_PATHLEN
|
||||||
|
class chmUnitInfo(_object):
|
||||||
|
__swig_setmethods__ = {}
|
||||||
|
__setattr__ = lambda self, name, value: _swig_setattr(self, chmUnitInfo, name, value)
|
||||||
|
__swig_getmethods__ = {}
|
||||||
|
__getattr__ = lambda self, name: _swig_getattr(self, chmUnitInfo, name)
|
||||||
|
__swig_setmethods__["start"] = _chmlib.chmUnitInfo_start_set
|
||||||
|
__swig_getmethods__["start"] = _chmlib.chmUnitInfo_start_get
|
||||||
|
if _newclass:start = property(_chmlib.chmUnitInfo_start_get,_chmlib.chmUnitInfo_start_set)
|
||||||
|
__swig_setmethods__["length"] = _chmlib.chmUnitInfo_length_set
|
||||||
|
__swig_getmethods__["length"] = _chmlib.chmUnitInfo_length_get
|
||||||
|
if _newclass:length = property(_chmlib.chmUnitInfo_length_get,_chmlib.chmUnitInfo_length_set)
|
||||||
|
__swig_setmethods__["space"] = _chmlib.chmUnitInfo_space_set
|
||||||
|
__swig_getmethods__["space"] = _chmlib.chmUnitInfo_space_get
|
||||||
|
if _newclass:space = property(_chmlib.chmUnitInfo_space_get,_chmlib.chmUnitInfo_space_set)
|
||||||
|
__swig_setmethods__["path"] = _chmlib.chmUnitInfo_path_set
|
||||||
|
__swig_getmethods__["path"] = _chmlib.chmUnitInfo_path_get
|
||||||
|
if _newclass:path = property(_chmlib.chmUnitInfo_path_get,_chmlib.chmUnitInfo_path_set)
|
||||||
|
def __init__(self,*args):
|
||||||
|
_swig_setattr(self, chmUnitInfo, 'this', apply(_chmlib.new_chmUnitInfo,args))
|
||||||
|
_swig_setattr(self, chmUnitInfo, 'thisown', 1)
|
||||||
|
def __del__(self, destroy= _chmlib.delete_chmUnitInfo):
|
||||||
|
try:
|
||||||
|
if self.thisown: destroy(self)
|
||||||
|
except: pass
|
||||||
|
def __repr__(self):
|
||||||
|
return "<C chmUnitInfo instance at %s>" % (self.this,)
|
||||||
|
|
||||||
|
class chmUnitInfoPtr(chmUnitInfo):
|
||||||
|
def __init__(self,this):
|
||||||
|
_swig_setattr(self, chmUnitInfo, 'this', this)
|
||||||
|
if not hasattr(self,"thisown"): _swig_setattr(self, chmUnitInfo, 'thisown', 0)
|
||||||
|
_swig_setattr(self, chmUnitInfo,self.__class__,chmUnitInfo)
|
||||||
|
_chmlib.chmUnitInfo_swigregister(chmUnitInfoPtr)
|
||||||
|
|
||||||
|
chm_open = _chmlib.chm_open
|
||||||
|
|
||||||
|
chm_close = _chmlib.chm_close
|
||||||
|
|
||||||
|
CHM_PARAM_MAX_BLOCKS_CACHED = _chmlib.CHM_PARAM_MAX_BLOCKS_CACHED
|
||||||
|
chm_set_param = _chmlib.chm_set_param
|
||||||
|
|
||||||
|
CHM_RESOLVE_SUCCESS = _chmlib.CHM_RESOLVE_SUCCESS
|
||||||
|
CHM_RESOLVE_FAILURE = _chmlib.CHM_RESOLVE_FAILURE
|
||||||
|
chm_resolve_object = _chmlib.chm_resolve_object
|
||||||
|
|
||||||
|
chm_retrieve_object = _chmlib.chm_retrieve_object
|
||||||
|
|
||||||
|
CHM_ENUMERATE_NORMAL = _chmlib.CHM_ENUMERATE_NORMAL
|
||||||
|
CHM_ENUMERATE_META = _chmlib.CHM_ENUMERATE_META
|
||||||
|
CHM_ENUMERATE_SPECIAL = _chmlib.CHM_ENUMERATE_SPECIAL
|
||||||
|
CHM_ENUMERATE_FILES = _chmlib.CHM_ENUMERATE_FILES
|
||||||
|
CHM_ENUMERATE_DIRS = _chmlib.CHM_ENUMERATE_DIRS
|
||||||
|
CHM_ENUMERATE_ALL = _chmlib.CHM_ENUMERATE_ALL
|
||||||
|
CHM_ENUMERATOR_FAILURE = _chmlib.CHM_ENUMERATOR_FAILURE
|
||||||
|
CHM_ENUMERATOR_CONTINUE = _chmlib.CHM_ENUMERATOR_CONTINUE
|
||||||
|
CHM_ENUMERATOR_SUCCESS = _chmlib.CHM_ENUMERATOR_SUCCESS
|
||||||
|
chm_enumerate = _chmlib.chm_enumerate
|
||||||
|
|
||||||
|
chm_enumerate_dir = _chmlib.chm_enumerate_dir
|
||||||
|
|
||||||
|
|
759
src/calibre/utils/chm/extra.c
Normal file
759
src/calibre/utils/chm/extra.c
Normal file
@ -0,0 +1,759 @@
|
|||||||
|
/*
|
||||||
|
* extra.c - full-text search support for pychm
|
||||||
|
*
|
||||||
|
* Copyright (C) 2004 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place, Suite 330,
|
||||||
|
* Boston, MA 02111-1307, USA.
|
||||||
|
*
|
||||||
|
* Author: Rubens Ramos <rubensr@users.sourceforge.net>
|
||||||
|
*
|
||||||
|
* Heavily based on work done by:
|
||||||
|
* Pabs <pabs@zip.to> - chmdeco
|
||||||
|
* Razvan Cojocaru <razvanco@gmx.net> - xCHM
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "chm_lib.h"
|
||||||
|
#ifdef __PYTHON__
|
||||||
|
#include "Python.h"
|
||||||
|
#else
|
||||||
|
#include <stdio.h>
|
||||||
|
#define PyObject void
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include "stdint.h"
|
||||||
|
#define strcasecmp stricmp
|
||||||
|
#define strncasecmp strnicmp
|
||||||
|
#else
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <strings.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( _MSC_VER ) && !defined( __cplusplus )
|
||||||
|
# define inline __inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(__WIN32__)
|
||||||
|
# if defined(_MSC_VER)
|
||||||
|
# if defined(STATIC_LINKED)
|
||||||
|
# define MODEXPORT(a) a
|
||||||
|
# define MODIMPORT(a) extern a
|
||||||
|
# else
|
||||||
|
# define MODEXPORT(a) __declspec(dllexport) a
|
||||||
|
# define MODIMPORT(a) extern a
|
||||||
|
# endif
|
||||||
|
# else
|
||||||
|
# if defined(__BORLANDC__)
|
||||||
|
# define MODEXPORT(a) a _export
|
||||||
|
# define MODIMPORT(a) a _export
|
||||||
|
# else
|
||||||
|
# define MODEXPORT(a) a
|
||||||
|
# define MODIMPORT(a) a
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define MODEXPORT(a) a
|
||||||
|
# define MODIMPORT(a) a
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define false 0
|
||||||
|
#define true 1
|
||||||
|
|
||||||
|
#define FTS_HEADER_LEN 0x32
|
||||||
|
#define TOPICS_ENTRY_LEN 16
|
||||||
|
#define COMMON_BUF_LEN 1025
|
||||||
|
|
||||||
|
#define FREE(x) free (x); x = NULL
|
||||||
|
|
||||||
|
inline uint16_t
|
||||||
|
get_uint16 (uint8_t* b) {
|
||||||
|
return b[0] |
|
||||||
|
b[1]<<8;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint32_t
|
||||||
|
get_uint32 (uint8_t* b) {
|
||||||
|
return b[0] |
|
||||||
|
b[1]<<8 |
|
||||||
|
b[2]<<16 |
|
||||||
|
b[3]<<24;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64_t
|
||||||
|
get_uint64 (uint8_t* b) {
|
||||||
|
return b[0] |
|
||||||
|
b[1]<<8 |
|
||||||
|
b[2]<<16 |
|
||||||
|
b[3]<<24 |
|
||||||
|
(uint64_t) b[4]<<32 |
|
||||||
|
(uint64_t) b[5]<<40 |
|
||||||
|
(uint64_t) b[6]<<48 |
|
||||||
|
(uint64_t) b[7]<<56;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint64_t
|
||||||
|
be_encint (unsigned char *buffer, size_t *length)
|
||||||
|
{
|
||||||
|
uint64_t result = 0;
|
||||||
|
int shift=0;
|
||||||
|
*length = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
result |= ((*buffer) & 0x7f) << shift;
|
||||||
|
shift += 7;
|
||||||
|
*length = *length + 1;
|
||||||
|
|
||||||
|
} while (*(buffer++) & 0x80);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Finds the first unset bit in memory. Returns the number of set bits found.
|
||||||
|
Returns -1 if the buffer runs out before we find an unset bit.
|
||||||
|
*/
|
||||||
|
inline int
|
||||||
|
ffus (unsigned char* byte, int* bit, size_t *length) {
|
||||||
|
int bits = 0;
|
||||||
|
*length = 0;
|
||||||
|
|
||||||
|
while(*byte & (1 << *bit)){
|
||||||
|
if(*bit)
|
||||||
|
--(*bit);
|
||||||
|
else {
|
||||||
|
++byte;
|
||||||
|
++(*length);
|
||||||
|
*bit = 7;
|
||||||
|
}
|
||||||
|
++bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(*bit)
|
||||||
|
--(*bit);
|
||||||
|
else {
|
||||||
|
++(*length);
|
||||||
|
*bit = 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline uint64_t
|
||||||
|
sr_int(unsigned char* byte, int* bit,
|
||||||
|
unsigned char s, unsigned char r, size_t *length)
|
||||||
|
{
|
||||||
|
uint64_t ret;
|
||||||
|
unsigned char mask;
|
||||||
|
int n, n_bits, num_bits, base, count;
|
||||||
|
size_t fflen;
|
||||||
|
|
||||||
|
*length = 0;
|
||||||
|
|
||||||
|
if(!bit || *bit > 7 || s != 2)
|
||||||
|
return ~(uint64_t)0;
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
count = ffus(byte, bit, &fflen);
|
||||||
|
*length += fflen;
|
||||||
|
byte += *length;
|
||||||
|
|
||||||
|
n_bits = n = r + (count ? count-1 : 0) ;
|
||||||
|
|
||||||
|
while (n > 0) {
|
||||||
|
num_bits = n > *bit ? *bit : n-1;
|
||||||
|
base = n > *bit ? 0 : *bit - (n-1);
|
||||||
|
|
||||||
|
switch (num_bits){
|
||||||
|
case 0:
|
||||||
|
mask = 1;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
mask = 3;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
mask = 7;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
mask = 0xf;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
mask = 0x1f;
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
mask = 0x3f;
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
mask = 0x7f;
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
mask = 0xff;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
mask = 0xff;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
mask <<= base;
|
||||||
|
ret = (ret << (num_bits+1)) |
|
||||||
|
(uint64_t)((*byte & mask) >> base);
|
||||||
|
|
||||||
|
if( n > *bit ){
|
||||||
|
++byte;
|
||||||
|
++(*length);
|
||||||
|
n -= *bit+1;
|
||||||
|
*bit = 7;
|
||||||
|
} else {
|
||||||
|
*bit -= n;
|
||||||
|
n = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(count)
|
||||||
|
ret |= (uint64_t)1 << n_bits;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline uint32_t
|
||||||
|
get_leaf_node_offset(struct chmFile *chmfile,
|
||||||
|
const char *text,
|
||||||
|
uint32_t initial_offset,
|
||||||
|
uint32_t buff_size,
|
||||||
|
uint16_t tree_depth,
|
||||||
|
struct chmUnitInfo *ui)
|
||||||
|
{
|
||||||
|
unsigned char word_len;
|
||||||
|
unsigned char pos;
|
||||||
|
uint16_t free_space;
|
||||||
|
char *wrd_buf;
|
||||||
|
char *word = NULL;
|
||||||
|
uint32_t test_offset = 0;
|
||||||
|
uint32_t i = sizeof(uint16_t);
|
||||||
|
unsigned char *buffer = (unsigned char *)malloc (buff_size);
|
||||||
|
|
||||||
|
if (NULL == buffer)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (--tree_depth) {
|
||||||
|
if (initial_offset == test_offset) {
|
||||||
|
FREE(buffer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
test_offset = initial_offset;
|
||||||
|
if (chm_retrieve_object (chmfile, ui, buffer,
|
||||||
|
initial_offset, buff_size) == 0) {
|
||||||
|
FREE(buffer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
free_space = get_uint16 (buffer);
|
||||||
|
|
||||||
|
while (i < buff_size - free_space) {
|
||||||
|
|
||||||
|
word_len = *(buffer + i);
|
||||||
|
pos = *(buffer + i + 1);
|
||||||
|
|
||||||
|
wrd_buf = (char*)malloc (word_len);
|
||||||
|
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
|
||||||
|
wrd_buf[word_len - 1] = 0;
|
||||||
|
|
||||||
|
if (pos == 0) {
|
||||||
|
FREE (word);
|
||||||
|
word = (char *) strdup (wrd_buf);
|
||||||
|
} else {
|
||||||
|
word = (char*)realloc (word, word_len + pos + 1);
|
||||||
|
strcpy (word + pos, wrd_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
FREE(wrd_buf);
|
||||||
|
|
||||||
|
if (strcasecmp (text, word) <= 0) {
|
||||||
|
initial_offset = get_uint32 (buffer + i + word_len + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
i += word_len + sizeof (unsigned char) + sizeof(uint32_t) +
|
||||||
|
sizeof(uint16_t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(initial_offset == test_offset)
|
||||||
|
initial_offset = 0;
|
||||||
|
|
||||||
|
FREE(word);
|
||||||
|
FREE(buffer);
|
||||||
|
|
||||||
|
return initial_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int
|
||||||
|
pychm_process_wlc (struct chmFile *chmfile,
|
||||||
|
uint64_t wlc_count, uint64_t wlc_size,
|
||||||
|
uint32_t wlc_offset, unsigned char ds,
|
||||||
|
unsigned char dr, unsigned char cs,
|
||||||
|
unsigned char cr, unsigned char ls,
|
||||||
|
unsigned char lr, struct chmUnitInfo *uimain,
|
||||||
|
struct chmUnitInfo* uitbl,
|
||||||
|
struct chmUnitInfo *uistrings,
|
||||||
|
struct chmUnitInfo* topics,
|
||||||
|
struct chmUnitInfo *urlstr,
|
||||||
|
PyObject *dict)
|
||||||
|
{
|
||||||
|
uint32_t stroff, urloff;
|
||||||
|
uint64_t i, j, count;
|
||||||
|
size_t length;
|
||||||
|
int wlc_bit = 7;
|
||||||
|
size_t off = 0;
|
||||||
|
uint64_t index = 0;
|
||||||
|
unsigned char entry[TOPICS_ENTRY_LEN];
|
||||||
|
unsigned char combuf[COMMON_BUF_LEN];
|
||||||
|
unsigned char *buffer = (unsigned char *)malloc (wlc_size);
|
||||||
|
char *url = NULL;
|
||||||
|
char *topic = NULL;
|
||||||
|
|
||||||
|
if (chm_retrieve_object(chmfile, uimain, buffer,
|
||||||
|
wlc_offset, wlc_size) == 0) {
|
||||||
|
FREE(buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < wlc_count; ++i) {
|
||||||
|
|
||||||
|
if(wlc_bit != 7) {
|
||||||
|
++off;
|
||||||
|
wlc_bit = 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
index += sr_int(buffer + off, &wlc_bit, ds, dr, &length);
|
||||||
|
off += length;
|
||||||
|
|
||||||
|
if(chm_retrieve_object(chmfile, topics, entry,
|
||||||
|
index * 16, TOPICS_ENTRY_LEN) == 0) {
|
||||||
|
FREE(topic);
|
||||||
|
FREE(url);
|
||||||
|
FREE(buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||||
|
stroff = get_uint32 (entry + 4);
|
||||||
|
|
||||||
|
FREE (topic);
|
||||||
|
if (chm_retrieve_object (chmfile, uistrings, combuf,
|
||||||
|
stroff, COMMON_BUF_LEN - 1) == 0) {
|
||||||
|
topic = strdup ("Untitled in index");
|
||||||
|
|
||||||
|
} else {
|
||||||
|
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||||
|
|
||||||
|
topic = strdup ((char*)combuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
urloff = get_uint32 (entry + 8);
|
||||||
|
|
||||||
|
if(chm_retrieve_object (chmfile, uitbl, combuf,
|
||||||
|
urloff, 12) == 0) {
|
||||||
|
FREE(buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
urloff = get_uint32 (combuf + 8);
|
||||||
|
|
||||||
|
if (chm_retrieve_object (chmfile, urlstr, combuf,
|
||||||
|
urloff + 8, COMMON_BUF_LEN - 1) == 0) {
|
||||||
|
FREE(topic);
|
||||||
|
FREE(url);
|
||||||
|
FREE(buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
combuf[COMMON_BUF_LEN - 1] = 0;
|
||||||
|
|
||||||
|
FREE (url);
|
||||||
|
url = strdup ((char*)combuf);
|
||||||
|
|
||||||
|
if (url && topic) {
|
||||||
|
#ifdef __PYTHON__
|
||||||
|
PyDict_SetItemString (dict, topic,
|
||||||
|
PyString_FromString (url));
|
||||||
|
#else
|
||||||
|
printf ("%s ==> %s\n", url, topic);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
count = sr_int (buffer + off, &wlc_bit, cs, cr, &length);
|
||||||
|
off += length;
|
||||||
|
|
||||||
|
for (j = 0; j < count; ++j) {
|
||||||
|
sr_int (buffer + off, &wlc_bit, ls, lr, &length);
|
||||||
|
off += length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FREE(topic);
|
||||||
|
FREE(url);
|
||||||
|
FREE(buffer);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
chm_search (struct chmFile *chmfile,
|
||||||
|
const char *text, int whole_words,
|
||||||
|
int titles_only, PyObject *dict)
|
||||||
|
{
|
||||||
|
unsigned char header[FTS_HEADER_LEN];
|
||||||
|
unsigned char doc_index_s;
|
||||||
|
unsigned char doc_index_r;
|
||||||
|
unsigned char code_count_s;
|
||||||
|
unsigned char code_count_r;
|
||||||
|
unsigned char loc_codes_s;
|
||||||
|
unsigned char loc_codes_r;
|
||||||
|
unsigned char word_len, pos;
|
||||||
|
unsigned char *buffer;
|
||||||
|
char *word = NULL;
|
||||||
|
uint32_t node_offset;
|
||||||
|
uint32_t node_len;
|
||||||
|
uint16_t tree_depth;
|
||||||
|
uint32_t i;
|
||||||
|
uint16_t free_space;
|
||||||
|
uint64_t wlc_count, wlc_size;
|
||||||
|
uint32_t wlc_offset;
|
||||||
|
char *wrd_buf;
|
||||||
|
unsigned char title;
|
||||||
|
size_t encsz;
|
||||||
|
struct chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;
|
||||||
|
int partial = false;
|
||||||
|
|
||||||
|
if (NULL == text)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (chm_resolve_object (chmfile, "/$FIftiMain", &ui) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (chmfile, "/#TOPICS", &uitopics) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (chmfile, "/#STRINGS", &uistrings) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (chmfile, "/#URLTBL", &uiurltbl) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (chmfile, "/#URLSTR", &uiurlstr) !=
|
||||||
|
CHM_RESOLVE_SUCCESS)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if(chm_retrieve_object(chmfile, &ui, header, 0, FTS_HEADER_LEN) == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
doc_index_s = header[0x1E];
|
||||||
|
doc_index_r = header[0x1F];
|
||||||
|
code_count_s = header[0x20];
|
||||||
|
code_count_r = header[0x21];
|
||||||
|
loc_codes_s = header[0x22];
|
||||||
|
loc_codes_r = header[0x23];
|
||||||
|
|
||||||
|
if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
node_offset = get_uint32 (header + 0x14);
|
||||||
|
node_len = get_uint32 (header + 0x2e);
|
||||||
|
tree_depth = get_uint16 (header + 0x18);
|
||||||
|
|
||||||
|
i = sizeof(uint16_t);
|
||||||
|
|
||||||
|
buffer = (unsigned char*)malloc (node_len);
|
||||||
|
|
||||||
|
node_offset = get_leaf_node_offset (chmfile, text, node_offset, node_len,
|
||||||
|
tree_depth, &ui);
|
||||||
|
|
||||||
|
if (!node_offset) {
|
||||||
|
FREE(buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
|
||||||
|
if (chm_retrieve_object (chmfile, &ui, buffer,
|
||||||
|
node_offset, node_len) == 0) {
|
||||||
|
FREE(word);
|
||||||
|
FREE(buffer);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
free_space = get_uint16 (buffer + 6);
|
||||||
|
|
||||||
|
i = sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint16_t);
|
||||||
|
|
||||||
|
encsz = 0;
|
||||||
|
|
||||||
|
while (i < node_len - free_space) {
|
||||||
|
word_len = *(buffer + i);
|
||||||
|
pos = *(buffer + i + 1);
|
||||||
|
|
||||||
|
wrd_buf = (char*)malloc (word_len);
|
||||||
|
memcpy (wrd_buf, buffer + i + 2, word_len - 1);
|
||||||
|
wrd_buf[word_len - 1] = 0;
|
||||||
|
|
||||||
|
if (pos == 0) {
|
||||||
|
FREE(word);
|
||||||
|
word = (char *) strdup (wrd_buf);
|
||||||
|
} else {
|
||||||
|
word = (char*)realloc (word, word_len + pos + 1);
|
||||||
|
strcpy (word + pos, wrd_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
FREE(wrd_buf);
|
||||||
|
|
||||||
|
i += 2 + word_len;
|
||||||
|
title = *(buffer + i - 1);
|
||||||
|
|
||||||
|
wlc_count = be_encint (buffer + i, &encsz);
|
||||||
|
i += encsz;
|
||||||
|
|
||||||
|
wlc_offset = get_uint32 (buffer + i);
|
||||||
|
|
||||||
|
i += sizeof(uint32_t) + sizeof(uint16_t);
|
||||||
|
wlc_size = be_encint (buffer + i, &encsz);
|
||||||
|
i += encsz;
|
||||||
|
|
||||||
|
node_offset = get_uint32 (buffer);
|
||||||
|
|
||||||
|
if (!title && titles_only)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (whole_words && !strcasecmp(text, word)) {
|
||||||
|
partial = pychm_process_wlc (chmfile, wlc_count, wlc_size,
|
||||||
|
wlc_offset, doc_index_s,
|
||||||
|
doc_index_r,code_count_s,
|
||||||
|
code_count_r, loc_codes_s,
|
||||||
|
loc_codes_r, &ui, &uiurltbl,
|
||||||
|
&uistrings, &uitopics,
|
||||||
|
&uiurlstr, dict);
|
||||||
|
FREE(word);
|
||||||
|
FREE(buffer);
|
||||||
|
return partial;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!whole_words) {
|
||||||
|
if (!strncasecmp (word, text, strlen(text))) {
|
||||||
|
partial = true;
|
||||||
|
pychm_process_wlc (chmfile, wlc_count, wlc_size,
|
||||||
|
wlc_offset, doc_index_s,
|
||||||
|
doc_index_r,code_count_s,
|
||||||
|
code_count_r, loc_codes_s,
|
||||||
|
loc_codes_r, &ui, &uiurltbl,
|
||||||
|
&uistrings, &uitopics,
|
||||||
|
&uiurlstr, dict);
|
||||||
|
|
||||||
|
} else if (strncasecmp (text, word, strlen(text)) < -1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
} while (!whole_words &&
|
||||||
|
!strncmp (word, text, strlen(text)) &&
|
||||||
|
node_offset);
|
||||||
|
|
||||||
|
FREE(word);
|
||||||
|
FREE(buffer);
|
||||||
|
|
||||||
|
return partial;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *file;
|
||||||
|
int offset;
|
||||||
|
} Langrec;
|
||||||
|
|
||||||
|
Langrec lang_files[] = {
|
||||||
|
{"/$FIftiMain", 0x7E},
|
||||||
|
{"$WWKeywordLinks/BTree", 0x34},
|
||||||
|
{"$WWAssociativeLinks/BTree", 0x34}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define LANG_FILES_SIZE (sizeof(lang_files)/sizeof(Langrec))
|
||||||
|
|
||||||
|
int
|
||||||
|
chm_get_lcid (struct chmFile *chmfile) {
|
||||||
|
struct chmUnitInfo ui;
|
||||||
|
uint32_t lang;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i=0; i<LANG_FILES_SIZE; i++) {
|
||||||
|
|
||||||
|
if (chm_resolve_object (chmfile, lang_files[i].file, &ui) ==
|
||||||
|
CHM_RESOLVE_SUCCESS) {
|
||||||
|
|
||||||
|
if (chm_retrieve_object (chmfile, &ui, (unsigned char *) &lang,
|
||||||
|
lang_files[i].offset, sizeof(uint32_t)) != 0)
|
||||||
|
return lang;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __PYTHON__
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
is_searchable (PyObject *self, PyObject *args) {
|
||||||
|
struct chmFile *file;
|
||||||
|
PyObject *obj0;
|
||||||
|
struct chmUnitInfo ui;
|
||||||
|
|
||||||
|
if (PyArg_ParseTuple (args, "O:is_searchable", &obj0)) {
|
||||||
|
|
||||||
|
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||||
|
|
||||||
|
if (chm_resolve_object (file, "/$FIftiMain", &ui) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (file, "/#TOPICS", &ui) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (file, "/#STRINGS", &ui) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (file, "/#URLTBL", &ui) !=
|
||||||
|
CHM_RESOLVE_SUCCESS ||
|
||||||
|
chm_resolve_object (file, "/#URLSTR", &ui) !=
|
||||||
|
CHM_RESOLVE_SUCCESS)
|
||||||
|
return Py_BuildValue ("i", 0);
|
||||||
|
else
|
||||||
|
return Py_BuildValue ("i", 1);
|
||||||
|
} else {
|
||||||
|
PyErr_SetString(PyExc_TypeError, "Expected chmfile (not CHMFile!)");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
search (PyObject *self, PyObject *args) {
|
||||||
|
char *text;
|
||||||
|
int whole_words;
|
||||||
|
int titles_only;
|
||||||
|
int partial;
|
||||||
|
struct chmFile *file;
|
||||||
|
PyObject *obj0;
|
||||||
|
PyObject *dict;
|
||||||
|
|
||||||
|
if (PyArg_ParseTuple (args, "Osii:search", &obj0, &text,
|
||||||
|
&whole_words, &titles_only)) {
|
||||||
|
|
||||||
|
dict = PyDict_New();
|
||||||
|
|
||||||
|
if (dict) {
|
||||||
|
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||||
|
|
||||||
|
partial = chm_search (file,
|
||||||
|
text, whole_words, titles_only, dict);
|
||||||
|
|
||||||
|
return Py_BuildValue ("(iO)", partial, dict);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PyErr_SetString(PyExc_TypeError,
|
||||||
|
"Expected chmfile (not CHMFile!), string, int, int");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
get_lcid (PyObject *self, PyObject *args) {
|
||||||
|
int code;
|
||||||
|
struct chmFile *file;
|
||||||
|
PyObject *obj0;
|
||||||
|
|
||||||
|
if (PyArg_ParseTuple (args, "O:get_lcid", &obj0)) {
|
||||||
|
|
||||||
|
file = (struct chmFile *) PyCObject_AsVoidPtr(obj0);
|
||||||
|
|
||||||
|
code = chm_get_lcid (file);
|
||||||
|
|
||||||
|
if (code != -1)
|
||||||
|
return Py_BuildValue ("i", code);
|
||||||
|
else
|
||||||
|
Py_INCREF(Py_None);
|
||||||
|
return Py_None;
|
||||||
|
} else {
|
||||||
|
PyErr_SetString(PyExc_TypeError,"Expected a chmfile (not a CHMFile!)");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMethodDef
|
||||||
|
IndexMethods[] = {
|
||||||
|
{"get_lcid", get_lcid, METH_VARARGS,
|
||||||
|
"Returns LCID (Locale ID) for archive."},
|
||||||
|
{"search", search, METH_VARARGS,
|
||||||
|
"Perform Full-Text search."},
|
||||||
|
{"is_searchable", is_searchable, METH_VARARGS,
|
||||||
|
"Return 1 if it is possible to search the archive, 0 otherwise."},
|
||||||
|
{NULL, NULL, 0, NULL}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
MODEXPORT(void)
|
||||||
|
initchm_extra (void) {
|
||||||
|
Py_InitModule ("chm_extra", IndexMethods);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char **argv) {
|
||||||
|
struct chmFile *file;
|
||||||
|
char text[255];
|
||||||
|
int whole_words, titles_only;
|
||||||
|
int partial;
|
||||||
|
|
||||||
|
if (argc == 2) {
|
||||||
|
file = chm_open (argv[1]);
|
||||||
|
|
||||||
|
if (file) {
|
||||||
|
printf ("\nLCID= %d (%08X)\n", chm_get_lcid(file), chm_get_lcid(file));
|
||||||
|
while (1) {
|
||||||
|
printf ("\n<whole_words> <titles_only> <string>\n");
|
||||||
|
printf ("> ");
|
||||||
|
if (scanf ("%d %d %s", &whole_words, &titles_only, text))
|
||||||
|
partial = chm_search (file,
|
||||||
|
text, whole_words, titles_only, NULL);
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
|
||||||
|
printf ("Partial = %d\n", partial);
|
||||||
|
}
|
||||||
|
|
||||||
|
chm_close (file);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
printf ("\n%s <filename>\n", argv[0]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
247
src/calibre/utils/chm/stdint.h
Normal file
247
src/calibre/utils/chm/stdint.h
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
// ISO C9x compliant stdint.h for Microsoft Visual Studio
|
||||||
|
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
|
||||||
|
//
|
||||||
|
// Copyright (c) 2006-2008 Alexander Chemeris
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer in the
|
||||||
|
// documentation and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// 3. The name of the author may be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||||
|
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||||
|
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#ifndef _MSC_VER // [
|
||||||
|
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||||
|
#endif // _MSC_VER ]
|
||||||
|
|
||||||
|
#ifndef _MSC_STDINT_H_ // [
|
||||||
|
#define _MSC_STDINT_H_
|
||||||
|
|
||||||
|
#if _MSC_VER > 1000
|
||||||
|
#pragma once
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
|
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
|
||||||
|
// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
|
||||||
|
// or compiler give many errors like this:
|
||||||
|
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
# include <wchar.h>
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Define _W64 macros to mark types changing their size, like intptr_t.
|
||||||
|
#ifndef _W64
|
||||||
|
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
|
||||||
|
# define _W64 __w64
|
||||||
|
# else
|
||||||
|
# define _W64
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// 7.18.1 Integer types
|
||||||
|
|
||||||
|
// 7.18.1.1 Exact-width integer types
|
||||||
|
|
||||||
|
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
|
||||||
|
// realize that, e.g. char has the same size as __int8
|
||||||
|
// so we give up on __intX for them.
|
||||||
|
#if (_MSC_VER < 1300)
|
||||||
|
typedef signed char int8_t;
|
||||||
|
typedef signed short int16_t;
|
||||||
|
typedef signed int int32_t;
|
||||||
|
typedef unsigned char uint8_t;
|
||||||
|
typedef unsigned short uint16_t;
|
||||||
|
typedef unsigned int uint32_t;
|
||||||
|
#else
|
||||||
|
typedef signed __int8 int8_t;
|
||||||
|
typedef signed __int16 int16_t;
|
||||||
|
typedef signed __int32 int32_t;
|
||||||
|
typedef unsigned __int8 uint8_t;
|
||||||
|
typedef unsigned __int16 uint16_t;
|
||||||
|
typedef unsigned __int32 uint32_t;
|
||||||
|
#endif
|
||||||
|
typedef signed __int64 int64_t;
|
||||||
|
typedef unsigned __int64 uint64_t;
|
||||||
|
|
||||||
|
|
||||||
|
// 7.18.1.2 Minimum-width integer types
|
||||||
|
typedef int8_t int_least8_t;
|
||||||
|
typedef int16_t int_least16_t;
|
||||||
|
typedef int32_t int_least32_t;
|
||||||
|
typedef int64_t int_least64_t;
|
||||||
|
typedef uint8_t uint_least8_t;
|
||||||
|
typedef uint16_t uint_least16_t;
|
||||||
|
typedef uint32_t uint_least32_t;
|
||||||
|
typedef uint64_t uint_least64_t;
|
||||||
|
|
||||||
|
// 7.18.1.3 Fastest minimum-width integer types
|
||||||
|
typedef int8_t int_fast8_t;
|
||||||
|
typedef int16_t int_fast16_t;
|
||||||
|
typedef int32_t int_fast32_t;
|
||||||
|
typedef int64_t int_fast64_t;
|
||||||
|
typedef uint8_t uint_fast8_t;
|
||||||
|
typedef uint16_t uint_fast16_t;
|
||||||
|
typedef uint32_t uint_fast32_t;
|
||||||
|
typedef uint64_t uint_fast64_t;
|
||||||
|
|
||||||
|
// 7.18.1.4 Integer types capable of holding object pointers
|
||||||
|
#ifdef _WIN64 // [
|
||||||
|
typedef signed __int64 intptr_t;
|
||||||
|
typedef unsigned __int64 uintptr_t;
|
||||||
|
#else // _WIN64 ][
|
||||||
|
typedef _W64 signed int intptr_t;
|
||||||
|
typedef _W64 unsigned int uintptr_t;
|
||||||
|
#endif // _WIN64 ]
|
||||||
|
|
||||||
|
// 7.18.1.5 Greatest-width integer types
|
||||||
|
typedef int64_t intmax_t;
|
||||||
|
typedef uint64_t uintmax_t;
|
||||||
|
|
||||||
|
|
||||||
|
// 7.18.2 Limits of specified-width integer types
|
||||||
|
|
||||||
|
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
|
||||||
|
|
||||||
|
// 7.18.2.1 Limits of exact-width integer types
|
||||||
|
#define INT8_MIN ((int8_t)_I8_MIN)
|
||||||
|
#define INT8_MAX _I8_MAX
|
||||||
|
#define INT16_MIN ((int16_t)_I16_MIN)
|
||||||
|
#define INT16_MAX _I16_MAX
|
||||||
|
#define INT32_MIN ((int32_t)_I32_MIN)
|
||||||
|
#define INT32_MAX _I32_MAX
|
||||||
|
#define INT64_MIN ((int64_t)_I64_MIN)
|
||||||
|
#define INT64_MAX _I64_MAX
|
||||||
|
#define UINT8_MAX _UI8_MAX
|
||||||
|
#define UINT16_MAX _UI16_MAX
|
||||||
|
#define UINT32_MAX _UI32_MAX
|
||||||
|
#define UINT64_MAX _UI64_MAX
|
||||||
|
|
||||||
|
// 7.18.2.2 Limits of minimum-width integer types
|
||||||
|
#define INT_LEAST8_MIN INT8_MIN
|
||||||
|
#define INT_LEAST8_MAX INT8_MAX
|
||||||
|
#define INT_LEAST16_MIN INT16_MIN
|
||||||
|
#define INT_LEAST16_MAX INT16_MAX
|
||||||
|
#define INT_LEAST32_MIN INT32_MIN
|
||||||
|
#define INT_LEAST32_MAX INT32_MAX
|
||||||
|
#define INT_LEAST64_MIN INT64_MIN
|
||||||
|
#define INT_LEAST64_MAX INT64_MAX
|
||||||
|
#define UINT_LEAST8_MAX UINT8_MAX
|
||||||
|
#define UINT_LEAST16_MAX UINT16_MAX
|
||||||
|
#define UINT_LEAST32_MAX UINT32_MAX
|
||||||
|
#define UINT_LEAST64_MAX UINT64_MAX
|
||||||
|
|
||||||
|
// 7.18.2.3 Limits of fastest minimum-width integer types
|
||||||
|
#define INT_FAST8_MIN INT8_MIN
|
||||||
|
#define INT_FAST8_MAX INT8_MAX
|
||||||
|
#define INT_FAST16_MIN INT16_MIN
|
||||||
|
#define INT_FAST16_MAX INT16_MAX
|
||||||
|
#define INT_FAST32_MIN INT32_MIN
|
||||||
|
#define INT_FAST32_MAX INT32_MAX
|
||||||
|
#define INT_FAST64_MIN INT64_MIN
|
||||||
|
#define INT_FAST64_MAX INT64_MAX
|
||||||
|
#define UINT_FAST8_MAX UINT8_MAX
|
||||||
|
#define UINT_FAST16_MAX UINT16_MAX
|
||||||
|
#define UINT_FAST32_MAX UINT32_MAX
|
||||||
|
#define UINT_FAST64_MAX UINT64_MAX
|
||||||
|
|
||||||
|
// 7.18.2.4 Limits of integer types capable of holding object pointers
|
||||||
|
#ifdef _WIN64 // [
|
||||||
|
# define INTPTR_MIN INT64_MIN
|
||||||
|
# define INTPTR_MAX INT64_MAX
|
||||||
|
# define UINTPTR_MAX UINT64_MAX
|
||||||
|
#else // _WIN64 ][
|
||||||
|
# define INTPTR_MIN INT32_MIN
|
||||||
|
# define INTPTR_MAX INT32_MAX
|
||||||
|
# define UINTPTR_MAX UINT32_MAX
|
||||||
|
#endif // _WIN64 ]
|
||||||
|
|
||||||
|
// 7.18.2.5 Limits of greatest-width integer types
|
||||||
|
#define INTMAX_MIN INT64_MIN
|
||||||
|
#define INTMAX_MAX INT64_MAX
|
||||||
|
#define UINTMAX_MAX UINT64_MAX
|
||||||
|
|
||||||
|
// 7.18.3 Limits of other integer types
|
||||||
|
|
||||||
|
#ifdef _WIN64 // [
|
||||||
|
# define PTRDIFF_MIN _I64_MIN
|
||||||
|
# define PTRDIFF_MAX _I64_MAX
|
||||||
|
#else // _WIN64 ][
|
||||||
|
# define PTRDIFF_MIN _I32_MIN
|
||||||
|
# define PTRDIFF_MAX _I32_MAX
|
||||||
|
#endif // _WIN64 ]
|
||||||
|
|
||||||
|
#define SIG_ATOMIC_MIN INT_MIN
|
||||||
|
#define SIG_ATOMIC_MAX INT_MAX
|
||||||
|
|
||||||
|
#ifndef SIZE_MAX // [
|
||||||
|
# ifdef _WIN64 // [
|
||||||
|
# define SIZE_MAX _UI64_MAX
|
||||||
|
# else // _WIN64 ][
|
||||||
|
# define SIZE_MAX _UI32_MAX
|
||||||
|
# endif // _WIN64 ]
|
||||||
|
#endif // SIZE_MAX ]
|
||||||
|
|
||||||
|
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
|
||||||
|
#ifndef WCHAR_MIN // [
|
||||||
|
# define WCHAR_MIN 0
|
||||||
|
#endif // WCHAR_MIN ]
|
||||||
|
#ifndef WCHAR_MAX // [
|
||||||
|
# define WCHAR_MAX _UI16_MAX
|
||||||
|
#endif // WCHAR_MAX ]
|
||||||
|
|
||||||
|
#define WINT_MIN 0
|
||||||
|
#define WINT_MAX _UI16_MAX
|
||||||
|
|
||||||
|
#endif // __STDC_LIMIT_MACROS ]
|
||||||
|
|
||||||
|
|
||||||
|
// 7.18.4 Limits of other integer types
|
||||||
|
|
||||||
|
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
|
||||||
|
|
||||||
|
// 7.18.4.1 Macros for minimum-width integer constants
|
||||||
|
|
||||||
|
#define INT8_C(val) val##i8
|
||||||
|
#define INT16_C(val) val##i16
|
||||||
|
#define INT32_C(val) val##i32
|
||||||
|
#define INT64_C(val) val##i64
|
||||||
|
|
||||||
|
#define UINT8_C(val) val##ui8
|
||||||
|
#define UINT16_C(val) val##ui16
|
||||||
|
#define UINT32_C(val) val##ui32
|
||||||
|
#define UINT64_C(val) val##ui64
|
||||||
|
|
||||||
|
// 7.18.4.2 Macros for greatest-width integer constants
|
||||||
|
#define INTMAX_C INT64_C
|
||||||
|
#define UINTMAX_C UINT64_C
|
||||||
|
|
||||||
|
#endif // __STDC_CONSTANT_MACROS ]
|
||||||
|
|
||||||
|
|
||||||
|
#endif // _MSC_STDINT_H_ ]
|
1233
src/calibre/utils/chm/swig_chm.c
Normal file
1233
src/calibre/utils/chm/swig_chm.c
Normal file
File diff suppressed because it is too large
Load Diff
214
src/calibre/utils/chm/swig_chm.i
Normal file
214
src/calibre/utils/chm/swig_chm.i
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
%module chmlib
|
||||||
|
%include "typemaps.i"
|
||||||
|
%include "cstring.i"
|
||||||
|
|
||||||
|
%{
|
||||||
|
/*
|
||||||
|
Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
|
||||||
|
|
||||||
|
Based on code by:
|
||||||
|
Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
|
||||||
|
|
||||||
|
pychm is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License as
|
||||||
|
published by the Free Software Foundation; either version 2 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public
|
||||||
|
License along with this program; see the file COPYING. If not,
|
||||||
|
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||||
|
Boston, MA 02111-1307, USA
|
||||||
|
|
||||||
|
$Id: swig_chm.i,v 1.1.1.1 2003/12/02 12:38:14 rubensr Exp $
|
||||||
|
*/
|
||||||
|
#include "chm_lib.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
static PyObject *my_callback = NULL;
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
my_set_callback(PyObject *dummy, PyObject *arg)
|
||||||
|
{
|
||||||
|
PyObject *result = NULL;
|
||||||
|
|
||||||
|
if (!PyCallable_Check(arg)) {
|
||||||
|
PyErr_SetString(PyExc_TypeError, "parameter must be callable");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_XINCREF(arg); /* Add a reference to new callback */
|
||||||
|
Py_XDECREF(my_callback); /* Dispose of previous callback */
|
||||||
|
my_callback = arg; /* Remember new callback */
|
||||||
|
/* Boilerplate to return "None" */
|
||||||
|
Py_INCREF(Py_None);
|
||||||
|
result = Py_None;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dummy_enumerator (struct chmFile *h,
|
||||||
|
struct chmUnitInfo *ui,
|
||||||
|
void *context) {
|
||||||
|
PyObject *arglist;
|
||||||
|
PyObject *result;
|
||||||
|
PyObject *py_h;
|
||||||
|
PyObject *py_ui;
|
||||||
|
PyObject *py_c;
|
||||||
|
|
||||||
|
py_h = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
|
||||||
|
py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
|
||||||
|
py_c = PyCObject_AsVoidPtr(context);
|
||||||
|
|
||||||
|
/* Time to call the callback */
|
||||||
|
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
|
||||||
|
if (arglist) {
|
||||||
|
result = PyEval_CallObject(my_callback, arglist);
|
||||||
|
Py_DECREF(arglist);
|
||||||
|
Py_DECREF(result);
|
||||||
|
|
||||||
|
Py_DECREF(py_h);
|
||||||
|
Py_DECREF(py_ui);
|
||||||
|
|
||||||
|
if (result == NULL) {
|
||||||
|
return 0; /* Pass error back */
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
%}
|
||||||
|
|
||||||
|
%typemap(in) CHM_ENUMERATOR {
|
||||||
|
if (!my_set_callback(self, $input)) goto fail;
|
||||||
|
$1 = dummy_enumerator;
|
||||||
|
}
|
||||||
|
|
||||||
|
%typemap(in) void *context {
|
||||||
|
if (!($1 = PyCObject_FromVoidPtr($input, NULL))) goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
|
||||||
|
$1 = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
%typemap(argout) struct chmUnitInfo *OutValue {
|
||||||
|
PyObject *o, *o2, *o3;
|
||||||
|
o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
|
||||||
|
if ((!$result) || ($result == Py_None)) {
|
||||||
|
$result = o;
|
||||||
|
} else {
|
||||||
|
if (!PyTuple_Check($result)) {
|
||||||
|
PyObject *o2 = $result;
|
||||||
|
$result = PyTuple_New(1);
|
||||||
|
PyTuple_SetItem($result,0,o2);
|
||||||
|
}
|
||||||
|
o3 = PyTuple_New(1);
|
||||||
|
PyTuple_SetItem(o3,0,o);
|
||||||
|
o2 = $result;
|
||||||
|
$result = PySequence_Concat(o2,o3);
|
||||||
|
Py_DECREF(o2);
|
||||||
|
Py_DECREF(o3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
%typemap(check) unsigned char *OUTPUT {
|
||||||
|
/* nasty hack */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
$1 = ($1_ltype) new char[arg5];
|
||||||
|
#else
|
||||||
|
$1 = ($1_ltype) malloc(arg5);
|
||||||
|
#endif
|
||||||
|
if ($1 == NULL) SWIG_fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
|
||||||
|
PyObject *o;
|
||||||
|
o = PyString_FromStringAndSize($1, arg5);
|
||||||
|
$result = t_output_helper($result,o);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
delete [] $1;
|
||||||
|
#else
|
||||||
|
free($1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
typedef unsigned __int64 LONGUINT64;
|
||||||
|
typedef __int64 LONGINT64;
|
||||||
|
#else
|
||||||
|
typedef unsigned long long LONGUINT64;
|
||||||
|
typedef long long LONGINT64;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* the two available spaces in a CHM file */
|
||||||
|
/* N.B.: The format supports arbitrarily many spaces, but only */
|
||||||
|
/* two appear to be used at present. */
|
||||||
|
#define CHM_UNCOMPRESSED (0)
|
||||||
|
#define CHM_COMPRESSED (1)
|
||||||
|
|
||||||
|
/* structure representing an ITS (CHM) file stream */
|
||||||
|
struct chmFile;
|
||||||
|
|
||||||
|
/* structure representing an element from an ITS file stream */
|
||||||
|
#define CHM_MAX_PATHLEN 256
|
||||||
|
struct chmUnitInfo
|
||||||
|
{
|
||||||
|
LONGUINT64 start;
|
||||||
|
LONGUINT64 length;
|
||||||
|
int space;
|
||||||
|
char path[CHM_MAX_PATHLEN+1];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* open an ITS archive */
|
||||||
|
struct chmFile* chm_open(const char *filename);
|
||||||
|
|
||||||
|
/* close an ITS archive */
|
||||||
|
void chm_close(struct chmFile *h);
|
||||||
|
|
||||||
|
/* methods for ssetting tuning parameters for particular file */
|
||||||
|
#define CHM_PARAM_MAX_BLOCKS_CACHED 0
|
||||||
|
void chm_set_param(struct chmFile *h,
|
||||||
|
int paramType,
|
||||||
|
int paramVal);
|
||||||
|
|
||||||
|
/* resolve a particular object from the archive */
|
||||||
|
#define CHM_RESOLVE_SUCCESS (0)
|
||||||
|
#define CHM_RESOLVE_FAILURE (1)
|
||||||
|
int chm_resolve_object(struct chmFile *h,
|
||||||
|
const char *objPath,
|
||||||
|
struct chmUnitInfo *OutValue);
|
||||||
|
|
||||||
|
/* retrieve part of an object from the archive */
|
||||||
|
LONGINT64 chm_retrieve_object(struct chmFile *h,
|
||||||
|
struct chmUnitInfo *ui,
|
||||||
|
unsigned char *OUTPUT,
|
||||||
|
LONGUINT64 addr,
|
||||||
|
LONGINT64 len);
|
||||||
|
|
||||||
|
/* enumerate the objects in the .chm archive */
|
||||||
|
typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
|
||||||
|
struct chmUnitInfo *ui,
|
||||||
|
void *context);
|
||||||
|
#define CHM_ENUMERATE_NORMAL (1)
|
||||||
|
#define CHM_ENUMERATE_META (2)
|
||||||
|
#define CHM_ENUMERATE_SPECIAL (4)
|
||||||
|
#define CHM_ENUMERATE_FILES (8)
|
||||||
|
#define CHM_ENUMERATE_DIRS (16)
|
||||||
|
#define CHM_ENUMERATE_ALL (31)
|
||||||
|
#define CHM_ENUMERATOR_FAILURE (0)
|
||||||
|
#define CHM_ENUMERATOR_CONTINUE (1)
|
||||||
|
#define CHM_ENUMERATOR_SUCCESS (2)
|
||||||
|
int chm_enumerate(struct chmFile *h,
|
||||||
|
int what,
|
||||||
|
CHM_ENUMERATOR e,
|
||||||
|
void *context);
|
||||||
|
|
||||||
|
int chm_enumerate_dir(struct chmFile *h,
|
||||||
|
const char *prefix,
|
||||||
|
int what,
|
||||||
|
CHM_ENUMERATOR e,
|
||||||
|
void *context);
|
Loading…
x
Reference in New Issue
Block a user