De-vendor pychm

This commit is contained in:
Kovid Goyal 2019-11-22 12:08:28 +05:30
parent 411da596b4
commit baf0685ceb
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
10 changed files with 33 additions and 6708 deletions

View File

@ -206,12 +206,6 @@ Files: src/calibre/headless/fontconfig_database.cpp
Copyright: Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies)
License: LGPL-2.1, LGPL-3
Files: src/calibre/utils/chm/*
Copyright: Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
License: GPL-2+
The full text of the GPL is distributed as in
/usr/share/common-licenses/GPL-2 on Debian systems.
Files: src/calibre/utils/open_search/description.py
Copyright: 2011, John Schember <john@nachtimwald.com>, 2006, Ed Summers <ehs@pobox.com>
License: GPL-3

View File

@ -556,6 +556,15 @@
}
},
{
"name": "pychm",
"unix": {
"filename": "pychm-0.8.5.tar.gz",
"hash": "sha256:ec440c164a3fe4bd8aa906c24ed32cf296245932fe76b06bfd6b0734761d2cf5",
"urls": ["pypi"]
}
},
{
"name": "html5-parser",
"unix": {

View File

@ -71,15 +71,6 @@
"sources": "calibre/library/sqlite_custom.c",
"inc_dirs": "!sqlite_inc_dirs"
},
{
"name": "chmlib",
"sources": "calibre/utils/chm/swig_chm.c",
"libraries": "chm",
"windows_libraries": "ChmLib",
"inc_dirs": "!chmlib_inc_dirs",
"lib_dirs": "!chmlib_lib_dirs",
"defines": "SWIG_COBJECT_TYPES SWIG_PYTHON_STRICT_BYTE_CHAR"
},
{
"name": "lzx",
"sources": "calibre/utils/lzx/lzxmodule.c calibre/utils/lzx/compressor.c calibre/utils/lzx/lzxd.c calibre/utils/lzx/lzc.c calibre/utils/lzx/lzxc.c",

View File

@ -167,7 +167,6 @@ class Plugins(collections.Mapping):
'podofo',
'cPalmdoc',
'progress_indicator',
'chmlib',
'icu',
'speedup',
'html_as_json',

View File

@ -1,26 +1,19 @@
''' CHM File decoding support '''
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
' and Alex Bramley <a.bramley at gmail.com>.'
import os, re
import codecs
import os
import re
from calibre import guess_type as guess_mimetype
from calibre.constants import filesystem_encoding, iswindows
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
from calibre.constants import iswindows, filesystem_encoding
from calibre.utils.chm.chm import CHMFile
from calibre.constants import plugins
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import unicode_type, getcwd, as_unicode
chmlib, chmlib_err = plugins['chmlib']
if chmlib_err:
raise RuntimeError('Failed to load chmlib: ' + chmlib_err)
from calibre.ebooks.metadata.toc import TOC
from chm.chm import CHMFile, chmlib
from polyglot.builtins import as_unicode, getcwd, unicode_type
def match_string(s1, s2_already_lowered):
@ -76,6 +69,19 @@ class CHMReader(CHMFile):
self.root, ext = os.path.splitext(self.topics.lstrip('/'))
self.hhc_path = self.root + ".hhc"
def get_encoding(self):
ans = self.GetEncoding()
if ans is None:
lcid = self.GetLCID()
if lcid is not None:
ans = lcid[0]
if ans:
try:
codecs.lookup(ans)
except Exception:
ans = None
return ans
def _parse_toc(self, ul, basedir=getcwd()):
toc = TOC(play_order=self._playorder, base_path=basedir, text='')
self._playorder += 1

View File

@ -50,6 +50,10 @@ class BuildTest(unittest.TestCase):
from calibre.spell.dictionary import build_test
build_test()
def test_pychm(self):
from chm.chm import CHMFile, chmlib
del CHMFile, chmlib
def test_chardet(self):
from chardet import detect
raw = 'mūsi Füße'.encode('utf-8')

View File

@ -1,30 +0,0 @@
# Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
# pychm is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# $Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $
'''
chm - A package to manipulate CHM files
The chm package provides four modules: chm, chmlib, extra and
_chmlib. _chmlib and chmlib are very low level libraries generated
from SWIG interface files, and are simple wrappers around the API
defined by the C library chmlib.
The extra module adds full-text search support.
the chm module provides some higher level classes to simplify
access to the CHM files information.
'''
__all__ = ["chm", "chmlib", "_chmlib", "extra"]
__version__ = "0.8.4"
__revision__ = "$Id: __init__.py,v 1.8 2006/06/18 10:50:43 rubensr Exp $"

View File

@ -1,520 +0,0 @@
# Copyright (C) 2003-2006 Rubens Ramos <rubensr@users.sourceforge.net>
# Based on code by:
# Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
# pychm is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# $Id: chm.py,v 1.12 2006/08/07 12:31:51 rubensr Exp $
'''
chm - A high-level front end for the chmlib python module.
The chm module provides high level access to the functionality
included in chmlib. It encapsulates functions in the CHMFile class, and
provides some additional features, such as the ability to obtain
the contents tree of a CHM archive.
'''
import array
import codecs
import struct
import sys
from calibre.constants import plugins, filesystem_encoding
from polyglot.builtins import long_type
chmlib, chmlib_err = plugins['chmlib']
if chmlib_err:
raise RuntimeError('Failed to load chmlib: ' + chmlib_err)
charset_table = {
0 : 'iso8859_1', # ANSI_CHARSET
238 : 'iso8859_2', # EASTEUROPE_CHARSET
178 : 'iso8859_6', # ARABIC_CHARSET
161 : 'iso8859_7', # GREEK_CHARSET
177 : 'iso8859_8', # HEBREW_CHARSET
162 : 'iso8859_9', # TURKISH_CHARSET
222 : 'iso8859_11', # THAI_CHARSET - hmm not in python 2.2...
186 : 'iso8859_13', # BALTIC_CHARSET
204 : 'cp1251', # RUSSIAN_CHARSET
255 : 'cp437', # OEM_CHARSET
128 : 'cp932', # SHIFTJIS_CHARSET
134 : 'cp936', # GB2312_CHARSET
129 : 'cp949', # HANGUL_CHARSET
136 : 'cp950', # CHINESEBIG5_CHARSET
1 : None, # DEFAULT_CHARSET
2 : None, # SYMBOL_CHARSET
130 : None, # JOHAB_CHARSET
163 : None, # VIETNAMESE_CHARSET
77 : None, # MAC_CHARSET
}
locale_table = {
0x0436 : ('iso8859_1', "Afrikaans", "Western Europe & US"),
0x041c : ('iso8859_2', "Albanian", "Central Europe"),
0x0401 : ('iso8859_6', "Arabic_Saudi_Arabia", "Arabic"),
0x0801 : ('iso8859_6', "Arabic_Iraq", "Arabic"),
0x0c01 : ('iso8859_6', "Arabic_Egypt", "Arabic"),
0x1001 : ('iso8859_6', "Arabic_Libya", "Arabic"),
0x1401 : ('iso8859_6', "Arabic_Algeria", "Arabic"),
0x1801 : ('iso8859_6', "Arabic_Morocco", "Arabic"),
0x1c01 : ('iso8859_6', "Arabic_Tunisia", "Arabic"),
0x2001 : ('iso8859_6', "Arabic_Oman", "Arabic"),
0x2401 : ('iso8859_6', "Arabic_Yemen", "Arabic"),
0x2801 : ('iso8859_6', "Arabic_Syria", "Arabic"),
0x2c01 : ('iso8859_6', "Arabic_Jordan", "Arabic"),
0x3001 : ('iso8859_6', "Arabic_Lebanon", "Arabic"),
0x3401 : ('iso8859_6', "Arabic_Kuwait", "Arabic"),
0x3801 : ('iso8859_6', "Arabic_UAE", "Arabic"),
0x3c01 : ('iso8859_6', "Arabic_Bahrain", "Arabic"),
0x4001 : ('iso8859_6', "Arabic_Qatar", "Arabic"),
0x042b : (None, "Armenian","Armenian"),
0x042c : ('iso8859_9', "Azeri_Latin", "Turkish"),
0x082c : ('cp1251', "Azeri_Cyrillic", "Cyrillic"),
0x042d : ('iso8859_1', "Basque", "Western Europe & US"),
0x0423 : ('cp1251', "Belarusian", "Cyrillic"),
0x0402 : ('cp1251', "Bulgarian", "Cyrillic"),
0x0403 : ('iso8859_1', "Catalan", "Western Europe & US"),
0x0404 : ('cp950', "Chinese_Taiwan", "Traditional Chinese"),
0x0804 : ('cp936', "Chinese_PRC", "Simplified Chinese"),
0x0c04 : ('cp950', "Chinese_Hong_Kong", "Traditional Chinese"),
0x1004 : ('cp936', "Chinese_Singapore", "Simplified Chinese"),
0x1404 : ('cp950', "Chinese_Macau", "Traditional Chinese"),
0x041a : ('iso8859_2', "Croatian", "Central Europe"),
0x0405 : ('iso8859_2', "Czech", "Central Europe"),
0x0406 : ('iso8859_1', "Danish", "Western Europe & US"),
0x0413 : ('iso8859_1', "Dutch_Standard", "Western Europe & US"),
0x0813 : ('iso8859_1', "Dutch_Belgian", "Western Europe & US"),
0x0409 : ('iso8859_1', "English_United_States", "Western Europe & US"),
0x0809 : ('iso8859_1', "English_United_Kingdom", "Western Europe & US"),
0x0c09 : ('iso8859_1', "English_Australian", "Western Europe & US"),
0x1009 : ('iso8859_1', "English_Canadian", "Western Europe & US"),
0x1409 : ('iso8859_1', "English_New_Zealand", "Western Europe & US"),
0x1809 : ('iso8859_1', "English_Irish", "Western Europe & US"),
0x1c09 : ('iso8859_1', "English_South_Africa", "Western Europe & US"),
0x2009 : ('iso8859_1', "English_Jamaica", "Western Europe & US"),
0x2409 : ('iso8859_1', "English_Caribbean", "Western Europe & US"),
0x2809 : ('iso8859_1', "English_Belize", "Western Europe & US"),
0x2c09 : ('iso8859_1', "English_Trinidad", "Western Europe & US"),
0x3009 : ('iso8859_1', "English_Zimbabwe", "Western Europe & US"),
0x3409 : ('iso8859_1', "English_Philippines", "Western Europe & US"),
0x0425 : ('iso8859_13',"Estonian", "Baltic",),
0x0438 : ('iso8859_1', "Faeroese", "Western Europe & US"),
0x0429 : ('iso8859_6', "Farsi", "Arabic"),
0x040b : ('iso8859_1', "Finnish", "Western Europe & US"),
0x040c : ('iso8859_1', "French_Standard", "Western Europe & US"),
0x080c : ('iso8859_1', "French_Belgian", "Western Europe & US"),
0x0c0c : ('iso8859_1', "French_Canadian", "Western Europe & US"),
0x100c : ('iso8859_1', "French_Swiss", "Western Europe & US"),
0x140c : ('iso8859_1', "French_Luxembourg", "Western Europe & US"),
0x180c : ('iso8859_1', "French_Monaco", "Western Europe & US"),
0x0437 : (None, "Georgian", "Georgian"),
0x0407 : ('iso8859_1', "German_Standard", "Western Europe & US"),
0x0807 : ('iso8859_1', "German_Swiss", "Western Europe & US"),
0x0c07 : ('iso8859_1', "German_Austrian", "Western Europe & US"),
0x1007 : ('iso8859_1', "German_Luxembourg", "Western Europe & US"),
0x1407 : ('iso8859_1', "German_Liechtenstein", "Western Europe & US"),
0x0408 : ('iso8859_7', "Greek", "Greek"),
0x040d : ('iso8859_8', "Hebrew", "Hebrew"),
0x0439 : (None, "Hindi", "Indic"),
0x040e : ('iso8859_2', "Hungarian", "Central Europe"),
0x040f : ('iso8859_1', "Icelandic", "Western Europe & US"),
0x0421 : ('iso8859_1', "Indonesian", "Western Europe & US"),
0x0410 : ('iso8859_1', "Italian_Standard", "Western Europe & US"),
0x0810 : ('iso8859_1', "Italian_Swiss", "Western Europe & US"),
0x0411 : ('cp932', "Japanese", "Japanese"),
0x043f : ('cp1251', "Kazakh", "Cyrillic"),
0x0457 : (None, "Konkani", "Indic"),
0x0412 : ('cp949', "Korean", "Korean"),
0x0426 : ('iso8859_13',"Latvian", "Baltic",),
0x0427 : ('iso8859_13',"Lithuanian", "Baltic",),
0x042f : ('cp1251', "Macedonian", "Cyrillic"),
0x043e : ('iso8859_1', "Malay_Malaysia", "Western Europe & US"),
0x083e : ('iso8859_1', "Malay_Brunei_Darussalam", "Western Europe & US"),
0x044e : (None, "Marathi", "Indic"),
0x0414 : ('iso8859_1', "Norwegian_Bokmal", "Western Europe & US"),
0x0814 : ('iso8859_1', "Norwegian_Nynorsk", "Western Europe & US"),
0x0415 : ('iso8859_2', "Polish", "Central Europe"),
0x0416 : ('iso8859_1', "Portuguese_Brazilian", "Western Europe & US"),
0x0816 : ('iso8859_1', "Portuguese_Standard", "Western Europe & US"),
0x0418 : ('iso8859_2', "Romanian", "Central Europe"),
0x0419 : ('cp1251', "Russian", "Cyrillic"),
0x044f : (None, "Sanskrit", "Indic"),
0x081a : ('iso8859_2', "Serbian_Latin", "Central Europe"),
0x0c1a : ('cp1251', "Serbian_Cyrillic", "Cyrillic"),
0x041b : ('iso8859_2', "Slovak", "Central Europe"),
0x0424 : ('iso8859_2', "Slovenian", "Central Europe"),
0x040a : ('iso8859_1', "Spanish_Trad_Sort", "Western Europe & US"),
0x080a : ('iso8859_1', "Spanish_Mexican", "Western Europe & US"),
0x0c0a : ('iso8859_1', "Spanish_Modern_Sort", "Western Europe & US"),
0x100a : ('iso8859_1', "Spanish_Guatemala", "Western Europe & US"),
0x140a : ('iso8859_1', "Spanish_Costa_Rica", "Western Europe & US"),
0x180a : ('iso8859_1', "Spanish_Panama", "Western Europe & US"),
0x1c0a : ('iso8859_1', "Spanish_Dominican_Repub", "Western Europe & US"),
0x200a : ('iso8859_1', "Spanish_Venezuela", "Western Europe & US"),
0x240a : ('iso8859_1', "Spanish_Colombia", "Western Europe & US"),
0x280a : ('iso8859_1', "Spanish_Peru", "Western Europe & US"),
0x2c0a : ('iso8859_1', "Spanish_Argentina", "Western Europe & US"),
0x300a : ('iso8859_1', "Spanish_Ecuador", "Western Europe & US"),
0x340a : ('iso8859_1', "Spanish_Chile", "Western Europe & US"),
0x380a : ('iso8859_1', "Spanish_Uruguay", "Western Europe & US"),
0x3c0a : ('iso8859_1', "Spanish_Paraguay", "Western Europe & US"),
0x400a : ('iso8859_1', "Spanish_Bolivia", "Western Europe & US"),
0x440a : ('iso8859_1', "Spanish_El_Salvador", "Western Europe & US"),
0x480a : ('iso8859_1', "Spanish_Honduras", "Western Europe & US"),
0x4c0a : ('iso8859_1', "Spanish_Nicaragua", "Western Europe & US"),
0x500a : ('iso8859_1', "Spanish_Puerto_Rico", "Western Europe & US"),
0x0441 : ('iso8859_1', "Swahili", "Western Europe & US"),
0x041d : ('iso8859_1', "Swedish", "Western Europe & US"),
0x081d : ('iso8859_1', "Swedish_Finland", "Western Europe & US"),
0x0449 : (None, "Tamil", "Indic"),
0x0444 : ('cp1251', "Tatar", "Cyrillic"),
0x041e : ('iso8859_11',"Thai", "Thai"),
0x041f : ('iso8859_9', "Turkish", "Turkish"),
0x0422 : ('cp1251', "Ukrainian", "Cyrillic"),
0x0420 : ('iso8859_6', "Urdu", "Arabic"),
0x0443 : ('iso8859_9', "Uzbek_Latin", "Turkish"),
0x0843 : ('cp1251', "Uzbek_Cyrillic", "Cyrillic"),
0x042a : ('cp1258', "Vietnamese", "Vietnamese")
}
def get_lcid(chm_file_obj):
for lang, offset in (
(b"/$FIftiMain", 0x7E),
(b"$WWKeywordLinks/BTree", 0x34),
(b"$WWAssociativeLinks/BTree", 0x34),
):
result, ui = chmlib.chm_resolve_object(chm_file_obj, lang)
if result == chmlib.CHM_RESOLVE_SUCCESS:
size, text = chmlib.chm_retrieve_object(chm_file_obj, ui, offset, 4)
if size == 4:
return struct.unpack("I", text)[0]
class CHMFile:
"A class to manage access to CHM files."
filename = ""
file = None
title = b""
home = b"/"
index = None
topics = None
encoding = None
lcid = None
binaryindex = None
def __init__(self):
self.searchable = 0
def LoadCHM(self, archiveName):
'''Loads a CHM archive.
This function will also call GetArchiveInfo to obtain information
such as the index file name and the topics file. It returns 1 on
success, and 0 if it fails.
'''
if self.filename is not None:
self.CloseCHM()
path = archiveName
if not isinstance(path, bytes):
path = path.encode(filesystem_encoding)
self.file = chmlib.chm_open(path)
if self.file is None:
return 0
self.filename = archiveName
self.GetArchiveInfo()
return 1
def CloseCHM(self):
'''Closes the CHM archive.
This function will close the CHM file, if it is open. All variables
are also reset.
'''
if self.filename is not None:
chmlib.chm_close(self.file)
self.file = None
self.filename = ''
self.title = b""
self.home = b"/"
self.index = None
self.topics = None
self.encoding = None
def GetArchiveInfo(self):
'''Obtains information on CHM archive.
This function checks the /#SYSTEM file inside the CHM archive to
obtain the index, home page, topics, encoding and title. It is called
from LoadCHM.
'''
self.searchable = False
self.lcid = None
result, ui = chmlib.chm_resolve_object(self.file, b'/#SYSTEM')
if (result != chmlib.CHM_RESOLVE_SUCCESS):
sys.stderr.write('GetArchiveInfo: #SYSTEM does not exist\n')
return 0
size, text = chmlib.chm_retrieve_object(self.file, ui, 4, ui.length)
if (size == 0):
sys.stderr.write('GetArchiveInfo: file size = 0\n')
return 0
buff = array.array('B', text)
index = 0
while (index < size):
cursor = buff[index] + (buff[index+1] * 256)
if cursor == 0:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.topics = b'/' + text[index:index+cursor-1]
elif cursor == 1:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.index = b'/' + text[index:index+cursor-1]
elif cursor == 2:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.home = b'/' + text[index:index+cursor-1]
elif cursor == 3:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.title = text[index:index+cursor-1]
elif cursor == 4:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.lcid = buff[index] + (buff[index+1] * 256)
elif cursor == 6:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
tmp = text[index:index+cursor-1]
if not self.topics:
tmp1 = b'/' + tmp + b'.hhc'
tmp2 = b'/' + tmp + b'.hhk'
res1, ui1 = chmlib.chm_resolve_object(self.file, tmp1)
res2, ui2 = chmlib.chm_resolve_object(self.file, tmp2)
if not self.topics and \
res1 == chmlib.CHM_RESOLVE_SUCCESS:
self.topics = b'/' + tmp + b'.hhc'
if not self.index and \
res2 == chmlib.CHM_RESOLVE_SUCCESS:
self.index = b'/' + tmp + b'.hhk'
elif cursor == 16:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
self.encoding = text[index:index+cursor-1]
else:
index += 2
cursor = buff[index] + (buff[index+1] * 256)
index += 2
index += cursor
self.GetWindowsInfo()
if not self.lcid:
lcid = get_lcid(self.file)
if lcid is not None:
self.lcid = lcid
return 1
def GetTopicsTree(self):
'''Reads and returns the topics tree.
This auxiliary function reads and returns the topics tree file
contents for the CHM archive.
'''
if self.topics is None:
return None
if self.topics:
res, ui = chmlib.chm_resolve_object(self.file, self.topics)
if (res != chmlib.CHM_RESOLVE_SUCCESS):
return None
size, text = chmlib.chm_retrieve_object(self.file, ui, 0, ui.length)
if size == 0:
sys.stderr.write('GetTopicsTree: file size = 0\n')
return None
return text
def GetIndex(self):
'''Reads and returns the index tree.
This auxiliary function reads and returns the index tree file
contents for the CHM archive.
'''
if (self.index is None):
return None
if self.index:
res, ui = chmlib.chm_resolve_object(self.file, self.index)
if (res != chmlib.CHM_RESOLVE_SUCCESS):
return None
size, text = chmlib.chm_retrieve_object(self.file, ui, 0, ui.length)
if (size == 0):
sys.stderr.write('GetIndex: file size = 0\n')
return None
return text
def ResolveObject(self, document):
'''Tries to locate a document in the archive.
This function tries to locate the document inside the archive. It
returns a tuple where the first element is zero if the function
was successful, and the second is the UnitInfo for that document.
The UnitInfo is used to retrieve the document contents
'''
if self.file:
return chmlib.chm_resolve_object(self.file, document)
return 1, None
def RetrieveObject(self, ui, start=-1, length=-1):
'''Retrieves the contents of a document.
This function takes a UnitInfo and two optional arguments, the first
being the start address and the second is the length. These define
the amount of data to be read from the archive.
'''
if self.file and ui:
if length == -1:
len = ui.length
else:
len = length
if start == -1:
st = 0
else:
st = long_type(start)
return chmlib.chm_retrieve_object(self.file, ui, st, len)
else:
return 0, b''
def IsSearchable(self):
'''Indicates if the full-text search is available for this
archive - this flag is updated when GetArchiveInfo is called'''
return self.searchable
def GetEncoding(self):
'''Returns a string that can be used with the codecs python package
to encode or decode the files in the chm archive. If an error is
found, or if it is not possible to find the encoding, None is
returned.'''
if self.encoding:
vals = self.encoding.split(b',')
if len(vals) > 2:
try:
return charset_table[int(vals[2])]
except KeyError:
pass
return None
def GetLCID(self):
'''Returns the archive Locale ID'''
if self.lcid in locale_table:
return locale_table[self.lcid]
else:
return None
def get_encoding(self):
ans = self.GetEncoding()
if ans is None:
lcid = self.GetLCID()
if lcid is not None:
ans = lcid[0]
if ans:
try:
codecs.lookup(ans)
except Exception:
ans = None
return ans
def GetDWORD(self, buff, idx=0):
'''Internal method.
Reads a double word (4 bytes) from a buffer.
'''
result = buff[idx] + (buff[idx+1] << 8) + (buff[idx+2] << 16) + \
(buff[idx+3] << 24)
if result == 0xFFFFFFFF:
result = 0
return result
def GetString(self, text, idx):
'''Internal method.
Retrieves a string from the #STRINGS buffer.
'''
next = text.find(b'\x00', idx)
chunk = text[idx:next]
return chunk
def GetWindowsInfo(self):
'''Gets information from the #WINDOWS file.
Checks the #WINDOWS file to see if it has any info that was
not found in #SYSTEM (topics, index or default page.
'''
result, ui = chmlib.chm_resolve_object(self.file, b'/#WINDOWS')
if result != chmlib.CHM_RESOLVE_SUCCESS:
return -1
size, text = chmlib.chm_retrieve_object(self.file, ui, 0, 8)
if size < 8:
return -2
buff = array.array('B', text)
num_entries = self.GetDWORD(buff, 0)
entry_size = self.GetDWORD(buff, 4)
if num_entries < 1:
return -3
size, text = chmlib.chm_retrieve_object(self.file, ui, 8, entry_size)
if size < entry_size:
return -4
buff = array.array('B', text)
toc_index = self.GetDWORD(buff, 0x60)
idx_index = self.GetDWORD(buff, 0x64)
dft_index = self.GetDWORD(buff, 0x68)
result, ui = chmlib.chm_resolve_object(self.file, b'/#STRINGS')
if result != chmlib.CHM_RESOLVE_SUCCESS:
return -5
size, text = chmlib.chm_retrieve_object(self.file, ui, 0, ui.length)
if size == 0:
return -6
if not self.topics:
self.topics = self.GetString(text, toc_index)
if not self.topics.startswith(b"/"):
self.topics = b"/" + self.topics
if not self.index:
self.index = self.GetString(text, idx_index)
if not self.index.startswith(b"/"):
self.index = b"/" + self.index
if dft_index != 0:
self.home = self.GetString(text, dft_index)
if not self.home.startswith(b"/"):
self.home = b"/" + self.home

File diff suppressed because it is too large Load Diff

View File

@ -1,209 +0,0 @@
%module chmlib
%include "typemaps.i"
%include "cstring.i"
%{
/*
Copyright (C) 2003 Rubens Ramos <rubensr@users.sourceforge.net>
Based on code by:
Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net>
pychm is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
$Id: swig_chm.i,v 1.1.1.1 2003/12/02 12:38:14 rubensr Exp $
*/
#include "chm_lib.h"
#include <stdio.h>
static PyObject *my_callback = NULL;
static PyObject *
my_set_callback(PyObject *dummy, PyObject *arg)
{
PyObject *result = NULL;
if (!PyCallable_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "parameter must be callable");
return NULL;
}
Py_XINCREF(arg); /* Add a reference to new callback */
Py_XDECREF(my_callback); /* Dispose of previous callback */
my_callback = arg; /* Remember new callback */
/* Boilerplate to return "None" */
Py_INCREF(Py_None);
result = Py_None;
return result;
}
int dummy_enumerator (struct chmFile *h,
struct chmUnitInfo *ui,
void *context) {
PyObject *arglist;
PyObject *result;
PyObject *py_h;
PyObject *py_ui;
PyObject *py_c;
py_h = SWIG_NewPointerObj((void *) h, SWIGTYPE_p_chmFile, 0);
py_ui = SWIG_NewPointerObj((void *) ui, SWIGTYPE_p_chmUnitInfo, 0);
py_c = PyCapsule_GetPointer(context, NULL);
/* Time to call the callback */
arglist = Py_BuildValue("(OOO)", py_h, py_ui, py_c);
if (arglist) {
result = PyEval_CallObject(my_callback, arglist);
Py_DECREF(arglist);
Py_DECREF(py_h);
Py_DECREF(py_ui);
if (result == NULL) {
PyErr_Print();
return 0; /* Pass error back */
}
Py_DECREF(result);
return 1;
}
return 0;
}
%}
%typemap(in) CHM_ENUMERATOR {
if (!my_set_callback(self, $input)) goto fail;
$1 = dummy_enumerator;
}
%typemap(in) void *context {
if (!($1 = PyCapsule_New($input, NULL, NULL))) goto fail;
}
%typemap(in, numinputs=0) struct chmUnitInfo *OutValue (struct chmUnitInfo *temp = (struct chmUnitInfo *) calloc(1, sizeof(struct chmUnitInfo))) {
$1 = temp;
}
%typemap(argout) struct chmUnitInfo *OutValue {
PyObject *o, *o2, *o3;
o = SWIG_NewPointerObj((void *) $1, SWIGTYPE_p_chmUnitInfo, 1);
if ((!$result) || ($result == Py_None)) {
$result = o;
} else {
if (!PyTuple_Check($result)) {
PyObject *o2 = $result;
$result = PyTuple_New(1);
PyTuple_SetItem($result,0,o2);
}
o3 = PyTuple_New(1);
PyTuple_SetItem(o3,0,o);
o2 = $result;
$result = PySequence_Concat(o2,o3);
Py_DECREF(o2);
Py_DECREF(o3);
}
}
%typemap(check) unsigned char *OUTPUT {
/* nasty hack */
#ifdef __cplusplus
$1 = ($1_ltype) new char[arg5];
#else
$1 = ($1_ltype) malloc(arg5);
#endif
if ($1 == NULL) SWIG_fail;
}
%typemap(argout,fragment="t_output_helper") unsigned char *OUTPUT {
PyObject *o;
o = PyBytes_FromStringAndSize((const char*)$1, arg5);
$result = t_output_helper($result,o);
#ifdef __cplusplus
delete [] $1;
#else
free($1);
#endif
}
#ifdef WIN32
typedef unsigned __int64 LONGUINT64;
typedef __int64 LONGINT64;
#else
typedef unsigned long long LONGUINT64;
typedef long long LONGINT64;
#endif
/* the two available spaces in a CHM file */
/* N.B.: The format supports arbitrarily many spaces, but only */
/* two appear to be used at present. */
#define CHM_UNCOMPRESSED (0)
#define CHM_COMPRESSED (1)
/* structure representing an ITS (CHM) file stream */
struct chmFile;
/* structure representing an element from an ITS file stream */
#define CHM_MAX_PATHLEN 256
struct chmUnitInfo
{
LONGUINT64 start;
LONGUINT64 length;
int space;
char path[CHM_MAX_PATHLEN+1];
};
/* open an ITS archive */
struct chmFile* chm_open(const char *filename);
/* close an ITS archive */
void chm_close(struct chmFile *h);
/* methods for ssetting tuning parameters for particular file */
#define CHM_PARAM_MAX_BLOCKS_CACHED 0
void chm_set_param(struct chmFile *h,
int paramType,
int paramVal);
/* resolve a particular object from the archive */
#define CHM_RESOLVE_SUCCESS (0)
#define CHM_RESOLVE_FAILURE (1)
int chm_resolve_object(struct chmFile *h,
const char *objPath,
struct chmUnitInfo *OutValue);
/* retrieve part of an object from the archive */
LONGINT64 chm_retrieve_object(struct chmFile *h,
struct chmUnitInfo *ui,
unsigned char *OUTPUT,
LONGUINT64 addr,
LONGINT64 len);
/* enumerate the objects in the .chm archive */
typedef int (*CHM_ENUMERATOR)(struct chmFile *h,
struct chmUnitInfo *ui,
void *context);
#define CHM_ENUMERATE_NORMAL (1)
#define CHM_ENUMERATE_META (2)
#define CHM_ENUMERATE_SPECIAL (4)
#define CHM_ENUMERATE_FILES (8)
#define CHM_ENUMERATE_DIRS (16)
#define CHM_ENUMERATE_ALL (31)
#define CHM_ENUMERATOR_FAILURE (0)
#define CHM_ENUMERATOR_CONTINUE (1)
#define CHM_ENUMERATOR_SUCCESS (2)
int chm_enumerate(struct chmFile *h,
int what,
CHM_ENUMERATOR e,
void *context);
int chm_enumerate_dir(struct chmFile *h,
const char *prefix,
int what,
CHM_ENUMERATOR e,
void *context);