From a335d86cd59ba6d6374fd1aa43473bfe1896b40f Mon Sep 17 00:00:00 2001 From: Sengian Date: Sun, 9 Jan 2011 20:46:15 +0100 Subject: [PATCH] Add pict.rtf if debugging + simplify extract images in RTFinput --- src/calibre/ebooks/rtf/input.py | 51 ++++++++++++++++++++---------- src/calibre/ebooks/rtf2xml/pict.py | 4 ++- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 05c851a075..545c1fb3c8 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -127,35 +127,52 @@ class RTFInput(InputFormatPlugin): def extract_images(self, picts): self.log('Extracting images...') - - count = 0 + raw = open(picts, 'rb').read() - starts = [] - for match in re.finditer(r'\{\\pict([^}]+)\}', raw): - starts.append(match.start(1)) - + picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) + hex = re.compile(r'[^a-zA-Z0-9]') + encs = [hex.sub('', pict) for pict in picts] + + count = 0 imap = {} - - for start in starts: - pos, bc = start, 1 - while bc > 0: - if raw[pos] == '}': bc -= 1 - elif raw[pos] == '{': bc += 1 - pos += 1 - pict = raw[start:pos+1] - enc = re.sub(r'[^a-zA-Z0-9]', '', pict) + for enc in encs: if len(enc) % 2 == 1: enc = enc[:-1] data = enc.decode('hex') count += 1 - name = (('%4d'%count).replace(' ', '0'))+'.wmf' + name = '%04d.wmf' % count open(name, 'wb').write(data) imap[count] = name #open(name+'.hex', 'wb').write(enc) return self.convert_images(imap) + # count = 0 + # raw = open(picts, 'rb').read() + # starts = [] + # for match in re.finditer(r'\{\\pict([^}]+)\}', raw): + # starts.append(match.start(1)) + + # imap = {} + # for start in starts: + # pos, bc = start, 1 + # while bc > 0: + # if raw[pos] == '}': bc -= 1 + # elif raw[pos] == '{': bc += 1 + # pos += 1 + # pict = raw[start:pos+1] + # enc = re.sub(r'[^a-zA-Z0-9]', '', pict) + # if len(enc) % 2 == 1: + # enc = enc[:-1] + # data = enc.decode('hex') + # count += 1 + # name = (('%4d'%count).replace(' ', '0'))+'.wmf' + # open(name, 'wb').write(data) + # imap[count] = name + # #open(name+'.hex', 'wb').write(enc) + # return self.convert_images(imap) + def convert_images(self, imap): - for count, val in imap.items(): + for count, val in imap.iteritems(): try: imap[count] = self.convert_image(val) except: diff --git a/src/calibre/ebooks/rtf2xml/pict.py b/src/calibre/ebooks/rtf2xml/pict.py index be2cd9e600..a6cc2deade 100755 --- a/src/calibre/ebooks/rtf2xml/pict.py +++ b/src/calibre/ebooks/rtf2xml/pict.py @@ -146,7 +146,8 @@ class Pict: def process_pict(self): self.__make_dir() - with open(self.__file) as read_obj, open(self.__write_to, 'w') as write_obj: + with open(self.__file) as read_obj, \ + open(self.__write_to, 'w') as write_obj: for line in read_obj: self.__token_info = line[:16] if self.__token_info == 'ob