IGN: RTF Input: Give images their proper extensions

This commit is contained in:
Kovid Goyal 2009-07-30 16:11:14 -06:00
parent ecfa8d8385
commit d54828c88c
2 changed files with 19 additions and 3 deletions

View File

@ -69,6 +69,8 @@ class RTFInput(InputFormatPlugin):
for match in re.finditer(r'\{\\pict([^}]+)\}', raw): for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
starts.append(match.start(1)) starts.append(match.start(1))
imap = {}
for start in starts: for start in starts:
pos, bc = start, 1 pos, bc = start, 1
while bc > 0: while bc > 0:
@ -80,10 +82,17 @@ class RTFInput(InputFormatPlugin):
if len(enc) % 2 == 1: if len(enc) % 2 == 1:
enc = enc[:-1] enc = enc[:-1]
data = enc.decode('hex') data = enc.decode('hex')
ext = '.jpg'
if 'EMF' in data[:200]:
ext = '.wmf'
elif 'PNG' in data[:200]:
ext = '.png'
count += 1 count += 1
name = (('%4d'%count).replace(' ', '0'))+'.jpg' name = (('%4d'%count).replace(' ', '0'))+ext
open(name, 'wb').write(data) open(name, 'wb').write(data)
imap[count] = name
#open(name+'.hex', 'wb').write(enc) #open(name+'.hex', 'wb').write(enc)
return imap
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
@ -100,13 +109,20 @@ class RTFInput(InputFormatPlugin):
'support. Convert it to HTML first and then try it.')) 'support. Convert it to HTML first and then try it.'))
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d: if d:
imap = {}
try: try:
self.extract_images(d[0]) imap = self.extract_images(d[0])
except: except:
self.log.exception('Failed to extract images...') self.log.exception('Failed to extract images...')
self.log('Parsing XML...') self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser) doc = etree.fromstring(xml, parser=parser)
for pict in doc.xpath('//rtf:pict[@num]',
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
num = int(pict.get('num'))
name = imap.get(num, None)
if name is not None:
pict.set('num', name)
self.log('Converting XML to HTML...') self.log('Converting XML to HTML...')
styledoc = etree.fromstring(xhtml) styledoc = etree.fromstring(xhtml)

View File

@ -525,7 +525,7 @@ xhtml = '''\
<xsl:template match="rtf:pict"> <xsl:template match="rtf:pict">
<xsl:element name="img"> <xsl:element name="img">
<xsl:attribute name="src"><xsl:value-of select="@num" />.jpg</xsl:attribute> <xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
</xsl:element> </xsl:element>
</xsl:template> </xsl:template>