IGN: RTF Input: Give images their proper extensions

This commit is contained in:
Kovid Goyal 2009-07-30 16:11:14 -06:00
parent ecfa8d8385
commit d54828c88c
2 changed files with 19 additions and 3 deletions

View File

@ -69,6 +69,8 @@ class RTFInput(InputFormatPlugin):
for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
starts.append(match.start(1))
imap = {}
for start in starts:
pos, bc = start, 1
while bc > 0:
@ -80,10 +82,17 @@ class RTFInput(InputFormatPlugin):
if len(enc) % 2 == 1:
enc = enc[:-1]
data = enc.decode('hex')
ext = '.jpg'
if 'EMF' in data[:200]:
ext = '.wmf'
elif 'PNG' in data[:200]:
ext = '.png'
count += 1
name = (('%4d'%count).replace(' ', '0'))+'.jpg'
name = (('%4d'%count).replace(' ', '0'))+ext
open(name, 'wb').write(data)
imap[count] = name
#open(name+'.hex', 'wb').write(enc)
return imap
def convert(self, stream, options, file_ext, log,
accelerators):
@ -100,13 +109,20 @@ class RTFInput(InputFormatPlugin):
'support. Convert it to HTML first and then try it.'))
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
if d:
imap = {}
try:
self.extract_images(d[0])
imap = self.extract_images(d[0])
except:
self.log.exception('Failed to extract images...')
self.log('Parsing XML...')
parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser)
for pict in doc.xpath('//rtf:pict[@num]',
namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
num = int(pict.get('num'))
name = imap.get(num, None)
if name is not None:
pict.set('num', name)
self.log('Converting XML to HTML...')
styledoc = etree.fromstring(xhtml)

View File

@ -525,7 +525,7 @@ xhtml = '''\
<xsl:template match="rtf:pict">
<xsl:element name="img">
<xsl:attribute name="src"><xsl:value-of select="@num" />.jpg</xsl:attribute>
<xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
</xsl:element>
</xsl:template>