mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RTF Input: Support extraction of images (JPEG/PNG only)
This commit is contained in:
parent
d0a1ce4825
commit
ecfa8d8385
@ -2,7 +2,7 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os
|
import os, glob, re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -61,6 +61,30 @@ class RTFInput(InputFormatPlugin):
|
|||||||
os.remove('out.xml')
|
os.remove('out.xml')
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def extract_images(self, picts):
|
||||||
|
self.log('Extracting images...')
|
||||||
|
count = 0
|
||||||
|
raw = open(picts, 'rb').read()
|
||||||
|
starts = []
|
||||||
|
for match in re.finditer(r'\{\\pict([^}]+)\}', raw):
|
||||||
|
starts.append(match.start(1))
|
||||||
|
|
||||||
|
for start in starts:
|
||||||
|
pos, bc = start, 1
|
||||||
|
while bc > 0:
|
||||||
|
if raw[pos] == '}': bc -= 1
|
||||||
|
elif raw[pos] == '{': bc += 1
|
||||||
|
pos += 1
|
||||||
|
pict = raw[start:pos+1]
|
||||||
|
enc = re.sub(r'[^a-zA-Z0-9]', '', pict)
|
||||||
|
if len(enc) % 2 == 1:
|
||||||
|
enc = enc[:-1]
|
||||||
|
data = enc.decode('hex')
|
||||||
|
count += 1
|
||||||
|
name = (('%4d'%count).replace(' ', '0'))+'.jpg'
|
||||||
|
open(name, 'wb').write(data)
|
||||||
|
#open(name+'.hex', 'wb').write(enc)
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
from calibre.ebooks.rtf.xsl import xhtml
|
from calibre.ebooks.rtf.xsl import xhtml
|
||||||
@ -74,6 +98,12 @@ class RTFInput(InputFormatPlugin):
|
|||||||
except RtfInvalidCodeException:
|
except RtfInvalidCodeException:
|
||||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
'support. Convert it to HTML first and then try it.'))
|
'support. Convert it to HTML first and then try it.'))
|
||||||
|
d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
|
||||||
|
if d:
|
||||||
|
try:
|
||||||
|
self.extract_images(d[0])
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to extract images...')
|
||||||
self.log('Parsing XML...')
|
self.log('Parsing XML...')
|
||||||
parser = etree.XMLParser(recover=True, no_network=True)
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
doc = etree.fromstring(xml, parser=parser)
|
doc = etree.fromstring(xml, parser=parser)
|
||||||
|
@ -523,7 +523,11 @@ xhtml = '''\
|
|||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="rtf:pict" />
|
<xsl:template match="rtf:pict">
|
||||||
|
<xsl:element name="img">
|
||||||
|
<xsl:attribute name="src"><xsl:value-of select="@num" />.jpg</xsl:attribute>
|
||||||
|
</xsl:element>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="*">
|
<xsl:template match="*">
|
||||||
<xsl:message>
|
<xsl:message>
|
||||||
|
@ -55,7 +55,7 @@ class Pict:
|
|||||||
return "}\n"
|
return "}\n"
|
||||||
def __text_func(self, line):
|
def __text_func(self, line):
|
||||||
#tx<nu<__________<true text
|
#tx<nu<__________<true text
|
||||||
return line[18:]
|
return line[17:]
|
||||||
def __make_dir(self):
|
def __make_dir(self):
|
||||||
""" Make a dirctory to put the image data in"""
|
""" Make a dirctory to put the image data in"""
|
||||||
base_name = os.path.basename(getattr(self.__orig_file, 'name',
|
base_name = os.path.basename(getattr(self.__orig_file, 'name',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user