mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Ignore non internal links. Support composite images.
This commit is contained in:
parent
8557981a51
commit
644335d97b
@ -13,10 +13,9 @@ import zlib
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image, create_canvas
|
||||||
|
|
||||||
DATATYPE_PHTML = 0
|
DATATYPE_PHTML = 0
|
||||||
DATATYPE_PHTML_COMPRESSED = 1
|
DATATYPE_PHTML_COMPRESSED = 1
|
||||||
@ -178,6 +177,7 @@ class SectionHeaderText(object):
|
|||||||
self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
|
self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
|
||||||
self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
|
self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
|
||||||
|
|
||||||
|
|
||||||
class SectionMetadata(object):
|
class SectionMetadata(object):
|
||||||
|
|
||||||
def __init__(self, raw):
|
def __init__(self, raw):
|
||||||
@ -220,6 +220,7 @@ class SectionMetadata(object):
|
|||||||
|
|
||||||
adv += 2*length
|
adv += 2*length
|
||||||
|
|
||||||
|
|
||||||
class SectionText(object):
|
class SectionText(object):
|
||||||
|
|
||||||
def __init__(self, section_header, raw):
|
def __init__(self, section_header, raw):
|
||||||
@ -227,6 +228,34 @@ class SectionText(object):
|
|||||||
self.data = raw[section_header.paragraphs * 4:]
|
self.data = raw[section_header.paragraphs * 4:]
|
||||||
|
|
||||||
|
|
||||||
|
class SectionCompositeImage(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.columns, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.rows, = struct.unpack('>H', raw[2:4])
|
||||||
|
|
||||||
|
# [
|
||||||
|
# row [col, col, col...],
|
||||||
|
# row [col, col, col...],
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
# Each item in the layout is in it's
|
||||||
|
# correct position in the final
|
||||||
|
# composite.
|
||||||
|
#
|
||||||
|
# Each item in the layout is a uid
|
||||||
|
# to an image record.
|
||||||
|
self.layout = []
|
||||||
|
offset = 4
|
||||||
|
for i in xrange(self.rows):
|
||||||
|
col = []
|
||||||
|
for j in xrange(self.columns):
|
||||||
|
col.append(struct.unpack('>H', raw[offset:offset+2])[0])
|
||||||
|
offset += 2
|
||||||
|
self.layout.append(col)
|
||||||
|
|
||||||
|
|
||||||
class Reader(FormatReader):
|
class Reader(FormatReader):
|
||||||
|
|
||||||
def __init__(self, header, stream, log, options):
|
def __init__(self, header, stream, log, options):
|
||||||
@ -240,6 +269,7 @@ class Reader(FormatReader):
|
|||||||
self.uid_text_secion_number = OrderedDict()
|
self.uid_text_secion_number = OrderedDict()
|
||||||
self.uid_text_secion_encoding = {}
|
self.uid_text_secion_encoding = {}
|
||||||
self.uid_image_section_number = {}
|
self.uid_image_section_number = {}
|
||||||
|
self.uid_composite_image_section_number = {}
|
||||||
self.metadata_section_number = None
|
self.metadata_section_number = None
|
||||||
self.default_encoding = 'utf-8'
|
self.default_encoding = 'utf-8'
|
||||||
self.owner_id = None
|
self.owner_id = None
|
||||||
@ -266,8 +296,9 @@ class Reader(FormatReader):
|
|||||||
elif section_header.type == DATATYPE_METADATA:
|
elif section_header.type == DATATYPE_METADATA:
|
||||||
self.metadata_section_number = section_number
|
self.metadata_section_number = section_number
|
||||||
section = SectionMetadata(raw_data[start:])
|
section = SectionMetadata(raw_data[start:])
|
||||||
#elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
|
elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
|
||||||
|
self.uid_composite_image_section_number[section_header.uid] = section_number
|
||||||
|
section = SectionCompositeImage(raw_data[start:])
|
||||||
|
|
||||||
self.sections.append((section_header, section))
|
self.sections.append((section_header, section))
|
||||||
|
|
||||||
@ -282,6 +313,9 @@ class Reader(FormatReader):
|
|||||||
self.mi = get_metadata(stream, False)
|
self.mi = get_metadata(stream, False)
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
|
# Each text record is independent (unless the continuation
|
||||||
|
# value is set in the previous record). Put each converted
|
||||||
|
# text recored into a separate file.
|
||||||
with CurrentDir(output_dir):
|
with CurrentDir(output_dir):
|
||||||
for uid, num in self.uid_text_secion_number.items():
|
for uid, num in self.uid_text_secion_number.items():
|
||||||
self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
|
self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid)))
|
||||||
@ -297,9 +331,11 @@ class Reader(FormatReader):
|
|||||||
htmlf.write(html.encode('utf-8'))
|
htmlf.write(html.encode('utf-8'))
|
||||||
|
|
||||||
images = []
|
images = []
|
||||||
|
image_sizes = {}
|
||||||
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
||||||
os.makedirs(os.path.join(output_dir, 'images/'))
|
os.makedirs(os.path.join(output_dir, 'images/'))
|
||||||
with CurrentDir(os.path.join(output_dir, 'images/')):
|
with CurrentDir(os.path.join(output_dir, 'images/')):
|
||||||
|
# Single images.
|
||||||
for uid, num in self.uid_image_section_number.items():
|
for uid, num in self.uid_image_section_number.items():
|
||||||
section_header, section_data = self.sections[num]
|
section_header, section_data = self.sections[num]
|
||||||
if section_data:
|
if section_data:
|
||||||
@ -317,6 +353,7 @@ class Reader(FormatReader):
|
|||||||
itf.write(idata)
|
itf.write(idata)
|
||||||
im = Image()
|
im = Image()
|
||||||
im.read(itn)
|
im.read(itn)
|
||||||
|
image_sizes[uid] = im.size
|
||||||
im.set_compression_quality(70)
|
im.set_compression_quality(70)
|
||||||
im.save('%s.jpg' % uid)
|
im.save('%s.jpg' % uid)
|
||||||
self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
|
self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
|
||||||
@ -325,6 +362,49 @@ class Reader(FormatReader):
|
|||||||
images.append('%s.jpg' % uid)
|
images.append('%s.jpg' % uid)
|
||||||
else:
|
else:
|
||||||
self.log.error('Failed to write image with uid %s: No data.' % uid)
|
self.log.error('Failed to write image with uid %s: No data.' % uid)
|
||||||
|
# Composite images.
|
||||||
|
for uid, num in self.uid_composite_image_section_number.items():
|
||||||
|
try:
|
||||||
|
section_header, section_data = self.sections[num]
|
||||||
|
# Get the final width and height.
|
||||||
|
width = 0
|
||||||
|
height = 0
|
||||||
|
for row in section_data.layout:
|
||||||
|
row_width = 0
|
||||||
|
col_height = 0
|
||||||
|
for col in row:
|
||||||
|
if col not in image_sizes:
|
||||||
|
raise Exception('Image with uid: %s missing.' % col)
|
||||||
|
im = Image()
|
||||||
|
im.read('%s.jpg' % col)
|
||||||
|
w, h = im.size
|
||||||
|
row_width += w
|
||||||
|
if col_height < h:
|
||||||
|
col_height = h
|
||||||
|
if width < row_width:
|
||||||
|
width = row_width
|
||||||
|
height += col_height
|
||||||
|
# Create a new image the total size of all image
|
||||||
|
# parts. Put the parts into the new image.
|
||||||
|
canvas = create_canvas(width, height)
|
||||||
|
y_off = 0
|
||||||
|
for row in section_data.layout:
|
||||||
|
x_off = 0
|
||||||
|
largest_height = 0
|
||||||
|
for col in row:
|
||||||
|
im = Image()
|
||||||
|
im.read('%s.jpg' % col)
|
||||||
|
canvas.compose(im, x_off, y_off)
|
||||||
|
w, h = im.size
|
||||||
|
x_off += w
|
||||||
|
if largest_height < h:
|
||||||
|
largest_height = h
|
||||||
|
y_off += largest_height
|
||||||
|
canvas.set_compression_quality(70)
|
||||||
|
canvas.save('%s.jpg' % uid)
|
||||||
|
self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))
|
||||||
|
|
||||||
# Run the HTML through the html processing plugin.
|
# Run the HTML through the html processing plugin.
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
@ -334,13 +414,17 @@ class Reader(FormatReader):
|
|||||||
self.options.input_encoding = 'utf-8'
|
self.options.input_encoding = 'utf-8'
|
||||||
odi = self.options.debug_pipeline
|
odi = self.options.debug_pipeline
|
||||||
self.options.debug_pipeline = None
|
self.options.debug_pipeline = None
|
||||||
# Generate oeb from html conversion.
|
# Determine the home.html record uid. This should be set in the
|
||||||
|
# reserved values in the metadata recored. home.html is the first
|
||||||
|
# text record (should have hyper link references to other records)
|
||||||
|
# in the document.
|
||||||
try:
|
try:
|
||||||
home_html = self.header_record.home_html
|
home_html = self.header_record.home_html
|
||||||
if not home_html:
|
if not home_html:
|
||||||
home_html = self.uid_text_secion_number.items()[0][0]
|
home_html = self.uid_text_secion_number.items()[0][0]
|
||||||
except:
|
except:
|
||||||
raise Exception(_('Could not determine home.html'))
|
raise Exception(_('Could not determine home.html'))
|
||||||
|
# Generate oeb from html conversion.
|
||||||
oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
|
oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
|
||||||
self.options.debug_pipeline = odi
|
self.options.debug_pipeline = odi
|
||||||
|
|
||||||
@ -359,6 +443,7 @@ class Reader(FormatReader):
|
|||||||
html = u'<p id="p0">'
|
html = u'<p id="p0">'
|
||||||
offset = 0
|
offset = 0
|
||||||
paragraph_open = True
|
paragraph_open = True
|
||||||
|
link_open = False
|
||||||
need_set_p_id = False
|
need_set_p_id = False
|
||||||
p_num = 1
|
p_num = 1
|
||||||
paragraph_offsets = []
|
paragraph_offsets = []
|
||||||
@ -387,14 +472,15 @@ class Reader(FormatReader):
|
|||||||
if c == 0x0a:
|
if c == 0x0a:
|
||||||
offset += 1
|
offset += 1
|
||||||
id = struct.unpack('>H', d[offset:offset+2])[0]
|
id = struct.unpack('>H', d[offset:offset+2])[0]
|
||||||
html += '<a href="%s.html">' % id
|
if id in self.uid_text_secion_number:
|
||||||
|
html += '<a href="%s.html">' % id
|
||||||
|
link_open = True
|
||||||
offset += 1
|
offset += 1
|
||||||
# Targeted page link begins
|
# Targeted page link begins
|
||||||
# 3 Bytes
|
# 3 Bytes
|
||||||
# record ID, target
|
# record ID, target
|
||||||
elif c == 0x0b:
|
elif c == 0x0b:
|
||||||
offset += 3
|
offset += 3
|
||||||
html += '<a>'
|
|
||||||
# Paragraph link begins
|
# Paragraph link begins
|
||||||
# 4 Bytes
|
# 4 Bytes
|
||||||
# record ID, paragraph number
|
# record ID, paragraph number
|
||||||
@ -403,18 +489,21 @@ class Reader(FormatReader):
|
|||||||
id = struct.unpack('>H', d[offset:offset+2])[0]
|
id = struct.unpack('>H', d[offset:offset+2])[0]
|
||||||
offset += 2
|
offset += 2
|
||||||
pid = struct.unpack('>H', d[offset:offset+2])[0]
|
pid = struct.unpack('>H', d[offset:offset+2])[0]
|
||||||
html += '<a href="%s.html#p%s">' % (id, pid)
|
if id in self.uid_text_secion_number:
|
||||||
|
html += '<a href="%s.html#p%s">' % (id, pid)
|
||||||
|
link_open = True
|
||||||
offset += 1
|
offset += 1
|
||||||
# Targeted paragraph link begins
|
# Targeted paragraph link begins
|
||||||
# 5 Bytes
|
# 5 Bytes
|
||||||
# record ID, paragraph number, target
|
# record ID, paragraph number, target
|
||||||
elif c == 0x0d:
|
elif c == 0x0d:
|
||||||
offset += 5
|
offset += 5
|
||||||
html += '<a>'
|
|
||||||
# Link ends
|
# Link ends
|
||||||
# 0 Bytes
|
# 0 Bytes
|
||||||
elif c == 0x08:
|
elif c == 0x08:
|
||||||
html += '</a>'
|
if link_open:
|
||||||
|
html += '</a>'
|
||||||
|
link_open = False
|
||||||
# Set font
|
# Set font
|
||||||
# 1 Bytes
|
# 1 Bytes
|
||||||
# font specifier
|
# font specifier
|
||||||
|
Loading…
x
Reference in New Issue
Block a user