mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
DOCX Input: Detect likely cover image
DOCX Input: If a large image that looks like a cover is present at the start of the document, remove it and use it as the cover of the output ebook. This can be turned off under the DOC Input section of the conversion dialog.
This commit is contained in:
parent
7a0675e59a
commit
752bd9e06e
@ -14,9 +14,17 @@ class DOCXInput(InputFormatPlugin):
|
||||
description = 'Convert DOCX files (.docx) to HTML'
|
||||
file_types = set(['docx'])
|
||||
|
||||
options = {
|
||||
OptionRecommendation(name='docx_no_cover', recommended_value=False,
|
||||
help=_('Normally, if a large image is present at the start of the document that looks like a cover, '
|
||||
'it will be removed from the document and used as the cover for created ebook. This option '
|
||||
'turns off that behavior.')),
|
||||
|
||||
}
|
||||
|
||||
recommendations = set([('page_breaks_before', '/', OptionRecommendation.MED)])
|
||||
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from calibre.ebooks.docx.to_html import Convert
|
||||
return Convert(stream, log=log)()
|
||||
return Convert(stream, detect_cover=not options.docx_no_cover, log=log)()
|
||||
|
||||
|
@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
|
||||
def mergeable(previous, current):
|
||||
if previous.tail or current.tail:
|
||||
@ -83,8 +84,19 @@ def lift(span):
|
||||
else:
|
||||
add_text(last_child, 'tail', span.tail)
|
||||
|
||||
def before_count(root, tag, limit=10):
|
||||
body = root.xpath('//body[1]')
|
||||
if not body:
|
||||
return limit
|
||||
ans = 0
|
||||
for elem in body[0].iterdescendants():
|
||||
if elem is tag:
|
||||
return ans
|
||||
ans += 1
|
||||
if ans > limit:
|
||||
return limit
|
||||
|
||||
def cleanup_markup(root, styles):
|
||||
def cleanup_markup(log, root, styles, dest_dir, detect_cover):
|
||||
# Merge consecutive spans that have the same styling
|
||||
current_run = []
|
||||
for span in root.xpath('//span'):
|
||||
@ -134,3 +146,22 @@ def cleanup_markup(root, styles):
|
||||
for span in root.xpath('//span[not(@class) and not(@id)]'):
|
||||
lift(span)
|
||||
|
||||
if detect_cover:
|
||||
# Check if the first image in the document is possibly a cover
|
||||
img = root.xpath('//img[@src][1]')
|
||||
if img:
|
||||
img = img[0]
|
||||
path = os.path.join(dest_dir, img.get('src'))
|
||||
if os.path.exists(path) and before_count(root, img, limit=10) < 5:
|
||||
from calibre.utils.magick.draw import identify
|
||||
try:
|
||||
width, height, fmt = identify(path)
|
||||
except:
|
||||
width, height, fmt = 0, 0, None
|
||||
is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000
|
||||
if is_cover:
|
||||
log.debug('Detected an image that looks like a cover')
|
||||
img.getparent().remove(img)
|
||||
return path
|
||||
|
||||
|
||||
|
@ -40,11 +40,12 @@ class Text:
|
||||
|
||||
class Convert(object):
|
||||
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None):
|
||||
self.docx = DOCX(path_or_stream, log=log)
|
||||
self.ms_pat = re.compile(r'\s{2,}')
|
||||
self.ws_pat = re.compile(r'[\n\r\t]')
|
||||
self.log = self.docx.log
|
||||
self.detect_cover = detect_cover
|
||||
self.notes_text = notes_text or _('Notes')
|
||||
self.dest_dir = dest_dir or os.getcwdu()
|
||||
self.mi = self.docx.metadata
|
||||
@ -169,7 +170,7 @@ class Convert(object):
|
||||
break
|
||||
|
||||
self.log.debug('Cleaning up redundant markup generated by Word')
|
||||
cleanup_markup(self.html, self.styles)
|
||||
self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover)
|
||||
|
||||
return self.write(doc)
|
||||
|
||||
@ -280,6 +281,8 @@ class Convert(object):
|
||||
opf.toc = toc
|
||||
opf.create_manifest_from_files_in([self.dest_dir])
|
||||
opf.create_spine(['index.html'])
|
||||
if self.cover_image is not None:
|
||||
opf.guide.set_cover(self.cover_image)
|
||||
with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
|
||||
opf.render(of, ncx, 'toc.ncx')
|
||||
return os.path.join(self.dest_dir, 'metadata.opf')
|
||||
|
23
src/calibre/gui2/convert/docx_input.py
Normal file
23
src/calibre/gui2/convert/docx_input.py
Normal file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from calibre.gui2.convert.docx_input_ui import Ui_Form
|
||||
from calibre.gui2.convert import Widget
|
||||
|
||||
class PluginWidget(Widget, Ui_Form):
|
||||
|
||||
TITLE = _('DOCX Input')
|
||||
HELP = _('Options specific to')+' DOCX '+_('input')
|
||||
COMMIT_NAME = 'docx_input'
|
||||
ICON = I('mimetypes/docx.png')
|
||||
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent,
|
||||
['docx_no_cover', ])
|
||||
self.initialize_options(get_option, get_help, db, book_id)
|
||||
|
41
src/calibre/gui2/convert/docx_input.ui
Normal file
41
src/calibre/gui2/convert/docx_input.ui
Normal file
@ -0,0 +1,41 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ui version="4.0">
|
||||
<class>Form</class>
|
||||
<widget class="QWidget" name="Form">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>518</width>
|
||||
<height>353</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
<string>Form</string>
|
||||
</property>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_3">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="opt_docx_no_cover">
|
||||
<property name="text">
|
||||
<string>Do not try to autodetect a &cover from images in the document</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>20</width>
|
||||
<height>213</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
<connections/>
|
||||
</ui>
|
Loading…
x
Reference in New Issue
Block a user