diff --git a/manual/conversion.rst b/manual/conversion.rst
index 5eaca5a469..a4ecd902cc 100644
--- a/manual/conversion.rst
+++ b/manual/conversion.rst
@@ -710,3 +710,31 @@ EPUB from the ZIP file are::
Note that because this file explores the potential of EPUB, most of the advanced formatting is not going to work on readers less capable than |app|'s built-in EPUB viewer.
+
+Convert ODT documents
+~~~~~~~~~~~~~~~~~~~~~
+
+|app| can directly convert ODT (OpenDocument Text) files. You should use styles to format your document and minimize the use of direct formatting.
+When inserting images into your document you need to anchor them to the paragraph, images anchored to a page will all end up in the front of the conversion.
+
+To enable automatic detection of chapters, you need to mark them with the build-in styles called 'Heading 1', 'Heading 2', ..., 'Heading 6' ('Heading 1' equates to the HTML tag
, 'Heading 2' to etc). When you convert in |app| you can enter which style you used into the 'Detect chapters at' box. Example:
+
+ * If you mark Chapters with style 'Heading 2', you have to set the 'Detect chapters at' box to ``//h:h2``
+ * For a nested TOC with Sections marked with 'Heading 2' and the Chapters marked with 'Heading 3' you need to enter ``//h:h2|//h:h3``. On the Convert - TOC page set the 'Level 1 TOC' box to ``//h:h2`` and the 'Level 2 TOC' box to ``//h:h3``.
+
+Well-known document properties (Title, Keywords, Description, Creator) are recognized and |app| will use the first image (not to small, and with good aspect-ratio) as the cover image.
+
+There is also an advanced property conversion mode, which is activated by setting the custom property ``opf.metadata`` ('Yes or No' type) to Yes in your ODT document (File->Properties->Custom Properties).
+If this property is detected by |app|, the following custom properties are recognized (``opf.authors`` overrides document creator)::
+
+ opf.titlesort
+ opf.authors
+ opf.authorsort
+ opf.publisher
+ opf.pubdate
+ opf.isbn
+ opf.language
+
+In addition to this, you can specify the picture to use as the cover by naming it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no picture with this name is found, the 'smart' method is used.
+To prevent this you can set the custom property ``opf.nocover`` ('Yes or No' type) to Yes.
+
diff --git a/src/calibre/ebooks/metadata/odt.py b/src/calibre/ebooks/metadata/odt.py
index bf30dfd5f7..d795b997e2 100644
--- a/src/calibre/ebooks/metadata/odt.py
+++ b/src/calibre/ebooks/metadata/odt.py
@@ -1,5 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+#
# Copyright (C) 2006 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
@@ -17,12 +19,19 @@
#
# Contributor(s):
#
+from __future__ import division
+
import zipfile, re
import xml.sax.saxutils
from cStringIO import StringIO
from odf.namespaces import OFFICENS, DCNS, METANS
-from calibre.ebooks.metadata import MetaInformation, string_to_authors
+from odf.opendocument import load as odLoad
+from odf.draw import Image as odImage, Frame as odFrame
+
+from calibre.ebooks.metadata import MetaInformation, string_to_authors, check_isbn
+from calibre.utils.magick.draw import identify_data
+from calibre.utils.date import parse_date
whitespace = re.compile(r'\s+')
@@ -125,6 +134,10 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
else:
texttag = self._tag
self.seenfields[texttag] = self.data()
+ # OpenOffice has the habit to capitalize custom properties, so we add a
+ # lowercase version for easy access
+ if texttag[:4].lower() == u'opf.':
+ self.seenfields[texttag.lower()] = self.data()
if field in self.deletefields:
self.output.dowrite = True
@@ -141,7 +154,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
def data(self):
return normalize(''.join(self._data))
-def get_metadata(stream):
+def get_metadata(stream, extract_cover=True):
zin = zipfile.ZipFile(stream, 'r')
odfs = odfmetaparser()
parser = xml.sax.make_parser()
@@ -162,7 +175,73 @@ def get_metadata(stream):
if data.has_key('language'):
mi.language = data['language']
if data.get('keywords', ''):
- mi.tags = data['keywords'].split(',')
+ mi.tags = map(lambda x: x.strip(), data['keywords'].split(','))
+ opfmeta = False # we need this later for the cover
+ opfnocover = False
+ if data.get('opf.metadata','') == 'true':
+ # custom metadata contains OPF information
+ opfmeta = True
+ if data.get('opf.titlesort', ''):
+ mi.title_sort = data['opf.titlesort']
+ if data.get('opf.authors', ''):
+ mi.authors = string_to_authors(data['opf.authors'])
+ if data.get('opf.authorsort', ''):
+ mi.author_sort = data['opf.authorsort']
+ if data.get('opf.isbn', ''):
+ isbn = check_isbn(data['opf.isbn'])
+ if isbn is not None:
+ mi.isbn = isbn
+ if data.get('opf.publisher', ''):
+ mi.publisher = data['opf.publisher']
+ if data.get('opf.pubdate', ''):
+ mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
+ if data.get('opf.language', ''):
+ mi.languages = [ data['opf.language'] ]
+ opfnocover = data.get('opf.nocover', 'false') == 'true'
+ # search for an draw:image in a draw:frame with the name 'opf.cover'
+ # if opf.metadata prop is false, just use the first image that
+ # has a proper size (borrowed from docx)
+ otext = odLoad(stream)
+ cover_href = None
+ cover_data = None
+ # check that it's really a ODT
+ if otext.mimetype == u'application/vnd.oasis.opendocument.text':
+ for elem in otext.text.getElementsByType(odFrame):
+ img = elem.getElementsByType(odImage)
+ if len(img) > 0: # there should be only one
+ i_href = img[0].getAttribute('href')
+ try:
+ raw = zin.read(i_href)
+ except KeyError:
+ continue
+ try:
+ width, height, fmt = identify_data(raw)
+ except:
+ continue
+ else:
+ continue
+ if opfmeta and elem.getAttribute('name').lower() == u'opf.cover':
+ cover_href = i_href
+ cover_data = (fmt, raw)
+ break
+ if cover_href is None and 0.8 <= height/width <= 1.8 and height*width >= 12000:
+ cover_href = i_href
+ cover_data = (fmt, raw)
+ if not opfmeta:
+ break
+
+ if not opfnocover and cover_href is not None:
+ mi.cover = cover_href
+ if extract_cover:
+ if not cover_data:
+ raw = zin.read(cover_href)
+ try:
+ width, height, fmt = identify_data(raw)
+ except:
+ pass
+ else:
+ cover_data = (fmt, raw)
+ mi.cover_data = cover_data
return mi