From 8de266af7dbc89dbc6310aa1681937b71a2be8c9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 11 Nov 2016 18:01:30 +0530 Subject: [PATCH] Conversion: When converting markdown documents recognize basic metadata in the markdown document --- .../ebooks/conversion/plugins/txt_input.py | 15 ++++--- src/calibre/ebooks/oeb/transforms/cover.py | 5 ++- src/calibre/ebooks/txt/processor.py | 45 ++++++++++++++++++- 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py index d7990cb8f4..8a37a9b85f 100644 --- a/src/calibre/ebooks/conversion/plugins/txt_input.py +++ b/src/calibre/ebooks/conversion/plugins/txt_input.py @@ -77,7 +77,7 @@ class TXTInput(InputFormatPlugin): from calibre.ebooks.chardet import detect from calibre.utils.zipfile import ZipFile from calibre.ebooks.txt.processor import (convert_basic, - convert_markdown, separate_paragraphs_single_line, + convert_markdown_with_metadata, separate_paragraphs_single_line, separate_paragraphs_print_formatted, preserve_spaces, detect_paragraph_type, detect_formatting_type, normalize_line_endings, convert_textile, remove_indents, @@ -195,10 +195,11 @@ class TXTInput(InputFormatPlugin): # Process the text using the appropriate text processor. html = '' + input_mi = None if options.formatting_type == 'markdown': log.debug('Running text through markdown conversion...') try: - html = convert_markdown(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()]) + input_mi, html = convert_markdown_with_metadata(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()]) except RuntimeError: raise ValueError('This txt file has malformed markup, it cannot be' ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') @@ -236,11 +237,12 @@ class TXTInput(InputFormatPlugin): os.remove(htmlfile.name) # Set metadata from file. - from calibre.customize.ui import get_file_type_metadata + if input_mi is None: + from calibre.customize.ui import get_file_type_metadata + input_mi = get_file_type_metadata(stream, file_ext) from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata - mi = get_file_type_metadata(stream, file_ext) - meta_info_to_oeb_metadata(mi, oeb.metadata, log) - self.html_postprocess_title = mi.title + meta_info_to_oeb_metadata(input_mi, oeb.metadata, log) + self.html_postprocess_title = input_mi.title return oeb @@ -250,4 +252,3 @@ class TXTInput(InputFormatPlugin): for title in item.data.xpath('//*[local-name()="title"]'): if title.text == _('Unknown'): title.text = self.html_postprocess_title - diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py index 25e1d97c92..1fc4b86758 100644 --- a/src/calibre/ebooks/oeb/transforms/cover.py +++ b/src/calibre/ebooks/oeb/transforms/cover.py @@ -96,7 +96,10 @@ class CoverManager(object): from calibre.ebooks.covers import create_cover series = series_index = None if m.series: - series, series_index = unicode(m.series[0]), m.series_index[0] + try: + series, series_index = unicode(m.series[0]), m.series_index[0] + except IndexError: + pass img_data = create_cover(title, authors, series, series_index) id, href = self.oeb.manifest.generate('cover', u'cover_image.jpg') diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 3f328ee787..0845e6c944 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -99,7 +99,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0): return HTML_TEMPLATE % (title, u'\n'.join(lines)) -def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')): +DEFAULT_MD_EXTENSIONS = ('footnotes', 'tables', 'toc') + + +def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.markdown import Markdown extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS] @@ -107,6 +110,46 @@ def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')): return HTML_TEMPLATE % (title, md.convert(txt)) +def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS): + from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS + from calibre.ebooks.markdown import Markdown + from calibre.ebooks.metadata.book.base import Metadata + from calibre.utils.date import parse_only_date + from calibre.db.write import get_series_values + extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS] + meta_ext = 'calibre.ebooks.markdown.extensions.meta' + if meta_ext not in extensions: + extensions.append(meta_ext) + md = Markdown(extensions=extensions) + html = md.convert(txt) + mi = Metadata(title or _('Unknown')) + m = md.Meta + for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems(): + if v not in m and k in m: + m[v] = m.pop(k) + for k in 'title authors series tags pubdate comments publisher rating'.split(): + val = m.get(k) + if val: + mf = mi.metadata_for_field(k) + if not mf.get('is_multiple'): + val = val[0] + if k == 'series': + val, si = get_series_values(val) + mi.series_index = 1 if si is None else si + if k == 'rating': + try: + val = max(0, min(int(float(val)), 10)) + except Exception: + continue + if mf.get('datatype') == 'datetime': + try: + val = parse_only_date(val, assume_utc=False) + except Exception: + continue + setattr(mi, k, val) + return mi, HTML_TEMPLATE % (mi.title, html) + + def convert_textile(txt, title=''): from calibre.ebooks.textile import textile html = textile(txt, encoding='utf-8')