Conversion: When converting markdown documents recognize basic metadata in the markdown document

This commit is contained in:
Kovid Goyal 2016-11-11 18:01:30 +05:30
parent 813aff5702
commit 8de266af7d
3 changed files with 56 additions and 9 deletions

View File

@ -77,7 +77,7 @@ class TXTInput(InputFormatPlugin):
from calibre.ebooks.chardet import detect from calibre.ebooks.chardet import detect
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.ebooks.txt.processor import (convert_basic, from calibre.ebooks.txt.processor import (convert_basic,
convert_markdown, separate_paragraphs_single_line, convert_markdown_with_metadata, separate_paragraphs_single_line,
separate_paragraphs_print_formatted, preserve_spaces, separate_paragraphs_print_formatted, preserve_spaces,
detect_paragraph_type, detect_formatting_type, detect_paragraph_type, detect_formatting_type,
normalize_line_endings, convert_textile, remove_indents, normalize_line_endings, convert_textile, remove_indents,
@ -195,10 +195,11 @@ class TXTInput(InputFormatPlugin):
# Process the text using the appropriate text processor. # Process the text using the appropriate text processor.
html = '' html = ''
input_mi = None
if options.formatting_type == 'markdown': if options.formatting_type == 'markdown':
log.debug('Running text through markdown conversion...') log.debug('Running text through markdown conversion...')
try: try:
html = convert_markdown(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()]) input_mi, html = convert_markdown_with_metadata(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()])
except RuntimeError: except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be' raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
@ -236,11 +237,12 @@ class TXTInput(InputFormatPlugin):
os.remove(htmlfile.name) os.remove(htmlfile.name)
# Set metadata from file. # Set metadata from file.
if input_mi is None:
from calibre.customize.ui import get_file_type_metadata from calibre.customize.ui import get_file_type_metadata
input_mi = get_file_type_metadata(stream, file_ext)
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
mi = get_file_type_metadata(stream, file_ext) meta_info_to_oeb_metadata(input_mi, oeb.metadata, log)
meta_info_to_oeb_metadata(mi, oeb.metadata, log) self.html_postprocess_title = input_mi.title
self.html_postprocess_title = mi.title
return oeb return oeb
@ -250,4 +252,3 @@ class TXTInput(InputFormatPlugin):
for title in item.data.xpath('//*[local-name()="title"]'): for title in item.data.xpath('//*[local-name()="title"]'):
if title.text == _('Unknown'): if title.text == _('Unknown'):
title.text = self.html_postprocess_title title.text = self.html_postprocess_title

View File

@ -96,7 +96,10 @@ class CoverManager(object):
from calibre.ebooks.covers import create_cover from calibre.ebooks.covers import create_cover
series = series_index = None series = series_index = None
if m.series: if m.series:
try:
series, series_index = unicode(m.series[0]), m.series_index[0] series, series_index = unicode(m.series[0]), m.series_index[0]
except IndexError:
pass
img_data = create_cover(title, authors, series, series_index) img_data = create_cover(title, authors, series, series_index)
id, href = self.oeb.manifest.generate('cover', id, href = self.oeb.manifest.generate('cover',
u'cover_image.jpg') u'cover_image.jpg')

View File

@ -99,7 +99,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
return HTML_TEMPLATE % (title, u'\n'.join(lines)) return HTML_TEMPLATE % (title, u'\n'.join(lines))
def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')): DEFAULT_MD_EXTENSIONS = ('footnotes', 'tables', 'toc')
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
from calibre.ebooks.markdown import Markdown from calibre.ebooks.markdown import Markdown
extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS] extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
@ -107,6 +110,46 @@ def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
return HTML_TEMPLATE % (title, md.convert(txt)) return HTML_TEMPLATE % (title, md.convert(txt))
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
from calibre.ebooks.markdown import Markdown
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_only_date
from calibre.db.write import get_series_values
extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
meta_ext = 'calibre.ebooks.markdown.extensions.meta'
if meta_ext not in extensions:
extensions.append(meta_ext)
md = Markdown(extensions=extensions)
html = md.convert(txt)
mi = Metadata(title or _('Unknown'))
m = md.Meta
for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems():
if v not in m and k in m:
m[v] = m.pop(k)
for k in 'title authors series tags pubdate comments publisher rating'.split():
val = m.get(k)
if val:
mf = mi.metadata_for_field(k)
if not mf.get('is_multiple'):
val = val[0]
if k == 'series':
val, si = get_series_values(val)
mi.series_index = 1 if si is None else si
if k == 'rating':
try:
val = max(0, min(int(float(val)), 10))
except Exception:
continue
if mf.get('datatype') == 'datetime':
try:
val = parse_only_date(val, assume_utc=False)
except Exception:
continue
setattr(mi, k, val)
return mi, HTML_TEMPLATE % (mi.title, html)
def convert_textile(txt, title=''): def convert_textile(txt, title=''):
from calibre.ebooks.textile import textile from calibre.ebooks.textile import textile
html = textile(txt, encoding='utf-8') html = textile(txt, encoding='utf-8')