mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion: When converting markdown documents recognize basic metadata in the markdown document
This commit is contained in:
parent
813aff5702
commit
8de266af7d
@ -77,7 +77,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
from calibre.ebooks.chardet import detect
|
from calibre.ebooks.chardet import detect
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
from calibre.ebooks.txt.processor import (convert_basic,
|
from calibre.ebooks.txt.processor import (convert_basic,
|
||||||
convert_markdown, separate_paragraphs_single_line,
|
convert_markdown_with_metadata, separate_paragraphs_single_line,
|
||||||
separate_paragraphs_print_formatted, preserve_spaces,
|
separate_paragraphs_print_formatted, preserve_spaces,
|
||||||
detect_paragraph_type, detect_formatting_type,
|
detect_paragraph_type, detect_formatting_type,
|
||||||
normalize_line_endings, convert_textile, remove_indents,
|
normalize_line_endings, convert_textile, remove_indents,
|
||||||
@ -195,10 +195,11 @@ class TXTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
# Process the text using the appropriate text processor.
|
# Process the text using the appropriate text processor.
|
||||||
html = ''
|
html = ''
|
||||||
|
input_mi = None
|
||||||
if options.formatting_type == 'markdown':
|
if options.formatting_type == 'markdown':
|
||||||
log.debug('Running text through markdown conversion...')
|
log.debug('Running text through markdown conversion...')
|
||||||
try:
|
try:
|
||||||
html = convert_markdown(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()])
|
input_mi, html = convert_markdown_with_metadata(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()])
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
raise ValueError('This txt file has malformed markup, it cannot be'
|
raise ValueError('This txt file has malformed markup, it cannot be'
|
||||||
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
|
||||||
@ -236,11 +237,12 @@ class TXTInput(InputFormatPlugin):
|
|||||||
os.remove(htmlfile.name)
|
os.remove(htmlfile.name)
|
||||||
|
|
||||||
# Set metadata from file.
|
# Set metadata from file.
|
||||||
from calibre.customize.ui import get_file_type_metadata
|
if input_mi is None:
|
||||||
|
from calibre.customize.ui import get_file_type_metadata
|
||||||
|
input_mi = get_file_type_metadata(stream, file_ext)
|
||||||
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
|
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
|
||||||
mi = get_file_type_metadata(stream, file_ext)
|
meta_info_to_oeb_metadata(input_mi, oeb.metadata, log)
|
||||||
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
self.html_postprocess_title = input_mi.title
|
||||||
self.html_postprocess_title = mi.title
|
|
||||||
|
|
||||||
return oeb
|
return oeb
|
||||||
|
|
||||||
@ -250,4 +252,3 @@ class TXTInput(InputFormatPlugin):
|
|||||||
for title in item.data.xpath('//*[local-name()="title"]'):
|
for title in item.data.xpath('//*[local-name()="title"]'):
|
||||||
if title.text == _('Unknown'):
|
if title.text == _('Unknown'):
|
||||||
title.text = self.html_postprocess_title
|
title.text = self.html_postprocess_title
|
||||||
|
|
||||||
|
@ -96,7 +96,10 @@ class CoverManager(object):
|
|||||||
from calibre.ebooks.covers import create_cover
|
from calibre.ebooks.covers import create_cover
|
||||||
series = series_index = None
|
series = series_index = None
|
||||||
if m.series:
|
if m.series:
|
||||||
series, series_index = unicode(m.series[0]), m.series_index[0]
|
try:
|
||||||
|
series, series_index = unicode(m.series[0]), m.series_index[0]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
img_data = create_cover(title, authors, series, series_index)
|
img_data = create_cover(title, authors, series, series_index)
|
||||||
id, href = self.oeb.manifest.generate('cover',
|
id, href = self.oeb.manifest.generate('cover',
|
||||||
u'cover_image.jpg')
|
u'cover_image.jpg')
|
||||||
|
@ -99,7 +99,10 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
|
|||||||
return HTML_TEMPLATE % (title, u'\n'.join(lines))
|
return HTML_TEMPLATE % (title, u'\n'.join(lines))
|
||||||
|
|
||||||
|
|
||||||
def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
|
DEFAULT_MD_EXTENSIONS = ('footnotes', 'tables', 'toc')
|
||||||
|
|
||||||
|
|
||||||
|
def convert_markdown(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
|
||||||
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
|
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
|
||||||
from calibre.ebooks.markdown import Markdown
|
from calibre.ebooks.markdown import Markdown
|
||||||
extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
|
extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
|
||||||
@ -107,6 +110,46 @@ def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
|
|||||||
return HTML_TEMPLATE % (title, md.convert(txt))
|
return HTML_TEMPLATE % (title, md.convert(txt))
|
||||||
|
|
||||||
|
|
||||||
|
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
|
||||||
|
from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
|
||||||
|
from calibre.ebooks.markdown import Markdown
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
from calibre.utils.date import parse_only_date
|
||||||
|
from calibre.db.write import get_series_values
|
||||||
|
extensions = ['calibre.ebooks.markdown.extensions.' + x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
|
||||||
|
meta_ext = 'calibre.ebooks.markdown.extensions.meta'
|
||||||
|
if meta_ext not in extensions:
|
||||||
|
extensions.append(meta_ext)
|
||||||
|
md = Markdown(extensions=extensions)
|
||||||
|
html = md.convert(txt)
|
||||||
|
mi = Metadata(title or _('Unknown'))
|
||||||
|
m = md.Meta
|
||||||
|
for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems():
|
||||||
|
if v not in m and k in m:
|
||||||
|
m[v] = m.pop(k)
|
||||||
|
for k in 'title authors series tags pubdate comments publisher rating'.split():
|
||||||
|
val = m.get(k)
|
||||||
|
if val:
|
||||||
|
mf = mi.metadata_for_field(k)
|
||||||
|
if not mf.get('is_multiple'):
|
||||||
|
val = val[0]
|
||||||
|
if k == 'series':
|
||||||
|
val, si = get_series_values(val)
|
||||||
|
mi.series_index = 1 if si is None else si
|
||||||
|
if k == 'rating':
|
||||||
|
try:
|
||||||
|
val = max(0, min(int(float(val)), 10))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if mf.get('datatype') == 'datetime':
|
||||||
|
try:
|
||||||
|
val = parse_only_date(val, assume_utc=False)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
setattr(mi, k, val)
|
||||||
|
return mi, HTML_TEMPLATE % (mi.title, html)
|
||||||
|
|
||||||
|
|
||||||
def convert_textile(txt, title=''):
|
def convert_textile(txt, title=''):
|
||||||
from calibre.ebooks.textile import textile
|
from calibre.ebooks.textile import textile
|
||||||
html = textile(txt, encoding='utf-8')
|
html = textile(txt, encoding='utf-8')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user