This commit is contained in:
Kovid Goyal 2024-11-12 20:41:59 +05:30
commit 1ee299ee2a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 148 additions and 114 deletions

View File

@ -12,16 +12,17 @@ from calibre import prepare_string_for_xml
class NodeKinds(IntEnum):
DOCUMENT = -1
CODE_TEXT = -2
CODE_BLOCK = -3
URL = -4
BLANK_LINE = -5
TEXT = -6
LIST = -7
END_LIST = -8
BLANK_LINE = -2
CODE_TEXT = -3
CODE_BLOCK = -4
END_LIST = -5
GUI_LABEL = -6
ITALIC_TEXT = -7
LIST = -8
LIST_ITEM = -9
GUI_LABEL = -10
ITALIC_TEXT = -11
REF = -10
TEXT = -11
URL = -12
class Node:
@ -46,18 +47,10 @@ class Node:
return prepare_string_for_xml(self._text)
class DocumentNode(Node):
class BlankLineNode(Node):
def __init__(self):
super().__init__(NodeKinds.DOCUMENT)
self._children = []
class TextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.TEXT)
self._text = text
super().__init__(NodeKinds.BLANK_LINE)
class CodeBlock(Node):
@ -74,10 +67,51 @@ class CodeText(Node):
self._text = code_text
class BlankLineNode(Node):
class DocumentNode(Node):
def __init__(self):
super().__init__(NodeKinds.BLANK_LINE)
super().__init__(NodeKinds.DOCUMENT)
self._children = []
class GuiLabelNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.GUI_LABEL)
self._text = text
class ItalicTextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.ITALIC_TEXT)
self._text = text
class ListItemNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST_ITEM)
class ListNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST)
class RefNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.REF)
self._text = text
class TextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.TEXT)
self._text = text
class UrlNode(Node):
@ -100,32 +134,6 @@ class UrlNode(Node):
return prepare_string_for_xml(self._url)
class ListNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST)
class ListItemNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST_ITEM)
class ItalicTextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.ITALIC_TEXT)
self._text = text
class GuiLabelNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.GUI_LABEL)
self._text = text
class FFMLProcessor:
"""
@ -149,6 +157,15 @@ class FFMLProcessor:
- URLs. The syntax is similar to BBCODE: [URL href="http..."]Link text[/URL].
Example: [URL href="https://en.wikipedia.org/wiki/ISO_8601"]ISO[/URL]
- Internal function reference links. These are links to some formatter function
documentation. The syntax is the same as guilabel. Example: :ref:`get_note`.
The characters '()' are automatically added to the function name when
displayed. For HTML it generates the same as the inline program code text
operator (``) with no link. Example: :ref:`add` produces <code>add()</code>.
For RST it generates a :ref: reference that works only in an RST document
containing formatter function documentation. Example: :ref:`get_note`
generates :ref:`get_note() <ff_get_note>`
- example program code text blocks. Surround the code block with [CODE]
and [/CODE] tags. These tags must be first on a line. Example:
[CODE]
@ -175,8 +192,6 @@ class FFMLProcessor:
HTML output contains no CSS and does not start with a tag such as <DIV> or <P>.
RST output is not indented.
API example: generate documents for all builtin formatter functions
--------------------
from calibre.utils.ffml_processor import FFMLProcessor
@ -253,18 +268,16 @@ class FFMLProcessor:
result = ''
if tree.node_kind() == NodeKinds.TEXT:
result += tree.escaped_text()
elif tree.node_kind() == NodeKinds.BLANK_LINE:
result += '\n<br>\n<br>\n'
elif tree.node_kind() == NodeKinds.CODE_TEXT:
result += f'<code>{tree.escaped_text()}</code>'
elif tree.node_kind() == NodeKinds.CODE_BLOCK:
result += f'<pre style="margin-left:2em"><code>{tree.escaped_text()}</code></pre>'
elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
result += f'<i>{tree.escaped_text()}</i>'
elif tree.node_kind() == NodeKinds.GUI_LABEL:
result += f'<span style="font-family: Sans-Serif">{tree.escaped_text()}</span>'
elif tree.node_kind() == NodeKinds.BLANK_LINE:
result += '\n<br>\n<br>\n'
elif tree.node_kind() == NodeKinds.URL:
result += f'<a href="{tree.escaped_url()}">{tree.escaped_label()}</a>'
elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
result += f'<i>{tree.escaped_text()}</i>'
elif tree.node_kind() == NodeKinds.LIST:
result += '\n<ul>\n'
for child in tree.children():
@ -272,6 +285,10 @@ class FFMLProcessor:
result += self.tree_to_html(child, depth+1)
result += '</li>\n'
result += '</ul>\n'
elif tree.node_kind() == NodeKinds.REF:
result += f'<code>{tree.escaped_text()}()</code>'
elif tree.node_kind() == NodeKinds.URL:
result += f'<a href="{tree.escaped_url()}">{tree.escaped_label()}</a>'
elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM):
for child in tree.children():
result += self.tree_to_html(child, depth+1)
@ -306,29 +323,20 @@ class FFMLProcessor:
"""
if result is None:
result = ' ' * indent
if tree.node_kind() == NodeKinds.TEXT:
txt = tree.text()
if not result:
txt = txt.lstrip()
elif result.endswith('\n'):
txt = txt.lstrip()
result += ' ' * indent
result += txt
elif tree.node_kind() == NodeKinds.CODE_TEXT:
result += f'``{tree.text()}``'
elif tree.node_kind() == NodeKinds.GUI_LABEL:
result += f':guilabel:`{tree.text()}`'
if tree.node_kind() == NodeKinds.BLANK_LINE:
result += '\n\n'
elif tree.node_kind() == NodeKinds.CODE_BLOCK:
result += f"\n\n{' ' * indent}::\n\n"
for line in tree.text().strip().split('\n'):
result += f"{' ' * (indent+1)}{line}\n"
result += '\n'
elif tree.node_kind() == NodeKinds.BLANK_LINE:
result += '\n\n'
elif tree.node_kind() == NodeKinds.CODE_TEXT:
result += f'``{tree.text()}``'
elif tree.node_kind() == NodeKinds.GUI_LABEL:
result += f':guilabel:`{tree.text()}`'
elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
result += f'`{tree.text()}`'
elif tree.node_kind() == NodeKinds.URL:
result += f'`{tree.label()} <{tree.url()}>`_'
elif tree.node_kind() == NodeKinds.LIST:
result += '\n\n'
for child in tree.children():
@ -336,6 +344,18 @@ class FFMLProcessor:
result = self.tree_to_rst(child, indent+1, result)
result += '\n'
result += '\n'
elif tree.node_kind() == NodeKinds.REF:
result += f':ref:`{tree.text()}() <ff_{tree.text()}>`'
elif tree.node_kind() == NodeKinds.TEXT:
txt = tree.text()
if not result:
txt = txt.lstrip()
elif result.endswith('\n'):
txt = txt.lstrip()
result += ' ' * indent
result += txt
elif tree.node_kind() == NodeKinds.URL:
result += f'`{tree.label()} <{tree.url()}>`_'
elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM):
for child in tree.children():
result = self.tree_to_rst(child, indent, result)
@ -368,11 +388,12 @@ class FFMLProcessor:
keywords = {'``': NodeKinds.CODE_TEXT, # must be before '`'
'`': NodeKinds.ITALIC_TEXT,
':guilabel:': NodeKinds.GUI_LABEL,
'[CODE]': NodeKinds.CODE_BLOCK,
'[URL': NodeKinds.URL,
':guilabel:': NodeKinds.GUI_LABEL,
'[LIST]': NodeKinds.LIST,
'[/LIST]': NodeKinds.END_LIST,
':ref:': NodeKinds.REF,
'[URL': NodeKinds.URL,
'[*]': NodeKinds.LIST_ITEM,
'\n\n': NodeKinds.BLANK_LINE
}
@ -426,6 +447,17 @@ class FFMLProcessor:
return min(positions)
return len(self.input)
def get_code_block(self):
self.move_pos(len('[CODE]\n'))
end = self.find('[/CODE]')
if end < 0:
self.error('Missing [/CODE] for block')
node = CodeBlock(self.text_to(end))
self.move_pos(end + len('[/CODE]'))
if self.text_to(1) == '\n':
self.move_pos(1)
return node
def get_code_text(self):
self.move_pos(len('``'))
end = self.find('``')
@ -435,15 +467,6 @@ class FFMLProcessor:
self.move_pos(end + len('``'))
return node
def get_italic_text(self):
self.move_pos(1)
end = self.find('`')
if end < 0:
self.error('Missing closing "`" for italics')
node = ItalicTextNode(self.text_to(end))
self.move_pos(end + 1)
return node
def get_gui_label(self):
self.move_pos(len(':guilabel:`'))
end = self.find('`')
@ -453,15 +476,13 @@ class FFMLProcessor:
self.move_pos(end + len('`'))
return node
def get_code_block(self):
self.move_pos(len('[CODE]\n'))
end = self.find('[/CODE]')
def get_italic_text(self):
self.move_pos(1)
end = self.find('`')
if end < 0:
self.error('Missing [/CODE] for block')
node = CodeBlock(self.text_to(end))
self.move_pos(end + len('[/CODE]'))
if self.text_to(1) == '\n':
self.move_pos(1)
self.error('Missing closing "`" for italics')
node = ItalicTextNode(self.text_to(end))
self.move_pos(end + 1)
return node
def get_list(self):
@ -480,6 +501,15 @@ class FFMLProcessor:
self.move_pos(1)
return list_node
def get_ref(self):
self.move_pos(len(':ref:`'))
end = self.find('`')
if end < 0:
self.error('Missing ` (backquote) for :ref:')
node = RefNode(self.text_to_no_newline(end, 'REF (:ref:`)'))
self.move_pos(end + len('`'))
return node
def get_url(self):
self.move_pos(len('[URL'))
hp = self.find('href="')
@ -507,27 +537,29 @@ class FFMLProcessor:
txt = self.text_to(p).replace('\n', ' ')
parent.add_child(TextNode(txt))
self.move_pos(p)
elif p == NodeKinds.BLANK_LINE:
parent.add_child(BlankLineNode())
self.move_pos(2)
elif p == NodeKinds.CODE_TEXT:
parent.add_child(self.get_code_text())
elif p == NodeKinds.CODE_BLOCK:
parent.add_child(self.get_code_block())
elif p == NodeKinds.GUI_LABEL:
parent.add_child(self.get_gui_label())
elif p == NodeKinds.ITALIC_TEXT:
parent.add_child(self.get_italic_text())
elif p == NodeKinds.LIST:
parent.add_child(self.get_list())
elif p == NodeKinds.LIST_ITEM:
return parent
elif p == NodeKinds.END_LIST:
return parent
elif p == NodeKinds.BLANK_LINE:
parent.add_child(BlankLineNode())
self.move_pos(2)
elif p == NodeKinds.ITALIC_TEXT:
parent.add_child(self.get_italic_text())
elif p == NodeKinds.GUI_LABEL:
parent.add_child(self.get_gui_label())
elif p == NodeKinds.REF:
parent.add_child(self.get_ref())
elif p == NodeKinds.URL:
parent.add_child(self.get_url())
else:
self.move_pos(p+1)
self.error(f'Fatal parse error with node type {p}')
if self.at_end():
break
return parent

View File

@ -701,7 +701,7 @@ r'''
``lookup(value, [ pattern, key, ]* else_key)`` -- The patterns will be checked against
the value in order. If a pattern matches then the value of the field named by
``key`` is returned. If no pattern matches then the value of the field named by
``else_key`` is returned. See also the ``switch()`` function.
``else_key`` is returned. See also the :ref:`switch` function.
''')
def evaluate(self, formatter, kwargs, mi, locals, val, *args):
@ -986,7 +986,8 @@ r'''
``re_group(value, pattern [, template_for_group]*)`` -- return a string made by
applying the regular expression pattern to ``value`` and replacing each matched
instance with the value returned by the corresponding template. In
`Template Program Mode`, like for the ``template`` and the
[URL href="https://manual.calibre-ebook.com/template_lang.html#more-complex-programs-in-template-expressions-template-program-mode"]
Template Program Mode[/URL], like for the ``template`` and the
``eval`` functions, you use ``[[`` for ``{`` and ``]]`` for ``}``.
The following example looks for a series with more than one word and uppercases the first word:
@ -1210,8 +1211,8 @@ r'''
``formats_modtimes(date_format_string)`` -- return a comma-separated list of
colon-separated items ``FMT:DATE`` representing modification times for the
formats of a book. The ``date_format_string`` parameter specifies how the date
is to be formatted. See the ``format_date()`` function for details. You can use
the ``select()`` function to get the modification time for a specific format. Note
is to be formatted. See the :ref:`format_date` function for details. You can use
the :ref:`select` function to get the modification time for a specific format. Note
that format names are always uppercase, as in EPUB.
''')
@ -1293,10 +1294,11 @@ r'''
number using a Python formatting template such as ``{0:5.2f}`` or ``{0:,d}`` or
``${0:5,.2f}``. The formatting template must begin with ``{0:`` and end with
``}`` as in the above examples. Exception: you can leave off the leading "{0:"
and trailing "}" if the format template contains only a format. See the template
language and the [URL href="https://docs.python.org/3/library/string.html#formatstrings"]
Python documentation[/URL]
for more examples. Returns the empty string if formatting fails.
and trailing "}" if the format template contains only a format. See the
[URL href="https://manual.calibre-ebook.com/template_lang.html"]
Template Language[/URL] and the
[URL href="https://docs.python.org/3/library/string.html#formatstrings"]
Python[/URL] documentation for more examples. Returns the empty string if formatting fails.
''')
def evaluate(self, formatter, kwargs, mi, locals, val, template):
@ -1490,7 +1492,7 @@ class BuiltinFormatDateField(BuiltinFormatterFunction):
r'''
``format_date_field(field_name, format_string)`` -- format the value in the
field ``field_name``, which must be the lookup name of a date field, either
standard or custom. See ``format_date()`` for the formatting codes. This
standard or custom. See :ref:`format_date` for the formatting codes. This
function is much faster than format_date() and should be used when you are
formatting the value in a field (column). It is also more reliable because it
works directly on the underlying date. It can't be used for computed dates or
@ -2882,8 +2884,8 @@ r'''
(the empty string). If the optional parameter ``pattern`` (a regular expression)
is supplied then the list is filtered to files that match ``pattern`` before the
files are counted. The pattern match is case insensitive. See also the functions
``extra_file_names()``, ``extra_file_size()`` and ``extra_file_modtime()``. This
function can be used only in the GUI.
:ref:`extra_file_names`, :ref:`extra_file_size` and :ref:`extra_file_modtime`.
This function can be used only in the GUI.
''')
def evaluate(self, formatter, kwargs, mi, locals, *args):
@ -2913,8 +2915,8 @@ r'''
extra files in the book's ``data/`` folder. If the optional parameter
``pattern``, a regular expression, is supplied then the list is filtered to
files that match ``pattern``. The pattern match is case insensitive. See also
the functions ``has_extra_files()``, ``extra_file_modtime()`` and
``extra_file_size()``. This function can be used only in the GUI.
the functions :ref:`has_extra_files`, :ref:`extra_file_modtime` and
:ref:`extra_file_size`. This function can be used only in the GUI.
''')
def evaluate(self, formatter, kwargs, mi, locals, sep, *args):
@ -2968,10 +2970,10 @@ r'''
``extra_file_modtime(file_name, format_string)`` -- returns the modification
time of the extra file ``file_name`` in the book's ``data/`` folder if it
exists, otherwise ``-1``. The modtime is formatted according to
``format_string`` (see ``format_date()`` for details). If ``format_string`` is
``format_string`` (see :ref:`format_date()` for details). If ``format_string`` is
the empty string, returns the modtime as the floating point number of seconds
since the epoch. See also the functions ``has_extra_files()``,
``extra_file_names()`` and ``extra_file_size()``. The epoch is OS dependent.
since the epoch. See also the functions :ref:`has_extra_files`,
:ref:`extra_file_names` and :ref:`extra_file_size`. The epoch is OS dependent.
This function can be used only in the GUI.
''')