This commit is contained in:
Kovid Goyal 2024-11-12 20:41:59 +05:30
commit 1ee299ee2a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 148 additions and 114 deletions

View File

@ -12,16 +12,17 @@ from calibre import prepare_string_for_xml
class NodeKinds(IntEnum): class NodeKinds(IntEnum):
DOCUMENT = -1 DOCUMENT = -1
CODE_TEXT = -2 BLANK_LINE = -2
CODE_BLOCK = -3 CODE_TEXT = -3
URL = -4 CODE_BLOCK = -4
BLANK_LINE = -5 END_LIST = -5
TEXT = -6 GUI_LABEL = -6
LIST = -7 ITALIC_TEXT = -7
END_LIST = -8 LIST = -8
LIST_ITEM = -9 LIST_ITEM = -9
GUI_LABEL = -10 REF = -10
ITALIC_TEXT = -11 TEXT = -11
URL = -12
class Node: class Node:
@ -46,18 +47,10 @@ class Node:
return prepare_string_for_xml(self._text) return prepare_string_for_xml(self._text)
class DocumentNode(Node): class BlankLineNode(Node):
def __init__(self): def __init__(self):
super().__init__(NodeKinds.DOCUMENT) super().__init__(NodeKinds.BLANK_LINE)
self._children = []
class TextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.TEXT)
self._text = text
class CodeBlock(Node): class CodeBlock(Node):
@ -74,10 +67,51 @@ class CodeText(Node):
self._text = code_text self._text = code_text
class BlankLineNode(Node): class DocumentNode(Node):
def __init__(self): def __init__(self):
super().__init__(NodeKinds.BLANK_LINE) super().__init__(NodeKinds.DOCUMENT)
self._children = []
class GuiLabelNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.GUI_LABEL)
self._text = text
class ItalicTextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.ITALIC_TEXT)
self._text = text
class ListItemNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST_ITEM)
class ListNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST)
class RefNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.REF)
self._text = text
class TextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.TEXT)
self._text = text
class UrlNode(Node): class UrlNode(Node):
@ -100,32 +134,6 @@ class UrlNode(Node):
return prepare_string_for_xml(self._url) return prepare_string_for_xml(self._url)
class ListNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST)
class ListItemNode(Node):
def __init__(self):
super().__init__(NodeKinds.LIST_ITEM)
class ItalicTextNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.ITALIC_TEXT)
self._text = text
class GuiLabelNode(Node):
def __init__(self, text):
super().__init__(NodeKinds.GUI_LABEL)
self._text = text
class FFMLProcessor: class FFMLProcessor:
""" """
@ -149,6 +157,15 @@ class FFMLProcessor:
- URLs. The syntax is similar to BBCODE: [URL href="http..."]Link text[/URL]. - URLs. The syntax is similar to BBCODE: [URL href="http..."]Link text[/URL].
Example: [URL href="https://en.wikipedia.org/wiki/ISO_8601"]ISO[/URL] Example: [URL href="https://en.wikipedia.org/wiki/ISO_8601"]ISO[/URL]
- Internal function reference links. These are links to some formatter function
documentation. The syntax is the same as guilabel. Example: :ref:`get_note`.
The characters '()' are automatically added to the function name when
displayed. For HTML it generates the same as the inline program code text
operator (``) with no link. Example: :ref:`add` produces <code>add()</code>.
For RST it generates a :ref: reference that works only in an RST document
containing formatter function documentation. Example: :ref:`get_note`
generates :ref:`get_note() <ff_get_note>`
- example program code text blocks. Surround the code block with [CODE] - example program code text blocks. Surround the code block with [CODE]
and [/CODE] tags. These tags must be first on a line. Example: and [/CODE] tags. These tags must be first on a line. Example:
[CODE] [CODE]
@ -175,8 +192,6 @@ class FFMLProcessor:
HTML output contains no CSS and does not start with a tag such as <DIV> or <P>. HTML output contains no CSS and does not start with a tag such as <DIV> or <P>.
RST output is not indented.
API example: generate documents for all builtin formatter functions API example: generate documents for all builtin formatter functions
-------------------- --------------------
from calibre.utils.ffml_processor import FFMLProcessor from calibre.utils.ffml_processor import FFMLProcessor
@ -253,18 +268,16 @@ class FFMLProcessor:
result = '' result = ''
if tree.node_kind() == NodeKinds.TEXT: if tree.node_kind() == NodeKinds.TEXT:
result += tree.escaped_text() result += tree.escaped_text()
elif tree.node_kind() == NodeKinds.BLANK_LINE:
result += '\n<br>\n<br>\n'
elif tree.node_kind() == NodeKinds.CODE_TEXT: elif tree.node_kind() == NodeKinds.CODE_TEXT:
result += f'<code>{tree.escaped_text()}</code>' result += f'<code>{tree.escaped_text()}</code>'
elif tree.node_kind() == NodeKinds.CODE_BLOCK: elif tree.node_kind() == NodeKinds.CODE_BLOCK:
result += f'<pre style="margin-left:2em"><code>{tree.escaped_text()}</code></pre>' result += f'<pre style="margin-left:2em"><code>{tree.escaped_text()}</code></pre>'
elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
result += f'<i>{tree.escaped_text()}</i>'
elif tree.node_kind() == NodeKinds.GUI_LABEL: elif tree.node_kind() == NodeKinds.GUI_LABEL:
result += f'<span style="font-family: Sans-Serif">{tree.escaped_text()}</span>' result += f'<span style="font-family: Sans-Serif">{tree.escaped_text()}</span>'
elif tree.node_kind() == NodeKinds.BLANK_LINE: elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
result += '\n<br>\n<br>\n' result += f'<i>{tree.escaped_text()}</i>'
elif tree.node_kind() == NodeKinds.URL:
result += f'<a href="{tree.escaped_url()}">{tree.escaped_label()}</a>'
elif tree.node_kind() == NodeKinds.LIST: elif tree.node_kind() == NodeKinds.LIST:
result += '\n<ul>\n' result += '\n<ul>\n'
for child in tree.children(): for child in tree.children():
@ -272,6 +285,10 @@ class FFMLProcessor:
result += self.tree_to_html(child, depth+1) result += self.tree_to_html(child, depth+1)
result += '</li>\n' result += '</li>\n'
result += '</ul>\n' result += '</ul>\n'
elif tree.node_kind() == NodeKinds.REF:
result += f'<code>{tree.escaped_text()}()</code>'
elif tree.node_kind() == NodeKinds.URL:
result += f'<a href="{tree.escaped_url()}">{tree.escaped_label()}</a>'
elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM): elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM):
for child in tree.children(): for child in tree.children():
result += self.tree_to_html(child, depth+1) result += self.tree_to_html(child, depth+1)
@ -306,29 +323,20 @@ class FFMLProcessor:
""" """
if result is None: if result is None:
result = ' ' * indent result = ' ' * indent
if tree.node_kind() == NodeKinds.TEXT:
txt = tree.text() if tree.node_kind() == NodeKinds.BLANK_LINE:
if not result: result += '\n\n'
txt = txt.lstrip()
elif result.endswith('\n'):
txt = txt.lstrip()
result += ' ' * indent
result += txt
elif tree.node_kind() == NodeKinds.CODE_TEXT:
result += f'``{tree.text()}``'
elif tree.node_kind() == NodeKinds.GUI_LABEL:
result += f':guilabel:`{tree.text()}`'
elif tree.node_kind() == NodeKinds.CODE_BLOCK: elif tree.node_kind() == NodeKinds.CODE_BLOCK:
result += f"\n\n{' ' * indent}::\n\n" result += f"\n\n{' ' * indent}::\n\n"
for line in tree.text().strip().split('\n'): for line in tree.text().strip().split('\n'):
result += f"{' ' * (indent+1)}{line}\n" result += f"{' ' * (indent+1)}{line}\n"
result += '\n' result += '\n'
elif tree.node_kind() == NodeKinds.BLANK_LINE: elif tree.node_kind() == NodeKinds.CODE_TEXT:
result += '\n\n' result += f'``{tree.text()}``'
elif tree.node_kind() == NodeKinds.GUI_LABEL:
result += f':guilabel:`{tree.text()}`'
elif tree.node_kind() == NodeKinds.ITALIC_TEXT: elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
result += f'`{tree.text()}`' result += f'`{tree.text()}`'
elif tree.node_kind() == NodeKinds.URL:
result += f'`{tree.label()} <{tree.url()}>`_'
elif tree.node_kind() == NodeKinds.LIST: elif tree.node_kind() == NodeKinds.LIST:
result += '\n\n' result += '\n\n'
for child in tree.children(): for child in tree.children():
@ -336,6 +344,18 @@ class FFMLProcessor:
result = self.tree_to_rst(child, indent+1, result) result = self.tree_to_rst(child, indent+1, result)
result += '\n' result += '\n'
result += '\n' result += '\n'
elif tree.node_kind() == NodeKinds.REF:
result += f':ref:`{tree.text()}() <ff_{tree.text()}>`'
elif tree.node_kind() == NodeKinds.TEXT:
txt = tree.text()
if not result:
txt = txt.lstrip()
elif result.endswith('\n'):
txt = txt.lstrip()
result += ' ' * indent
result += txt
elif tree.node_kind() == NodeKinds.URL:
result += f'`{tree.label()} <{tree.url()}>`_'
elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM): elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM):
for child in tree.children(): for child in tree.children():
result = self.tree_to_rst(child, indent, result) result = self.tree_to_rst(child, indent, result)
@ -368,11 +388,12 @@ class FFMLProcessor:
keywords = {'``': NodeKinds.CODE_TEXT, # must be before '`' keywords = {'``': NodeKinds.CODE_TEXT, # must be before '`'
'`': NodeKinds.ITALIC_TEXT, '`': NodeKinds.ITALIC_TEXT,
':guilabel:': NodeKinds.GUI_LABEL,
'[CODE]': NodeKinds.CODE_BLOCK, '[CODE]': NodeKinds.CODE_BLOCK,
'[URL': NodeKinds.URL, ':guilabel:': NodeKinds.GUI_LABEL,
'[LIST]': NodeKinds.LIST, '[LIST]': NodeKinds.LIST,
'[/LIST]': NodeKinds.END_LIST, '[/LIST]': NodeKinds.END_LIST,
':ref:': NodeKinds.REF,
'[URL': NodeKinds.URL,
'[*]': NodeKinds.LIST_ITEM, '[*]': NodeKinds.LIST_ITEM,
'\n\n': NodeKinds.BLANK_LINE '\n\n': NodeKinds.BLANK_LINE
} }
@ -426,6 +447,17 @@ class FFMLProcessor:
return min(positions) return min(positions)
return len(self.input) return len(self.input)
def get_code_block(self):
self.move_pos(len('[CODE]\n'))
end = self.find('[/CODE]')
if end < 0:
self.error('Missing [/CODE] for block')
node = CodeBlock(self.text_to(end))
self.move_pos(end + len('[/CODE]'))
if self.text_to(1) == '\n':
self.move_pos(1)
return node
def get_code_text(self): def get_code_text(self):
self.move_pos(len('``')) self.move_pos(len('``'))
end = self.find('``') end = self.find('``')
@ -435,15 +467,6 @@ class FFMLProcessor:
self.move_pos(end + len('``')) self.move_pos(end + len('``'))
return node return node
def get_italic_text(self):
self.move_pos(1)
end = self.find('`')
if end < 0:
self.error('Missing closing "`" for italics')
node = ItalicTextNode(self.text_to(end))
self.move_pos(end + 1)
return node
def get_gui_label(self): def get_gui_label(self):
self.move_pos(len(':guilabel:`')) self.move_pos(len(':guilabel:`'))
end = self.find('`') end = self.find('`')
@ -453,15 +476,13 @@ class FFMLProcessor:
self.move_pos(end + len('`')) self.move_pos(end + len('`'))
return node return node
def get_code_block(self): def get_italic_text(self):
self.move_pos(len('[CODE]\n'))
end = self.find('[/CODE]')
if end < 0:
self.error('Missing [/CODE] for block')
node = CodeBlock(self.text_to(end))
self.move_pos(end + len('[/CODE]'))
if self.text_to(1) == '\n':
self.move_pos(1) self.move_pos(1)
end = self.find('`')
if end < 0:
self.error('Missing closing "`" for italics')
node = ItalicTextNode(self.text_to(end))
self.move_pos(end + 1)
return node return node
def get_list(self): def get_list(self):
@ -480,6 +501,15 @@ class FFMLProcessor:
self.move_pos(1) self.move_pos(1)
return list_node return list_node
def get_ref(self):
self.move_pos(len(':ref:`'))
end = self.find('`')
if end < 0:
self.error('Missing ` (backquote) for :ref:')
node = RefNode(self.text_to_no_newline(end, 'REF (:ref:`)'))
self.move_pos(end + len('`'))
return node
def get_url(self): def get_url(self):
self.move_pos(len('[URL')) self.move_pos(len('[URL'))
hp = self.find('href="') hp = self.find('href="')
@ -507,27 +537,29 @@ class FFMLProcessor:
txt = self.text_to(p).replace('\n', ' ') txt = self.text_to(p).replace('\n', ' ')
parent.add_child(TextNode(txt)) parent.add_child(TextNode(txt))
self.move_pos(p) self.move_pos(p)
elif p == NodeKinds.BLANK_LINE:
parent.add_child(BlankLineNode())
self.move_pos(2)
elif p == NodeKinds.CODE_TEXT: elif p == NodeKinds.CODE_TEXT:
parent.add_child(self.get_code_text()) parent.add_child(self.get_code_text())
elif p == NodeKinds.CODE_BLOCK: elif p == NodeKinds.CODE_BLOCK:
parent.add_child(self.get_code_block()) parent.add_child(self.get_code_block())
elif p == NodeKinds.GUI_LABEL:
parent.add_child(self.get_gui_label())
elif p == NodeKinds.ITALIC_TEXT:
parent.add_child(self.get_italic_text())
elif p == NodeKinds.LIST: elif p == NodeKinds.LIST:
parent.add_child(self.get_list()) parent.add_child(self.get_list())
elif p == NodeKinds.LIST_ITEM: elif p == NodeKinds.LIST_ITEM:
return parent return parent
elif p == NodeKinds.END_LIST: elif p == NodeKinds.END_LIST:
return parent return parent
elif p == NodeKinds.BLANK_LINE: elif p == NodeKinds.REF:
parent.add_child(BlankLineNode()) parent.add_child(self.get_ref())
self.move_pos(2)
elif p == NodeKinds.ITALIC_TEXT:
parent.add_child(self.get_italic_text())
elif p == NodeKinds.GUI_LABEL:
parent.add_child(self.get_gui_label())
elif p == NodeKinds.URL: elif p == NodeKinds.URL:
parent.add_child(self.get_url()) parent.add_child(self.get_url())
else: else:
self.move_pos(p+1) self.error(f'Fatal parse error with node type {p}')
if self.at_end(): if self.at_end():
break break
return parent return parent

View File

@ -701,7 +701,7 @@ r'''
``lookup(value, [ pattern, key, ]* else_key)`` -- The patterns will be checked against ``lookup(value, [ pattern, key, ]* else_key)`` -- The patterns will be checked against
the value in order. If a pattern matches then the value of the field named by the value in order. If a pattern matches then the value of the field named by
``key`` is returned. If no pattern matches then the value of the field named by ``key`` is returned. If no pattern matches then the value of the field named by
``else_key`` is returned. See also the ``switch()`` function. ``else_key`` is returned. See also the :ref:`switch` function.
''') ''')
def evaluate(self, formatter, kwargs, mi, locals, val, *args): def evaluate(self, formatter, kwargs, mi, locals, val, *args):
@ -986,7 +986,8 @@ r'''
``re_group(value, pattern [, template_for_group]*)`` -- return a string made by ``re_group(value, pattern [, template_for_group]*)`` -- return a string made by
applying the regular expression pattern to ``value`` and replacing each matched applying the regular expression pattern to ``value`` and replacing each matched
instance with the value returned by the corresponding template. In instance with the value returned by the corresponding template. In
`Template Program Mode`, like for the ``template`` and the [URL href="https://manual.calibre-ebook.com/template_lang.html#more-complex-programs-in-template-expressions-template-program-mode"]
Template Program Mode[/URL], like for the ``template`` and the
``eval`` functions, you use ``[[`` for ``{`` and ``]]`` for ``}``. ``eval`` functions, you use ``[[`` for ``{`` and ``]]`` for ``}``.
The following example looks for a series with more than one word and uppercases the first word: The following example looks for a series with more than one word and uppercases the first word:
@ -1210,8 +1211,8 @@ r'''
``formats_modtimes(date_format_string)`` -- return a comma-separated list of ``formats_modtimes(date_format_string)`` -- return a comma-separated list of
colon-separated items ``FMT:DATE`` representing modification times for the colon-separated items ``FMT:DATE`` representing modification times for the
formats of a book. The ``date_format_string`` parameter specifies how the date formats of a book. The ``date_format_string`` parameter specifies how the date
is to be formatted. See the ``format_date()`` function for details. You can use is to be formatted. See the :ref:`format_date` function for details. You can use
the ``select()`` function to get the modification time for a specific format. Note the :ref:`select` function to get the modification time for a specific format. Note
that format names are always uppercase, as in EPUB. that format names are always uppercase, as in EPUB.
''') ''')
@ -1293,10 +1294,11 @@ r'''
number using a Python formatting template such as ``{0:5.2f}`` or ``{0:,d}`` or number using a Python formatting template such as ``{0:5.2f}`` or ``{0:,d}`` or
``${0:5,.2f}``. The formatting template must begin with ``{0:`` and end with ``${0:5,.2f}``. The formatting template must begin with ``{0:`` and end with
``}`` as in the above examples. Exception: you can leave off the leading "{0:" ``}`` as in the above examples. Exception: you can leave off the leading "{0:"
and trailing "}" if the format template contains only a format. See the template and trailing "}" if the format template contains only a format. See the
language and the [URL href="https://docs.python.org/3/library/string.html#formatstrings"] [URL href="https://manual.calibre-ebook.com/template_lang.html"]
Python documentation[/URL] Template Language[/URL] and the
for more examples. Returns the empty string if formatting fails. [URL href="https://docs.python.org/3/library/string.html#formatstrings"]
Python[/URL] documentation for more examples. Returns the empty string if formatting fails.
''') ''')
def evaluate(self, formatter, kwargs, mi, locals, val, template): def evaluate(self, formatter, kwargs, mi, locals, val, template):
@ -1490,7 +1492,7 @@ class BuiltinFormatDateField(BuiltinFormatterFunction):
r''' r'''
``format_date_field(field_name, format_string)`` -- format the value in the ``format_date_field(field_name, format_string)`` -- format the value in the
field ``field_name``, which must be the lookup name of a date field, either field ``field_name``, which must be the lookup name of a date field, either
standard or custom. See ``format_date()`` for the formatting codes. This standard or custom. See :ref:`format_date` for the formatting codes. This
function is much faster than format_date() and should be used when you are function is much faster than format_date() and should be used when you are
formatting the value in a field (column). It is also more reliable because it formatting the value in a field (column). It is also more reliable because it
works directly on the underlying date. It can't be used for computed dates or works directly on the underlying date. It can't be used for computed dates or
@ -2882,8 +2884,8 @@ r'''
(the empty string). If the optional parameter ``pattern`` (a regular expression) (the empty string). If the optional parameter ``pattern`` (a regular expression)
is supplied then the list is filtered to files that match ``pattern`` before the is supplied then the list is filtered to files that match ``pattern`` before the
files are counted. The pattern match is case insensitive. See also the functions files are counted. The pattern match is case insensitive. See also the functions
``extra_file_names()``, ``extra_file_size()`` and ``extra_file_modtime()``. This :ref:`extra_file_names`, :ref:`extra_file_size` and :ref:`extra_file_modtime`.
function can be used only in the GUI. This function can be used only in the GUI.
''') ''')
def evaluate(self, formatter, kwargs, mi, locals, *args): def evaluate(self, formatter, kwargs, mi, locals, *args):
@ -2913,8 +2915,8 @@ r'''
extra files in the book's ``data/`` folder. If the optional parameter extra files in the book's ``data/`` folder. If the optional parameter
``pattern``, a regular expression, is supplied then the list is filtered to ``pattern``, a regular expression, is supplied then the list is filtered to
files that match ``pattern``. The pattern match is case insensitive. See also files that match ``pattern``. The pattern match is case insensitive. See also
the functions ``has_extra_files()``, ``extra_file_modtime()`` and the functions :ref:`has_extra_files`, :ref:`extra_file_modtime` and
``extra_file_size()``. This function can be used only in the GUI. :ref:`extra_file_size`. This function can be used only in the GUI.
''') ''')
def evaluate(self, formatter, kwargs, mi, locals, sep, *args): def evaluate(self, formatter, kwargs, mi, locals, sep, *args):
@ -2968,10 +2970,10 @@ r'''
``extra_file_modtime(file_name, format_string)`` -- returns the modification ``extra_file_modtime(file_name, format_string)`` -- returns the modification
time of the extra file ``file_name`` in the book's ``data/`` folder if it time of the extra file ``file_name`` in the book's ``data/`` folder if it
exists, otherwise ``-1``. The modtime is formatted according to exists, otherwise ``-1``. The modtime is formatted according to
``format_string`` (see ``format_date()`` for details). If ``format_string`` is ``format_string`` (see :ref:`format_date()` for details). If ``format_string`` is
the empty string, returns the modtime as the floating point number of seconds the empty string, returns the modtime as the floating point number of seconds
since the epoch. See also the functions ``has_extra_files()``, since the epoch. See also the functions :ref:`has_extra_files`,
``extra_file_names()`` and ``extra_file_size()``. The epoch is OS dependent. :ref:`extra_file_names` and :ref:`extra_file_size`. The epoch is OS dependent.
This function can be used only in the GUI. This function can be used only in the GUI.
''') ''')