From bb2950da4c566941c425aaf35663c73cc1b8875d Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Wed, 20 Nov 2024 14:21:51 +0000 Subject: [PATCH] Add the summary methods. Also fix bug in escaping. --- src/calibre/utils/ffml_processor.py | 133 +++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 20 deletions(-) diff --git a/src/calibre/utils/ffml_processor.py b/src/calibre/utils/ffml_processor.py index 45a61c2a35..0da3eff675 100644 --- a/src/calibre/utils/ffml_processor.py +++ b/src/calibre/utils/ffml_processor.py @@ -14,16 +14,18 @@ class NodeKinds(IntEnum): DOCUMENT = -1 BLANK_LINE = -2 BOLD_TEXT = -3 - CODE_TEXT = -4 - CODE_BLOCK = -5 - END_LIST = -6 - GUI_LABEL = -7 - ITALIC_TEXT = -8 - LIST = -9 - LIST_ITEM = -10 - REF = -11 - TEXT = -12 - URL = -13 + CHARACTER = -4 + CODE_TEXT = -5 + CODE_BLOCK = -6 + END_LIST = -7 + GUI_LABEL = -8 + ITALIC_TEXT = -9 + LIST = -10 + LIST_ITEM = -11 + REF = -12 + END_SUMMARY = -13 + TEXT = -14 + URL = -15 class Node: @@ -42,7 +44,7 @@ class Node: return self._children def text(self): - return self._text.replace('\\', '') + return self._text def escaped_text(self): return prepare_string_for_xml(self.text()) @@ -61,6 +63,13 @@ class BoldTextNode(Node): self._text = text +class CharacterNode(Node): + + def __init__(self, character): + super().__init__(NodeKinds.CHARACTER) + self._text = character + + class CodeBlock(Node): def __init__(self, code_text): @@ -82,6 +91,12 @@ class DocumentNode(Node): self._children = [] +class EndSummaryNode(Node): + + def __init__(self): + super().__init__(NodeKinds.END_SUMMARY) + + class GuiLabelNode(Node): def __init__(self, text): @@ -202,6 +217,12 @@ class FFMLProcessor: [/CODE] [/LIST] + - end of summary marker. A summary is generated from the first characters of + the documentation. The summary includes text up to a \[/] tag. There is no + opening tag because the summary starts at the first character. If there is + no \[/] tag then all the document is used for the summary. The \[/] tag + is not replaced with white space or any other character. + - escaped character: precede the character with a backslash. This is useful to escape tags. For example to make the [CODE] tag not a tag, use \[CODE]. @@ -241,7 +262,7 @@ class FFMLProcessor: :param indent: The indent level of the tree. The outermost root should have an indent of zero. """ - if node.node_kind() in (NodeKinds.TEXT, NodeKinds.CODE_TEXT, + if node.node_kind() in (NodeKinds.TEXT, NodeKinds.CODE_TEXT, NodeKinds.CHARACTER, NodeKinds.CODE_BLOCK, NodeKinds.ITALIC_TEXT, NodeKinds.GUI_LABEL, NodeKinds.BOLD_TEXT): print(f'{" " * indent}{node.node_kind().name}:{node.text()}') @@ -288,10 +309,14 @@ class FFMLProcessor: result += f'{tree.escaped_text()}' elif tree.node_kind() == NodeKinds.BLANK_LINE: result += '\n
\n
\n' + elif tree.node_kind() == NodeKinds.CHARACTER: + result += tree.text() elif tree.node_kind() == NodeKinds.CODE_TEXT: result += f'{tree.escaped_text()}' elif tree.node_kind() == NodeKinds.CODE_BLOCK: result += f'
{tree.escaped_text().rstrip()}
' + elif tree.node_kind() == NodeKinds.END_SUMMARY: + pass elif tree.node_kind() == NodeKinds.GUI_LABEL: result += f'{tree.escaped_text()}' elif tree.node_kind() == NodeKinds.ITALIC_TEXT: @@ -300,16 +325,16 @@ class FFMLProcessor: result += '\n\n' elif tree.node_kind() == NodeKinds.REF: - result += f'{tree.escaped_text()}()' + result += f'{tree.text()}' elif tree.node_kind() == NodeKinds.URL: result += f'{tree.escaped_label()}' elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM): for child in tree.children(): - result += self.tree_to_html(child, depth+1) + result += self.tree_to_html(child, depth=depth+1) return result def document_to_html(self, document, name): @@ -327,6 +352,29 @@ class FFMLProcessor: tree = self.parse_document(document, name) return self.tree_to_html(tree, 0) + def document_to_summary_html(self, document, name): + """ + Given a document in the Formatter Function Markup Language (FFML), return + that document's summary in HTML format. + + :param document: the text in FFML. + :param name: the name of the document, used during error + processing. It is usually the name of the function. + + :return: a string containing the HTML + + """ + document = document.strip() + sum_tag = document.find('[/]') + if sum_tag > 0: + document = document[0:sum_tag] + fname = document[0:document.find('(')].lstrip('`') + tree = self.parse_document(document, name) + result = self.tree_to_html(tree, depth=0) + paren = result.find('(') + result = f'{fname}{result[paren:]}' + return result + def tree_to_rst(self, tree, indent, result=None): """ Given a Formatter Function Markup Language (FFML) parse tree, return @@ -356,6 +404,8 @@ class FFMLProcessor: result += '\n\n' elif tree.node_kind() == NodeKinds.BOLD_TEXT: indent_text(f'**{tree.text()}**') + elif tree.node_kind() == NodeKinds.CHARACTER: + result += tree.text() elif tree.node_kind() == NodeKinds.CODE_BLOCK: result += f"\n\n{' ' * indent}::\n\n" for line in tree.text().strip().split('\n'): @@ -363,6 +413,8 @@ class FFMLProcessor: result += '\n' elif tree.node_kind() == NodeKinds.CODE_TEXT: indent_text(f'``{tree.text()}``') + elif tree.node_kind() == NodeKinds.END_SUMMARY: + pass elif tree.node_kind() == NodeKinds.GUI_LABEL: indent_text(f':guilabel:`{tree.text()}`') elif tree.node_kind() == NodeKinds.ITALIC_TEXT: @@ -371,7 +423,7 @@ class FFMLProcessor: result += '\n\n' for child in tree.children(): result += f"{' ' * (indent)}* " - result = self.tree_to_rst(child, indent+1, result) + result = self.tree_to_rst(child, indent+1, result=result) result += '\n' result += '\n' elif tree.node_kind() == NodeKinds.REF: @@ -384,7 +436,7 @@ class FFMLProcessor: indent_text(f'`{tree.label()} <{tree.url()}>`_') elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM): for child in tree.children(): - result = self.tree_to_rst(child, indent, result) + result = self.tree_to_rst(child, indent, result=result) return result def document_to_rst(self, document, name, indent=0, prefix=None): @@ -410,19 +462,51 @@ class FFMLProcessor: doc = prefix + doc.lstrip(' ' * indent) return doc + def document_to_summary_rst(self, document, name, indent=0, prefix=None): + """ + Given a document in the Formatter Function Markup Language (FFML), return + that document's summary in RST (sphinx reStructuredText) format. + + :param document: the text in FFML. + :param name: the name of the document, used during error + processing. It is usually the name of the function. + :param indent: the indenting level of the items in the tree. This is + usually zero, but can be greater than zero if you want + the RST output indented. + :param prefix: string. if supplied, this string replaces the indent + on the first line of the output. This permits specifying + an RST block, for example a bullet list + + :return: a string containing the RST text + + """ + document = document.strip() + sum_tag = document.find('[/]') + if sum_tag > 0: + document = document[0:sum_tag] + fname = document[0:document.find('(')].lstrip('`') + doc = self.tree_to_rst(self.parse_document(document, name), indent) + lparen = doc.find('(') + doc = f':ref:`ff_{fname}`\\ ``{doc[lparen:]}' + if prefix is not None: + doc = prefix + doc.lstrip(' ' * indent) + return doc + # ============== Internal methods ================= keywords = {'``': NodeKinds.CODE_TEXT, # must be before '`' '`': NodeKinds.ITALIC_TEXT, '[B]': NodeKinds.BOLD_TEXT, '[CODE]': NodeKinds.CODE_BLOCK, + '[/]': NodeKinds.END_SUMMARY, ':guilabel:': NodeKinds.GUI_LABEL, '[LIST]': NodeKinds.LIST, '[/LIST]': NodeKinds.END_LIST, ':ref:': NodeKinds.REF, '[URL': NodeKinds.URL, '[*]': NodeKinds.LIST_ITEM, - '\n\n': NodeKinds.BLANK_LINE + '\n\n': NodeKinds.BLANK_LINE, + '\\': NodeKinds.CHARACTER } def __init__(self): @@ -437,8 +521,6 @@ class FFMLProcessor: p = self.input.find(for_what, self.input_pos) if p < 0: return -1 - while p > 0 and self.input[p-1] == '\\': - p = self.input.find(for_what, p+1) return -1 if p < 0 else p - self.input_pos def move_pos(self, to_where): @@ -486,6 +568,12 @@ class FFMLProcessor: self.move_pos(end + len('[/B]')) return node + def get_character(self): + self.move_pos(1) + node = CharacterNode(self.text_to(1)) + self.move_pos(1) + return node + def get_code_block(self): self.move_pos(len('[CODE]')) if self.text_to(1) == '\n': @@ -583,10 +671,15 @@ class FFMLProcessor: self.move_pos(2) elif p == NodeKinds.BOLD_TEXT: parent.add_child(self.get_bold_text()) + elif p == NodeKinds.CHARACTER: + parent.add_child(self.get_character()) elif p == NodeKinds.CODE_TEXT: parent.add_child(self.get_code_text()) elif p == NodeKinds.CODE_BLOCK: parent.add_child(self.get_code_block()) + elif p == NodeKinds.END_SUMMARY: + parent.add_child(EndSummaryNode()) + self.move_pos(3) elif p == NodeKinds.GUI_LABEL: parent.add_child(self.get_gui_label()) elif p == NodeKinds.ITALIC_TEXT: