From bb2950da4c566941c425aaf35663c73cc1b8875d Mon Sep 17 00:00:00 2001
From: Charles Haley <cbhaley@i.wont.say.com>
Date: Wed, 20 Nov 2024 14:21:51 +0000
Subject: [PATCH] Add the summary methods. Also fix bug in escaping.

---
 src/calibre/utils/ffml_processor.py | 133 +++++++++++++++++++++++-----
 1 file changed, 113 insertions(+), 20 deletions(-)
diff --git a/src/calibre/utils/ffml_processor.py b/src/calibre/utils/ffml_processor.py
index 45a61c2a35..0da3eff675 100644
--- a/src/calibre/utils/ffml_processor.py
+++ b/src/calibre/utils/ffml_processor.py
@@ -14,16 +14,18 @@ class NodeKinds(IntEnum):
     DOCUMENT    = -1
     BLANK_LINE  = -2
     BOLD_TEXT   = -3
-    CODE_TEXT   = -4
-    CODE_BLOCK  = -5
-    END_LIST    = -6
-    GUI_LABEL   = -7
-    ITALIC_TEXT = -8
-    LIST        = -9
-    LIST_ITEM   = -10
-    REF         = -11
-    TEXT        = -12
-    URL         = -13
+    CHARACTER   = -4
+    CODE_TEXT   = -5
+    CODE_BLOCK  = -6
+    END_LIST    = -7
+    GUI_LABEL   = -8
+    ITALIC_TEXT = -9
+    LIST        = -10
+    LIST_ITEM   = -11
+    REF         = -12
+    END_SUMMARY = -13
+    TEXT        = -14
+    URL         = -15
 
 
 class Node:
@@ -42,7 +44,7 @@ class Node:
         return self._children
 
     def text(self):
-        return self._text.replace('\\', '')
+        return self._text
 
     def escaped_text(self):
         return prepare_string_for_xml(self.text())
@@ -61,6 +63,13 @@ class BoldTextNode(Node):
         self._text = text
 
 
+class CharacterNode(Node):
+
+    def __init__(self, character):
+        super().__init__(NodeKinds.CHARACTER)
+        self._text = character
+
+
 class CodeBlock(Node):
 
     def __init__(self, code_text):
@@ -82,6 +91,12 @@ class DocumentNode(Node):
         self._children = []
 
 
+class EndSummaryNode(Node):
+
+    def __init__(self):
+        super().__init__(NodeKinds.END_SUMMARY)
+
+
 class GuiLabelNode(Node):
 
     def __init__(self, text):
@@ -202,6 +217,12 @@ class FFMLProcessor:
       [/CODE]
       [/LIST]
 
+    - end of summary marker. A summary is generated from the first characters  of
+      the documentation. The summary includes text up to a \[/] tag. There is no
+      opening tag because the summary starts at the first character. If there is
+      no \[/] tag then all the document is used for the summary. The \[/] tag
+      is not replaced with white space or any other character.
+
     - escaped character: precede the character with a backslash. This is useful
       to escape tags. For example to make the [CODE] tag not a tag, use \[CODE].
 
@@ -241,7 +262,7 @@ class FFMLProcessor:
         :param indent: The indent level of the tree. The outermost root should
                        have an indent of zero.
         """
-        if node.node_kind() in (NodeKinds.TEXT, NodeKinds.CODE_TEXT,
+        if node.node_kind() in (NodeKinds.TEXT, NodeKinds.CODE_TEXT, NodeKinds.CHARACTER,
                                 NodeKinds.CODE_BLOCK, NodeKinds.ITALIC_TEXT,
                                 NodeKinds.GUI_LABEL, NodeKinds.BOLD_TEXT):
             print(f'{" " * indent}{node.node_kind().name}:{node.text()}')
@@ -288,10 +309,14 @@ class FFMLProcessor:
             result += f'<b>{tree.escaped_text()}</b>'
         elif tree.node_kind() == NodeKinds.BLANK_LINE:
             result += '\n<br>\n<br>\n'
+        elif tree.node_kind() == NodeKinds.CHARACTER:
+            result += tree.text()
         elif tree.node_kind() == NodeKinds.CODE_TEXT:
             result += f'<code>{tree.escaped_text()}</code>'
         elif tree.node_kind() == NodeKinds.CODE_BLOCK:
             result += f'<pre style="margin-left:2em"><code>{tree.escaped_text().rstrip()}</code></pre>'
+        elif tree.node_kind() == NodeKinds.END_SUMMARY:
+            pass
         elif tree.node_kind() == NodeKinds.GUI_LABEL:
             result += f'<span style="font-family: Sans-Serif">{tree.escaped_text()}</span>'
         elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
@@ -300,16 +325,16 @@ class FFMLProcessor:
             result += '\n<ul>\n'
             for child in tree.children():
                 result += '<li>\n'
-                result += self.tree_to_html(child, depth+1)
+                result += self.tree_to_html(child, depth=depth+1)
                 result += '</li>\n'
             result += '</ul>\n'
         elif tree.node_kind() == NodeKinds.REF:
-            result += f'<code>{tree.escaped_text()}()</code>'
+            result += f'<a href="ffdoc:{tree.text()}">{tree.text()}</a></a>'
         elif tree.node_kind() == NodeKinds.URL:
             result += f'<a href="{tree.escaped_url()}">{tree.escaped_label()}</a>'
         elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM):
             for child in tree.children():
-                result += self.tree_to_html(child, depth+1)
+                result += self.tree_to_html(child, depth=depth+1)
         return result
 
     def document_to_html(self, document, name):
@@ -327,6 +352,29 @@ class FFMLProcessor:
         tree = self.parse_document(document, name)
         return self.tree_to_html(tree, 0)
 
+    def document_to_summary_html(self, document, name):
+        """
+        Given a document in the Formatter Function Markup Language (FFML), return
+        that document's summary in HTML format.
+
+        :param document: the text in FFML.
+        :param name: the name of the document, used during error
+                     processing. It is usually the name of the function.
+
+        :return: a string containing the HTML
+
+        """
+        document = document.strip()
+        sum_tag = document.find('[/]')
+        if sum_tag > 0:
+            document = document[0:sum_tag]
+        fname = document[0:document.find('(')].lstrip('`')
+        tree = self.parse_document(document, name)
+        result = self.tree_to_html(tree, depth=0)
+        paren = result.find('(')
+        result = f'<a href="ffdoc:{fname}">{fname}</a>{result[paren:]}'
+        return result
+
     def tree_to_rst(self, tree, indent, result=None):
         """
         Given a Formatter Function Markup Language (FFML) parse tree, return
@@ -356,6 +404,8 @@ class FFMLProcessor:
             result += '\n\n'
         elif tree.node_kind() == NodeKinds.BOLD_TEXT:
             indent_text(f'**{tree.text()}**')
+        elif tree.node_kind() == NodeKinds.CHARACTER:
+            result += tree.text()
         elif tree.node_kind() == NodeKinds.CODE_BLOCK:
             result += f"\n\n{'  ' * indent}::\n\n"
             for line in tree.text().strip().split('\n'):
@@ -363,6 +413,8 @@ class FFMLProcessor:
             result += '\n'
         elif tree.node_kind() == NodeKinds.CODE_TEXT:
             indent_text(f'``{tree.text()}``')
+        elif tree.node_kind() == NodeKinds.END_SUMMARY:
+            pass
         elif tree.node_kind() == NodeKinds.GUI_LABEL:
             indent_text(f':guilabel:`{tree.text()}`')
         elif tree.node_kind() == NodeKinds.ITALIC_TEXT:
@@ -371,7 +423,7 @@ class FFMLProcessor:
             result += '\n\n'
             for child in tree.children():
                 result += f"{'  ' * (indent)}* "
-                result = self.tree_to_rst(child, indent+1, result)
+                result = self.tree_to_rst(child, indent+1, result=result)
                 result += '\n'
             result += '\n'
         elif tree.node_kind() == NodeKinds.REF:
@@ -384,7 +436,7 @@ class FFMLProcessor:
             indent_text(f'`{tree.label()} <{tree.url()}>`_')
         elif tree.node_kind() in (NodeKinds.DOCUMENT, NodeKinds.LIST_ITEM):
             for child in tree.children():
-                result = self.tree_to_rst(child, indent, result)
+                result = self.tree_to_rst(child, indent, result=result)
         return result
 
     def document_to_rst(self, document, name, indent=0, prefix=None):
@@ -410,19 +462,51 @@ class FFMLProcessor:
             doc = prefix + doc.lstrip('  ' * indent)
         return doc
 
+    def document_to_summary_rst(self, document, name, indent=0, prefix=None):
+        """
+        Given a document in the Formatter Function Markup Language (FFML), return
+        that document's summary in RST (sphinx reStructuredText) format.
+
+        :param document: the text in FFML.
+        :param name:     the name of the document, used during error
+                         processing. It is usually the name of the function.
+        :param indent:   the indenting level of the items in the tree. This is
+                         usually zero, but can be greater than zero if you want
+                         the RST output indented.
+        :param prefix:   string. if supplied, this string replaces the indent
+                         on the first line of the output. This permits specifying
+                         an RST block, for example a bullet list
+
+        :return: a string containing the RST text
+
+        """
+        document = document.strip()
+        sum_tag = document.find('[/]')
+        if sum_tag > 0:
+            document = document[0:sum_tag]
+        fname = document[0:document.find('(')].lstrip('`')
+        doc = self.tree_to_rst(self.parse_document(document, name), indent)
+        lparen = doc.find('(')
+        doc = f':ref:`ff_{fname}`\\ ``{doc[lparen:]}'
+        if prefix is not None:
+            doc = prefix + doc.lstrip('  ' * indent)
+        return doc
+
 # ============== Internal methods =================
 
     keywords = {'``':           NodeKinds.CODE_TEXT, # must be before '`'
                 '`':            NodeKinds.ITALIC_TEXT,
                 '[B]':          NodeKinds.BOLD_TEXT,
                 '[CODE]':       NodeKinds.CODE_BLOCK,
+                '[/]':          NodeKinds.END_SUMMARY,
                 ':guilabel:':   NodeKinds.GUI_LABEL,
                 '[LIST]':       NodeKinds.LIST,
                 '[/LIST]':      NodeKinds.END_LIST,
                 ':ref:':        NodeKinds.REF,
                 '[URL':         NodeKinds.URL,
                 '[*]':          NodeKinds.LIST_ITEM,
-                '\n\n':         NodeKinds.BLANK_LINE
+                '\n\n':         NodeKinds.BLANK_LINE,
+                '\\':           NodeKinds.CHARACTER
             }
 
     def __init__(self):
@@ -437,8 +521,6 @@ class FFMLProcessor:
         p = self.input.find(for_what, self.input_pos)
         if p < 0:
             return -1
-        while p > 0 and self.input[p-1] == '\\':
-            p = self.input.find(for_what, p+1)
         return -1 if p < 0 else p - self.input_pos
 
     def move_pos(self, to_where):
@@ -486,6 +568,12 @@ class FFMLProcessor:
         self.move_pos(end + len('[/B]'))
         return node
 
+    def get_character(self):
+        self.move_pos(1)
+        node = CharacterNode(self.text_to(1))
+        self.move_pos(1)
+        return node
+
     def get_code_block(self):
         self.move_pos(len('[CODE]'))
         if self.text_to(1) == '\n':
@@ -583,10 +671,15 @@ class FFMLProcessor:
                 self.move_pos(2)
             elif p == NodeKinds.BOLD_TEXT:
                 parent.add_child(self.get_bold_text())
+            elif p == NodeKinds.CHARACTER:
+                parent.add_child(self.get_character())
             elif p == NodeKinds.CODE_TEXT:
                 parent.add_child(self.get_code_text())
             elif p == NodeKinds.CODE_BLOCK:
                 parent.add_child(self.get_code_block())
+            elif p == NodeKinds.END_SUMMARY:
+                parent.add_child(EndSummaryNode())
+                self.move_pos(3)
             elif p == NodeKinds.GUI_LABEL:
                 parent.add_child(self.get_gui_label())
             elif p == NodeKinds.ITALIC_TEXT: