diff --git a/src/calibre/gui2/markdown_syntax_highlighter.py b/src/calibre/gui2/markdown_syntax_highlighter.py index b458619646..adb280a7e4 100644 --- a/src/calibre/gui2/markdown_syntax_highlighter.py +++ b/src/calibre/gui2/markdown_syntax_highlighter.py @@ -14,38 +14,37 @@ from calibre.gui2.palette import dark_link_color, light_link_color class MarkdownHighlighter(QSyntaxHighlighter): MARKDOWN_KEYS_REGEX = { - 'Bold' : re.compile(r'(?P\*\*)(?P.+)(?P=delim)'), - 'uBold': re.compile('(?P__)(?P.+)(?P=delim)'), - 'Italic': re.compile(r'(?P\*)(?P([^*]{2,}|[^*]))(?P=delim)'), - 'uItalic': re.compile('(?P_)(?P([^_]{2,}|[^_]))(?P=delim)'), - 'BoldItalic': re.compile(r'(?P\*\*\*)(?P([^*]{2,}|[^*]))(?P=delim)'), - 'uBoldItalic': re.compile(r'(?P___)(?P([^_]{2,}|[^_]))(?P=delim)'), - 'Link': re.compile(r'(?u)(^|(?P
[^!]))\[.*?\]:?[ ''\t'r']*\(?[^)]+\)?'),
-        'Image': re.compile(r'(?u)!\[.*?\]\(.+?\)'),
-        'HeaderAtx': re.compile(r'(?u)^\#{1,6}(.*?)\#*(''\n|$)'),
-        'Header': re.compile('^(.+)[ \t]*\n(=+|-+)[ \t]*\n+'),
+        'Bold': re.compile(r'(?\*\*)(?P.+?)(?P=delim)'),
+        'Italic': re.compile(r'(?\*)(?!\*)(?P([^\*]{2,}?|[^\*]))(?\*\*\*)(?P([^\*]{2,}?|[^\*]))(?__)(?P.+?)(?P=delim)'),
+        'uItalic': re.compile(r'(?_)(?!_)(?P([^_]{2,}?|[^_]))(?___)(?P([^_]{2,}?|[^_]))(?+\s*'),
-        'BlockQuoteCount': re.compile('^[ \t]*>[ \t]?'),
-        'CodeSpan': re.compile('(?P`+).+?(?P=delim)'),
+        'BlockQuote': re.compile(r'(?u)^[ ]{0,3}>+[ \t]?'),
+        'CodeSpan': re.compile(r'(?`+).+?(?P=delim)'),
         'HeaderLine': re.compile(r'(?u)^(-|=)+\s*$'),
         'HR': re.compile(r'(?u)^(\s*(\*|-|_)\s*){3,}$'),
-        'Html': re.compile('<.+?>')
+        'Html': re.compile(r'<.+?(?')
     }
 
     key_theme_maps = {
         'Bold': "bold",
-        'uBold': "bold",
         'Italic': "emphasis",
-        'uItalic': "emphasis",
         'BoldItalic': "boldemphasis",
+        'uBold': "bold",
+        'uItalic': "emphasis",
         'uBoldItalic': "boldemphasis",
         'Link': "link",
         'Image': "image",
-        'HeaderAtx': "header",
+        'LinkRef': "link",
         'Header': "header",
         'HeaderLine': "header",
         'CodeBlock': "codeblock",
@@ -53,7 +52,6 @@ class MarkdownHighlighter(QSyntaxHighlighter):
         'UnorderedListStar': "unorderedlist",
         'OrderedList': "orderedlist",
         'BlockQuote': "blockquote",
-        'BlockQuoteCount': "blockquote",
         'CodeSpan': "codespan",
         'HR': "line",
         'Html': "html",
@@ -68,7 +66,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
         "header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
         "unorderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
         "orderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
-        "blockquote": {"color":"red", "font-weight":"normal", "font-style":"normal"},
+        "blockquote": {"color":"red", "font-weight":"bold", "font-style":"normal"},
         "codespan": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
         "codeblock": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
         "line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
@@ -84,7 +82,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
         "header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
         "unorderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
         "orderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
-        "blockquote": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
+        "blockquote": {"color":"yellow", "font-weight":"bold", "font-style":"normal"},
         "codespan": {"color":"#90ee90", "font-weight":"normal", "font-style":"normal"},
         "codeblock": {"color":"#ff9900", "font-weight":"normal", "font-style":"normal"},
         "line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
@@ -112,65 +110,60 @@ class MarkdownHighlighter(QSyntaxHighlighter):
         self.rehighlight()
 
     def highlightBlock(self, text):
-        self.highlightMarkdown(text,0)
+        self.offset = 0
+        self.highlightMarkdown(text)
         self.highlightHtml(text)
 
-    def highlightMarkdown(self, text, strt):
+    def highlightMarkdown(self, text):
         cursor = QTextCursor(self.document())
         bf = cursor.blockFormat()
 
-        #Block quotes can contain all elements so process it first
-        self.highlightBlockQuote(text, cursor, bf, strt)
+        #Block quotes can contain all elements so process it first, internaly process recusively and return
+        if self.highlightBlockQuote(text, cursor, bf):
+            return
 
         #If empty line no need to check for below elements just return
-        if self.highlightEmptyLine(text, cursor, bf, strt):
+        if self.highlightEmptyLine(text, cursor, bf):
             return
 
         #If horizontal line, look at pevious line to see if its a header, process and return
-        if self.highlightHorizontalLine(text, cursor, bf, strt):
+        if self.highlightHorizontalLine(text, cursor, bf):
             return
 
-        if self.highlightAtxHeader(text, cursor, bf, strt):
+        if self.highlightHeader(text, cursor, bf):
             return
 
-        self.highlightList(text, cursor, bf, strt)
+        self.highlightList(text, cursor, bf)
 
-        self.highlightEmphasis(text, cursor, bf, strt)
+        self.highlightBoldEmphasis(text, cursor, bf)
 
-        self.highlightBold(text, cursor, bf, strt)
+        self.highlightLink(text, cursor, bf)
 
-        self.highlightBoldEmphasis(text, cursor, bf, strt)
+        self.highlightImage(text, cursor, bf)
 
-        self.highlightLink(text, cursor, bf, strt)
+        self.highlightCodeSpan(text, cursor, bf)
 
-        self.highlightImage(text, cursor, bf, strt)
+        self.highlightCodeBlock(text, cursor, bf)
 
-        self.highlightCodeSpan(text, cursor, bf, strt)
-
-        self.highlightCodeBlock(text, cursor, bf, strt)
-
-    def highlightBlockQuote(self, text, cursor, bf, strt):
+    def highlightBlockQuote(self, text, cursor, bf):
         found = False
         mo = re.search(self.MARKDOWN_KEYS_REGEX['BlockQuote'],text)
         if mo:
-            self.setFormat(mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
-            unquote = re.sub(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],'',text)
-            spcs = re.match(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],text)
-            spcslen = 0
-            if spcs:
-                spcslen = len(spcs.group(0))
-            self.highlightMarkdown(unquote,spcslen)
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
+            self.offset += mo.end()
+            unquote = text[mo.end():]
+            self.highlightMarkdown(unquote)
             found = True
         return found
 
-    def highlightEmptyLine(self, text, cursor, bf, strt):
+    def highlightEmptyLine(self, text, cursor, bf):
         textAscii = str(text.replace('\u2029','\n'))
         if textAscii.strip():
             return False
         else:
             return True
 
-    def highlightHorizontalLine(self, text, cursor, bf, strt):
+    def highlightHorizontalLine(self, text, cursor, bf):
         found = False
 
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderLine'],text):
@@ -178,7 +171,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
             prevCursor = QTextCursor(prevBlock)
             prev = prevBlock.text()
             prevAscii = str(prev.replace('\u2029','\n'))
-            if prevAscii.strip():
+            if self.offset == 0 and prevAscii.strip():
                 #print "Its a header"
                 prevCursor.select(QTextCursor.SelectionType.LineUnderCursor)
                 #prevCursor.setCharFormat(self.MARKDOWN_KWS_FORMAT['Header'])
@@ -187,101 +180,118 @@ class MarkdownHighlighter(QSyntaxHighlighter):
                 formatRange.length = prevCursor.block().length()
                 formatRange.start = 0
                 prevCursor.block().layout().setFormats([formatRange])
-                self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
+                self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
                 return True
 
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HR'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
             found = True
         return found
 
-    def highlightAtxHeader(self, text, cursor, bf, strt):
+    def highlightHeader(self, text, cursor, bf):
         found = False
-        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderAtx'],text):
+        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Header'],text):
             #bf.setBackground(QBrush(QColor(7,54,65)))
             #cursor.movePosition(QTextCursor.End)
             #cursor.mergeBlockFormat(bf)
-            self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderAtx'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Header'])
             found = True
         return found
 
-    def highlightList(self, text, cursor, bf, strt):
+    def highlightList(self, text, cursor, bf):
         found = False
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['UnorderedList'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['UnorderedList'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['UnorderedList'])
             found = True
 
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['OrderedList'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['OrderedList'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['OrderedList'])
             found = True
         return found
 
-    def highlightLink(self, text, cursor, bf, strt):
+    def highlightLink(self, text, cursor, bf):
         found = False
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Link'],text):
-            start_bracket = mo.group()[0][0] == '['
-            self.setFormat(mo.start() + strt + (0 if start_bracket else 1),
-                           mo.end() - mo.start() - strt - (0 if start_bracket else 1), self.MARKDOWN_KWS_FORMAT['Link'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Link'])
+            found = True
+
+        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['LinkRef'],text):
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['LinkRef'])
             found = True
         return found
 
-    def highlightImage(self, text, cursor, bf, strt):
+    def highlightImage(self, text, cursor, bf):
         found = False
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Image'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Image'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Image'])
             found = True
         return found
 
-    def highlightCodeSpan(self, text, cursor, bf, strt):
+    def highlightCodeSpan(self, text, cursor, bf):
         found = False
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeSpan'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['CodeSpan'])
+            self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeSpan'])
             found = True
         return found
 
-    def highlightBold(self, text, cursor, bf, strt):
-        found = False
-        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Bold'])
-            found = True
+    def highlightBoldEmphasis(self, text, cursor, bf):
+        mo = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'], text)
+        if mo:
+            offset = mo.end()
+        else:
+            offset = 0
+        return self._highlightBoldEmphasis(text[offset:], cursor, bf, offset, False, False)
 
-        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBold'])
-            found = True
-        return found
-
-    def highlightEmphasis(self, text, cursor, bf, strt):
+    def _highlightBoldEmphasis(self, text, cursor, bf, offset, bold, emphasis):
+        #detect and apply imbricated Bold/Emphasis
         found = False
-        unlist = re.sub(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],'',text)
-        spcs = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],text)
-        spcslen = 0
-        if spcs:
-            spcslen = len(spcs.group(0))
-        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],unlist):
-            self.setFormat(mo.start()+strt+spcslen, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Italic'])
+
+        def apply(match, bold, emphasis):
+            if bold and emphasis:
+                self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['BoldItalic'])
+            elif bold:
+                self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Bold'])
+            elif emphasis:
+                self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Italic'])
+
+        def recusive(match, extra_offset, bold, emphasis):
+            apply(match, bold, emphasis)
+            if bold and emphasis:
+                return  # max deep => return, do not process extra Bold/Italic
+
+            sub_txt = text[match.start()+extra_offset : match.end()-extra_offset]
+            sub_offset = offset + extra_offset + mo.start()
+            self._highlightBoldEmphasis(sub_txt, cursor, bf, sub_offset, bold, emphasis)
+
+        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],text):
+            recusive(mo, 1, bold, True)
             found = True
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uItalic'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uItalic'])
+            recusive(mo, 1, bold, True)
+            found = True
+
+        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
+            recusive(mo, 2, True, emphasis)
+            found = True
+        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
+            recusive(mo, 2, True, emphasis)
             found = True
-        return found
 
-    def highlightBoldEmphasis(self, text, cursor, bf, strt):
-        found = False
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['BoldItalic'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['BoldItalic'])
+            apply(mo, True, True)
+            found = True
+        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
+            apply(mo, True, True)
             found = True
 
-        for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
-            self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBoldItalic'])
-            found = True
         return found
 
-    def highlightCodeBlock(self, text, cursor, bf, strt):
+    def highlightCodeBlock(self, text, cursor, bf):
         found = False
         for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeBlock'],text):
             stripped = text.lstrip()
-            if stripped[0] not in ('*','-','+','>'):
-                self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
+            if stripped[0] not in ('*','-','+','>') and not re.match(r'\d+\.', stripped):
+                self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
                 found = True
         return found