Merge branch 'improve-markdown-highlighter' of https://github.com/un-pogaz/calibre

This commit is contained in:
Kovid Goyal 2023-05-02 07:24:58 +05:30
commit af2656c3b5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -14,38 +14,37 @@ from calibre.gui2.palette import dark_link_color, light_link_color
class MarkdownHighlighter(QSyntaxHighlighter):
MARKDOWN_KEYS_REGEX = {
'Bold' : re.compile(r'(?P<delim>\*\*)(?P<text>.+)(?P=delim)'),
'uBold': re.compile('(?P<delim>__)(?P<text>.+)(?P=delim)'),
'Italic': re.compile(r'(?P<delim>\*)(?P<text>([^*]{2,}|[^*]))(?P=delim)'),
'uItalic': re.compile('(?P<delim>_)(?P<text>([^_]{2,}|[^_]))(?P=delim)'),
'BoldItalic': re.compile(r'(?P<delim>\*\*\*)(?P<text>([^*]{2,}|[^*]))(?P=delim)'),
'uBoldItalic': re.compile(r'(?P<delim>___)(?P<text>([^_]{2,}|[^_]))(?P=delim)'),
'Link': re.compile(r'(?u)(^|(?P<pre>[^!]))\[.*?\]:?[ ''\t'r']*\(?[^)]+\)?'),
'Image': re.compile(r'(?u)!\[.*?\]\(.+?\)'),
'HeaderAtx': re.compile(r'(?u)^\#{1,6}(.*?)\#*(''\n|$)'),
'Header': re.compile('^(.+)[ \t]*\n(=+|-+)[ \t]*\n+'),
'Bold': re.compile(r'(?<!\\)(?P<delim>\*\*)(?P<text>.+?)(?P=delim)'),
'Italic': re.compile(r'(?<![\*\\])(?P<delim>\*)(?!\*)(?P<text>([^\*]{2,}?|[^\*]))(?<![\*\\])(?P=delim)'),
'BoldItalic': re.compile(r'(?<!\\)(?P<delim>\*\*\*)(?P<text>([^\*]{2,}?|[^\*]))(?<!\\)(?P=delim)'),
'uBold': re.compile(r'(?<!\\)(?P<delim>__)(?P<text>.+?)(?P=delim)'),
'uItalic': re.compile(r'(?<![_\\])(?P<delim>_)(?!_)(?P<text>([^_]{2,}?|[^_]))(?<![_\\])(?P=delim)'),
'uBoldItalic': re.compile(r'(?<!\\)(?P<delim>___)(?P<text>([^_]{2,}?|[^_]))(?<!\\)(?P=delim)'),
'Link': re.compile(r'(?u)(?<![!\\]])\[.*?(?<!\\)\](\[.+?(?<!\\)\]|\(.+?(?<!\\)\))'),
'Image': re.compile(r'(?u)(?<!\\)!\[.*?(?<!\\)\](\[.+?(?<!\\)\]|\(.+?(?<!\\)\))'),
'LinkRef': re.compile(r'(?u)^ *\[.*?\]:[ \t]*.*$'),
'Header': re.compile(r'(?u)^#{1,6}(.*?)$'),
'CodeBlock': re.compile('^([ ]{4,}|\t).*'),
'UnorderedList': re.compile(r'(?u)^\s*(\* |\+ |- )+\s*'),
'UnorderedListStar': re.compile(r'^\s*(\* )+\s*'),
'OrderedList': re.compile(r'(?u)^\s*(\d+\. )\s*'),
'BlockQuote': re.compile(r'(?u)^\s*>+\s*'),
'BlockQuoteCount': re.compile('^[ \t]*>[ \t]?'),
'CodeSpan': re.compile('(?P<delim>`+).+?(?P=delim)'),
'BlockQuote': re.compile(r'(?u)^[ ]{0,3}>+[ \t]?'),
'CodeSpan': re.compile(r'(?<!\\)(?P<delim>`+).+?(?P=delim)'),
'HeaderLine': re.compile(r'(?u)^(-|=)+\s*$'),
'HR': re.compile(r'(?u)^(\s*(\*|-|_)\s*){3,}$'),
'Html': re.compile('<.+?>')
'Html': re.compile(r'<.+?(?<!\\)>')
}
key_theme_maps = {
'Bold': "bold",
'uBold': "bold",
'Italic': "emphasis",
'uItalic': "emphasis",
'BoldItalic': "boldemphasis",
'uBold': "bold",
'uItalic': "emphasis",
'uBoldItalic': "boldemphasis",
'Link': "link",
'Image': "image",
'HeaderAtx': "header",
'LinkRef': "link",
'Header': "header",
'HeaderLine': "header",
'CodeBlock': "codeblock",
@ -53,7 +52,6 @@ class MarkdownHighlighter(QSyntaxHighlighter):
'UnorderedListStar': "unorderedlist",
'OrderedList': "orderedlist",
'BlockQuote': "blockquote",
'BlockQuoteCount': "blockquote",
'CodeSpan': "codespan",
'HR': "line",
'Html': "html",
@ -68,7 +66,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
"header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
"unorderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
"orderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
"blockquote": {"color":"red", "font-weight":"normal", "font-style":"normal"},
"blockquote": {"color":"red", "font-weight":"bold", "font-style":"normal"},
"codespan": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
"codeblock": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
"line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
@ -84,7 +82,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
"header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
"unorderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
"orderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
"blockquote": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
"blockquote": {"color":"yellow", "font-weight":"bold", "font-style":"normal"},
"codespan": {"color":"#90ee90", "font-weight":"normal", "font-style":"normal"},
"codeblock": {"color":"#ff9900", "font-weight":"normal", "font-style":"normal"},
"line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
@ -112,65 +110,60 @@ class MarkdownHighlighter(QSyntaxHighlighter):
self.rehighlight()
def highlightBlock(self, text):
self.highlightMarkdown(text,0)
self.offset = 0
self.highlightMarkdown(text)
self.highlightHtml(text)
def highlightMarkdown(self, text, strt):
def highlightMarkdown(self, text):
cursor = QTextCursor(self.document())
bf = cursor.blockFormat()
#Block quotes can contain all elements so process it first
self.highlightBlockQuote(text, cursor, bf, strt)
#Block quotes can contain all elements so process it first, internaly process recusively and return
if self.highlightBlockQuote(text, cursor, bf):
return
#If empty line no need to check for below elements just return
if self.highlightEmptyLine(text, cursor, bf, strt):
if self.highlightEmptyLine(text, cursor, bf):
return
#If horizontal line, look at pevious line to see if its a header, process and return
if self.highlightHorizontalLine(text, cursor, bf, strt):
if self.highlightHorizontalLine(text, cursor, bf):
return
if self.highlightAtxHeader(text, cursor, bf, strt):
if self.highlightHeader(text, cursor, bf):
return
self.highlightList(text, cursor, bf, strt)
self.highlightList(text, cursor, bf)
self.highlightEmphasis(text, cursor, bf, strt)
self.highlightBoldEmphasis(text, cursor, bf)
self.highlightBold(text, cursor, bf, strt)
self.highlightLink(text, cursor, bf)
self.highlightBoldEmphasis(text, cursor, bf, strt)
self.highlightImage(text, cursor, bf)
self.highlightLink(text, cursor, bf, strt)
self.highlightCodeSpan(text, cursor, bf)
self.highlightImage(text, cursor, bf, strt)
self.highlightCodeBlock(text, cursor, bf)
self.highlightCodeSpan(text, cursor, bf, strt)
self.highlightCodeBlock(text, cursor, bf, strt)
def highlightBlockQuote(self, text, cursor, bf, strt):
def highlightBlockQuote(self, text, cursor, bf):
found = False
mo = re.search(self.MARKDOWN_KEYS_REGEX['BlockQuote'],text)
if mo:
self.setFormat(mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
unquote = re.sub(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],'',text)
spcs = re.match(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],text)
spcslen = 0
if spcs:
spcslen = len(spcs.group(0))
self.highlightMarkdown(unquote,spcslen)
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
self.offset += mo.end()
unquote = text[mo.end():]
self.highlightMarkdown(unquote)
found = True
return found
def highlightEmptyLine(self, text, cursor, bf, strt):
def highlightEmptyLine(self, text, cursor, bf):
textAscii = str(text.replace('\u2029','\n'))
if textAscii.strip():
return False
else:
return True
def highlightHorizontalLine(self, text, cursor, bf, strt):
def highlightHorizontalLine(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderLine'],text):
@ -178,7 +171,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
prevCursor = QTextCursor(prevBlock)
prev = prevBlock.text()
prevAscii = str(prev.replace('\u2029','\n'))
if prevAscii.strip():
if self.offset == 0 and prevAscii.strip():
#print "Its a header"
prevCursor.select(QTextCursor.SelectionType.LineUnderCursor)
#prevCursor.setCharFormat(self.MARKDOWN_KWS_FORMAT['Header'])
@ -187,101 +180,118 @@ class MarkdownHighlighter(QSyntaxHighlighter):
formatRange.length = prevCursor.block().length()
formatRange.start = 0
prevCursor.block().layout().setFormats([formatRange])
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
return True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HR'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
found = True
return found
def highlightAtxHeader(self, text, cursor, bf, strt):
def highlightHeader(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderAtx'],text):
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Header'],text):
#bf.setBackground(QBrush(QColor(7,54,65)))
#cursor.movePosition(QTextCursor.End)
#cursor.mergeBlockFormat(bf)
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderAtx'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Header'])
found = True
return found
def highlightList(self, text, cursor, bf, strt):
def highlightList(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['UnorderedList'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['UnorderedList'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['UnorderedList'])
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['OrderedList'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['OrderedList'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['OrderedList'])
found = True
return found
def highlightLink(self, text, cursor, bf, strt):
def highlightLink(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Link'],text):
start_bracket = mo.group()[0][0] == '['
self.setFormat(mo.start() + strt + (0 if start_bracket else 1),
mo.end() - mo.start() - strt - (0 if start_bracket else 1), self.MARKDOWN_KWS_FORMAT['Link'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Link'])
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['LinkRef'],text):
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['LinkRef'])
found = True
return found
def highlightImage(self, text, cursor, bf, strt):
def highlightImage(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Image'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Image'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Image'])
found = True
return found
def highlightCodeSpan(self, text, cursor, bf, strt):
def highlightCodeSpan(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeSpan'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['CodeSpan'])
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeSpan'])
found = True
return found
def highlightBold(self, text, cursor, bf, strt):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Bold'])
found = True
def highlightBoldEmphasis(self, text, cursor, bf):
mo = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'], text)
if mo:
offset = mo.end()
else:
offset = 0
return self._highlightBoldEmphasis(text[offset:], cursor, bf, offset, False, False)
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBold'])
found = True
return found
def highlightEmphasis(self, text, cursor, bf, strt):
def _highlightBoldEmphasis(self, text, cursor, bf, offset, bold, emphasis):
#detect and apply imbricated Bold/Emphasis
found = False
unlist = re.sub(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],'',text)
spcs = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],text)
spcslen = 0
if spcs:
spcslen = len(spcs.group(0))
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],unlist):
self.setFormat(mo.start()+strt+spcslen, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Italic'])
def apply(match, bold, emphasis):
if bold and emphasis:
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['BoldItalic'])
elif bold:
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Bold'])
elif emphasis:
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Italic'])
def recusive(match, extra_offset, bold, emphasis):
apply(match, bold, emphasis)
if bold and emphasis:
return # max deep => return, do not process extra Bold/Italic
sub_txt = text[match.start()+extra_offset : match.end()-extra_offset]
sub_offset = offset + extra_offset + mo.start()
self._highlightBoldEmphasis(sub_txt, cursor, bf, sub_offset, bold, emphasis)
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],text):
recusive(mo, 1, bold, True)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uItalic'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uItalic'])
recusive(mo, 1, bold, True)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
recusive(mo, 2, True, emphasis)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
recusive(mo, 2, True, emphasis)
found = True
return found
def highlightBoldEmphasis(self, text, cursor, bf, strt):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['BoldItalic'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['BoldItalic'])
apply(mo, True, True)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
apply(mo, True, True)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBoldItalic'])
found = True
return found
def highlightCodeBlock(self, text, cursor, bf, strt):
def highlightCodeBlock(self, text, cursor, bf):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeBlock'],text):
stripped = text.lstrip()
if stripped[0] not in ('*','-','+','>'):
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
if stripped[0] not in ('*','-','+','>') and not re.match(r'\d+\.', stripped):
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
found = True
return found