Merge branch 'improve-markdown-highlighter' of https://github.com/un-pogaz/calibre

This commit is contained in:
Kovid Goyal 2023-05-02 07:24:58 +05:30
commit af2656c3b5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -14,38 +14,37 @@ from calibre.gui2.palette import dark_link_color, light_link_color
class MarkdownHighlighter(QSyntaxHighlighter): class MarkdownHighlighter(QSyntaxHighlighter):
MARKDOWN_KEYS_REGEX = { MARKDOWN_KEYS_REGEX = {
'Bold' : re.compile(r'(?P<delim>\*\*)(?P<text>.+)(?P=delim)'), 'Bold': re.compile(r'(?<!\\)(?P<delim>\*\*)(?P<text>.+?)(?P=delim)'),
'uBold': re.compile('(?P<delim>__)(?P<text>.+)(?P=delim)'), 'Italic': re.compile(r'(?<![\*\\])(?P<delim>\*)(?!\*)(?P<text>([^\*]{2,}?|[^\*]))(?<![\*\\])(?P=delim)'),
'Italic': re.compile(r'(?P<delim>\*)(?P<text>([^*]{2,}|[^*]))(?P=delim)'), 'BoldItalic': re.compile(r'(?<!\\)(?P<delim>\*\*\*)(?P<text>([^\*]{2,}?|[^\*]))(?<!\\)(?P=delim)'),
'uItalic': re.compile('(?P<delim>_)(?P<text>([^_]{2,}|[^_]))(?P=delim)'), 'uBold': re.compile(r'(?<!\\)(?P<delim>__)(?P<text>.+?)(?P=delim)'),
'BoldItalic': re.compile(r'(?P<delim>\*\*\*)(?P<text>([^*]{2,}|[^*]))(?P=delim)'), 'uItalic': re.compile(r'(?<![_\\])(?P<delim>_)(?!_)(?P<text>([^_]{2,}?|[^_]))(?<![_\\])(?P=delim)'),
'uBoldItalic': re.compile(r'(?P<delim>___)(?P<text>([^_]{2,}|[^_]))(?P=delim)'), 'uBoldItalic': re.compile(r'(?<!\\)(?P<delim>___)(?P<text>([^_]{2,}?|[^_]))(?<!\\)(?P=delim)'),
'Link': re.compile(r'(?u)(^|(?P<pre>[^!]))\[.*?\]:?[ ''\t'r']*\(?[^)]+\)?'), 'Link': re.compile(r'(?u)(?<![!\\]])\[.*?(?<!\\)\](\[.+?(?<!\\)\]|\(.+?(?<!\\)\))'),
'Image': re.compile(r'(?u)!\[.*?\]\(.+?\)'), 'Image': re.compile(r'(?u)(?<!\\)!\[.*?(?<!\\)\](\[.+?(?<!\\)\]|\(.+?(?<!\\)\))'),
'HeaderAtx': re.compile(r'(?u)^\#{1,6}(.*?)\#*(''\n|$)'), 'LinkRef': re.compile(r'(?u)^ *\[.*?\]:[ \t]*.*$'),
'Header': re.compile('^(.+)[ \t]*\n(=+|-+)[ \t]*\n+'), 'Header': re.compile(r'(?u)^#{1,6}(.*?)$'),
'CodeBlock': re.compile('^([ ]{4,}|\t).*'), 'CodeBlock': re.compile('^([ ]{4,}|\t).*'),
'UnorderedList': re.compile(r'(?u)^\s*(\* |\+ |- )+\s*'), 'UnorderedList': re.compile(r'(?u)^\s*(\* |\+ |- )+\s*'),
'UnorderedListStar': re.compile(r'^\s*(\* )+\s*'), 'UnorderedListStar': re.compile(r'^\s*(\* )+\s*'),
'OrderedList': re.compile(r'(?u)^\s*(\d+\. )\s*'), 'OrderedList': re.compile(r'(?u)^\s*(\d+\. )\s*'),
'BlockQuote': re.compile(r'(?u)^\s*>+\s*'), 'BlockQuote': re.compile(r'(?u)^[ ]{0,3}>+[ \t]?'),
'BlockQuoteCount': re.compile('^[ \t]*>[ \t]?'), 'CodeSpan': re.compile(r'(?<!\\)(?P<delim>`+).+?(?P=delim)'),
'CodeSpan': re.compile('(?P<delim>`+).+?(?P=delim)'),
'HeaderLine': re.compile(r'(?u)^(-|=)+\s*$'), 'HeaderLine': re.compile(r'(?u)^(-|=)+\s*$'),
'HR': re.compile(r'(?u)^(\s*(\*|-|_)\s*){3,}$'), 'HR': re.compile(r'(?u)^(\s*(\*|-|_)\s*){3,}$'),
'Html': re.compile('<.+?>') 'Html': re.compile(r'<.+?(?<!\\)>')
} }
key_theme_maps = { key_theme_maps = {
'Bold': "bold", 'Bold': "bold",
'uBold': "bold",
'Italic': "emphasis", 'Italic': "emphasis",
'uItalic': "emphasis",
'BoldItalic': "boldemphasis", 'BoldItalic': "boldemphasis",
'uBold': "bold",
'uItalic': "emphasis",
'uBoldItalic': "boldemphasis", 'uBoldItalic': "boldemphasis",
'Link': "link", 'Link': "link",
'Image': "image", 'Image': "image",
'HeaderAtx': "header", 'LinkRef': "link",
'Header': "header", 'Header': "header",
'HeaderLine': "header", 'HeaderLine': "header",
'CodeBlock': "codeblock", 'CodeBlock': "codeblock",
@ -53,7 +52,6 @@ class MarkdownHighlighter(QSyntaxHighlighter):
'UnorderedListStar': "unorderedlist", 'UnorderedListStar': "unorderedlist",
'OrderedList': "orderedlist", 'OrderedList': "orderedlist",
'BlockQuote': "blockquote", 'BlockQuote': "blockquote",
'BlockQuoteCount': "blockquote",
'CodeSpan': "codespan", 'CodeSpan': "codespan",
'HR': "line", 'HR': "line",
'Html': "html", 'Html': "html",
@ -68,7 +66,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
"header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"}, "header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
"unorderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"}, "unorderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
"orderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"}, "orderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
"blockquote": {"color":"red", "font-weight":"normal", "font-style":"normal"}, "blockquote": {"color":"red", "font-weight":"bold", "font-style":"normal"},
"codespan": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"}, "codespan": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
"codeblock": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"}, "codeblock": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
"line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"}, "line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
@ -84,7 +82,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
"header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"}, "header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
"unorderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"}, "unorderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
"orderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"}, "orderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
"blockquote": {"color":"yellow", "font-weight":"normal", "font-style":"normal"}, "blockquote": {"color":"yellow", "font-weight":"bold", "font-style":"normal"},
"codespan": {"color":"#90ee90", "font-weight":"normal", "font-style":"normal"}, "codespan": {"color":"#90ee90", "font-weight":"normal", "font-style":"normal"},
"codeblock": {"color":"#ff9900", "font-weight":"normal", "font-style":"normal"}, "codeblock": {"color":"#ff9900", "font-weight":"normal", "font-style":"normal"},
"line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"}, "line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
@ -112,65 +110,60 @@ class MarkdownHighlighter(QSyntaxHighlighter):
self.rehighlight() self.rehighlight()
def highlightBlock(self, text): def highlightBlock(self, text):
self.highlightMarkdown(text,0) self.offset = 0
self.highlightMarkdown(text)
self.highlightHtml(text) self.highlightHtml(text)
def highlightMarkdown(self, text, strt): def highlightMarkdown(self, text):
cursor = QTextCursor(self.document()) cursor = QTextCursor(self.document())
bf = cursor.blockFormat() bf = cursor.blockFormat()
#Block quotes can contain all elements so process it first #Block quotes can contain all elements so process it first, internaly process recusively and return
self.highlightBlockQuote(text, cursor, bf, strt) if self.highlightBlockQuote(text, cursor, bf):
return
#If empty line no need to check for below elements just return #If empty line no need to check for below elements just return
if self.highlightEmptyLine(text, cursor, bf, strt): if self.highlightEmptyLine(text, cursor, bf):
return return
#If horizontal line, look at pevious line to see if its a header, process and return #If horizontal line, look at pevious line to see if its a header, process and return
if self.highlightHorizontalLine(text, cursor, bf, strt): if self.highlightHorizontalLine(text, cursor, bf):
return return
if self.highlightAtxHeader(text, cursor, bf, strt): if self.highlightHeader(text, cursor, bf):
return return
self.highlightList(text, cursor, bf, strt) self.highlightList(text, cursor, bf)
self.highlightEmphasis(text, cursor, bf, strt) self.highlightBoldEmphasis(text, cursor, bf)
self.highlightBold(text, cursor, bf, strt) self.highlightLink(text, cursor, bf)
self.highlightBoldEmphasis(text, cursor, bf, strt) self.highlightImage(text, cursor, bf)
self.highlightLink(text, cursor, bf, strt) self.highlightCodeSpan(text, cursor, bf)
self.highlightImage(text, cursor, bf, strt) self.highlightCodeBlock(text, cursor, bf)
self.highlightCodeSpan(text, cursor, bf, strt) def highlightBlockQuote(self, text, cursor, bf):
self.highlightCodeBlock(text, cursor, bf, strt)
def highlightBlockQuote(self, text, cursor, bf, strt):
found = False found = False
mo = re.search(self.MARKDOWN_KEYS_REGEX['BlockQuote'],text) mo = re.search(self.MARKDOWN_KEYS_REGEX['BlockQuote'],text)
if mo: if mo:
self.setFormat(mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
unquote = re.sub(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],'',text) self.offset += mo.end()
spcs = re.match(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],text) unquote = text[mo.end():]
spcslen = 0 self.highlightMarkdown(unquote)
if spcs:
spcslen = len(spcs.group(0))
self.highlightMarkdown(unquote,spcslen)
found = True found = True
return found return found
def highlightEmptyLine(self, text, cursor, bf, strt): def highlightEmptyLine(self, text, cursor, bf):
textAscii = str(text.replace('\u2029','\n')) textAscii = str(text.replace('\u2029','\n'))
if textAscii.strip(): if textAscii.strip():
return False return False
else: else:
return True return True
def highlightHorizontalLine(self, text, cursor, bf, strt): def highlightHorizontalLine(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderLine'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderLine'],text):
@ -178,7 +171,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
prevCursor = QTextCursor(prevBlock) prevCursor = QTextCursor(prevBlock)
prev = prevBlock.text() prev = prevBlock.text()
prevAscii = str(prev.replace('\u2029','\n')) prevAscii = str(prev.replace('\u2029','\n'))
if prevAscii.strip(): if self.offset == 0 and prevAscii.strip():
#print "Its a header" #print "Its a header"
prevCursor.select(QTextCursor.SelectionType.LineUnderCursor) prevCursor.select(QTextCursor.SelectionType.LineUnderCursor)
#prevCursor.setCharFormat(self.MARKDOWN_KWS_FORMAT['Header']) #prevCursor.setCharFormat(self.MARKDOWN_KWS_FORMAT['Header'])
@ -187,101 +180,118 @@ class MarkdownHighlighter(QSyntaxHighlighter):
formatRange.length = prevCursor.block().length() formatRange.length = prevCursor.block().length()
formatRange.start = 0 formatRange.start = 0
prevCursor.block().layout().setFormats([formatRange]) prevCursor.block().layout().setFormats([formatRange])
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
return True return True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HR'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HR'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
found = True found = True
return found return found
def highlightAtxHeader(self, text, cursor, bf, strt): def highlightHeader(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderAtx'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Header'],text):
#bf.setBackground(QBrush(QColor(7,54,65))) #bf.setBackground(QBrush(QColor(7,54,65)))
#cursor.movePosition(QTextCursor.End) #cursor.movePosition(QTextCursor.End)
#cursor.mergeBlockFormat(bf) #cursor.mergeBlockFormat(bf)
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderAtx']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Header'])
found = True found = True
return found return found
def highlightList(self, text, cursor, bf, strt): def highlightList(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['UnorderedList'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['UnorderedList'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['UnorderedList']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['UnorderedList'])
found = True found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['OrderedList'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['OrderedList'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['OrderedList']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['OrderedList'])
found = True found = True
return found return found
def highlightLink(self, text, cursor, bf, strt): def highlightLink(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Link'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Link'],text):
start_bracket = mo.group()[0][0] == '[' self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Link'])
self.setFormat(mo.start() + strt + (0 if start_bracket else 1), found = True
mo.end() - mo.start() - strt - (0 if start_bracket else 1), self.MARKDOWN_KWS_FORMAT['Link'])
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['LinkRef'],text):
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['LinkRef'])
found = True found = True
return found return found
def highlightImage(self, text, cursor, bf, strt): def highlightImage(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Image'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Image'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Image']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Image'])
found = True found = True
return found return found
def highlightCodeSpan(self, text, cursor, bf, strt): def highlightCodeSpan(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeSpan'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeSpan'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['CodeSpan']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeSpan'])
found = True found = True
return found return found
def highlightBold(self, text, cursor, bf, strt): def highlightBoldEmphasis(self, text, cursor, bf):
found = False mo = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'], text)
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text): if mo:
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Bold']) offset = mo.end()
found = True else:
offset = 0
return self._highlightBoldEmphasis(text[offset:], cursor, bf, offset, False, False)
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text): def _highlightBoldEmphasis(self, text, cursor, bf, offset, bold, emphasis):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBold']) #detect and apply imbricated Bold/Emphasis
found = True
return found
def highlightEmphasis(self, text, cursor, bf, strt):
found = False found = False
unlist = re.sub(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],'',text)
spcs = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],text) def apply(match, bold, emphasis):
spcslen = 0 if bold and emphasis:
if spcs: self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['BoldItalic'])
spcslen = len(spcs.group(0)) elif bold:
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],unlist): self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Bold'])
self.setFormat(mo.start()+strt+spcslen, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Italic']) elif emphasis:
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Italic'])
def recusive(match, extra_offset, bold, emphasis):
apply(match, bold, emphasis)
if bold and emphasis:
return # max deep => return, do not process extra Bold/Italic
sub_txt = text[match.start()+extra_offset : match.end()-extra_offset]
sub_offset = offset + extra_offset + mo.start()
self._highlightBoldEmphasis(sub_txt, cursor, bf, sub_offset, bold, emphasis)
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],text):
recusive(mo, 1, bold, True)
found = True found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uItalic'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uItalic'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uItalic']) recusive(mo, 1, bold, True)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
recusive(mo, 2, True, emphasis)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
recusive(mo, 2, True, emphasis)
found = True found = True
return found
def highlightBoldEmphasis(self, text, cursor, bf, strt):
found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['BoldItalic'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['BoldItalic'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['BoldItalic']) apply(mo, True, True)
found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
apply(mo, True, True)
found = True found = True
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBoldItalic'])
found = True
return found return found
def highlightCodeBlock(self, text, cursor, bf, strt): def highlightCodeBlock(self, text, cursor, bf):
found = False found = False
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeBlock'],text): for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeBlock'],text):
stripped = text.lstrip() stripped = text.lstrip()
if stripped[0] not in ('*','-','+','>'): if stripped[0] not in ('*','-','+','>') and not re.match(r'\d+\.', stripped):
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock']) self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
found = True found = True
return found return found