mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Merge branch 'improve-markdown-highlighter' of https://github.com/un-pogaz/calibre
This commit is contained in:
commit
af2656c3b5
@ -14,38 +14,37 @@ from calibre.gui2.palette import dark_link_color, light_link_color
|
||||
class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
|
||||
MARKDOWN_KEYS_REGEX = {
|
||||
'Bold' : re.compile(r'(?P<delim>\*\*)(?P<text>.+)(?P=delim)'),
|
||||
'uBold': re.compile('(?P<delim>__)(?P<text>.+)(?P=delim)'),
|
||||
'Italic': re.compile(r'(?P<delim>\*)(?P<text>([^*]{2,}|[^*]))(?P=delim)'),
|
||||
'uItalic': re.compile('(?P<delim>_)(?P<text>([^_]{2,}|[^_]))(?P=delim)'),
|
||||
'BoldItalic': re.compile(r'(?P<delim>\*\*\*)(?P<text>([^*]{2,}|[^*]))(?P=delim)'),
|
||||
'uBoldItalic': re.compile(r'(?P<delim>___)(?P<text>([^_]{2,}|[^_]))(?P=delim)'),
|
||||
'Link': re.compile(r'(?u)(^|(?P<pre>[^!]))\[.*?\]:?[ ''\t'r']*\(?[^)]+\)?'),
|
||||
'Image': re.compile(r'(?u)!\[.*?\]\(.+?\)'),
|
||||
'HeaderAtx': re.compile(r'(?u)^\#{1,6}(.*?)\#*(''\n|$)'),
|
||||
'Header': re.compile('^(.+)[ \t]*\n(=+|-+)[ \t]*\n+'),
|
||||
'Bold': re.compile(r'(?<!\\)(?P<delim>\*\*)(?P<text>.+?)(?P=delim)'),
|
||||
'Italic': re.compile(r'(?<![\*\\])(?P<delim>\*)(?!\*)(?P<text>([^\*]{2,}?|[^\*]))(?<![\*\\])(?P=delim)'),
|
||||
'BoldItalic': re.compile(r'(?<!\\)(?P<delim>\*\*\*)(?P<text>([^\*]{2,}?|[^\*]))(?<!\\)(?P=delim)'),
|
||||
'uBold': re.compile(r'(?<!\\)(?P<delim>__)(?P<text>.+?)(?P=delim)'),
|
||||
'uItalic': re.compile(r'(?<![_\\])(?P<delim>_)(?!_)(?P<text>([^_]{2,}?|[^_]))(?<![_\\])(?P=delim)'),
|
||||
'uBoldItalic': re.compile(r'(?<!\\)(?P<delim>___)(?P<text>([^_]{2,}?|[^_]))(?<!\\)(?P=delim)'),
|
||||
'Link': re.compile(r'(?u)(?<![!\\]])\[.*?(?<!\\)\](\[.+?(?<!\\)\]|\(.+?(?<!\\)\))'),
|
||||
'Image': re.compile(r'(?u)(?<!\\)!\[.*?(?<!\\)\](\[.+?(?<!\\)\]|\(.+?(?<!\\)\))'),
|
||||
'LinkRef': re.compile(r'(?u)^ *\[.*?\]:[ \t]*.*$'),
|
||||
'Header': re.compile(r'(?u)^#{1,6}(.*?)$'),
|
||||
'CodeBlock': re.compile('^([ ]{4,}|\t).*'),
|
||||
'UnorderedList': re.compile(r'(?u)^\s*(\* |\+ |- )+\s*'),
|
||||
'UnorderedListStar': re.compile(r'^\s*(\* )+\s*'),
|
||||
'OrderedList': re.compile(r'(?u)^\s*(\d+\. )\s*'),
|
||||
'BlockQuote': re.compile(r'(?u)^\s*>+\s*'),
|
||||
'BlockQuoteCount': re.compile('^[ \t]*>[ \t]?'),
|
||||
'CodeSpan': re.compile('(?P<delim>`+).+?(?P=delim)'),
|
||||
'BlockQuote': re.compile(r'(?u)^[ ]{0,3}>+[ \t]?'),
|
||||
'CodeSpan': re.compile(r'(?<!\\)(?P<delim>`+).+?(?P=delim)'),
|
||||
'HeaderLine': re.compile(r'(?u)^(-|=)+\s*$'),
|
||||
'HR': re.compile(r'(?u)^(\s*(\*|-|_)\s*){3,}$'),
|
||||
'Html': re.compile('<.+?>')
|
||||
'Html': re.compile(r'<.+?(?<!\\)>')
|
||||
}
|
||||
|
||||
key_theme_maps = {
|
||||
'Bold': "bold",
|
||||
'uBold': "bold",
|
||||
'Italic': "emphasis",
|
||||
'uItalic': "emphasis",
|
||||
'BoldItalic': "boldemphasis",
|
||||
'uBold': "bold",
|
||||
'uItalic': "emphasis",
|
||||
'uBoldItalic': "boldemphasis",
|
||||
'Link': "link",
|
||||
'Image': "image",
|
||||
'HeaderAtx': "header",
|
||||
'LinkRef': "link",
|
||||
'Header': "header",
|
||||
'HeaderLine': "header",
|
||||
'CodeBlock': "codeblock",
|
||||
@ -53,7 +52,6 @@ class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
'UnorderedListStar': "unorderedlist",
|
||||
'OrderedList': "orderedlist",
|
||||
'BlockQuote': "blockquote",
|
||||
'BlockQuoteCount': "blockquote",
|
||||
'CodeSpan': "codespan",
|
||||
'HR': "line",
|
||||
'Html': "html",
|
||||
@ -68,7 +66,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
"header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
|
||||
"unorderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
|
||||
"orderedlist": {"color":"red", "font-weight":"normal", "font-style":"normal"},
|
||||
"blockquote": {"color":"red", "font-weight":"normal", "font-style":"normal"},
|
||||
"blockquote": {"color":"red", "font-weight":"bold", "font-style":"normal"},
|
||||
"codespan": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
|
||||
"codeblock": {"color":"#ff5800", "font-weight":"normal", "font-style":"normal"},
|
||||
"line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
|
||||
@ -84,7 +82,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
"header": {"color":"#2aa198", "font-weight":"bold", "font-style":"normal"},
|
||||
"unorderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
|
||||
"orderedlist": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
|
||||
"blockquote": {"color":"yellow", "font-weight":"normal", "font-style":"normal"},
|
||||
"blockquote": {"color":"yellow", "font-weight":"bold", "font-style":"normal"},
|
||||
"codespan": {"color":"#90ee90", "font-weight":"normal", "font-style":"normal"},
|
||||
"codeblock": {"color":"#ff9900", "font-weight":"normal", "font-style":"normal"},
|
||||
"line": {"color":"#2aa198", "font-weight":"normal", "font-style":"normal"},
|
||||
@ -112,65 +110,60 @@ class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
self.rehighlight()
|
||||
|
||||
def highlightBlock(self, text):
|
||||
self.highlightMarkdown(text,0)
|
||||
self.offset = 0
|
||||
self.highlightMarkdown(text)
|
||||
self.highlightHtml(text)
|
||||
|
||||
def highlightMarkdown(self, text, strt):
|
||||
def highlightMarkdown(self, text):
|
||||
cursor = QTextCursor(self.document())
|
||||
bf = cursor.blockFormat()
|
||||
|
||||
#Block quotes can contain all elements so process it first
|
||||
self.highlightBlockQuote(text, cursor, bf, strt)
|
||||
#Block quotes can contain all elements so process it first, internaly process recusively and return
|
||||
if self.highlightBlockQuote(text, cursor, bf):
|
||||
return
|
||||
|
||||
#If empty line no need to check for below elements just return
|
||||
if self.highlightEmptyLine(text, cursor, bf, strt):
|
||||
if self.highlightEmptyLine(text, cursor, bf):
|
||||
return
|
||||
|
||||
#If horizontal line, look at pevious line to see if its a header, process and return
|
||||
if self.highlightHorizontalLine(text, cursor, bf, strt):
|
||||
if self.highlightHorizontalLine(text, cursor, bf):
|
||||
return
|
||||
|
||||
if self.highlightAtxHeader(text, cursor, bf, strt):
|
||||
if self.highlightHeader(text, cursor, bf):
|
||||
return
|
||||
|
||||
self.highlightList(text, cursor, bf, strt)
|
||||
self.highlightList(text, cursor, bf)
|
||||
|
||||
self.highlightEmphasis(text, cursor, bf, strt)
|
||||
self.highlightBoldEmphasis(text, cursor, bf)
|
||||
|
||||
self.highlightBold(text, cursor, bf, strt)
|
||||
self.highlightLink(text, cursor, bf)
|
||||
|
||||
self.highlightBoldEmphasis(text, cursor, bf, strt)
|
||||
self.highlightImage(text, cursor, bf)
|
||||
|
||||
self.highlightLink(text, cursor, bf, strt)
|
||||
self.highlightCodeSpan(text, cursor, bf)
|
||||
|
||||
self.highlightImage(text, cursor, bf, strt)
|
||||
self.highlightCodeBlock(text, cursor, bf)
|
||||
|
||||
self.highlightCodeSpan(text, cursor, bf, strt)
|
||||
|
||||
self.highlightCodeBlock(text, cursor, bf, strt)
|
||||
|
||||
def highlightBlockQuote(self, text, cursor, bf, strt):
|
||||
def highlightBlockQuote(self, text, cursor, bf):
|
||||
found = False
|
||||
mo = re.search(self.MARKDOWN_KEYS_REGEX['BlockQuote'],text)
|
||||
if mo:
|
||||
self.setFormat(mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
|
||||
unquote = re.sub(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],'',text)
|
||||
spcs = re.match(self.MARKDOWN_KEYS_REGEX['BlockQuoteCount'],text)
|
||||
spcslen = 0
|
||||
if spcs:
|
||||
spcslen = len(spcs.group(0))
|
||||
self.highlightMarkdown(unquote,spcslen)
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['BlockQuote'])
|
||||
self.offset += mo.end()
|
||||
unquote = text[mo.end():]
|
||||
self.highlightMarkdown(unquote)
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightEmptyLine(self, text, cursor, bf, strt):
|
||||
def highlightEmptyLine(self, text, cursor, bf):
|
||||
textAscii = str(text.replace('\u2029','\n'))
|
||||
if textAscii.strip():
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def highlightHorizontalLine(self, text, cursor, bf, strt):
|
||||
def highlightHorizontalLine(self, text, cursor, bf):
|
||||
found = False
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderLine'],text):
|
||||
@ -178,7 +171,7 @@ class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
prevCursor = QTextCursor(prevBlock)
|
||||
prev = prevBlock.text()
|
||||
prevAscii = str(prev.replace('\u2029','\n'))
|
||||
if prevAscii.strip():
|
||||
if self.offset == 0 and prevAscii.strip():
|
||||
#print "Its a header"
|
||||
prevCursor.select(QTextCursor.SelectionType.LineUnderCursor)
|
||||
#prevCursor.setCharFormat(self.MARKDOWN_KWS_FORMAT['Header'])
|
||||
@ -187,101 +180,118 @@ class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
formatRange.length = prevCursor.block().length()
|
||||
formatRange.start = 0
|
||||
prevCursor.block().layout().setFormats([formatRange])
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderLine'])
|
||||
return True
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HR'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HR'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightAtxHeader(self, text, cursor, bf, strt):
|
||||
def highlightHeader(self, text, cursor, bf):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['HeaderAtx'],text):
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Header'],text):
|
||||
#bf.setBackground(QBrush(QColor(7,54,65)))
|
||||
#cursor.movePosition(QTextCursor.End)
|
||||
#cursor.mergeBlockFormat(bf)
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['HeaderAtx'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Header'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightList(self, text, cursor, bf, strt):
|
||||
def highlightList(self, text, cursor, bf):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['UnorderedList'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['UnorderedList'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['UnorderedList'])
|
||||
found = True
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['OrderedList'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['OrderedList'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['OrderedList'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightLink(self, text, cursor, bf, strt):
|
||||
def highlightLink(self, text, cursor, bf):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Link'],text):
|
||||
start_bracket = mo.group()[0][0] == '['
|
||||
self.setFormat(mo.start() + strt + (0 if start_bracket else 1),
|
||||
mo.end() - mo.start() - strt - (0 if start_bracket else 1), self.MARKDOWN_KWS_FORMAT['Link'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Link'])
|
||||
found = True
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['LinkRef'],text):
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['LinkRef'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightImage(self, text, cursor, bf, strt):
|
||||
def highlightImage(self, text, cursor, bf):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Image'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Image'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['Image'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightCodeSpan(self, text, cursor, bf, strt):
|
||||
def highlightCodeSpan(self, text, cursor, bf):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeSpan'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['CodeSpan'])
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeSpan'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightBold(self, text, cursor, bf, strt):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Bold'])
|
||||
found = True
|
||||
def highlightBoldEmphasis(self, text, cursor, bf):
|
||||
mo = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'], text)
|
||||
if mo:
|
||||
offset = mo.end()
|
||||
else:
|
||||
offset = 0
|
||||
return self._highlightBoldEmphasis(text[offset:], cursor, bf, offset, False, False)
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBold'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightEmphasis(self, text, cursor, bf, strt):
|
||||
def _highlightBoldEmphasis(self, text, cursor, bf, offset, bold, emphasis):
|
||||
#detect and apply imbricated Bold/Emphasis
|
||||
found = False
|
||||
unlist = re.sub(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],'',text)
|
||||
spcs = re.match(self.MARKDOWN_KEYS_REGEX['UnorderedListStar'],text)
|
||||
spcslen = 0
|
||||
if spcs:
|
||||
spcslen = len(spcs.group(0))
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],unlist):
|
||||
self.setFormat(mo.start()+strt+spcslen, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['Italic'])
|
||||
|
||||
def apply(match, bold, emphasis):
|
||||
if bold and emphasis:
|
||||
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['BoldItalic'])
|
||||
elif bold:
|
||||
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Bold'])
|
||||
elif emphasis:
|
||||
self.setFormat(self.offset+offset+ match.start(), match.end() - match.start(), self.MARKDOWN_KWS_FORMAT['Italic'])
|
||||
|
||||
def recusive(match, extra_offset, bold, emphasis):
|
||||
apply(match, bold, emphasis)
|
||||
if bold and emphasis:
|
||||
return # max deep => return, do not process extra Bold/Italic
|
||||
|
||||
sub_txt = text[match.start()+extra_offset : match.end()-extra_offset]
|
||||
sub_offset = offset + extra_offset + mo.start()
|
||||
self._highlightBoldEmphasis(sub_txt, cursor, bf, sub_offset, bold, emphasis)
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Italic'],text):
|
||||
recusive(mo, 1, bold, True)
|
||||
found = True
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uItalic'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uItalic'])
|
||||
recusive(mo, 1, bold, True)
|
||||
found = True
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['Bold'],text):
|
||||
recusive(mo, 2, True, emphasis)
|
||||
found = True
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBold'],text):
|
||||
recusive(mo, 2, True, emphasis)
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightBoldEmphasis(self, text, cursor, bf, strt):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['BoldItalic'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['BoldItalic'])
|
||||
apply(mo, True, True)
|
||||
found = True
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
|
||||
apply(mo, True, True)
|
||||
found = True
|
||||
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['uBoldItalic'],text):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start()-strt, self.MARKDOWN_KWS_FORMAT['uBoldItalic'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
def highlightCodeBlock(self, text, cursor, bf, strt):
|
||||
def highlightCodeBlock(self, text, cursor, bf):
|
||||
found = False
|
||||
for mo in re.finditer(self.MARKDOWN_KEYS_REGEX['CodeBlock'],text):
|
||||
stripped = text.lstrip()
|
||||
if stripped[0] not in ('*','-','+','>'):
|
||||
self.setFormat(mo.start()+strt, mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
|
||||
if stripped[0] not in ('*','-','+','>') and not re.match(r'\d+\.', stripped):
|
||||
self.setFormat(self.offset+ mo.start(), mo.end() - mo.start(), self.MARKDOWN_KWS_FORMAT['CodeBlock'])
|
||||
found = True
|
||||
return found
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user