834 lines
35 KiB
Python

#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, re
from operator import itemgetter
from itertools import chain
from css_parser import parseStyle
from PyQt5.Qt import QTextEdit, Qt, QTextCursor
from calibre import prepare_string_for_xml, xml_entity_to_unicode
from calibre.ebooks.oeb.polish.container import OEB_DOCS
from calibre.gui2 import error_dialog
from calibre.gui2.tweak_book.editor.syntax.html import ATTR_NAME, ATTR_END, ATTR_START, ATTR_VALUE
from calibre.gui2.tweak_book import tprefs, current_container
from calibre.gui2.tweak_book.editor.smarts import NullSmarts
from calibre.gui2.tweak_book.editor.smarts.utils import (
no_modifiers, get_leading_whitespace_on_block, get_text_before_cursor,
get_text_after_cursor, smart_home, smart_backspace, smart_tab, expand_tabs)
from calibre.utils.icu import utf16_length
get_offset = itemgetter(0)
PARAGRAPH_SEPARATOR = '\u2029'
DEFAULT_LINK_TEMPLATE = '<a href="_TARGET_">_TEXT_</a>'
class Tag(object):
def __init__(self, start_block, tag_start, end_block, tag_end, self_closing=False):
self.start_block, self.end_block = start_block, end_block
self.start_offset, self.end_offset = tag_start.offset, tag_end.offset
tag = tag_start.name
if tag_start.prefix:
tag = tag_start.prefix + ':' + tag
self.name = tag
self.self_closing = self_closing
def __repr__(self):
return '<%s start_block=%s start_offset=%s end_block=%s end_offset=%s self_closing=%s>' % (
self.name, self.start_block.blockNumber(), self.start_offset, self.end_block.blockNumber(), self.end_offset, self.self_closing)
__str__ = __repr__
def next_tag_boundary(block, offset, forward=True, max_lines=10000):
while block.isValid() and max_lines > 0:
ud = block.userData()
if ud is not None:
tags = sorted(ud.tags, key=get_offset, reverse=not forward)
for boundary in tags:
if forward and boundary.offset > offset:
return block, boundary
if not forward and boundary.offset < offset:
return block, boundary
block = block.next() if forward else block.previous()
offset = -1 if forward else sys.maxint
max_lines -= 1
return None, None
def next_attr_boundary(block, offset, forward=True):
while block.isValid():
ud = block.userData()
if ud is not None:
attributes = sorted(ud.attributes, key=get_offset, reverse=not forward)
for boundary in attributes:
if forward and boundary.offset >= offset:
return block, boundary
if not forward and boundary.offset <= offset:
return block, boundary
block = block.next() if forward else block.previous()
offset = -1 if forward else sys.maxint
return None, None
def find_closest_containing_tag(block, offset, max_tags=sys.maxint):
''' Find the closest containing tag. To find it, we search for the first
opening tag that does not have a matching closing tag before the specified
position. Search through at most max_tags. '''
prev_tag_boundary = lambda b, o: next_tag_boundary(b, o, forward=False)
block, boundary = prev_tag_boundary(block, offset)
if block is None:
return None
if boundary.is_start:
# We are inside a tag already
if boundary.closing:
return find_closest_containing_tag(block, boundary.offset)
eblock, eboundary = next_tag_boundary(block, boundary.offset)
if eblock is None or eboundary is None or eboundary.is_start:
return None
if eboundary.self_closing:
return Tag(block, boundary, eblock, eboundary, self_closing=True)
return find_closest_containing_tag(eblock, eboundary.offset + 1)
stack = []
block, tag_end = block, boundary
while block is not None and max_tags > 0:
sblock, tag_start = prev_tag_boundary(block, tag_end.offset)
if sblock is None or not tag_start.is_start:
break
if tag_start.closing: # A closing tag of the form </a>
stack.append((tag_start.prefix, tag_start.name))
elif tag_end.self_closing: # A self closing tag of the form <a/>
pass # Ignore it
else: # An opening tag, hurray
try:
prefix, name = stack.pop()
except IndexError:
prefix = name = None
if (prefix, name) != (tag_start.prefix, tag_start.name):
# Either we have an unbalanced opening tag or a syntax error, in
# either case terminate
return Tag(sblock, tag_start, block, tag_end)
block, tag_end = prev_tag_boundary(sblock, tag_start.offset)
max_tags -= 1
return None # Could not find a containing tag
def find_tag_definition(block, offset):
''' Return the <tag | > definition, if any that (block, offset) is inside. '''
block, boundary = next_tag_boundary(block, offset, forward=False)
if not boundary or not boundary.is_start:
return None, False
tag_start = boundary
closing = tag_start.closing
tag = tag_start.name
if tag_start.prefix:
tag = tag_start.prefix + ':' + tag
return tag, closing
def find_containing_attribute(block, offset):
block, boundary = next_attr_boundary(block, offset, forward=False)
if block is None:
return None
if boundary.type is ATTR_NAME or boundary.data is ATTR_END:
return None # offset is not inside an attribute value
block, boundary = next_attr_boundary(block, boundary.offset - 1, forward=False)
if block is not None and boundary.type == ATTR_NAME:
return boundary.data
return None
def find_attribute_in_tag(block, offset, attr_name):
' Return the start of the attribute value as block, offset or None, None if attribute not found '
end_block, boundary = next_tag_boundary(block, offset)
if boundary.is_start:
return None, None
end_offset = boundary.offset
end_pos = (end_block.blockNumber(), end_offset)
current_block, current_offset = block, offset
found_attr = False
while True:
current_block, boundary = next_attr_boundary(current_block, current_offset)
if current_block is None or (current_block.blockNumber(), boundary.offset) > end_pos:
return None, None
current_offset = boundary.offset
if found_attr:
if boundary.type is not ATTR_VALUE or boundary.data is not ATTR_START:
return None, None
return current_block, current_offset
else:
if boundary.type is ATTR_NAME and boundary.data.lower() == attr_name.lower():
found_attr = True
current_offset += 1
def find_end_of_attribute(block, offset):
' Find the end of an attribute that occurs somewhere after the position specified by (block, offset) '
block, boundary = next_attr_boundary(block, offset)
if block is None or boundary is None:
return None, None
if boundary.type is not ATTR_VALUE or boundary.data is not ATTR_END:
return None, None
return block, boundary.offset
def find_closing_tag(tag, max_tags=sys.maxint):
''' Find the closing tag corresponding to the specified tag. To find it we
search for the first closing tag after the specified tag that does not
match a previous opening tag. Search through at most max_tags. '''
if tag.self_closing:
return None
stack = []
block, offset = tag.end_block, tag.end_offset
while block.isValid() and max_tags > 0:
block, tag_start = next_tag_boundary(block, offset)
if block is None or not tag_start.is_start:
break
endblock, tag_end = next_tag_boundary(block, tag_start.offset)
if endblock is None or tag_end.is_start:
break
if tag_start.closing:
try:
prefix, name = stack.pop()
except IndexError:
prefix = name = None
if (prefix, name) != (tag_start.prefix, tag_start.name):
return Tag(block, tag_start, endblock, tag_end)
elif tag_end.self_closing:
pass
else:
stack.append((tag_start.prefix, tag_start.name))
block, offset = endblock, tag_end.offset
max_tags -= 1
return None
def select_tag(cursor, tag):
cursor.setPosition(tag.start_block.position() + tag.start_offset)
cursor.setPosition(tag.end_block.position() + tag.end_offset + 1, cursor.KeepAnchor)
return unicode(cursor.selectedText()).replace(PARAGRAPH_SEPARATOR, '\n').rstrip('\0')
def rename_tag(cursor, opening_tag, closing_tag, new_name, insert=False):
cursor.beginEditBlock()
text = select_tag(cursor, closing_tag)
if insert:
text = '</%s>%s' % (new_name, text)
else:
text = re.sub(r'^<\s*/\s*[a-zA-Z0-9]+', '</%s' % new_name, text)
cursor.insertText(text)
text = select_tag(cursor, opening_tag)
if insert:
text += '<%s>' % new_name
else:
text = re.sub(r'^<\s*[a-zA-Z0-9]+', '<%s' % new_name, text)
cursor.insertText(text)
cursor.endEditBlock()
def ensure_not_within_tag_definition(cursor, forward=True):
''' Ensure the cursor is not inside a tag definition <>. Returns True iff the cursor was moved. '''
block, offset = cursor.block(), cursor.positionInBlock()
b, boundary = next_tag_boundary(block, offset, forward=False)
if b is None:
return False
if boundary.is_start:
# We are inside a tag
if forward:
block, boundary = next_tag_boundary(block, offset)
if block is not None:
cursor.setPosition(block.position() + boundary.offset + 1)
return True
else:
cursor.setPosition(b.position() + boundary.offset)
return True
return False
BLOCK_TAG_NAMES = frozenset((
'address', 'article', 'aside', 'blockquote', 'center', 'dir', 'fieldset',
'isindex', 'menu', 'noframes', 'hgroup', 'noscript', 'pre', 'section',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'p', 'div', 'dd', 'dl', 'ul',
'ol', 'li', 'body', 'td', 'th'))
def find_closest_containing_block_tag(block, offset, block_tag_names=BLOCK_TAG_NAMES):
while True:
tag = find_closest_containing_tag(block, offset)
if tag is None:
break
if tag.name in block_tag_names:
return tag
block, offset = tag.start_block, tag.start_offset
def set_style_property(tag, property_name, value, editor):
'''
Set a style property, i.e. a CSS property inside the style attribute of the tag.
Any existing style attribute is updated or a new attribute is inserted.
'''
block, offset = find_attribute_in_tag(tag.start_block, tag.start_offset + 1, 'style')
c = editor.textCursor()
def css(d):
return d.cssText.replace('\n', ' ')
if block is None or offset is None:
d = parseStyle('')
d.setProperty(property_name, value)
c.setPosition(tag.end_block.position() + tag.end_offset)
c.insertText(' style="%s"' % css(d))
else:
c.setPosition(block.position() + offset - 1)
end_block, end_offset = find_end_of_attribute(block, offset + 1)
if end_block is None:
return error_dialog(editor, _('Invalid markup'), _(
'The current block tag has an existing unclosed style attribute. Run the Fix HTML'
' tool first.'), show=True)
c.setPosition(end_block.position() + end_offset, c.KeepAnchor)
d = parseStyle(editor.selected_text_from_cursor(c)[1:-1])
d.setProperty(property_name, value)
c.insertText('"%s"' % css(d))
entity_pat = re.compile(r'&(#{0,1}[a-zA-Z0-9]{1,8});$')
class Smarts(NullSmarts):
def __init__(self, *args, **kwargs):
if not hasattr(Smarts, 'regexps_compiled'):
Smarts.regexps_compiled = True
Smarts.tag_pat = re.compile(r'<[^>]+>')
Smarts.closing_tag_pat = re.compile(r'<\s*/[^>]+>')
Smarts.closing_pat = re.compile(r'<\s*/')
Smarts.self_closing_pat = re.compile(r'/\s*>')
Smarts.complete_attr_pat = re.compile(r'''([a-zA-Z0-9_-]+)\s*=\s*(?:'([^']*)|"([^"]*))$''')
NullSmarts.__init__(self, *args, **kwargs)
self.last_matched_tag = self.last_matched_closing_tag = None
def get_extra_selections(self, editor):
ans = []
def add_tag(tag):
a = QTextEdit.ExtraSelection()
a.cursor, a.format = editor.textCursor(), editor.match_paren_format
a.cursor.setPosition(tag.start_block.position()), a.cursor.movePosition(a.cursor.EndOfBlock, a.cursor.KeepAnchor)
text = unicode(a.cursor.selectedText())
start_pos = utf16_length(text[:tag.start_offset])
a.cursor.setPosition(tag.end_block.position()), a.cursor.movePosition(a.cursor.EndOfBlock, a.cursor.KeepAnchor)
text = unicode(a.cursor.selectedText())
end_pos = utf16_length(text[:tag.end_offset + 1])
a.cursor.setPosition(tag.start_block.position() + start_pos)
a.cursor.setPosition(tag.end_block.position() + end_pos, a.cursor.KeepAnchor)
ans.append(a)
c = editor.textCursor()
block, offset = c.block(), c.positionInBlock()
tag = self.last_matched_tag = find_closest_containing_tag(block, offset, max_tags=2000)
self.last_matched_closing_tag = None
if tag is not None:
add_tag(tag)
tag = self.last_matched_closing_tag = find_closing_tag(tag, max_tags=4000)
if tag is not None:
add_tag(tag)
return ans
def jump_to_enclosing_tag(self, editor, start=True):
editor.highlighter.join()
tag = self.last_matched_tag if start else self.last_matched_closing_tag
if tag is None:
return False
c = editor.textCursor()
c.setPosition(tag.start_block.position() + tag.start_offset + (1 if start else 2))
editor.setTextCursor(c)
return True
def remove_tag(self, editor):
editor.highlighter.join()
if not self.last_matched_closing_tag and not self.last_matched_tag:
return
c = editor.textCursor()
c.beginEditBlock()
def erase_tag(tag):
c.setPosition(tag.start_block.position() + tag.start_offset)
c.setPosition(tag.end_block.position() + tag.end_offset + 1, c.KeepAnchor)
c.removeSelectedText()
if self.last_matched_closing_tag:
erase_tag(self.last_matched_closing_tag)
if self.last_matched_tag:
erase_tag(self.last_matched_tag)
c.endEditBlock()
self.last_matched_tag = self.last_matched_closing_tag = None
def rename_block_tag(self, editor, new_name):
editor.highlighter.join()
c = editor.textCursor()
block, offset = c.block(), c.positionInBlock()
tag = find_closest_containing_block_tag(block, offset)
if tag is not None:
closing_tag = find_closing_tag(tag)
if closing_tag is None:
return error_dialog(editor, _('Invalid HTML'), _(
'There is an unclosed %s tag. You should run the Fix HTML tool'
' before trying to rename tags.') % tag.name, show=True)
rename_tag(c, tag, closing_tag, new_name, insert=tag.name in {'body', 'td', 'th', 'li'})
else:
return error_dialog(editor, _('No found'), _(
'No suitable block level tag was found to rename'), show=True)
def get_smart_selection(self, editor, update=True):
editor.highlighter.join()
cursor = editor.textCursor()
if not cursor.hasSelection():
return ''
left = min(cursor.anchor(), cursor.position())
right = max(cursor.anchor(), cursor.position())
cursor.setPosition(left)
ensure_not_within_tag_definition(cursor)
left = cursor.position()
cursor.setPosition(right)
ensure_not_within_tag_definition(cursor, forward=False)
right = cursor.position()
cursor.setPosition(left), cursor.setPosition(right, cursor.KeepAnchor)
if update:
editor.setTextCursor(cursor)
return editor.selected_text_from_cursor(cursor)
def insert_hyperlink(self, editor, target, text, template=None):
template = template or DEFAULT_LINK_TEMPLATE
template = template.replace('_TARGET_', prepare_string_for_xml(target, True))
offset = template.find('_TEXT_')
editor.highlighter.join()
c = editor.textCursor()
if c.hasSelection():
c.insertText('') # delete any existing selected text
ensure_not_within_tag_definition(c)
p = c.position() + offset
c.insertText(template.replace('_TEXT_', text or ''))
c.setPosition(p) # ensure cursor is positioned inside the newly created tag
editor.setTextCursor(c)
def insert_tag(self, editor, name):
editor.highlighter.join()
name = name.lstrip()
text = self.get_smart_selection(editor, update=True)
c = editor.textCursor()
pos = min(c.position(), c.anchor())
m = re.match(r'[a-zA-Z0-9:-]+', name)
cname = name if m is None else m.group()
c.insertText('<{0}>{1}</{2}>'.format(name, text, cname))
c.setPosition(pos + 2 + len(name))
editor.setTextCursor(c)
def verify_for_spellcheck(self, cursor, highlighter):
# Return True iff the cursor is in a location where spelling is
# checked (inside a tag or inside a checked attribute)
highlighter.join()
block = cursor.block()
start_pos = cursor.anchor() - block.position()
end_pos = cursor.position() - block.position()
start_tag, closing = find_tag_definition(block, start_pos)
if closing:
return False
end_tag, closing = find_tag_definition(block, end_pos)
if closing:
return False
if start_tag is None and end_tag is None:
# We are in normal text, check that the containing tag is
# allowed for spell checking.
tag = find_closest_containing_tag(block, start_pos)
if tag is not None and highlighter.tag_ok_for_spell(tag.name.split(':')[-1]):
return True
if start_tag != end_tag:
return False
# Now we check if we are in an allowed attribute
sa = find_containing_attribute(block, start_pos)
ea = find_containing_attribute(block, end_pos)
if sa == ea and sa in highlighter.spell_attributes:
return True
return False
def cursor_position_with_sourceline(self, cursor, for_position_sync=True, use_matched_tag=True):
''' Return the tag just before the current cursor as a source line
number and a list of tags defined on that line up to and including the
containing tag. If ``for_position_sync`` is False then the tag
*containing* the cursor is returned instead of the tag just before the
cursor. Note that finding the containing tag is expensive, so
use with care. As an optimization, the last tag matched by
get_extra_selections is used, unless use_matched_tag is False. '''
block, offset = cursor.block(), cursor.positionInBlock()
if for_position_sync:
nblock, boundary = next_tag_boundary(block, offset, forward=False)
if boundary is None:
return None, None
if boundary.is_start:
# We are inside a tag, use this tag
start_block, start_offset = nblock, boundary.offset
else:
start_block = None
while start_block is None and block.isValid():
ud = block.userData()
if ud is not None:
for boundary in reversed(ud.tags):
if boundary.is_start and not boundary.closing and boundary.offset <= offset:
start_block, start_offset = block, boundary.offset
break
block, offset = block.previous(), sys.maxint
end_block = None
if start_block is not None:
end_block, boundary = next_tag_boundary(start_block, start_offset)
if boundary is None or boundary.is_start:
return None, None
else:
tag = None
if use_matched_tag:
tag = self.last_matched_tag
if tag is None:
tag = find_closest_containing_tag(block, offset, max_tags=2000)
if tag is None:
return None, None
start_block, start_offset, end_block = tag.start_block, tag.start_offset, tag.end_block
if start_block is None or end_block is None:
return None, None
sourceline = end_block.blockNumber() + 1 # blockNumber() is zero based
ud = start_block.userData()
if ud is None:
return None, None
tags = [t.name for t in ud.tags if (t.is_start and not t.closing and t.offset <= start_offset)]
if start_block.blockNumber() != end_block.blockNumber():
# Multiline opening tag, it must be the first tag on the line with the closing >
del tags[:-1]
return sourceline, tags
def goto_sourceline(self, editor, sourceline, tags, attribute=None):
''' Move the cursor to the tag identified by sourceline and tags (a
list of tags names on the specified line). If attribute is specified
the cursor will be placed at the start of the attribute value. '''
found_tag = False
if sourceline is None:
return found_tag
block = editor.document().findBlockByNumber(sourceline - 1) # blockNumber() is zero based
if not block.isValid():
return found_tag
c = editor.textCursor()
ud = block.userData()
all_tags = [] if ud is None else [t for t in ud.tags if (t.is_start and not t.closing)]
tag_names = [t.name for t in all_tags]
if all_tags and tag_names[:len(tags)] == tags:
c.setPosition(block.position() + all_tags[len(tags)-1].offset)
found_tag = True
else:
c.setPosition(block.position())
if found_tag and attribute is not None:
start_offset = c.position() - block.position()
nblock, offset = find_attribute_in_tag(block, start_offset, attribute)
if nblock is not None:
c.setPosition(nblock.position() + offset)
editor.setTextCursor(c)
return found_tag
def get_inner_HTML(self, editor):
''' Select the inner HTML of the current tag. Return a cursor with the
inner HTML selected or None. '''
editor.highlighter.join()
c = editor.textCursor()
block = c.block()
offset = c.position() - block.position()
nblock, boundary = next_tag_boundary(block, offset)
if boundary.is_start:
# We are within the contents of a tag already
tag = find_closest_containing_tag(block, offset)
else:
# We are inside a tag definition < | >
if boundary.self_closing:
return None # self closing tags have no inner html
tag = find_closest_containing_tag(nblock, boundary.offset + 1)
if tag is None:
return None
ctag = find_closing_tag(tag)
if ctag is None:
return None
c.setPosition(tag.end_block.position() + tag.end_offset + 1)
c.setPosition(ctag.start_block.position() + ctag.start_offset, c.KeepAnchor)
return c
def set_text_alignment(self, editor, value):
''' Set the text-align property on the current block tag(s) '''
editor.highlighter.join()
block_tag_names = BLOCK_TAG_NAMES - {'body'} # ignore body since setting text-align globally on body is almost never what is wanted
tags = []
c = editor.textCursor()
if c.hasSelection():
start, end = min(c.anchor(), c.position()), max(c.anchor(), c.position())
c.setPosition(start)
block = c.block()
while block.isValid() and block.position() < end:
ud = block.userData()
if ud is not None:
for tb in ud.tags:
if tb.is_start and not tb.closing and tb.name.lower() in block_tag_names:
nblock, boundary = next_tag_boundary(block, tb.offset)
if boundary is not None and not boundary.is_start and not boundary.self_closing:
tags.append(Tag(block, tb, nblock, boundary))
block = block.next()
if not tags:
c = editor.textCursor()
block, offset = c.block(), c.positionInBlock()
tag = find_closest_containing_block_tag(block, offset, block_tag_names)
if tag is None:
return error_dialog(editor, _('Not in a block tag'), _(
'Cannot change text alignment as the cursor is not inside a block level tag, such as a &lt;p&gt; or &lt;div&gt; tag.'), show=True)
tags = [tag]
for tag in reversed(tags):
set_style_property(tag, 'text-align', value, editor)
def handle_key_press(self, ev, editor):
ev_text = ev.text()
key = ev.key()
is_xml = editor.syntax == 'xml'
if tprefs['replace_entities_as_typed'] and (key == Qt.Key_Semicolon or ';' in ev_text):
self.replace_possible_entity(editor)
return True
if key in (Qt.Key_Enter, Qt.Key_Return) and no_modifiers(ev, Qt.ControlModifier, Qt.AltModifier):
ls = get_leading_whitespace_on_block(editor)
if ls == ' ':
ls = '' # Do not consider a single leading space as indentation
if is_xml:
count = 0
for m in self.tag_pat.finditer(get_text_before_cursor(editor)[1]):
text = m.group()
if self.closing_pat.search(text) is not None:
count -= 1
elif self.self_closing_pat.search(text) is None:
count += 1
if self.closing_tag_pat.match(get_text_after_cursor(editor)[1].lstrip()):
count -= 1
if count > 0:
ls += editor.tw * ' '
editor.textCursor().insertText('\n' + ls)
return True
if key == Qt.Key_Slash:
cursor, text = get_text_before_cursor(editor)
if not text.rstrip().endswith('<'):
return False
text = expand_tabs(text.rstrip()[:-1], editor.tw)
pls = get_leading_whitespace_on_block(editor, previous=True)
if is_xml and not text.lstrip() and len(text) > 1 and len(text) >= len(pls):
# Auto-dedent
text = text[:-editor.tw] + '</'
cursor.insertText(text)
editor.setTextCursor(cursor)
self.auto_close_tag(editor)
return True
if self.auto_close_tag(editor):
return True
if key == Qt.Key_Home and smart_home(editor, ev):
return True
if key == Qt.Key_Tab and smart_tab(editor, ev):
return True
if key == Qt.Key_Backspace and smart_backspace(editor, ev):
return True
if key in (Qt.Key_BraceLeft, Qt.Key_BraceRight):
mods = ev.modifiers()
if int(mods & Qt.ControlModifier):
if self.jump_to_enclosing_tag(editor, key == Qt.Key_BraceLeft):
return True
return False
def replace_possible_entity(self, editor):
c = editor.textCursor()
c.insertText(';')
c.setPosition(c.position() - min(c.positionInBlock(), 10), c.KeepAnchor)
text = editor.selected_text_from_cursor(c)
m = entity_pat.search(text)
if m is not None:
ent = m.group()
repl = xml_entity_to_unicode(m)
if repl != ent:
c.setPosition(c.position() + m.start(), c.KeepAnchor)
c.insertText(repl)
editor.setTextCursor(c)
def auto_close_tag(self, editor):
if not tprefs['auto_close_tags']:
return False
def check_if_in_tag(block, offset=0):
if block.isValid():
text = block.text()
close_pos = text.find('>', offset)
open_pos = text.find('<', offset)
if (close_pos > -1 and open_pos == -1) or (close_pos < open_pos):
return True
return False
c = editor.textCursor()
block, offset = c.block(), c.positionInBlock()
if check_if_in_tag(block, offset) or check_if_in_tag(block.next()):
return False
tag = find_closest_containing_tag(block, offset - 1, max_tags=4000)
if tag is None:
return False
c.insertText('/%s>' % tag.name)
editor.setTextCursor(c)
return True
def get_completion_data(self, editor, ev=None):
c = editor.textCursor()
block, offset = c.block(), c.positionInBlock()
oblock, boundary = next_tag_boundary(block, offset, forward=False, max_lines=5)
if boundary is None or not boundary.is_start or boundary.closing:
# Not inside a opening tag definition
return
tagname = boundary.name.lower()
startpos = oblock.position() + boundary.offset
c.setPosition(c.position()), c.setPosition(startpos, c.KeepAnchor)
text = c.selectedText()
m = self.complete_attr_pat.search(text)
if m is None:
return
attr = m.group(1).lower().split(':')[-1]
doc_name = editor.completion_doc_name
if doc_name and attr in {'href', 'src'}:
# A link
query = m.group(2) or m.group(3) or ''
c = current_container()
names_type = {'a':'text_link', 'img':'image', 'image':'image', 'link':'stylesheet'}.get(tagname)
idx = query.find('#')
if idx > -1 and names_type in (None, 'text_link'):
href, query = query[:idx], query[idx+1:]
name = c.href_to_name(href) if href else doc_name
if c.mime_map.get(name) in OEB_DOCS:
return 'complete_anchor', name, query
return 'complete_names', (names_type, doc_name, c.root), query
def find_text(self, pat, cursor):
from calibre.gui2.tweak_book.text_search import find_text_in_chunks
chunks = []
cstart = min(cursor.position(), cursor.anchor())
cend = max(cursor.position(), cursor.anchor())
c = QTextCursor(cursor)
c.setPosition(cstart)
block = c.block()
in_text = find_tag_definition(block, 0)[0] is None
if in_text:
# Check if we are in comment/PI/etc.
pb = block.previous()
while pb.isValid():
boundaries = pb.userData().non_tag_structures
if boundaries:
if boundaries[-1].is_start:
in_text = False
break
pb = pb.previous()
def append(text, start):
text = text.replace(PARAGRAPH_SEPARATOR, '\n')
after = start + len(text)
if start <= cend and cstart < after:
extra = after - (cend + 1)
if extra > 0:
text = text[:-extra]
extra = cstart - start
if extra > 0:
text = text[extra:]
chunks.append((text, start + max(extra, 0)))
while block.isValid() and block.position() <= cend:
ud = block.userData()
boundaries = sorted(chain(ud.tags, ud.non_tag_structures), key=get_offset)
if not boundaries:
# Add the whole line
if in_text:
text = block.text() + '\n'
append(text, block.position())
else:
start = block.position()
c.setPosition(start)
for b in boundaries:
if in_text:
c.setPosition(start + b.offset, c.KeepAnchor)
if c.hasSelection():
append(c.selectedText(), c.anchor())
in_text = not b.is_start
c.setPosition(start + b.offset + 1)
if in_text:
# Add remaining text in block
c.setPosition(block.position() + boundaries[-1].offset + 1)
c.movePosition(c.EndOfBlock, c.KeepAnchor)
if c.hasSelection():
append(c.selectedText() + '\n', c.anchor())
block = block.next()
s, e = find_text_in_chunks(pat, chunks)
return s != -1 and e != -1, s, e
if __name__ == '__main__': # {{{
from calibre.gui2.tweak_book.editor.widget import launch_editor
if sys.argv[-1].endswith('.html'):
raw = lopen(sys.argv[-1], 'rb').read().decode('utf-8')
else:
raw = '''\
<!DOCTYPE html>
<html xml:lang="en" lang="en">
<!--
-->
<head>
<meta charset="utf-8" />
<title>A title with a tag <span> in it, the tag is treated as normal text</title>
<style type="text/css">
body {
color: green;
font-size: 12pt;
}
</style>
<style type="text/css">p.small { font-size: x-small; color:gray }</style>
</head id="invalid attribute on closing tag">
<body lang="en_IN"><p:
<!-- The start of the actual body text -->
<h1 lang="en_US">A heading that should appear in bold, with an <i>italic</i> word</h1>
<p>Some text with inline formatting, that is syntax highlighted. A <b>bold</b> word, and an <em>italic</em> word. \
<i>Some italic text with a <b>bold-italic</b> word in </i>the middle.</p>
<!-- Let's see what exotic constructs like namespace prefixes and empty attributes look like -->
<svg:svg xmlns:svg="http://whatever" />
<input disabled><input disabled /><span attr=<></span>
<!-- Non-breaking spaces are rendered differently from normal spaces, so that they stand out -->
<p>Some\xa0words\xa0separated\xa0by\xa0non\u2011breaking\xa0spaces and non\u2011breaking hyphens.</p>
<p>Some non-BMP unicode text:\U0001f431\U0001f431\U0001f431</p>
</body>
</html>
'''
def callback(ed):
import regex
ed.find_text(regex.compile('A bold word'))
launch_editor(raw, path_is_raw=True, syntax='html', callback=callback)
# }}}