|[iub]>\s*
\s*<[iub]>)\s*(?P[\w\d]+)'% length)
+ elif format == 'txt':
+ intextmatch = re.compile(u'(?<=.{%i})(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length)
elif format == 'individual_words':
- intextmatch = re.compile(u'>[^<]*\b(?P[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)(?P[^<]*\b(?P[^\[\]\\\^\$\.\|\?\*\+\(\)"\s>]+)(-|‐)\u0020*(?P\w+)\b[^<]*<') # for later, not called anywhere yet
elif format == 'html_cleanup':
intextmatch = re.compile(u'(?P[^\[\]\\\^\$\.\|\?\*\+\(\)“"\s>]+)(-|‐)\s*(?=<)(?P