Fix syntax errors manually

This commit is contained in:
Flaviu Tamas 2018-09-04 11:27:40 -04:00 committed by Kovid Goyal
parent 655ab21b0b
commit 999175cf55
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
29 changed files with 60 additions and 63 deletions

View File

@ -237,7 +237,7 @@ def render_options(cmd, groups, options_header=True, add_program=True, header_le
def mark_options(raw):
raw = re.sub(r'(\s+)--(\s+)', ur'\1``--``\2', raw)
raw = re.sub(r'(\s+)--(\s+)', u'\\1``--``\\2', raw)
def sub(m):
opt = m.group()

View File

@ -18,9 +18,9 @@ class LaTeXHelpBuilder(LaTeXBuilder):
LaTeXBuilder.finish(self)
self.info('Fixing Cyrillic characters...')
tex = os.path.join(self.outdir, 'calibre.tex')
with open(tex, 'r+b') as f:
with open(tex, 'r+') as f:
raw = f.read()
for x in (b'Михаил Горбачёв', b'Фёдор Миха́йлович Достоевский'):
raw = raw.replace(x, br'{\fontencoding{T2A}\selectfont %s}' % (x.replace(b'а́', b'a')))
for x in (u'Михаил Горбачёв', u'Фёдор Миха́йлович Достоевский'):
raw = raw.replace(x, u'{\\fontencoding{T2A}\\selectfont %s}' % (x.replace(u'а́', u'a')))
f.seek(0)
f.write(raw)

View File

@ -64,23 +64,23 @@ class TXT2TXTZ(FileTypePlugin):
images = []
# Textile
for m in re.finditer(ur'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))', txt):
for m in re.finditer(unicode(r'(?mu)(?:[\[{])?\!(?:\. )?(?P<path>[^\s(!]+)\s?(?:\(([^\)]+)\))?\!(?::(\S+))?(?:[\]}]|(?=\s|$))'), txt):
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown inline
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)', txt): # noqa
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\((?P<path>[^\)]*)\)'), txt): # noqa
path = m.group('path')
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)
# Markdown reference
refs = {}
for m in re.finditer(ur'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$', txt):
for m in re.finditer(unicode(r'(?mu)^(\ ?\ ?\ ?)\[(?P<id>[^\]]*)\]:\s*(?P<path>[^\s]*)$'), txt):
if m.group('id') and m.group('path'):
refs[m.group('id')] = m.group('path')
for m in re.finditer(ur'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]', txt): # noqa
for m in re.finditer(unicode(r'(?mu)\!\[([^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*(\[[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*\])*[^\]\[]*)\]\s*\[(?P<id>[^\]]*)\]'), txt): # noqa
path = refs.get(m.group('id'), None)
if path and not os.path.isabs(path) and guess_type(path)[0] in OEB_IMAGES and os.path.exists(os.path.join(base_dir, path)):
images.append(path)

View File

@ -315,7 +315,7 @@ class CSSPreProcessor(object):
# are commented lines before the first @import or @charset rule. Since
# the conversion will remove all stylesheets anyway, we don't lose
# anything
data = re.sub(ur'/\*.*?\*/', u'', data, flags=re.DOTALL)
data = re.sub(unicode(r'/\*.*?\*/'), u'', data, flags=re.DOTALL)
ans, namespaced = [], False
for line in data.splitlines():
@ -533,7 +533,7 @@ class HTMLPreProcessor(object):
start_rules = []
if is_pdftohtml:
# Remove non breaking spaces
start_rules.append((re.compile(ur'\u00a0'), lambda match : ' '))
start_rules.append((re.compile(unicode(r'\u00a0')), lambda match : ' '))
if not getattr(self.extra_opts, 'keep_ligatures', False):
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)

View File

@ -157,17 +157,17 @@ class HeuristicProcessor(object):
]
ITALICIZE_STYLE_PATS = [
ur'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_',
ur'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~',
ur'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_',
ur'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_',
ur'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*',
ur'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/',
ur'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|',
ur'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*',
ur'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~',
ur'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/',
ur'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'
unicode(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'),
unicode(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'),
unicode(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'),
unicode(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'),
unicode(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'),
unicode(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'),
unicode(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'),
unicode(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'),
unicode(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'),
unicode(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'),
unicode(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'),
]
for word in ITALICIZE_WORDS:
@ -419,7 +419,7 @@ class HeuristicProcessor(object):
return html
def fix_nbsp_indents(self, html):
txtindent = re.compile(ur'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE)
txtindent = re.compile(unicode(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
html = txtindent.sub(self.insert_indent, html)
if self.found_indents > 1:
self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles")
@ -427,10 +427,10 @@ class HeuristicProcessor(object):
def cleanup_markup(self, html):
# remove remaining non-breaking spaces
html = re.sub(ur'\u00a0', ' ', html)
html = re.sub(unicode(r'\u00a0'), ' ', html)
# Get rid of various common microsoft specific tags which can cause issues later
# Get rid of empty <o:p> tags to simplify other processing
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
html = re.sub(unicode(r'\s*<o:p>\s*</o:p>'), ' ', html)
# Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\w+>', '', html)
# Re-open self closing paragraph tags

View File

@ -108,7 +108,7 @@ class HTMLConverter(object):
re.IGNORECASE), lambda m: '<br />'),
# Replace entities
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
(re.compile(u'&(\\S+?);'), partial(entity_to_unicode,
exceptions=['lt', 'gt', 'amp', 'quot'])),
# Remove comments from within style tags as they can mess up BeatifulSoup
(re.compile(r'(<style.*?</style>)', re.IGNORECASE|re.DOTALL),

View File

@ -233,7 +233,7 @@ class OverDrive(Source):
xreq.add_header('Referer', q_init_search)
xreq.add_header('Accept', 'application/json, text/javascript, */*')
raw = br.open_novisit(xreq).read()
for m in re.finditer(ur'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)', raw):
for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
if int(m.group('totalrecords')) == 0:
return ''
elif int(m.group('displayrecords')) >= 1:

View File

@ -358,7 +358,7 @@ class MobiReader(object):
self.processed_html = re.sub(
r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', '\g<blockquote>'+'\g<para>', self.processed_html)
bods = htmls = 0
for x in re.finditer(ur'</body>|</html>', self.processed_html):
for x in re.finditer(u'</body>|</html>', self.processed_html):
if x == '</body>':
bods +=1
else:

View File

@ -163,7 +163,7 @@ def resolve_styles(container, name, select=None, sheet_callback=None):
style_map = defaultdict(list)
pseudo_style_map = defaultdict(list)
rule_index_counter = count()
pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
def process_sheet(sheet, sheet_name):
if sheet_callback is not None:

View File

@ -94,7 +94,7 @@ class Structure(BaseTest):
self.assertEqual(3, c.opf_version_parsed.major)
self.assertTrue(len(get_toc(c))) # detect NCX toc even in epub 3 files
c.add_file('nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
b'<body><nav epub:type="toc"><ol><li><a href="start.xhtml">EPUB 3 nav</a></li></ol></nav></body></html>',
process_manifest_item=lambda item:item.set('properties', 'nav'))
toc = get_toc(c)
self.assertTrue(len(toc))
@ -132,9 +132,9 @@ class Structure(BaseTest):
c = self.create_epub([cmi('xxx.html'), cmi('a.html')], ver=3)
self.assertEqual(3, c.opf_version_parsed.major)
c.add_file('xxx/nav.html', b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">'
'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
'<li><a href="../a.html"> YYY </a></li>'
'</ol></nav></body></html>',
b'<body><nav epub:type="landmarks"><ol><li><a epub:type="x" href="../xxx.html#moo">XXX </a></li>'
b'<li><a href="../a.html"> YYY </a></li>'
b'</ol></nav></body></html>',
process_manifest_item=lambda item:item.set('properties', 'nav'))
self.assertEqual([
{'dest':'xxx.html', 'frag':'moo', 'type':'x', 'title':'XXX'}, {'dest':'a.html', 'frag':'', 'type':'', 'title':'YYY'}

View File

@ -217,7 +217,7 @@ class Stylizer(object):
rules.sort()
self.rules = rules
self._styles = {}
pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
select = Select(tree, ignore_inappropriate_pseudo_classes=True)
for _, _, cssdict, text, _ in rules:

View File

@ -293,7 +293,7 @@ class FlowSplitter(object):
body = self.get_body(root)
if body is None:
return False
txt = re.sub(ur'\s+|\xa0', '',
txt = re.sub(u'\\s+|\\xa0', '',
etree.tostring(body, method='text', encoding=unicode))
if len(txt) > 1:
return False

View File

@ -278,7 +278,7 @@ class PDFStream(object):
self.stream = HashingStream(stream)
self.compress = compress
self.write_line(PDFVER)
self.write_line(b'%íì¦"')
self.write_line(u'%íì¦"'.encode())
creator = ('%s %s [https://calibre-ebook.com]'%(__appname__,
__version__))
self.write_line('%% Created by %s'%creator)

View File

@ -174,7 +174,7 @@ class PMLMLizer(object):
def prepare_text(self, text):
# Replace empty paragraphs with \c pml codes used to denote emtpy lines.
text = re.sub(ur'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>', '\\c\n\\c', text)
text = re.sub(unicode(r'(?<=</p>)\s*<p[^>]*>[\xc2\xa0\s]*</p>'), '\\c\n\\c', text)
return text
def clean_text(self, text):
@ -188,7 +188,7 @@ class PMLMLizer(object):
text = text.replace('\\Q="%s"' % unused, '')
# Remove \Cn tags that are within \x and \Xn tags
text = re.sub(ur'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)', '\g<t>\g<a>\g<b>\g<t>', text)
text = re.sub(unicode(r'(?msu)(?P<t>\\(x|X[0-4]))(?P<a>.*?)(?P<c>\\C[0-4]\s*=\s*"[^"]*")(?P<b>.*?)(?P=t)'), '\g<t>\g<a>\g<b>\g<t>', text)
# Replace bad characters.
text = text.replace(u'\xc2', '')

View File

@ -119,7 +119,7 @@ class RTFMLizer(object):
self.log.debug('Converting %s to RTF markup...' % item.href)
# Removing comments is needed as comments with -- inside them can
# cause fromstring() to fail
content = re.sub(ur'<!--.*?-->', u'', etree.tostring(item.data, encoding=unicode), flags=re.DOTALL)
content = re.sub(u'<!--.*?-->', u'', etree.tostring(item.data, encoding=unicode), flags=re.DOTALL)
content = self.remove_newlines(content)
content = self.remove_tabs(content)
content = etree.fromstring(content)

View File

@ -16,7 +16,7 @@ from calibre.ptempfile import better_mktemp
class FieldsLarge:
"""
r"""
=========================
Logic
=========================

View File

@ -64,7 +64,7 @@ class Tokenize:
self.__reini_utf8_counters()
return token
# add a uc control
elif token[:3] == '\uc':
elif token[:3] == r'\uc':
self.__uc_value[-1] = int(token[3:])
self.__reini_utf8_counters()
return token

View File

@ -23,10 +23,7 @@ from calibre.utils.date import UNDEFINED_DATE
from calibre.utils.localization import get_lang
from calibre.utils.file_type_icons import EXT_MAP
try:
NO_URL_FORMATTING = QUrl.None_
except AttributeError:
NO_URL_FORMATTING = QUrl.None
NO_URL_FORMATTING = QUrl.None_
# Setup gprefs {{{
gprefs = JSONConfig('gui')

View File

@ -57,7 +57,7 @@ def css():
val = P('templates/book_details.css', data=True).decode('utf-8')
col = QApplication.instance().palette().color(QPalette.Link).name()
val = val.replace('LINK_COLOR', col)
_css = re.sub(ur'/\*.*?\*/', '', val, flags=re.DOTALL)
_css = re.sub(r'/\*.*?\*/', '', val, flags=re.DOTALL)
return _css

View File

@ -1151,7 +1151,7 @@ class BooksModel(QAbstractTableModel): # {{{
return False
val = (int(value) if column == 'rating' else
value if column in ('timestamp', 'pubdate')
else re.sub(ur'\s', u' ', unicode(value or '').strip()))
else re.sub(u'\\s', u' ', unicode(value or '').strip()))
id = self.db.id(row)
books_to_refresh = set([id])
if column == 'rating':

View File

@ -175,7 +175,7 @@ class Stores(OrderedDict):
def load_object(self, src, key):
namespace = {}
builtin = self[key]
exec src in namespace
exec(src, namespace)
ver = namespace['store_version']
cls = None
for x in namespace.itervalues():

View File

@ -56,7 +56,7 @@ def get_newest_version():
except UnicodeDecodeError:
version = u''
ans = NO_CALIBRE_UPDATE
m = re.match(ur'(\d+)\.(\d+).(\d+)$', version)
m = re.match(unicode(r'(\d+)\.(\d+).(\d+)$'), version)
if m is not None:
ans = tuple(map(int, (m.group(1), m.group(2), m.group(3))))
return ans

View File

@ -373,14 +373,14 @@ class ZshCompleter(object): # {{{
opt_lines.append(ostrings + help_txt + ' \\')
opt_lines = ('\n' + (' ' * 8)).join(opt_lines)
f.write((ur'''
f.write((u'''
_ebook_edit() {
local curcontext="$curcontext" state line ebookfile expl
typeset -A opt_args
_arguments -C -s \
_arguments -C -s \\
%s
"1:ebook file:_files -g '(#i)*.(%s)'" \
"1:ebook file:_files -g '(#i)*.(%s)'" \\
'*:file in ebook:->files' && return 0
case $state in
@ -393,7 +393,7 @@ _ebook_edit() {
else
return 1
fi
_wanted files expl 'file from ebook' \
_wanted files expl 'file from ebook' \\
_multi_parts / _zip_cache_list && return 0
;;
esac

View File

@ -39,7 +39,7 @@ class TestHTTP(BaseTest):
'\r\n', a='one', b='two 2 3', c='three')
test('Non-ascii headers parsing',
b'a:mūs\r', '\r\n', a='mūs')
'a:mūs\r', '\r\n', a='mūs')
test('Comma-separated parsing',
'Accept-Encoding: one',

View File

@ -380,7 +380,7 @@ def create_global_prefs(conf_obj=None):
c.add_opt('database_path',
default=os.path.expanduser('~/library1.db'),
help=_('Path to the database in which books are stored'))
c.add_opt('filename_pattern', default=ur'(?P<title>.+) - (?P<author>[^_]+)',
c.add_opt('filename_pattern', default=u'(?P<title>.+) - (?P<author>[^_]+)',
help=_('Pattern to guess metadata from filenames'))
c.add_opt('isbndb_com_key', default='',
help=_('Access key for isbndb.com'))

View File

@ -143,15 +143,15 @@ class Parser(object):
WORD = 2
QUOTED_WORD = 3
EOF = 4
REPLACEMENTS = tuple((u'\\' + x, unichr(i + 1)) for i, x in enumerate(ur'\"()'))
REPLACEMENTS = tuple((u'\\' + x, unichr(i + 1)) for i, x in enumerate(u'\\"()'))
# Had to translate named constants to numeric values
lex_scanner = re.Scanner([
(ur'[()]', lambda x,t: (Parser.OPCODE, t)),
(ur'@.+?:[^")\s]+', lambda x,t: (Parser.WORD, unicode(t))),
(ur'[^"()\s]+', lambda x,t: (Parser.WORD, unicode(t))),
(ur'".*?((?<!\\)")', lambda x,t: (Parser.QUOTED_WORD, t[1:-1])),
(ur'\s+', None)
(unicode(r'[()]'), lambda x,t: (Parser.OPCODE, t)),
(unicode(r'@.+?:[^")\s]+'), lambda x,t: (Parser.WORD, unicode(t))),
(unicode(r'[^"()\s]+'), lambda x,t: (Parser.WORD, unicode(t))),
(unicode(r'".*?((?<!\\)")'), lambda x,t: (Parser.QUOTED_WORD, t[1:-1])),
(unicode(r'\s+'), None)
], flags=re.DOTALL)
def token(self, advance=False):

View File

@ -21,7 +21,7 @@ PUNCT = r"""!"#$%&'()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
CAPFIRST = re.compile(ur"^[%s]*?(\w)" % PUNCT, flags=re.UNICODE)
CAPFIRST = re.compile(unicode(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)

View File

@ -1319,7 +1319,7 @@ class _FeedParserMixin:
author, email = context.get(key), None
if not author:
return
emailmatch = re.search(ur'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?''', author)
emailmatch = re.search(unicode(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))(\?subject=\S+)?'''), author)
if emailmatch:
email = emailmatch.group(0)
# probably a better way to do the following, but it passes all the tests

View File

@ -1681,7 +1681,7 @@ class BasicNewsRecipe(Recipe):
@classmethod
def soup(cls, raw):
entity_replace = [(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
entity_replace = [(re.compile(u'&(\\S+?);'), partial(entity_to_unicode,
exceptions=[]))]
nmassage = list(BeautifulSoup.MARKUP_MASSAGE)
nmassage.extend(entity_replace)