mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Replace htmlsymbols with built in functions.
This commit is contained in:
parent
b665378698
commit
192b968caa
@ -1,312 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
'''
|
|
||||||
Maping of non-acii symbols and their corresponding html entity number and name
|
|
||||||
'''
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|
||||||
|
|
||||||
# http://www.w3schools.com/tags/ref_symbols.asp
|
|
||||||
HTML_SYMBOLS = {
|
|
||||||
# Math Symbols
|
|
||||||
u'∀' : ['∀', '∀'], # for all
|
|
||||||
u'∂' : ['∂', '∂'], # part
|
|
||||||
u'∃' : ['∃', '&exists;'], # exists
|
|
||||||
u'∅' : ['∅', '∅'], # empty
|
|
||||||
u'∇' : ['∇', '∇'], # nabla
|
|
||||||
u'∈' : ['∈', '∈'], # isin
|
|
||||||
u'∉' : ['∉', '∉'], # notin
|
|
||||||
u'∋' : ['∋', '∋'], # ni
|
|
||||||
u'∏' : ['∏', '∏'], # prod
|
|
||||||
u'∑' : ['∑', '∑'], # sum
|
|
||||||
u'−' : ['−', '−'], # minus
|
|
||||||
u'∗' : ['∗', '∗'], # lowast
|
|
||||||
u'√' : ['√', '√'], # square root
|
|
||||||
u'∝' : ['∝', '∝'], # proportional to
|
|
||||||
u'∞' : ['∞', '∞'], # infinity
|
|
||||||
u'∠' : ['∠', '∠'], # angle
|
|
||||||
u'∧' : ['∧', '∧'], # and
|
|
||||||
u'∨' : ['∨', '∨'], # or
|
|
||||||
u'∩' : ['∩', '∩'], # cap
|
|
||||||
u'∪' : ['∪', '∪'], # cup
|
|
||||||
u'∫' : ['∫', '∫'], # integral
|
|
||||||
u'∴' : ['∴', '∴'], # therefore
|
|
||||||
u'∼' : ['∼', '∼'], # simular to
|
|
||||||
u'≅' : ['≅', '≅'], # approximately equal
|
|
||||||
u'≈' : ['≈', '≈'], # almost equal
|
|
||||||
u'≠' : ['≠', '≠'], # not equal
|
|
||||||
u'≡' : ['≡', '≡'], # equivalent
|
|
||||||
u'≤' : ['≤', '≤'], # less or equal
|
|
||||||
u'≥' : ['≥', '≥'], # greater or equal
|
|
||||||
u'⊂' : ['⊂', '⊂'], # subset of
|
|
||||||
u'⊃' : ['⊃', '⊃'], # superset of
|
|
||||||
u'⊄' : ['⊄', '⊄'], # not subset of
|
|
||||||
u'⊆' : ['⊆', '⊆'], # subset or equal
|
|
||||||
u'⊇' : ['⊇', '⊇'], # superset or equal
|
|
||||||
u'⊕' : ['⊕', '⊕'], # circled plus
|
|
||||||
u'⊗' : ['⊗', '⊗'], # cirled times
|
|
||||||
u'⊥' : ['⊥', '⊥'], # perpendicular
|
|
||||||
u'⋅' : ['⋅', '⋅'], # dot operator
|
|
||||||
# Greek Letters
|
|
||||||
u'Α' : ['Α', 'Α'], # Alpha
|
|
||||||
u'Β' : ['Β', 'Β'], # Beta
|
|
||||||
u'Γ' : ['Γ', 'Γ'], # Gamma
|
|
||||||
u'Δ' : ['Δ', 'Δ'], # Delta
|
|
||||||
u'Ε' : ['Ε', 'Ε'], # Epsilon
|
|
||||||
u'Ζ' : ['Ζ', 'Ζ'], # Zeta
|
|
||||||
u'Η' : ['Η', 'Η'], # Eta
|
|
||||||
u'Θ' : ['Θ', 'Θ'], # Theta
|
|
||||||
u'Ι' : ['Ι', 'Ι'], # Iota
|
|
||||||
u'Κ' : ['Κ', 'Κ'], # Kappa
|
|
||||||
u'Λ' : ['Λ', 'Λ'], # Lambda
|
|
||||||
u'Μ' : ['Μ', 'Μ'], # Mu
|
|
||||||
u'Ν' : ['Ν', 'Ν'], # Nu
|
|
||||||
u'Ξ' : ['Ξ', 'Ξ'], # Xi
|
|
||||||
u'Ο' : ['Ο', 'Ο'], # Omicron
|
|
||||||
u'Π' : ['Π', 'Π'], # Pi
|
|
||||||
u'Ρ' : ['Ρ', 'Ρ'], # Rho
|
|
||||||
u'Σ' : ['Σ', 'Σ'], # Sigma
|
|
||||||
u'Τ' : ['Τ', 'Τ'], # Tau
|
|
||||||
u'Υ' : ['Υ', 'Υ'], # Upsilon
|
|
||||||
u'Φ' : ['Φ', 'Φ'], # Phi
|
|
||||||
u'Χ' : ['Χ', 'Χ'], # Chi
|
|
||||||
u'Ψ' : ['Ψ', 'Ψ'], # Psi
|
|
||||||
u'ω' : ['ω', 'ω'], # omega
|
|
||||||
u'ϑ' : ['ϑ', 'ϑ'], # theta symbol
|
|
||||||
u'ϒ' : ['ϒ', 'ϒ'], # upsilon symbol
|
|
||||||
u'ϖ' : ['ϖ', 'ϖ'], # pi symbol
|
|
||||||
# Other
|
|
||||||
u'Œ' : ['Œ', 'Œ'], # capital ligature OE
|
|
||||||
u'œ' : ['œ', 'œ'], # small ligature oe
|
|
||||||
u'Š' : ['Š', 'Š'], # capital S with caron
|
|
||||||
u'š' : ['š', 'š'], # small S with caron
|
|
||||||
u'Ÿ' : ['Ÿ', 'Ÿ'], # capital Y with diaeres
|
|
||||||
u'ƒ' : ['ƒ', 'ƒ'], # f with hook
|
|
||||||
u'ˆ' : ['ˆ', 'ˆ'], # modifier letter circumflex accent
|
|
||||||
u'˜' : ['˜', '˜'], # small tilde
|
|
||||||
u'–' : ['–', '–'], # en dash
|
|
||||||
u'—' : ['—', '—'], # em dash
|
|
||||||
u'‘' : ['‘', '‘'], # left single quotation mark
|
|
||||||
u'’' : ['’', '’'], # right single quotation mark
|
|
||||||
u'‚' : ['‚', '‚'], # single low-9 quotation mark
|
|
||||||
u'“' : ['“', '“'], # left double quotation mark
|
|
||||||
u'”' : ['”', '”'], # right double quotation mark
|
|
||||||
u'„' : ['„', '„'], # double low-9 quotation mark
|
|
||||||
u'†' : ['†', '†'], # dagger
|
|
||||||
u'‡' : ['‡', '‡'], # double dagger
|
|
||||||
u'•' : ['•', '•'], # bullet
|
|
||||||
u'…' : ['…', '…'], # horizontal ellipsis
|
|
||||||
u'‰' : ['‰', '‰'], # per mille
|
|
||||||
u'′' : ['′', '′'], # minutes
|
|
||||||
u'″' : ['″', '″'], # seconds
|
|
||||||
u'‹' : ['‹', '‹'], # single left angle quotation
|
|
||||||
u'›' : ['›', '›'], # single right angle quotation
|
|
||||||
u'‾' : ['‾', '‾'], # overline
|
|
||||||
u'€' : ['€', '€'], # euro
|
|
||||||
u'™' : ['™', '™'], # trademark
|
|
||||||
u'←' : ['←', '←'], # left arrow
|
|
||||||
u'↑' : ['↑', '↑'], # up arrow
|
|
||||||
u'→' : ['→', '→'], # right arrow
|
|
||||||
u'↓' : ['↓', '↓'], # down arrow
|
|
||||||
u'↔' : ['↔', '↔'], # left right arrow
|
|
||||||
u'↵' : ['↵', '↵'], # carriage return arrow
|
|
||||||
u'⌈' : ['⌈', '⌈'], # left ceiling
|
|
||||||
u'⌉' : ['⌉', '⌉'], # right ceiling
|
|
||||||
u'⌊' : ['⌊', '⌊'], # left floor
|
|
||||||
u'⌋' : ['⌋', '⌋'], # right floor
|
|
||||||
u'◊' : ['◊', '◊'], # lozenge
|
|
||||||
u'♠' : ['♠', '♠'], # spade
|
|
||||||
u'♣' : ['♣', '♣'], # club
|
|
||||||
u'♥' : ['♥', '♥'], # heart
|
|
||||||
u'♦' : ['♦', '♦'], # diamond
|
|
||||||
# Extra http://www.ascii.cl/htmlcodes.htm
|
|
||||||
u' ' : [' '], # space
|
|
||||||
u'!' : ['!'], # exclamation point
|
|
||||||
u'#' : ['#'], # number sign
|
|
||||||
u'$' : ['$'], # dollar sign
|
|
||||||
u'%' : ['%'], # percent sign
|
|
||||||
u'\'' : ['''], # single quote
|
|
||||||
u'(' : ['('], # opening parenthesis
|
|
||||||
u')' : [')'], # closing parenthesis
|
|
||||||
u'*' : ['*'], # asterisk
|
|
||||||
u'+' : ['+'], # plus sign
|
|
||||||
u',' : [','], # comma
|
|
||||||
u'-' : ['-'], # minus sign - hyphen
|
|
||||||
u'.' : ['.'], # period
|
|
||||||
u'/' : ['/'], # slash
|
|
||||||
u'0' : ['0'], # zero
|
|
||||||
u'1' : ['1'], # one
|
|
||||||
u'2' : ['2'], # two
|
|
||||||
u'3' : ['3'], # three
|
|
||||||
u'4' : ['4'], # four
|
|
||||||
u'5' : ['5'], # five
|
|
||||||
u'6' : ['6'], # six
|
|
||||||
u'7' : ['7'], # seven
|
|
||||||
u'8' : ['8'], # eight
|
|
||||||
u'9' : ['9'], # nine
|
|
||||||
u':' : [':'], # colon
|
|
||||||
u';' : [';'], # semicolon
|
|
||||||
u'=' : ['='], # equal sign
|
|
||||||
u'?' : ['?'], # question mark
|
|
||||||
u'@' : ['@'], # at symbol
|
|
||||||
u'A' : ['A'], #
|
|
||||||
u'B' : ['B'], #
|
|
||||||
u'C' : ['C'], #
|
|
||||||
u'D' : ['D'], #
|
|
||||||
u'E' : ['E'], #
|
|
||||||
u'F' : ['F'], #
|
|
||||||
u'G' : ['G'], #
|
|
||||||
u'H' : ['H'], #
|
|
||||||
u'I' : ['I'], #
|
|
||||||
u'J' : ['J'], #
|
|
||||||
u'K' : ['K'], #
|
|
||||||
u'L' : ['L'], #
|
|
||||||
u'M' : ['M'], #
|
|
||||||
u'N' : ['N'], #
|
|
||||||
u'O' : ['O'], #
|
|
||||||
u'P' : ['P'], #
|
|
||||||
u'Q' : ['Q'], #
|
|
||||||
u'R' : ['R'], #
|
|
||||||
u'S' : ['S'], #
|
|
||||||
u'T' : ['T'], #
|
|
||||||
u'U' : ['U'], #
|
|
||||||
u'V' : ['V'], #
|
|
||||||
u'W' : ['W'], #
|
|
||||||
u'X' : ['X'], #
|
|
||||||
u'Y' : ['Y'], #
|
|
||||||
u'Z' : ['Z'], #
|
|
||||||
u'[' : ['['], # opening bracket
|
|
||||||
u'\\' : ['\'], # backslash
|
|
||||||
u']' : [']'], # closing bracket
|
|
||||||
u'^' : ['^'], # caret - circumflex
|
|
||||||
u'_' : ['_'], # underscore
|
|
||||||
u'`' : ['`'], # grave accent
|
|
||||||
u'a' : ['a'], #
|
|
||||||
u'b' : ['b'], #
|
|
||||||
u'c' : ['c'], #
|
|
||||||
u'd' : ['d'], #
|
|
||||||
u'e' : ['e'], #
|
|
||||||
u'f' : ['f'], #
|
|
||||||
u'g' : ['g'], #
|
|
||||||
u'h' : ['h'], #
|
|
||||||
u'i' : ['i'], #
|
|
||||||
u'j' : ['j'], #
|
|
||||||
u'k' : ['k'], #
|
|
||||||
u'l' : ['l'], #
|
|
||||||
u'm' : ['m'], #
|
|
||||||
u'n' : ['n'], #
|
|
||||||
u'o' : ['o'], #
|
|
||||||
u'p' : ['p'], #
|
|
||||||
u'q' : ['q'], #
|
|
||||||
u'r' : ['r'], #
|
|
||||||
u's' : ['s'], #
|
|
||||||
u't' : ['t'], #
|
|
||||||
u'u' : ['u'], #
|
|
||||||
u'v' : ['v'], #
|
|
||||||
u'w' : ['w'], #
|
|
||||||
u'x' : ['x'], #
|
|
||||||
u'y' : ['y'], #
|
|
||||||
u'z' : ['z'], #
|
|
||||||
u'{' : ['{'], # opening brace
|
|
||||||
u'|' : ['|'], # vertical bar
|
|
||||||
u'}' : ['}'], # closing brace
|
|
||||||
u'~' : ['~'], # equivalency sign - tilde
|
|
||||||
u'<' : ['<', '<'], # less than sign
|
|
||||||
u'>' : ['>', '>'], # greater than sign
|
|
||||||
u'¡' : ['¡', '¡'], # inverted exclamation mark
|
|
||||||
u'¢' : ['¢', '¢'], # cent sign
|
|
||||||
u'£' : ['£', '£'], # pound sign
|
|
||||||
u'¤' : ['¤', '¤'], # currency sign
|
|
||||||
u'¥' : ['¥', '¥'], # yen sign
|
|
||||||
u'¦' : ['¦', '¦'], # broken vertical bar
|
|
||||||
u'§' : ['§', '§'], # section sign
|
|
||||||
u'¨' : ['¨', '¨'], # spacing diaeresis - umlaut
|
|
||||||
u'©' : ['©', '©'], # copyright sign
|
|
||||||
u'ª' : ['ª', 'ª'], # feminine ordinal indicator
|
|
||||||
u'«' : ['«', '«'], # left double angle quotes
|
|
||||||
u'¬' : ['¬', '¬'], # not sign
|
|
||||||
u'®' : ['®', '®'], # registered trade mark sign
|
|
||||||
u'¯' : ['¯', '¯'], # spacing macron - overline
|
|
||||||
u'°' : ['°', '°'], # degree sign
|
|
||||||
u'±' : ['±', '±'], # plus-or-minus sign
|
|
||||||
u'²' : ['²', '²'], # superscript two - squared
|
|
||||||
u'³' : ['³', '³'], # superscript three - cubed
|
|
||||||
u'´' : ['´', '´'], # acute accent - spacing acute
|
|
||||||
u'µ' : ['µ', 'µ'], # micro sign
|
|
||||||
u'¶' : ['¶', '¶'], # pilcrow sign - paragraph sign
|
|
||||||
u'·' : ['·', '·'], # middle dot - Georgian comma
|
|
||||||
u'¸' : ['¸', '¸'], # spacing cedilla
|
|
||||||
u'¹' : ['¹', '¹'], # superscript one
|
|
||||||
u'º' : ['º', 'º'], # masculine ordinal indicator
|
|
||||||
u'»' : ['»', '»'], # right double angle quotes
|
|
||||||
u'¼' : ['¼', '¼'], # fraction one quarter
|
|
||||||
u'½' : ['½', '½'], # fraction one half
|
|
||||||
u'¾' : ['¾', '¾'], # fraction three quarters
|
|
||||||
u'¿' : ['¿', '¿'], # inverted question mark
|
|
||||||
u'À' : ['À', 'À'], # latin capital letter A with grave
|
|
||||||
u'Á' : ['Á', 'Á'], # latin capital letter A with acute
|
|
||||||
u'Â' : ['Â', 'Â'], # latin capital letter A with circumflex
|
|
||||||
u'Ã' : ['Ã', 'Ã'], # latin capital letter A with tilde
|
|
||||||
u'Ä' : ['Ä', 'Ä'], # latin capital letter A with diaeresis
|
|
||||||
u'Å' : ['Å', 'Å'], # latin capital letter A with ring above
|
|
||||||
u'Æ' : ['Æ', 'Æ'], # latin capital letter AE
|
|
||||||
u'Ç' : ['Ç', 'Ç'], # latin capital letter C with cedilla
|
|
||||||
u'È' : ['È', 'È'], # latin capital letter E with grave
|
|
||||||
u'É' : ['É', 'É'], # latin capital letter E with acute
|
|
||||||
u'Ê' : ['Ê', 'Ê'], # latin capital letter E with circumflex
|
|
||||||
u'Ë' : ['Ë', 'Ë'], # latin capital letter E with diaeresis
|
|
||||||
u'Ì' : ['Ì', 'Ì'], # latin capital letter I with grave
|
|
||||||
u'Í' : ['Í', 'Í'], # latin capital letter I with acute
|
|
||||||
u'Î' : ['Î', 'Î'], # latin capital letter I with circumflex
|
|
||||||
u'Ï' : ['Ï', 'Ï'], # latin capital letter I with diaeresis
|
|
||||||
u'Ð' : ['Ð', 'Ð'], # latin capital letter ETH
|
|
||||||
u'Ñ' : ['Ñ', 'Ñ'], # latin capital letter N with tilde
|
|
||||||
u'Ò' : ['Ò', 'Ò'], # latin capital letter O with grave
|
|
||||||
u'Ó' : ['Ó', 'Ó'], # latin capital letter O with acute
|
|
||||||
u'Ô' : ['Ô', 'Ô'], # latin capital letter O with circumflex
|
|
||||||
u'Õ' : ['Õ', 'Õ'], # latin capital letter O with tilde
|
|
||||||
u'Ö' : ['Ö', 'Ö'], # latin capital letter O with diaeresis
|
|
||||||
u'×' : ['×', '×'], # multiplication sign
|
|
||||||
u'Ø' : ['Ø', 'Ø'], # latin capital letter O with slash
|
|
||||||
u'Ù' : ['Ù', 'Ù'], # latin capital letter U with grave
|
|
||||||
u'Ú' : ['Ú', 'Ú'], # latin capital letter U with acute
|
|
||||||
u'Û' : ['Û', 'Û'], # latin capital letter U with circumflex
|
|
||||||
u'Ü' : ['Ü', 'Ü'], # latin capital letter U with diaeresis
|
|
||||||
u'Ý' : ['Ý', 'Ý'], # latin capital letter Y with acute
|
|
||||||
u'Þ' : ['Þ', 'Þ'], # latin capital letter THORN
|
|
||||||
u'ß' : ['ß', 'ß'], # latin small letter sharp s - ess-zed
|
|
||||||
u'à' : ['à', 'à'], # latin small letter a with grave
|
|
||||||
u'á' : ['á', 'á'], # latin small letter a with acute
|
|
||||||
u'â' : ['â', 'â'], # latin small letter a with circumflex
|
|
||||||
u'ã' : ['ã', 'ã'], # latin small letter a with tilde
|
|
||||||
u'ä' : ['ä', 'ä'], # latin small letter a with diaeresis
|
|
||||||
u'å' : ['å', 'å'], # latin small letter a with ring above
|
|
||||||
u'æ' : ['æ', 'æ'], # latin small letter ae
|
|
||||||
u'ç' : ['ç', 'ç'], # latin small letter c with cedilla
|
|
||||||
u'è' : ['è', 'è'], # latin small letter e with grave
|
|
||||||
u'é' : ['é', 'é'], # latin small letter e with acute
|
|
||||||
u'ê' : ['ê', 'ê'], # latin small letter e with circumflex
|
|
||||||
u'ë' : ['ë', 'ë'], # latin small letter e with diaeresis
|
|
||||||
u'ì' : ['ì', 'ì'], # latin small letter i with grave
|
|
||||||
u'í' : ['í', 'í'], # latin small letter i with acute
|
|
||||||
u'î' : ['î', 'î'], # latin small letter i with circumflex
|
|
||||||
u'ï' : ['ï', 'ï'], # latin small letter i with diaeresis
|
|
||||||
u'ð' : ['ð', 'ð'], # latin small letter eth
|
|
||||||
u'ñ' : ['ñ', 'ñ'], # latin small letter n with tilde
|
|
||||||
u'ò' : ['ò', 'ò'], # latin small letter o with grave
|
|
||||||
u'ó' : ['ó', 'ó'], # latin small letter o with acute
|
|
||||||
u'ô' : ['ô', 'ô'], # latin small letter o with circumflex
|
|
||||||
u'õ' : ['õ', 'õ'], # latin small letter o with tilde
|
|
||||||
u'ö' : ['ö', 'ö'], # latin small letter o with diaeresis
|
|
||||||
u'÷' : ['÷', '÷'], # division sign
|
|
||||||
u'ø' : ['ø', 'ø'], # latin small letter o with slash
|
|
||||||
u'ù' : ['ù', 'ù'], # latin small letter u with grave
|
|
||||||
u'ú' : ['ú', 'ú'], # latin small letter u with acute
|
|
||||||
u'û' : ['û', 'û'], # latin small letter u with circumflex
|
|
||||||
u'ü' : ['ü', 'ü'], # latin small letter u with diaeresis
|
|
||||||
u'ý' : ['ý', 'ý'], # latin small letter y with acute
|
|
||||||
u'þ' : ['þ', 'þ'], # latin small letter thorn
|
|
||||||
u'ÿ' : ['ÿ', 'ÿ'], # latin small letter y with diaeresis
|
|
||||||
# More
|
|
||||||
u' ' : [' '],
|
|
||||||
}
|
|
||||||
|
|
@ -10,10 +10,9 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.ebooks.pdb.ereader import image_name
|
from htmlentitydefs import codepoint2name
|
||||||
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.pdb.ereader import image_name
|
||||||
|
|
||||||
PML_HTML_RULES = [
|
PML_HTML_RULES = [
|
||||||
(re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
|
(re.compile(r'\\p'), lambda match: '<br /><br style="page-break-after: always;" />'),
|
||||||
@ -71,9 +70,11 @@ def pml_to_html(pml):
|
|||||||
for rule in PML_HTML_RULES:
|
for rule in PML_HTML_RULES:
|
||||||
html = rule[0].sub(rule[1], html)
|
html = rule[0].sub(rule[1], html)
|
||||||
|
|
||||||
for symbol in HTML_SYMBOLS.keys():
|
# Turn special characters into entities.
|
||||||
if ord(symbol) > 128:
|
cps = [ord(c) for c in set(html)]
|
||||||
html = html.replace(symbol, HTML_SYMBOLS[symbol][len(HTML_SYMBOLS[symbol]) - 1])
|
cps = set(cps).intersection(codepoint2name.keys()).difference([60, 62])
|
||||||
|
for cp in cps:
|
||||||
|
html = html.replace(unichr(cp), '&%s;' % codepoint2name[cp])
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@ import os
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre import entity_to_unicode
|
from calibre import entity_to_unicode
|
||||||
from calibre.ebooks.htmlsymbols import HTML_SYMBOLS
|
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup
|
from BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
@ -82,10 +81,6 @@ class TxtWriter(object):
|
|||||||
return stripped
|
return stripped
|
||||||
|
|
||||||
def replace_html_symbols(self, content):
|
def replace_html_symbols(self, content):
|
||||||
for symbol in HTML_SYMBOLS:
|
|
||||||
for code in HTML_SYMBOLS[symbol]:
|
|
||||||
content = content.replace(code, symbol)
|
|
||||||
|
|
||||||
for entity in set(re.findall('&.+?;', content)):
|
for entity in set(re.findall('&.+?;', content)):
|
||||||
mo = re.search('(%s)' % entity[1:-1], content)
|
mo = re.search('(%s)' % entity[1:-1], content)
|
||||||
content = content.replace(entity, entity_to_unicode(mo))
|
content = content.replace(entity, entity_to_unicode(mo))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user