diff --git a/src/calibre/utils/cleantext.py b/src/calibre/utils/cleantext.py
index b4afe7576d..938960df93 100644
--- a/src/calibre/utils/cleantext.py
+++ b/src/calibre/utils/cleantext.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2010, sengian <sengian1@gmail.com>'
 __docformat__ = 'restructuredtext en'
 
-import re
+import re, htmlentitydefs
 
 _ascii_pat = None
 
@@ -21,3 +21,32 @@ def clean_ascii_chars(txt, charlist=None):
         pat = re.compile(u'|'.join(map(unichr, charlist)))
     return pat.sub('', txt)
 
+##
+# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+
+def unescape(text, rm=False, rchar=u''):
+    def fixup(m, rm=rm, rchar=rchar):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return unichr(int(text[3:-1], 16))
+                else:
+                    return unichr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        if rm:
+            return rchar #replace by char
+        return text # leave as is
+    return re.sub("&#?\w+;", fixup, text)