diff --git a/src/calibre/ebooks/sgmllib.py b/src/calibre/ebooks/sgmllib.py
deleted file mode 100644
index 09a31eb86c..0000000000
--- a/src/calibre/ebooks/sgmllib.py
+++ /dev/null
@@ -1,568 +0,0 @@
-"""A parser for SGML, using the derived class as a static DTD."""
-from __future__ import print_function
-
-# XXX This only supports those SGML features used by HTML.
-
-# XXX There should be a way to distinguish between PCDATA (parsed
-# character data -- the normal case), RCDATA (replaceable character
-# data -- only char and entity references and end tags are special)
-# and CDATA (character data -- only end tags are special). RCDATA is
-# not supported at all.
-
-
-import markupbase
-import re
-
-__all__ = ["SGMLParser", "SGMLParseError"]
-
-# Regular expressions used for parsing
-
-interesting = re.compile('[&<]')
-incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
- '<([a-zA-Z][^<>]*|'
- '/([a-zA-Z][^<>]*)?|'
- '![^<>]*)?')
-
-entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
-charref = re.compile('(x{0,1}[a-f0-9]+)[^a-f0-9]', re.IGNORECASE) # Changed by Kovid to handle hex numeric entities
-
-starttagopen = re.compile('<[>a-zA-Z]')
-shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
-shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
-piclose = re.compile('>')
-endbracket = re.compile('[<>]')
-tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
-attrfind = re.compile(
- r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
-
-
-class SGMLParseError(RuntimeError):
- """Exception raised for all parse errors."""
- pass
-
-
-# SGML parser base class -- find tags and call handler functions.
-# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
-# The dtd is defined by deriving a class which defines methods
-# with special names to handle tags: start_foo and end_foo to handle
-# and , respectively, or do_foo to handle by itself.
-# (Tags are converted to lower case for this purpose.) The data
-# between tags is passed to the parser by calling self.handle_data()
-# with some data as argument (the data may be split up in arbitrary
-# chunks). Entity references are passed by calling
-# self.handle_entityref() with the entity reference as argument.
-
-class SGMLParser(markupbase.ParserBase):
- # Definition of entities -- derived classes may override
- entity_or_charref = re.compile('&(?:'
- '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
- ')(;?)')
-
- def __init__(self, verbose=0):
- """Initialize and reset this instance."""
- self.verbose = verbose
- self.reset()
-
- def reset(self):
- """Reset this instance. Loses all unprocessed data."""
- self.__starttag_text = None
- self.rawdata = ''
- self.stack = []
- self.lasttag = '???'
- self.nomoretags = 0
- self.literal = 0
- markupbase.ParserBase.reset(self)
-
- def setnomoretags(self):
- """Enter literal mode (CDATA) till EOF.
-
- Intended for derived classes only.
- """
- self.nomoretags = self.literal = 1
-
- def setliteral(self, *args):
- """Enter literal mode (CDATA).
-
- Intended for derived classes only.
- """
- self.literal = 1
-
- def feed(self, data):
- """Feed some data to the parser.
-
- Call this as often as you want, with as little or as much text
- as you want (may include '\n'). (This just saves the text,
- all the processing is done by goahead().)
- """
-
- self.rawdata = self.rawdata + data
- self.goahead(0)
-
- def close(self):
- """Handle the remaining data."""
- self.goahead(1)
-
- def error(self, message):
- raise SGMLParseError(message)
-
- # Internal -- handle data as far as reasonable. May leave state
- # and data to be processed by a subsequent call. If 'end' is
- # true, force handling all data as if followed by EOF marker.
- def goahead(self, end):
- rawdata = self.rawdata
- i = 0
- n = len(rawdata)
- while i < n:
- if self.nomoretags:
- self.handle_data(rawdata[i:n])
- i = n
- break
- match = interesting.search(rawdata, i)
- if match:
- j = match.start()
- else:
- j = n
- if i < j:
- self.handle_data(rawdata[i:j])
- i = j
- if i == n:
- break
- if rawdata[i] == '<':
- if starttagopen.match(rawdata, i):
- if self.literal:
- self.handle_data(rawdata[i])
- i = i+1
- continue
- k = self.parse_starttag(i)
- if k < 0:
- break
- i = k
- continue
- if rawdata.startswith("", i):
- k = self.parse_endtag(i)
- if k < 0:
- break
- i = k
- self.literal = 0
- continue
- if self.literal:
- if n > (i + 1):
- self.handle_data("<")
- i = i+1
- else:
- # incomplete
- break
- continue
- if rawdata.startswith("