diff --git a/src/calibre/ebooks/metadata/html.py b/src/calibre/ebooks/metadata/html.py
index dad218ae19..c6d4693baf 100644
--- a/src/calibre/ebooks/metadata/html.py
+++ b/src/calibre/ebooks/metadata/html.py
@@ -9,6 +9,7 @@ Try to read metadata from an HTML file.
'''
import re
+import unittest
from collections import defaultdict
from HTMLParser import HTMLParser
@@ -251,3 +252,184 @@ def get_metadata_(src, encoding=None):
mi.set_identifier(k, v[0])
return mi
+
+
+class MetadataHtmlTest(unittest.TestCase):
+
+ def compare_metadata(self, meta_a, meta_b):
+ for attr in ('title', 'authors', 'publisher', 'isbn', 'languages', 'pubdate', 'timestamp', 'series', 'series_index', 'rating', 'comments', 'tags', 'identifiers'):
+ self.assertEqual(getattr(meta_a, attr), getattr(meta_b, attr))
+
+ def get_stream(self, test):
+ from io import BytesIO
+
+ raw = b'''\
+
+
+'''
+
+ if test in {'title', 'meta_single', 'meta_multi', 'comment_single', 'comment_multi'}:
+ raw += b'''\
+ }
+ A Title Tag & Title Ⓒ
+'''
+
+ if test in {'meta_single', 'meta_multi', 'comment_single', 'comment_multi'}:
+ raw += b'''\
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+ if test in {'meta_multi', 'comment_single', 'comment_multi'}:
+ raw += b'''\
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+ if test in {'comment_single', 'comment_multi'}:
+ raw += b'''\
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+ if test in {'comment_multi'}:
+ raw += b'''\
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+ raw += b'''\
+
+
+
+
+'''
+ return BytesIO(raw)
+
+
+ def test_input_title(self):
+ stream_meta = get_metadata(self.get_stream('title'))
+ canon_meta = Metadata('A Title Tag & Title Ⓒ', [_('Unknown')])
+ self.compare_metadata(stream_meta, canon_meta)
+
+
+ def test_input_meta_single(self):
+ stream_meta = get_metadata(self.get_stream('meta_single'))
+ canon_meta = Metadata('A Meta Tag & Title Ⓒ', ['George Washington'])
+ canon_meta.publisher = 'Publisher A'
+ canon_meta.languages = ['English']
+ canon_meta.pubdate = parse_date('2019-01-01')
+ canon_meta.timestamp = parse_date('2018-01-01')
+ canon_meta.series = 'Meta Series'
+ canon_meta.series_index = float(1)
+ # canon_meta.rating = float(0)
+ # canon_meta.comments = ''
+ canon_meta.tags = ['tag a', 'tag b']
+ canon_meta.set_identifiers({'isbn': '1234567890'})
+ self.compare_metadata(stream_meta, canon_meta)
+
+
+ def test_input_meta_multi(self):
+ stream_meta = get_metadata(self.get_stream('meta_multi'))
+ canon_meta = Metadata('A Meta Tag & Title Ⓒ', ['George Washington', 'John Adams', 'Thomas Jefferson'])
+ canon_meta.publisher = 'Publisher A'
+ canon_meta.languages = ['English', 'Spanish']
+ canon_meta.pubdate = parse_date('2019-01-01')
+ canon_meta.timestamp = parse_date('2018-01-01')
+ canon_meta.series = 'Meta Series'
+ canon_meta.series_index = float(1)
+ canon_meta.rating = float(8)
+ canon_meta.comments = 'meta "comments" ♥ HTML &'
+ canon_meta.tags = ['tag a', 'tag b', 'tag c']
+ canon_meta.set_identifiers({'isbn': '1234567890', 'url': 'http://google.com/search?q=calibre'})
+ self.compare_metadata(stream_meta, canon_meta)
+
+
+ def test_input_comment_single(self):
+ stream_meta = get_metadata(self.get_stream('comment_single'))
+ canon_meta = Metadata('A Comment Tag & Title Ⓒ', ['James Madison', 'James Monroe'])
+ canon_meta.publisher = 'Publisher C'
+ canon_meta.languages = ['French']
+ canon_meta.pubdate = parse_date('2015-01-01')
+ canon_meta.timestamp = parse_date('2014-01-01')
+ canon_meta.series = 'Comment Series'
+ canon_meta.series_index = float(3)
+ canon_meta.rating = float(0)
+ canon_meta.comments = 'comment "comments" ♥ HTML too &'
+ canon_meta.tags = ['tag d']
+ canon_meta.set_identifiers({'isbn': '3456789012', 'url': 'http://google.com/search?q=calibre'})
+ self.compare_metadata(stream_meta, canon_meta)
+
+
+ def test_input_comment_multi(self):
+ stream_meta = get_metadata(self.get_stream('comment_multi'))
+ canon_meta = Metadata('A Comment Tag & Title Ⓒ', ['James Madison', 'James Monroe', 'John Quincy Adams'])
+ canon_meta.publisher = 'Publisher C'
+ canon_meta.languages = ['French', 'Japanese']
+ canon_meta.pubdate = parse_date('2015-01-01')
+ canon_meta.timestamp = parse_date('2014-01-01')
+ canon_meta.series = 'Comment Series'
+ canon_meta.series_index = float(3)
+ canon_meta.rating = float(0)
+ canon_meta.comments = 'comment "comments" ♥ HTML too &'
+ canon_meta.tags = ['tag d', 'tag e', 'tag f']
+ canon_meta.set_identifiers({'isbn': '3456789012', 'url': 'http://google.com/search?q=calibre'})
+ self.compare_metadata(stream_meta, canon_meta)
+
+
+def suite():
+ return unittest.TestLoader().loadTestsFromTestCase(MetadataHtmlTest)
+
+
+def test():
+ unittest.TextTestRunner(verbosity=2).run(suite())
+
+
+if __name__ == '__main__':
+ test()