From b12c75c904be0b54c5cf2b32b5a16c7a1b39309e Mon Sep 17 00:00:00 2001 From: Christopher Szucko Date: Sun, 11 Aug 2019 09:30:35 -0500 Subject: [PATCH] Add unit tests for HTML metadata imports --- src/calibre/ebooks/metadata/html.py | 182 ++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/src/calibre/ebooks/metadata/html.py b/src/calibre/ebooks/metadata/html.py index dad218ae19..c6d4693baf 100644 --- a/src/calibre/ebooks/metadata/html.py +++ b/src/calibre/ebooks/metadata/html.py @@ -9,6 +9,7 @@ Try to read metadata from an HTML file. ''' import re +import unittest from collections import defaultdict from HTMLParser import HTMLParser @@ -251,3 +252,184 @@ def get_metadata_(src, encoding=None): mi.set_identifier(k, v[0]) return mi + + +class MetadataHtmlTest(unittest.TestCase): + + def compare_metadata(self, meta_a, meta_b): + for attr in ('title', 'authors', 'publisher', 'isbn', 'languages', 'pubdate', 'timestamp', 'series', 'series_index', 'rating', 'comments', 'tags', 'identifiers'): + self.assertEqual(getattr(meta_a, attr), getattr(meta_b, attr)) + + def get_stream(self, test): + from io import BytesIO + + raw = b'''\ + + +''' + + if test in {'title', 'meta_single', 'meta_multi', 'comment_single', 'comment_multi'}: + raw += b'''\ + } + A Title Tag & Title Ⓒ +''' + + if test in {'meta_single', 'meta_multi', 'comment_single', 'comment_multi'}: + raw += b'''\ + + + + + + + + + + + + + + + + + +''' + + if test in {'meta_multi', 'comment_single', 'comment_multi'}: + raw += b'''\ + + + + + + + + + + + + + +''' + + if test in {'comment_single', 'comment_multi'}: + raw += b'''\ + + + + + + + + + + + + +''' + + if test in {'comment_multi'}: + raw += b'''\ + + + + + + + + + + + + +''' + + raw += b'''\ + + + + +''' + return BytesIO(raw) + + + def test_input_title(self): + stream_meta = get_metadata(self.get_stream('title')) + canon_meta = Metadata('A Title Tag & Title Ⓒ', [_('Unknown')]) + self.compare_metadata(stream_meta, canon_meta) + + + def test_input_meta_single(self): + stream_meta = get_metadata(self.get_stream('meta_single')) + canon_meta = Metadata('A Meta Tag & Title Ⓒ', ['George Washington']) + canon_meta.publisher = 'Publisher A' + canon_meta.languages = ['English'] + canon_meta.pubdate = parse_date('2019-01-01') + canon_meta.timestamp = parse_date('2018-01-01') + canon_meta.series = 'Meta Series' + canon_meta.series_index = float(1) + # canon_meta.rating = float(0) + # canon_meta.comments = '' + canon_meta.tags = ['tag a', 'tag b'] + canon_meta.set_identifiers({'isbn': '1234567890'}) + self.compare_metadata(stream_meta, canon_meta) + + + def test_input_meta_multi(self): + stream_meta = get_metadata(self.get_stream('meta_multi')) + canon_meta = Metadata('A Meta Tag & Title Ⓒ', ['George Washington', 'John Adams', 'Thomas Jefferson']) + canon_meta.publisher = 'Publisher A' + canon_meta.languages = ['English', 'Spanish'] + canon_meta.pubdate = parse_date('2019-01-01') + canon_meta.timestamp = parse_date('2018-01-01') + canon_meta.series = 'Meta Series' + canon_meta.series_index = float(1) + canon_meta.rating = float(8) + canon_meta.comments = 'meta "comments" ♥ HTML &' + canon_meta.tags = ['tag a', 'tag b', 'tag c'] + canon_meta.set_identifiers({'isbn': '1234567890', 'url': 'http://google.com/search?q=calibre'}) + self.compare_metadata(stream_meta, canon_meta) + + + def test_input_comment_single(self): + stream_meta = get_metadata(self.get_stream('comment_single')) + canon_meta = Metadata('A Comment Tag & Title Ⓒ', ['James Madison', 'James Monroe']) + canon_meta.publisher = 'Publisher C' + canon_meta.languages = ['French'] + canon_meta.pubdate = parse_date('2015-01-01') + canon_meta.timestamp = parse_date('2014-01-01') + canon_meta.series = 'Comment Series' + canon_meta.series_index = float(3) + canon_meta.rating = float(0) + canon_meta.comments = 'comment "comments" ♥ HTML too &' + canon_meta.tags = ['tag d'] + canon_meta.set_identifiers({'isbn': '3456789012', 'url': 'http://google.com/search?q=calibre'}) + self.compare_metadata(stream_meta, canon_meta) + + + def test_input_comment_multi(self): + stream_meta = get_metadata(self.get_stream('comment_multi')) + canon_meta = Metadata('A Comment Tag & Title Ⓒ', ['James Madison', 'James Monroe', 'John Quincy Adams']) + canon_meta.publisher = 'Publisher C' + canon_meta.languages = ['French', 'Japanese'] + canon_meta.pubdate = parse_date('2015-01-01') + canon_meta.timestamp = parse_date('2014-01-01') + canon_meta.series = 'Comment Series' + canon_meta.series_index = float(3) + canon_meta.rating = float(0) + canon_meta.comments = 'comment "comments" ♥ HTML too &' + canon_meta.tags = ['tag d', 'tag e', 'tag f'] + canon_meta.set_identifiers({'isbn': '3456789012', 'url': 'http://google.com/search?q=calibre'}) + self.compare_metadata(stream_meta, canon_meta) + + +def suite(): + return unittest.TestLoader().loadTestsFromTestCase(MetadataHtmlTest) + + +def test(): + unittest.TextTestRunner(verbosity=2).run(suite()) + + +if __name__ == '__main__': + test()