diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index 5da148e81a..cf4dc02a33 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -100,7 +100,7 @@ def html5_parse(data, max_nesting_depth=100): if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node depth = node_depth(x) if depth > max_nesting_depth: - raise ValueError('html5lib resulted in a tree with nesting' + raise ValueError('HTML 5 parsing resulted in a tree with nesting' ' depth > %d'%max_nesting_depth) return data diff --git a/src/calibre/gui2/store/stores/google_books_plugin.py b/src/calibre/gui2/store/stores/google_books_plugin.py index d225a84e8e..3070ec5c47 100644 --- a/src/calibre/gui2/store/stores/google_books_plugin.py +++ b/src/calibre/gui2/store/stores/google_books_plugin.py @@ -13,7 +13,6 @@ from contextlib import closing from lxml import html from PyQt5.Qt import QUrl -import html5lib from calibre import browser, url_slash_cleaner from calibre.gui2 import open_url from calibre.gui2.store import StorePlugin @@ -23,7 +22,14 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog def parse_html(raw): - return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml') + try: + from html5_parser import parse + except ImportError: + # Old versions of calibre + import html5lib + return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) + else: + return parse(raw) def search_google(query, max_results=10, timeout=60, write_html_to=None): diff --git a/src/calibre/gui2/store/stores/ozon_ru_plugin.py b/src/calibre/gui2/store/stores/ozon_ru_plugin.py index c8da9f2bfb..9ad9acfa7d 100644 --- a/src/calibre/gui2/store/stores/ozon_ru_plugin.py +++ b/src/calibre/gui2/store/stores/ozon_ru_plugin.py @@ -11,7 +11,6 @@ import urllib from contextlib import closing from PyQt5.Qt import QUrl -import html5lib from calibre import browser, url_slash_cleaner from calibre.ebooks.chardet import xml_to_unicode @@ -23,6 +22,17 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog shop_url = 'http://www.ozon.ru' +def parse_html(raw): + try: + from html5_parser import parse + except ImportError: + # Old versions of calibre + import html5lib + return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) + else: + return parse(raw) + + def search(query, max_results=15, timeout=60): url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % urllib.quote_plus(query) @@ -31,7 +41,7 @@ def search(query, max_results=15, timeout=60): with closing(br.open(url, timeout=timeout)) as f: raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0] - root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) + root = parse_html(raw) for tile in root.xpath('//*[@class="bShelfTile inline"]'): if counter <= 0: break @@ -74,6 +84,7 @@ def format_price_in_RUR(price): price = price.replace('\xa0', '').replace(',', '.').strip() + ' py6' return price + if __name__ == '__main__': import sys for r in search(sys.argv[-1]): diff --git a/src/calibre/gui2/tweak_book/editor/syntax/html.py b/src/calibre/gui2/tweak_book/editor/syntax/html.py index 59eb6d73d0..900297bbc9 100644 --- a/src/calibre/gui2/tweak_book/editor/syntax/html.py +++ b/src/calibre/gui2/tweak_book/editor/syntax/html.py @@ -23,9 +23,7 @@ from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_lo from calibre.gui2.tweak_book.editor.syntax.css import ( create_formats as create_css_formats, state_map as css_state_map, CSSState, CSSUserData) -from html5lib.constants import cdataElements, rcdataElements - -cdata_tags = cdataElements | rcdataElements +cdata_tags = frozenset(['title', 'textarea', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript']) normal_pat = re.compile(r'[^<>&]+') entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};') tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:-]+') diff --git a/src/calibre/utils/open_with/osx.py b/src/calibre/utils/open_with/osx.py index dd98e9e644..d8eb62d000 100644 --- a/src/calibre/utils/open_with/osx.py +++ b/src/calibre/utils/open_with/osx.py @@ -19,10 +19,11 @@ application_locations = ('/Applications', '~/Applications', '~/Desktop') def generate_public_uti_map(): from lxml import etree - import html5lib, urllib + import urllib + from html5parser import parse raw = urllib.urlopen( 'https://developer.apple.com/library/ios/documentation/Miscellaneous/Reference/UTIRef/Articles/System-DeclaredUniformTypeIdentifiers.html').read() - root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) + root = parse(raw) tables = root.xpath('//table')[0::2] data = {} for table in tables: @@ -44,6 +45,8 @@ def generate_public_uti_map(): f.seek(0) nraw = re.sub(r'^PUBLIC_UTI_MAP = .+?}', '\n'.join(lines), raw, flags=re.MULTILINE | re.DOTALL) f.truncate(), f.write(nraw) + + # Generated by generate_public_uti_map() PUBLIC_UTI_MAP = { '3g2': 'public.3gpp2',