mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove html5lib from miscellaneous places
This commit is contained in:
parent
62e4a9900e
commit
5e67ba1369
@ -100,7 +100,7 @@ def html5_parse(data, max_nesting_depth=100):
|
||||
if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node
|
||||
depth = node_depth(x)
|
||||
if depth > max_nesting_depth:
|
||||
raise ValueError('html5lib resulted in a tree with nesting'
|
||||
raise ValueError('HTML 5 parsing resulted in a tree with nesting'
|
||||
' depth > %d'%max_nesting_depth)
|
||||
return data
|
||||
|
||||
|
@ -13,7 +13,6 @@ from contextlib import closing
|
||||
from lxml import html
|
||||
from PyQt5.Qt import QUrl
|
||||
|
||||
import html5lib
|
||||
from calibre import browser, url_slash_cleaner
|
||||
from calibre.gui2 import open_url
|
||||
from calibre.gui2.store import StorePlugin
|
||||
@ -23,7 +22,14 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
|
||||
|
||||
def parse_html(raw):
|
||||
return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml')
|
||||
try:
|
||||
from html5_parser import parse
|
||||
except ImportError:
|
||||
# Old versions of calibre
|
||||
import html5lib
|
||||
return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
||||
else:
|
||||
return parse(raw)
|
||||
|
||||
|
||||
def search_google(query, max_results=10, timeout=60, write_html_to=None):
|
||||
|
@ -11,7 +11,6 @@ import urllib
|
||||
from contextlib import closing
|
||||
|
||||
from PyQt5.Qt import QUrl
|
||||
import html5lib
|
||||
|
||||
from calibre import browser, url_slash_cleaner
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
@ -23,6 +22,17 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
||||
shop_url = 'http://www.ozon.ru'
|
||||
|
||||
|
||||
def parse_html(raw):
|
||||
try:
|
||||
from html5_parser import parse
|
||||
except ImportError:
|
||||
# Old versions of calibre
|
||||
import html5lib
|
||||
return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
||||
else:
|
||||
return parse(raw)
|
||||
|
||||
|
||||
def search(query, max_results=15, timeout=60):
|
||||
url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % urllib.quote_plus(query)
|
||||
|
||||
@ -31,7 +41,7 @@ def search(query, max_results=15, timeout=60):
|
||||
|
||||
with closing(br.open(url, timeout=timeout)) as f:
|
||||
raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
|
||||
root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
||||
root = parse_html(raw)
|
||||
for tile in root.xpath('//*[@class="bShelfTile inline"]'):
|
||||
if counter <= 0:
|
||||
break
|
||||
@ -74,6 +84,7 @@ def format_price_in_RUR(price):
|
||||
price = price.replace('\xa0', '').replace(',', '.').strip() + ' py6'
|
||||
return price
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
for r in search(sys.argv[-1]):
|
||||
|
@ -23,9 +23,7 @@ from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_lo
|
||||
from calibre.gui2.tweak_book.editor.syntax.css import (
|
||||
create_formats as create_css_formats, state_map as css_state_map, CSSState, CSSUserData)
|
||||
|
||||
from html5lib.constants import cdataElements, rcdataElements
|
||||
|
||||
cdata_tags = cdataElements | rcdataElements
|
||||
cdata_tags = frozenset(['title', 'textarea', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'])
|
||||
normal_pat = re.compile(r'[^<>&]+')
|
||||
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
|
||||
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:-]+')
|
||||
|
@ -19,10 +19,11 @@ application_locations = ('/Applications', '~/Applications', '~/Desktop')
|
||||
|
||||
def generate_public_uti_map():
|
||||
from lxml import etree
|
||||
import html5lib, urllib
|
||||
import urllib
|
||||
from html5parser import parse
|
||||
raw = urllib.urlopen(
|
||||
'https://developer.apple.com/library/ios/documentation/Miscellaneous/Reference/UTIRef/Articles/System-DeclaredUniformTypeIdentifiers.html').read()
|
||||
root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
||||
root = parse(raw)
|
||||
tables = root.xpath('//table')[0::2]
|
||||
data = {}
|
||||
for table in tables:
|
||||
@ -44,6 +45,8 @@ def generate_public_uti_map():
|
||||
f.seek(0)
|
||||
nraw = re.sub(r'^PUBLIC_UTI_MAP = .+?}', '\n'.join(lines), raw, flags=re.MULTILINE | re.DOTALL)
|
||||
f.truncate(), f.write(nraw)
|
||||
|
||||
|
||||
# Generated by generate_public_uti_map()
|
||||
PUBLIC_UTI_MAP = {
|
||||
'3g2': 'public.3gpp2',
|
||||
|
Loading…
x
Reference in New Issue
Block a user