mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Remove html5lib from miscellaneous places
This commit is contained in:
parent
62e4a9900e
commit
5e67ba1369
@ -100,7 +100,7 @@ def html5_parse(data, max_nesting_depth=100):
|
|||||||
if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node
|
if isinstance(x.tag, basestring) and len(x) is 0: # Leaf node
|
||||||
depth = node_depth(x)
|
depth = node_depth(x)
|
||||||
if depth > max_nesting_depth:
|
if depth > max_nesting_depth:
|
||||||
raise ValueError('html5lib resulted in a tree with nesting'
|
raise ValueError('HTML 5 parsing resulted in a tree with nesting'
|
||||||
' depth > %d'%max_nesting_depth)
|
' depth > %d'%max_nesting_depth)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@ -13,7 +13,6 @@ from contextlib import closing
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
from PyQt5.Qt import QUrl
|
from PyQt5.Qt import QUrl
|
||||||
|
|
||||||
import html5lib
|
|
||||||
from calibre import browser, url_slash_cleaner
|
from calibre import browser, url_slash_cleaner
|
||||||
from calibre.gui2 import open_url
|
from calibre.gui2 import open_url
|
||||||
from calibre.gui2.store import StorePlugin
|
from calibre.gui2.store import StorePlugin
|
||||||
@ -23,7 +22,14 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|||||||
|
|
||||||
|
|
||||||
def parse_html(raw):
|
def parse_html(raw):
|
||||||
return html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml')
|
try:
|
||||||
|
from html5_parser import parse
|
||||||
|
except ImportError:
|
||||||
|
# Old versions of calibre
|
||||||
|
import html5lib
|
||||||
|
return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
||||||
|
else:
|
||||||
|
return parse(raw)
|
||||||
|
|
||||||
|
|
||||||
def search_google(query, max_results=10, timeout=60, write_html_to=None):
|
def search_google(query, max_results=10, timeout=60, write_html_to=None):
|
||||||
|
@ -11,7 +11,6 @@ import urllib
|
|||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from PyQt5.Qt import QUrl
|
from PyQt5.Qt import QUrl
|
||||||
import html5lib
|
|
||||||
|
|
||||||
from calibre import browser, url_slash_cleaner
|
from calibre import browser, url_slash_cleaner
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
@ -23,6 +22,17 @@ from calibre.gui2.store.web_store_dialog import WebStoreDialog
|
|||||||
shop_url = 'http://www.ozon.ru'
|
shop_url = 'http://www.ozon.ru'
|
||||||
|
|
||||||
|
|
||||||
|
def parse_html(raw):
|
||||||
|
try:
|
||||||
|
from html5_parser import parse
|
||||||
|
except ImportError:
|
||||||
|
# Old versions of calibre
|
||||||
|
import html5lib
|
||||||
|
return html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
||||||
|
else:
|
||||||
|
return parse(raw)
|
||||||
|
|
||||||
|
|
||||||
def search(query, max_results=15, timeout=60):
|
def search(query, max_results=15, timeout=60):
|
||||||
url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % urllib.quote_plus(query)
|
url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % urllib.quote_plus(query)
|
||||||
|
|
||||||
@ -31,7 +41,7 @@ def search(query, max_results=15, timeout=60):
|
|||||||
|
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
|
raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
|
||||||
root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
root = parse_html(raw)
|
||||||
for tile in root.xpath('//*[@class="bShelfTile inline"]'):
|
for tile in root.xpath('//*[@class="bShelfTile inline"]'):
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
@ -74,6 +84,7 @@ def format_price_in_RUR(price):
|
|||||||
price = price.replace('\xa0', '').replace(',', '.').strip() + ' py6'
|
price = price.replace('\xa0', '').replace(',', '.').strip() + ' py6'
|
||||||
return price
|
return price
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
for r in search(sys.argv[-1]):
|
for r in search(sys.argv[-1]):
|
||||||
|
@ -23,9 +23,7 @@ from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_lo
|
|||||||
from calibre.gui2.tweak_book.editor.syntax.css import (
|
from calibre.gui2.tweak_book.editor.syntax.css import (
|
||||||
create_formats as create_css_formats, state_map as css_state_map, CSSState, CSSUserData)
|
create_formats as create_css_formats, state_map as css_state_map, CSSState, CSSUserData)
|
||||||
|
|
||||||
from html5lib.constants import cdataElements, rcdataElements
|
cdata_tags = frozenset(['title', 'textarea', 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'])
|
||||||
|
|
||||||
cdata_tags = cdataElements | rcdataElements
|
|
||||||
normal_pat = re.compile(r'[^<>&]+')
|
normal_pat = re.compile(r'[^<>&]+')
|
||||||
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
|
entity_pat = re.compile(r'&#{0,1}[a-zA-Z0-9]{1,8};')
|
||||||
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:-]+')
|
tag_name_pat = re.compile(r'/{0,1}[a-zA-Z0-9:-]+')
|
||||||
|
@ -19,10 +19,11 @@ application_locations = ('/Applications', '~/Applications', '~/Desktop')
|
|||||||
|
|
||||||
def generate_public_uti_map():
|
def generate_public_uti_map():
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import html5lib, urllib
|
import urllib
|
||||||
|
from html5parser import parse
|
||||||
raw = urllib.urlopen(
|
raw = urllib.urlopen(
|
||||||
'https://developer.apple.com/library/ios/documentation/Miscellaneous/Reference/UTIRef/Articles/System-DeclaredUniformTypeIdentifiers.html').read()
|
'https://developer.apple.com/library/ios/documentation/Miscellaneous/Reference/UTIRef/Articles/System-DeclaredUniformTypeIdentifiers.html').read()
|
||||||
root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
|
root = parse(raw)
|
||||||
tables = root.xpath('//table')[0::2]
|
tables = root.xpath('//table')[0::2]
|
||||||
data = {}
|
data = {}
|
||||||
for table in tables:
|
for table in tables:
|
||||||
@ -44,6 +45,8 @@ def generate_public_uti_map():
|
|||||||
f.seek(0)
|
f.seek(0)
|
||||||
nraw = re.sub(r'^PUBLIC_UTI_MAP = .+?}', '\n'.join(lines), raw, flags=re.MULTILINE | re.DOTALL)
|
nraw = re.sub(r'^PUBLIC_UTI_MAP = .+?}', '\n'.join(lines), raw, flags=re.MULTILINE | re.DOTALL)
|
||||||
f.truncate(), f.write(nraw)
|
f.truncate(), f.write(nraw)
|
||||||
|
|
||||||
|
|
||||||
# Generated by generate_public_uti_map()
|
# Generated by generate_public_uti_map()
|
||||||
PUBLIC_UTI_MAP = {
|
PUBLIC_UTI_MAP = {
|
||||||
'3g2': 'public.3gpp2',
|
'3g2': 'public.3gpp2',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user