mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Update the host used for wayback downloads
This commit is contained in:
parent
6c9fc1c833
commit
d2977ebec4
@ -4,12 +4,12 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from xml.sax.saxutils import escape, quoteattr
|
import sys
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
from xml.sax.saxutils import escape, quoteattr
|
||||||
|
|
||||||
from calibre.utils.iso8601 import parse_iso8601
|
from calibre.utils.iso8601 import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
module_version = 4 # needed for live updates
|
module_version = 4 # needed for live updates
|
||||||
pprint
|
pprint
|
||||||
|
|
||||||
@ -186,20 +186,26 @@ def extract_html(soup):
|
|||||||
return json_to_html(raw)
|
return json_to_html(raw)
|
||||||
|
|
||||||
|
|
||||||
def download_url(url, br):
|
def download_url(url=None, br=None):
|
||||||
# Get the URL from the Wayback machine
|
# Get the URL from the Wayback machine
|
||||||
from mechanize import Request
|
from mechanize import Request
|
||||||
|
host = 'http://localhost:8090'
|
||||||
|
host = 'https://wayback1.calibre-ebook.com'
|
||||||
|
if url is None:
|
||||||
|
url = sys.argv[-1]
|
||||||
rq = Request(
|
rq = Request(
|
||||||
'http://localhost:8090/nytimes',
|
host + '/nytimes',
|
||||||
data=json.dumps({"url": url}),
|
data=json.dumps({"url": url}),
|
||||||
headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'}
|
headers={'User-Agent': 'calibre', 'Content-Type': 'application/json'}
|
||||||
)
|
)
|
||||||
|
if br is None:
|
||||||
|
from calibre import browser
|
||||||
|
br = browser()
|
||||||
br.set_handle_gzip(True)
|
br.set_handle_gzip(True)
|
||||||
return br.open_novisit(rq, timeout=3 * 60).read()
|
return br.open_novisit(rq, timeout=3 * 60).read()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
|
||||||
f = sys.argv[-1]
|
f = sys.argv[-1]
|
||||||
raw = open(f).read()
|
raw = open(f).read()
|
||||||
if f.endswith('.html'):
|
if f.endswith('.html'):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user