mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
skip the wayback archive for the index page
This commit is contained in:
parent
8db5fff5ac
commit
176171a116
@ -101,8 +101,8 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
self._nyt_parser = ans = load_module('calibre.web.site_parsers.nytimes')
|
self._nyt_parser = ans = load_module('calibre.web.site_parsers.nytimes')
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def get_nyt_page(self, url):
|
def get_nyt_page(self, url, skip_wayback=False):
|
||||||
if use_wayback_machine:
|
if use_wayback_machine and not skip_wayback:
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
return self.nyt_parser.download_url(url, browser())
|
return self.nyt_parser.download_url(url, browser())
|
||||||
return self.browser.open_novisit(url).read()
|
return self.browser.open_novisit(url).read()
|
||||||
@ -123,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
def read_todays_paper(self):
|
def read_todays_paper(self):
|
||||||
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
||||||
# INDEX = 'file:///t/raw.html'
|
# INDEX = 'file:///t/raw.html'
|
||||||
return self.index_to_soup(self.get_nyt_page(INDEX))
|
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
|
||||||
|
|
||||||
def read_nyt_metadata(self):
|
def read_nyt_metadata(self):
|
||||||
soup = self.read_todays_paper()
|
soup = self.read_todays_paper()
|
||||||
|
@ -101,8 +101,8 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
self._nyt_parser = ans = load_module('calibre.web.site_parsers.nytimes')
|
self._nyt_parser = ans = load_module('calibre.web.site_parsers.nytimes')
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def get_nyt_page(self, url):
|
def get_nyt_page(self, url, skip_wayback=False):
|
||||||
if use_wayback_machine:
|
if use_wayback_machine and not skip_wayback:
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
return self.nyt_parser.download_url(url, browser())
|
return self.nyt_parser.download_url(url, browser())
|
||||||
return self.browser.open_novisit(url).read()
|
return self.browser.open_novisit(url).read()
|
||||||
@ -123,7 +123,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
def read_todays_paper(self):
|
def read_todays_paper(self):
|
||||||
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
||||||
# INDEX = 'file:///t/raw.html'
|
# INDEX = 'file:///t/raw.html'
|
||||||
return self.index_to_soup(self.get_nyt_page(INDEX))
|
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
|
||||||
|
|
||||||
def read_nyt_metadata(self):
|
def read_nyt_metadata(self):
|
||||||
soup = self.read_todays_paper()
|
soup = self.read_todays_paper()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user