mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Update Foreign Affairs
This commit is contained in:
parent
4371f8582e
commit
f623878892
@ -49,7 +49,7 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
answer = []
|
answer = []
|
||||||
soup = self.index_to_soup(self.FRONTPAGE)
|
soup = self.index_to_soup(html.tostring(self.clean_fa_html(self.index_to_soup(self.FRONTPAGE, as_tree=True))))
|
||||||
div = soup.find('div', attrs={'class':'magazine-hero__image image_auto_width'})
|
div = soup.find('div', attrs={'class':'magazine-hero__image image_auto_width'})
|
||||||
self.cover_url = div.find('img')['src']
|
self.cover_url = div.find('img')['src']
|
||||||
# get dates
|
# get dates
|
||||||
@ -80,12 +80,16 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
|
|||||||
answer.append((section, articles))
|
answer.append((section, articles))
|
||||||
return answer
|
return answer
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def clean_fa_html(self, root):
|
||||||
root = html5lib.parse(raw_html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
|
||||||
for svg in tuple(root.iter('{*}svg')):
|
for svg in tuple(root.iter('{*}svg')):
|
||||||
svg.getparent().remove(svg)
|
svg.getparent().remove(svg)
|
||||||
for meta in tuple(root.iter('{*}meta')):
|
for meta in tuple(root.iter('{*}meta')):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
|
return root
|
||||||
|
|
||||||
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
|
root = html5lib.parse(raw_html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||||
|
self.clean_fa_html(root)
|
||||||
return html.tostring(root)
|
return html.tostring(root)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user