mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #5769 (The Sun Feed)
This commit is contained in:
parent
70b2ab0293
commit
47303223b2
@ -1,5 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class AdvancedUserRecipe1268409464(BasicNewsRecipe):
|
class AdvancedUserRecipe1268409464(BasicNewsRecipe):
|
||||||
title = u'The Sun'
|
title = u'The Sun'
|
||||||
@ -14,24 +15,27 @@ class AdvancedUserRecipe1268409464(BasicNewsRecipe):
|
|||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'medium-centered'})
|
dict(id='column-print')
|
||||||
,dict(name='div', attrs={'class':'article'})
|
|
||||||
,dict(name='div', attrs={'class':'clear-left'})
|
|
||||||
,dict(name='div', attrs={'class':'text-center'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':'slideshow'})
|
dict(name='div', attrs={'class':[
|
||||||
,dict(name='div', attrs={'class':'float-left'})
|
'clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line',
|
||||||
,dict(name='div', attrs={'class':'ltbx-slideshow ltbx-btn-ss'})
|
'clear width-625 bg-fff padding-top-10'
|
||||||
,dict(name='a', attrs={'class':'add_a_comment'})
|
]}),
|
||||||
,dict(name='div', attrs={'id':'vxFlashPlayerContent'})
|
dict(name='video'),
|
||||||
,dict(name='div', attrs={'id':'k1006094r1c1t5w380h529'})
|
|
||||||
,dict(name='div', attrs={'id':'tum_login_form_container'})
|
|
||||||
,dict(name='div', attrs={'class':'discHeader'})
|
|
||||||
,dict(name='div', attrs={'class':'margin-bottom-neg-2'})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
h1 = soup.find('h1')
|
||||||
|
if h1 is not None:
|
||||||
|
text = self.tag_to_string(h1)
|
||||||
|
nh = Tag(soup, 'h1')
|
||||||
|
nh.insert(0, text)
|
||||||
|
h1.replaceWith(nh)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
|
feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
|
||||||
,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')
|
,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user