__license__ = 'GPL v3' __copyright__ = '2011, Starson17 ' ''' www.wired.co.uk ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe import re class Wired_UK(BasicNewsRecipe): title = 'Wired Magazine - UK edition' __author__ = 'Starson17' __version__ = 'v1.30' __date__ = '15 July 2011' description = 'Gaming news' publisher = 'Conde Nast Digital' category = 'news, games, IT, gadgets' oldest_article = 40 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False #masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif' language = 'en_GB' index = 'http://www.wired.co.uk' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } keep_only_tags = [dict(name='div', attrs={'class':['layoutColumn1']})] remove_tags = [dict(name='div',attrs={'class':['articleSidebar1','commentAddBox linkit','commentCountBox commentCountBoxBig']})] remove_tags_after = dict(name='div',attrs={'class':['mainCopy entry-content','mainCopy']}) ''' remove_attributes = ['height','width'] ,dict(name=['object','embed','iframe','link']) ,dict(attrs={'class':['opts','comment','stories']}) ] ''' def parse_index(self): totalfeeds = [] soup = self.index_to_soup(self.index) recentcontent = soup.find('ul',attrs={'class':'linkList3'}) mfeed = [] if recentcontent: for li in recentcontent.findAll('li'): a = li.h2.a url = self.index + a['href'] + '?page=all' title = self.tag_to_string(a) description = '' date = strftime(self.timefmt) mfeed.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) totalfeeds.append(('Wired UK Magazine Latest News', mfeed)) popmagcontent = soup.findAll('div',attrs={'class':'sidebarLinkList'}) magcontent = popmagcontent[1] mfeed2 = [] if magcontent: a = magcontent.h3.a if a: url = self.index + a['href'] + '?page=all' title = self.tag_to_string(a) description = '' date = strftime(self.timefmt) mfeed2.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) for li in magcontent.findAll('li'): a = li.a url = self.index + a['href'] + '?page=all' title = self.tag_to_string(a) description = '' date = strftime(self.timefmt) mfeed2.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) totalfeeds.append(('Wired UK Magazine Features', mfeed2)) magsoup = self.index_to_soup(self.index + '/magazine') startcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titleStart'}).parent mfeed3 = [] if startcontent: for li in startcontent.findAll('li'): a = li.a url = self.index + a['href'] + '?page=all' title = self.tag_to_string(a) description = '' date = strftime(self.timefmt) mfeed3.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) totalfeeds.append(('Wired UK Magazine More', mfeed3)) playcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titlePlay'}).parent mfeed4 = [] if playcontent: for li in playcontent.findAll('li'): a = li.a url = self.index + a['href'] + '?page=all' title = self.tag_to_string(a) description = '' date = strftime(self.timefmt) mfeed4.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) totalfeeds.append(('Wired UK Magazine Play', mfeed4)) return totalfeeds def get_cover_url(self): cover_url = '' soup = self.index_to_soup(self.index + '/magazine/archive') cover_item = soup.find('div', attrs={'class':'image linkme'}) if cover_item: cover_url = cover_item.img['src'] return cover_url def preprocess_html(self, soup): for tag in soup.findAll(name='p'): if tag.find(name='span', text=re.compile(r'This article was taken from.*', re.DOTALL|re.IGNORECASE)): tag.extract() return soup extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;} '''