#!/usr/bin/env python __license__ = 'GPL v3' __author__ = 'Lorenzo Vigentini' __copyright__ = '2009, Lorenzo Vigentini ' __version__ = 'v1.01' __date__ = '13, January 2010' __description__ = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comparative reviews of computing and Internet products to highly engaged technology buyers.' ''' http://www.pcmag.com/ ''' import re from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Comment class pcMag(BasicNewsRecipe): __author__ = 'Lorenzo Vigentini' description = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comparative reviews of computing and Internet products to highly engaged technology buyers.' cover_url = 'http://www.pcmag.com/images/bg-logo-sharp.2.gif' title = 'PC Magazine' publisher = 'Ziff Davis Media' category = 'PC, computing, product reviews' language = 'en' encoding = 'cp1252' timefmt = '[%a, %d %b, %Y]' oldest_article = 15 max_articles_per_feed = 25 use_embedded_content = False recursion = 10 remove_javascript = True no_stylesheets = True feeds = [ (u'Tech Commentary from the Editors of PC Magazine', u'http://rssnewsapps.ziffdavis.com/PCMAG_commentary.xml'), (u'PC Magazine Breaking News', u'http://rssnewsapps.ziffdavis.com/pcmagtips.xml'), (u'PC Magazine Tips and Solutions', u'http://rssnewsapps.ziffdavis.com/pcmagofficetips.xml'), (u'PC Magazine Small Business', u'http://blogs.pcmag.com/atwork/index.xml'), (u'PC Magazine Security Watch', u'http://feeds.ziffdavis.com/ziffdavis/securitywatch?format=xml'), (u'PC Magazine: the Official John C. Dvorak RSS Feed', u'http://rssnewsapps.ziffdavis.com/PCMAG_dvorak.xml'), (u'PC Magazine Editor-in-Chief Lance Ulanoff', u'http://rssnewsapps.ziffdavis.com/pcmagulanoff.xml'), (u'Michael Millers Forward Thinking from PCMag.com', u'http://feeds.ziffdavis.com/ziffdavis/pcmag-miller?format=xml'), (u'Technology News from Ziff Davis', u'http://rssnewsapps.ziffdavis.com/pcmagbreakingnews.xml') ] keep_only_tags = [dict(attrs={'class':'content-page'})] remove_tags = [ dict(attrs={'class':['control-side','comment','highlights_content','btn-holder','subscribe-panel', 'grey-box comments-box']}), dict(id=['inlineDigg']), dict(text=lambda text:isinstance(text, Comment)), dict(name='img', width='1'), ] preprocess_regexps = [(re.compile(r"