Consumerist and Gawker Media by Trickery

This commit is contained in:
Kovid Goyal 2010-08-23 09:54:42 -06:00
parent 206982fa7f
commit 3c0af8b77c
18 changed files with 410 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 887 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 953 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 233 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 400 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@ -0,0 +1,53 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
consumerist.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Consumerist(BasicNewsRecipe):
title = 'Consumerist'
__author__ = 'NA'
description = "Consumerist, Shoppers Bite Back."
publisher = 'consumerist.com'
category = 'news, consumer news, consumer rights'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://consumerist.com/css/images/footer_man.gif'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
#keep_only_tags = [dict(attrs={'class':['', 'category-breadcrumb']}),]
remove_tags_before = dict(name='h2')
remove_tags = [
#dict(name='iframe'),
dict(name='div', attrs={'class':['e-comments', 'more-about', 'entry-tags']}),
#dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}),
#dict(name='ul', attrs={'class':'article-tools'}),
#dict(name='ul', attrs={'class':'articleTools'}),
]
remove_tags_after = dict(attrs={'class':'e-body'})
feeds = [(u'Articles', u'http://consumerist.com/index.xml')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,44 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
deadspin.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Deadspin(BasicNewsRecipe):
title = 'Deadspin'
__author__ = 'NA'
description = "Deadspin, Sports News without Access, Favor, or Discretion."
publisher = 'deadspin.com'
category = 'news, sports, meltdowns'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/deadspin.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/deadspin/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,44 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
fleshbot.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Fleshbot(BasicNewsRecipe):
title = 'Fleshbot'
__author__ = 'NA'
description = "Fleshbot, Pure Filth."
publisher = 'Fleshbot.com'
category = 'news, sex, sex industry, celebs, nudes, adult, adult toys, sex toys'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.fleshbot.com/assets/base/img/thumbs140x140/fleshbot.com.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/fleshbot/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,44 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
gawker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Gawker(BasicNewsRecipe):
title = 'Gawker'
__author__ = 'NA'
description = "Gawker, Gossip from Manhattan and the Beltway to Hollywood and the Valley."
publisher = 'gawker.com'
category = 'news, gossip'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/gawker.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/gawker/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,46 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
io9.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class i09(BasicNewsRecipe):
title = 'io9'
__author__ = 'NA'
description = "io9, We Come From the Future."
publisher = 'io9.com'
category = 'news, science, education'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/io9.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/io9/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
jalopnik.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Jalopnik(BasicNewsRecipe):
title = 'Jalopnik'
__author__ = 'NA'
description = "Jalopnik, Obsessed With The Cult of Cars."
publisher = 'jalopnik.com'
category = 'news, cars, spyshots'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/jalopnik.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/jalopnik/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
jezebel.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Jezebel(BasicNewsRecipe):
title = 'Jezebel'
__author__ = 'NA'
description = "Jezebel, Celebrity, Sex, Fashion for Women. Without Airbrushing."
publisher = 'jezebel.com'
category = 'news, cars, spyshots'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/jezebel.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/jezebel/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,44 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
kotaku.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Kotaku(BasicNewsRecipe):
title = 'Kotaku'
__author__ = 'NA'
description = "Kotaku, the Gamer's Guide."
publisher = 'kotaku.com'
category = 'news, games, Internet, xbox 360, playstation 3, tips and tricks'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/kotaku.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/kotaku/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -0,0 +1,45 @@
__license__ = 'GPL v3'
__copyright__ = '2010, NA'
'''
lifehacker.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Lifehacker(BasicNewsRecipe):
title = 'Lifehacker'
__author__ = 'NA'
description = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
publisher = 'lifehacker.com'
category = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
masthead_url = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
extra_css = '''
body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
img{margin-bottom: 1em}
h1{font-family :Arial,Helvetica,sans-serif; font-size:large}
h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small}
'''
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes = ['width','height']
keep_only_tags = [dict(attrs={'class':'content permalink'})]
remove_tags_before = dict(name='h1')
remove_tags = [dict(attrs={'class':'contactinfo'})]
remove_tags_after = dict(attrs={'class':'contactinfo'})
feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/full')]
def preprocess_html(self, soup):
return self.adeify_images(soup)