calibre/recipes/comics_com.recipe

225 lines
16 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class Comics(BasicNewsRecipe):
title = 'Comics.com'
__author__ = 'Starson17'
description = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
language = 'en'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
cover_url = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
recursions = 0
max_articles_per_feed = 10
num_comics_to_get = 7
simultaneous_downloads = 1
# delay = 3
keep_only_tags = [dict(name='a', attrs={'class':'STR_StripImage'}),
dict(name='div', attrs={'class':'STR_Date'})
]
def parse_index(self):
feeds = []
for title, url in [
("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"),
("Agnes", "http://comics.com/agnes"),
("Alley Oop", "http://comics.com/alley_oop"),
("Andy Capp", "http://comics.com/andy_capp"),
("Arlo & Janis", "http://comics.com/arlo&janis"),
("B.C.", "http://comics.com/bc"),
("Ballard Street", "http://comics.com/ballard_street"),
# ("Ben", "http://comics.com/ben"),
# ("Betty", "http://comics.com/betty"),
# ("Big Nate", "http://comics.com/big_nate"),
# ("Brevity", "http://comics.com/brevity"),
# ("Candorville", "http://comics.com/candorville"),
# ("Cheap Thrills", "http://comics.com/cheap_thrills"),
# ("Committed", "http://comics.com/committed"),
# ("Cow & Boy", "http://comics.com/cow&boy"),
# ("Daddy's Home", "http://comics.com/daddys_home"),
# ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
# ("Drabble", "http://comics.com/drabble"),
# ("F Minus", "http://comics.com/f_minus"),
# ("Family Tree", "http://comics.com/family_tree"),
# ("Farcus", "http://comics.com/farcus"),
# ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
# ("Ferd'nand", "http://comics.com/ferdnand"),
# ("Flight Deck", "http://comics.com/flight_deck"),
# ("Flo & Friends", "http://comics.com/flo&friends"),
# ("Fort Knox", "http://comics.com/fort_knox"),
# ("Frank & Ernest", "http://comics.com/frank&ernest"),
# ("Frazz", "http://comics.com/frazz"),
# ("Free Range", "http://comics.com/free_range"),
# ("Geech Classics", "http://comics.com/geech_classics"),
# ("Get Fuzzy", "http://comics.com/get_fuzzy"),
# ("Girls & Sports", "http://comics.com/girls&sports"),
# ("Graffiti", "http://comics.com/graffiti"),
# ("Grand Avenue", "http://comics.com/grand_avenue"),
# ("Heathcliff", "http://comics.com/heathcliff"),
# "Heathcliff, a street-smart and mischievous cat with many adventures."
# ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
# ("Herman", "http://comics.com/herman"),
# ("Home and Away", "http://comics.com/home_and_away"),
# ("It's All About You", "http://comics.com/its_all_about_you"),
# ("Jane's World", "http://comics.com/janes_world"),
# ("Jump Start", "http://comics.com/jump_start"),
# ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
# ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
# ("Liberty Meadows", "http://comics.com/liberty_meadows"),
# ("Little Dog Lost", "http://comics.com/little_dog_lost"),
# ("Lola", "http://comics.com/lola"),
# ("Luann", "http://comics.com/luann"),
# ("Marmaduke", "http://comics.com/marmaduke"),
# ("Meg! Classics", "http://comics.com/meg_classics"),
# ("Minimum Security", "http://comics.com/minimum_security"),
# ("Moderately Confused", "http://comics.com/moderately_confused"),
# ("Momma", "http://comics.com/momma"),
# ("Monty", "http://comics.com/monty"),
# ("Motley Classics", "http://comics.com/motley_classics"),
# ("Nancy", "http://comics.com/nancy"),
# ("Natural Selection", "http://comics.com/natural_selection"),
# ("Nest Heads", "http://comics.com/nest_heads"),
# ("Off The Mark", "http://comics.com/off_the_mark"),
# ("On a Claire Day", "http://comics.com/on_a_claire_day"),
# ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
# ("Over the Hedge", "http://comics.com/over_the_hedge"),
# ("PC and Pixel", "http://comics.com/pc_and_pixel"),
# ("Peanuts", "http://comics.com/peanuts"),
# ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
# ("Pickles", "http://comics.com/pickles"),
# ("Prickly City", "http://comics.com/prickly_city"),
# ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
# ("Reality Check", "http://comics.com/reality_check"),
# ("Red & Rover", "http://comics.com/red&rover"),
# ("Rip Haywire", "http://comics.com/rip_haywire"),
# ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
# ("Rose Is Rose", "http://comics.com/rose_is_rose"),
# ("Rubes", "http://comics.com/rubes"),
# ("Rudy Park", "http://comics.com/rudy_park"),
# ("Scary Gary", "http://comics.com/scary_gary"),
# ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
# ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
# ("Speed Bump", "http://comics.com/speed_bump"),
# ("Spot The Frog", "http://comics.com/spot_the_frog"),
# ("State of the Union", "http://comics.com/state_of_the_union"),
# ("Strange Brew", "http://comics.com/strange_brew"),
# ("Tarzan Classics", "http://comics.com/tarzan_classics"),
# ("That's Life", "http://comics.com/thats_life"),
# ("The Barn", "http://comics.com/the_barn"),
# ("The Born Loser", "http://comics.com/the_born_loser"),
# ("The Buckets", "http://comics.com/the_buckets"),
# ("The Dinette Set", "http://comics.com/the_dinette_set"),
# ("The Grizzwells", "http://comics.com/the_grizzwells"),
# ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
# ("The Knight Life", "http://comics.com/the_knight_life"),
# ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
# ("The Other Coast", "http://comics.com/the_other_coast"),
# ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
# ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
# ("Watch Your Head", "http://comics.com/watch_your_head"),
# ("Wizard of Id", "http://comics.com/wizard_of_id"),
# ("Working Daze", "http://comics.com/working_daze"),
# ("Working It Out", "http://comics.com/working_it_out"),
# ("Zack Hill", "http://comics.com/zack_hill"),
# ("(Th)ink", "http://comics.com/think"),
# "Tackling the political and social issues impacting communities of color."
# ("Adam Zyglis", "http://comics.com/adam_zyglis"),
# "Known for his excellent caricatures, as well as independent and incisive imagery. "
# ("Andy Singer", "http://comics.com/andy_singer"),
# ("Bill Day", "http://comics.com/bill_day"),
# "Powerful images on sensitive issues."
# ("Bill Schorr", "http://comics.com/bill_schorr"),
# ("Bob Englehart", "http://comics.com/bob_englehart"),
# ("Brian Fairrington", "http://comics.com/brian_fairrington"),
# ("Bruce Beattie", "http://comics.com/bruce_beattie"),
# ("Cam Cardow", "http://comics.com/cam_cardow"),
# ("Chip Bok", "http://comics.com/chip_bok"),
# ("Chris Britt", "http://comics.com/chris_britt"),
# ("Chuck Asay", "http://comics.com/chuck_asay"),
# ("Clay Bennett", "http://comics.com/clay_bennett"),
# ("Daryl Cagle", "http://comics.com/daryl_cagle"),
# ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
# "David Fitzsimmons is a new editorial cartoons on comics.com. He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
# ("Drew Litton", "http://comics.com/drew_litton"),
# "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
# ("Ed Stein", "http://comics.com/ed_stein"),
# "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
# ("Eric Allie", "http://comics.com/eric_allie"),
# "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
# ("Gary Markstein", "http://comics.com/gary_markstein"),
# ("Gary McCoy", "http://comics.com/gary_mccoy"),
# "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for Best Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
# ("Gary Varvel", "http://comics.com/gary_varvel"),
# ("Henry Payne", "http://comics.com/henry_payne"),
# ("JD Crowe", "http://comics.com/jd_crowe"),
# ("Jeff Parker", "http://comics.com/jeff_parker"),
# ("Jeff Stahler", "http://comics.com/jeff_stahler"),
# ("Jerry Holbert", "http://comics.com/jerry_holbert"),
# ("John Cole", "http://comics.com/john_cole"),
# ("John Darkow", "http://comics.com/john_darkow"),
# "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for the Columbia Daily Tribune, Missouri"
# ("John Sherffius", "http://comics.com/john_sherffius"),
# ("Larry Wright", "http://comics.com/larry_wright"),
# ("Lisa Benson", "http://comics.com/lisa_benson"),
# ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
# ("Matt Bors", "http://comics.com/matt_bors"),
# ("Michael Ramirez", "http://comics.com/michael_ramirez"),
# ("Mike Keefe", "http://comics.com/mike_keefe"),
# ("Mike Luckovich", "http://comics.com/mike_luckovich"),
# ("MIke Thompson", "http://comics.com/mike_thompson"),
# ("Monte Wolverton", "http://comics.com/monte_wolverton"),
# "Unique mix of perspectives"
# ("Mr. Fish", "http://comics.com/mr_fish"),
# "Side effects may include swelling"
# ("Nate Beeler", "http://comics.com/nate_beeler"),
# "Middle America meets the Beltway."
# ("Nick Anderson", "http://comics.com/nick_anderson"),
# ("Pat Bagley", "http://comics.com/pat_bagley"),
# "Unfair and Totally Unbalanced."
# ("Paul Szep", "http://comics.com/paul_szep"),
# ("RJ Matson", "http://comics.com/rj_matson"),
# "Power cartoons from NYC and Capitol Hill"
# ("Rob Rogers", "http://comics.com/rob_rogers"),
# "Humorous slant on current events"
# ("Robert Ariail", "http://comics.com/robert_ariail"),
# "Clever and unpredictable"
# ("Scott Stantis", "http://comics.com/scott_stantis"),
# ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
# ("Steve Benson", "http://comics.com/steve_benson"),
# ("Steve Breen", "http://comics.com/steve_breen"),
# ("Steve Kelley", "http://comics.com/steve_kelley"),
# ("Steve Sack", "http://comics.com/steve_sack"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
soup = self.index_to_soup(url)
# print 'soup: ', soup
title = ''
current_articles = []
pages = range(1, self.num_comics_to_get+1)
for page in pages:
page_url = url + '/?Page=' + str(page)
soup = self.index_to_soup(page_url)
if soup:
strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'})
if strip_tag:
print 'strip_tag: ', strip_tag
title = strip_tag['title']
print 'title: ', title
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
current_articles.reverse()
return current_articles
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''