from calibre.web.feeds.news import BasicNewsRecipe class Comics(BasicNewsRecipe): title = 'Comics.com' __author__ = 'Starson17' description = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in' language = 'en' use_embedded_content= False no_stylesheets = True oldest_article = 24 remove_javascript = True cover_url = 'http://www.bsb.lib.tx.us/images/comics.com.gif' recursions = 0 max_articles_per_feed = 10 num_comics_to_get = 7 simultaneous_downloads = 1 # delay = 3 keep_only_tags = [dict(name='a', attrs={'class':'STR_StripImage'}), dict(name='div', attrs={'class':'STR_Date'}) ] def parse_index(self): feeds = [] for title, url in [ ("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"), ("Agnes", "http://comics.com/agnes"), ("Alley Oop", "http://comics.com/alley_oop"), ("Andy Capp", "http://comics.com/andy_capp"), ("Arlo & Janis", "http://comics.com/arlo&janis"), ("B.C.", "http://comics.com/bc"), ("Ballard Street", "http://comics.com/ballard_street"), # ("Ben", "http://comics.com/ben"), # ("Betty", "http://comics.com/betty"), # ("Big Nate", "http://comics.com/big_nate"), # ("Brevity", "http://comics.com/brevity"), # ("Candorville", "http://comics.com/candorville"), # ("Cheap Thrills", "http://comics.com/cheap_thrills"), # ("Committed", "http://comics.com/committed"), # ("Cow & Boy", "http://comics.com/cow&boy"), # ("Daddy's Home", "http://comics.com/daddys_home"), # ("Dog eat Doug", "http://comics.com/dog_eat_doug"), # ("Drabble", "http://comics.com/drabble"), # ("F Minus", "http://comics.com/f_minus"), # ("Family Tree", "http://comics.com/family_tree"), # ("Farcus", "http://comics.com/farcus"), # ("Fat Cats Classics", "http://comics.com/fat_cats_classics"), # ("Ferd'nand", "http://comics.com/ferdnand"), # ("Flight Deck", "http://comics.com/flight_deck"), # ("Flo & Friends", "http://comics.com/flo&friends"), # ("Fort Knox", "http://comics.com/fort_knox"), # ("Frank & Ernest", "http://comics.com/frank&ernest"), # ("Frazz", "http://comics.com/frazz"), # ("Free Range", "http://comics.com/free_range"), # ("Geech Classics", "http://comics.com/geech_classics"), # ("Get Fuzzy", "http://comics.com/get_fuzzy"), # ("Girls & Sports", "http://comics.com/girls&sports"), # ("Graffiti", "http://comics.com/graffiti"), # ("Grand Avenue", "http://comics.com/grand_avenue"), # ("Heathcliff", "http://comics.com/heathcliff"), # "Heathcliff, a street-smart and mischievous cat with many adventures." # ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"), # ("Herman", "http://comics.com/herman"), # ("Home and Away", "http://comics.com/home_and_away"), # ("It's All About You", "http://comics.com/its_all_about_you"), # ("Jane's World", "http://comics.com/janes_world"), # ("Jump Start", "http://comics.com/jump_start"), # ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"), # ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"), # ("Liberty Meadows", "http://comics.com/liberty_meadows"), # ("Little Dog Lost", "http://comics.com/little_dog_lost"), # ("Lola", "http://comics.com/lola"), # ("Luann", "http://comics.com/luann"), # ("Marmaduke", "http://comics.com/marmaduke"), # ("Meg! Classics", "http://comics.com/meg_classics"), # ("Minimum Security", "http://comics.com/minimum_security"), # ("Moderately Confused", "http://comics.com/moderately_confused"), # ("Momma", "http://comics.com/momma"), # ("Monty", "http://comics.com/monty"), # ("Motley Classics", "http://comics.com/motley_classics"), # ("Nancy", "http://comics.com/nancy"), # ("Natural Selection", "http://comics.com/natural_selection"), # ("Nest Heads", "http://comics.com/nest_heads"), # ("Off The Mark", "http://comics.com/off_the_mark"), # ("On a Claire Day", "http://comics.com/on_a_claire_day"), # ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"), # ("Over the Hedge", "http://comics.com/over_the_hedge"), # ("PC and Pixel", "http://comics.com/pc_and_pixel"), # ("Peanuts", "http://comics.com/peanuts"), # ("Pearls Before Swine", "http://comics.com/pearls_before_swine"), # ("Pickles", "http://comics.com/pickles"), # ("Prickly City", "http://comics.com/prickly_city"), # ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"), # ("Reality Check", "http://comics.com/reality_check"), # ("Red & Rover", "http://comics.com/red&rover"), # ("Rip Haywire", "http://comics.com/rip_haywire"), # ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"), # ("Rose Is Rose", "http://comics.com/rose_is_rose"), # ("Rubes", "http://comics.com/rubes"), # ("Rudy Park", "http://comics.com/rudy_park"), # ("Scary Gary", "http://comics.com/scary_gary"), # ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"), # ("Soup To Nutz", "http://comics.com/soup_to_nutz"), # ("Speed Bump", "http://comics.com/speed_bump"), # ("Spot The Frog", "http://comics.com/spot_the_frog"), # ("State of the Union", "http://comics.com/state_of_the_union"), # ("Strange Brew", "http://comics.com/strange_brew"), # ("Tarzan Classics", "http://comics.com/tarzan_classics"), # ("That's Life", "http://comics.com/thats_life"), # ("The Barn", "http://comics.com/the_barn"), # ("The Born Loser", "http://comics.com/the_born_loser"), # ("The Buckets", "http://comics.com/the_buckets"), # ("The Dinette Set", "http://comics.com/the_dinette_set"), # ("The Grizzwells", "http://comics.com/the_grizzwells"), # ("The Humble Stumble", "http://comics.com/the_humble_stumble"), # ("The Knight Life", "http://comics.com/the_knight_life"), # ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"), # ("The Other Coast", "http://comics.com/the_other_coast"), # ("The Sunshine Club", "http://comics.com/the_sunshine_club"), # ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"), # ("Watch Your Head", "http://comics.com/watch_your_head"), # ("Wizard of Id", "http://comics.com/wizard_of_id"), # ("Working Daze", "http://comics.com/working_daze"), # ("Working It Out", "http://comics.com/working_it_out"), # ("Zack Hill", "http://comics.com/zack_hill"), # ("(Th)ink", "http://comics.com/think"), # "Tackling the political and social issues impacting communities of color." # ("Adam Zyglis", "http://comics.com/adam_zyglis"), # "Known for his excellent caricatures, as well as independent and incisive imagery. " # ("Andy Singer", "http://comics.com/andy_singer"), # ("Bill Day", "http://comics.com/bill_day"), # "Powerful images on sensitive issues." # ("Bill Schorr", "http://comics.com/bill_schorr"), # ("Bob Englehart", "http://comics.com/bob_englehart"), # ("Brian Fairrington", "http://comics.com/brian_fairrington"), # ("Bruce Beattie", "http://comics.com/bruce_beattie"), # ("Cam Cardow", "http://comics.com/cam_cardow"), # ("Chip Bok", "http://comics.com/chip_bok"), # ("Chris Britt", "http://comics.com/chris_britt"), # ("Chuck Asay", "http://comics.com/chuck_asay"), # ("Clay Bennett", "http://comics.com/clay_bennett"), # ("Daryl Cagle", "http://comics.com/daryl_cagle"), # ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"), # "David Fitzsimmons is a new editorial cartoons on comics.com. He is also a staff writer and editorial cartoonist for the Arizona Daily Star. " # ("Drew Litton", "http://comics.com/drew_litton"), # "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. " # ("Ed Stein", "http://comics.com/ed_stein"), # "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. " # ("Eric Allie", "http://comics.com/eric_allie"), # "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. " # ("Gary Markstein", "http://comics.com/gary_markstein"), # ("Gary McCoy", "http://comics.com/gary_mccoy"), # "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for Best Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. " # ("Gary Varvel", "http://comics.com/gary_varvel"), # ("Henry Payne", "http://comics.com/henry_payne"), # ("JD Crowe", "http://comics.com/jd_crowe"), # ("Jeff Parker", "http://comics.com/jeff_parker"), # ("Jeff Stahler", "http://comics.com/jeff_stahler"), # ("Jerry Holbert", "http://comics.com/jerry_holbert"), # ("John Cole", "http://comics.com/john_cole"), # ("John Darkow", "http://comics.com/john_darkow"), # "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for the Columbia Daily Tribune, Missouri" # ("John Sherffius", "http://comics.com/john_sherffius"), # ("Larry Wright", "http://comics.com/larry_wright"), # ("Lisa Benson", "http://comics.com/lisa_benson"), # ("Marshall Ramsey", "http://comics.com/marshall_ramsey"), # ("Matt Bors", "http://comics.com/matt_bors"), # ("Michael Ramirez", "http://comics.com/michael_ramirez"), # ("Mike Keefe", "http://comics.com/mike_keefe"), # ("Mike Luckovich", "http://comics.com/mike_luckovich"), # ("MIke Thompson", "http://comics.com/mike_thompson"), # ("Monte Wolverton", "http://comics.com/monte_wolverton"), # "Unique mix of perspectives" # ("Mr. Fish", "http://comics.com/mr_fish"), # "Side effects may include swelling" # ("Nate Beeler", "http://comics.com/nate_beeler"), # "Middle America meets the Beltway." # ("Nick Anderson", "http://comics.com/nick_anderson"), # ("Pat Bagley", "http://comics.com/pat_bagley"), # "Unfair and Totally Unbalanced." # ("Paul Szep", "http://comics.com/paul_szep"), # ("RJ Matson", "http://comics.com/rj_matson"), # "Power cartoons from NYC and Capitol Hill" # ("Rob Rogers", "http://comics.com/rob_rogers"), # "Humorous slant on current events" # ("Robert Ariail", "http://comics.com/robert_ariail"), # "Clever and unpredictable" # ("Scott Stantis", "http://comics.com/scott_stantis"), # ("Signe Wilkinson", "http://comics.com/signe_wilkinson"), # ("Steve Benson", "http://comics.com/steve_benson"), # ("Steve Breen", "http://comics.com/steve_breen"), # ("Steve Kelley", "http://comics.com/steve_kelley"), # ("Steve Sack", "http://comics.com/steve_sack"), ]: articles = self.make_links(url) if articles: feeds.append((title, articles)) return feeds def make_links(self, url): soup = self.index_to_soup(url) # print 'soup: ', soup title = '' current_articles = [] pages = range(1, self.num_comics_to_get+1) for page in pages: page_url = url + '/?Page=' + str(page) soup = self.index_to_soup(page_url) if soup: strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'}) if strip_tag: print 'strip_tag: ', strip_tag title = strip_tag['title'] print 'title: ', title current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''}) current_articles.reverse() return current_articles extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} p{font-family:Arial,Helvetica,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;} '''