mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
225 lines
16 KiB
Plaintext
225 lines
16 KiB
Plaintext
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class Comics(BasicNewsRecipe):
|
|
title = 'Comics.com'
|
|
__author__ = 'Starson17'
|
|
description = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
|
|
language = 'en'
|
|
use_embedded_content= False
|
|
no_stylesheets = True
|
|
oldest_article = 24
|
|
remove_javascript = True
|
|
cover_url = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
|
|
recursions = 0
|
|
max_articles_per_feed = 10
|
|
num_comics_to_get = 7
|
|
simultaneous_downloads = 1
|
|
# delay = 3
|
|
|
|
keep_only_tags = [dict(name='a', attrs={'class':'STR_StripImage'}),
|
|
dict(name='div', attrs={'class':'STR_Date'})
|
|
]
|
|
|
|
def parse_index(self):
|
|
feeds = []
|
|
for title, url in [
|
|
("9 Chickweed Lane", "http://comics.com/9_chickweed_lane"),
|
|
("Agnes", "http://comics.com/agnes"),
|
|
("Alley Oop", "http://comics.com/alley_oop"),
|
|
("Andy Capp", "http://comics.com/andy_capp"),
|
|
("Arlo & Janis", "http://comics.com/arlo&janis"),
|
|
("B.C.", "http://comics.com/bc"),
|
|
("Ballard Street", "http://comics.com/ballard_street"),
|
|
# ("Ben", "http://comics.com/ben"),
|
|
# ("Betty", "http://comics.com/betty"),
|
|
# ("Big Nate", "http://comics.com/big_nate"),
|
|
# ("Brevity", "http://comics.com/brevity"),
|
|
# ("Candorville", "http://comics.com/candorville"),
|
|
# ("Cheap Thrills", "http://comics.com/cheap_thrills"),
|
|
# ("Committed", "http://comics.com/committed"),
|
|
# ("Cow & Boy", "http://comics.com/cow&boy"),
|
|
# ("Daddy's Home", "http://comics.com/daddys_home"),
|
|
# ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
|
|
# ("Drabble", "http://comics.com/drabble"),
|
|
# ("F Minus", "http://comics.com/f_minus"),
|
|
# ("Family Tree", "http://comics.com/family_tree"),
|
|
# ("Farcus", "http://comics.com/farcus"),
|
|
# ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
|
|
# ("Ferd'nand", "http://comics.com/ferdnand"),
|
|
# ("Flight Deck", "http://comics.com/flight_deck"),
|
|
# ("Flo & Friends", "http://comics.com/flo&friends"),
|
|
# ("Fort Knox", "http://comics.com/fort_knox"),
|
|
# ("Frank & Ernest", "http://comics.com/frank&ernest"),
|
|
# ("Frazz", "http://comics.com/frazz"),
|
|
# ("Free Range", "http://comics.com/free_range"),
|
|
# ("Geech Classics", "http://comics.com/geech_classics"),
|
|
# ("Get Fuzzy", "http://comics.com/get_fuzzy"),
|
|
# ("Girls & Sports", "http://comics.com/girls&sports"),
|
|
# ("Graffiti", "http://comics.com/graffiti"),
|
|
# ("Grand Avenue", "http://comics.com/grand_avenue"),
|
|
# ("Heathcliff", "http://comics.com/heathcliff"),
|
|
# "Heathcliff, a street-smart and mischievous cat with many adventures."
|
|
# ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
|
|
# ("Herman", "http://comics.com/herman"),
|
|
# ("Home and Away", "http://comics.com/home_and_away"),
|
|
# ("It's All About You", "http://comics.com/its_all_about_you"),
|
|
# ("Jane's World", "http://comics.com/janes_world"),
|
|
# ("Jump Start", "http://comics.com/jump_start"),
|
|
# ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
|
|
# ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
|
|
# ("Liberty Meadows", "http://comics.com/liberty_meadows"),
|
|
# ("Little Dog Lost", "http://comics.com/little_dog_lost"),
|
|
# ("Lola", "http://comics.com/lola"),
|
|
# ("Luann", "http://comics.com/luann"),
|
|
# ("Marmaduke", "http://comics.com/marmaduke"),
|
|
# ("Meg! Classics", "http://comics.com/meg_classics"),
|
|
# ("Minimum Security", "http://comics.com/minimum_security"),
|
|
# ("Moderately Confused", "http://comics.com/moderately_confused"),
|
|
# ("Momma", "http://comics.com/momma"),
|
|
# ("Monty", "http://comics.com/monty"),
|
|
# ("Motley Classics", "http://comics.com/motley_classics"),
|
|
# ("Nancy", "http://comics.com/nancy"),
|
|
# ("Natural Selection", "http://comics.com/natural_selection"),
|
|
# ("Nest Heads", "http://comics.com/nest_heads"),
|
|
# ("Off The Mark", "http://comics.com/off_the_mark"),
|
|
# ("On a Claire Day", "http://comics.com/on_a_claire_day"),
|
|
# ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
|
|
# ("Over the Hedge", "http://comics.com/over_the_hedge"),
|
|
# ("PC and Pixel", "http://comics.com/pc_and_pixel"),
|
|
# ("Peanuts", "http://comics.com/peanuts"),
|
|
# ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
|
|
# ("Pickles", "http://comics.com/pickles"),
|
|
# ("Prickly City", "http://comics.com/prickly_city"),
|
|
# ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
|
|
# ("Reality Check", "http://comics.com/reality_check"),
|
|
# ("Red & Rover", "http://comics.com/red&rover"),
|
|
# ("Rip Haywire", "http://comics.com/rip_haywire"),
|
|
# ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
|
|
# ("Rose Is Rose", "http://comics.com/rose_is_rose"),
|
|
# ("Rubes", "http://comics.com/rubes"),
|
|
# ("Rudy Park", "http://comics.com/rudy_park"),
|
|
# ("Scary Gary", "http://comics.com/scary_gary"),
|
|
# ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
|
|
# ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
|
|
# ("Speed Bump", "http://comics.com/speed_bump"),
|
|
# ("Spot The Frog", "http://comics.com/spot_the_frog"),
|
|
# ("State of the Union", "http://comics.com/state_of_the_union"),
|
|
# ("Strange Brew", "http://comics.com/strange_brew"),
|
|
# ("Tarzan Classics", "http://comics.com/tarzan_classics"),
|
|
# ("That's Life", "http://comics.com/thats_life"),
|
|
# ("The Barn", "http://comics.com/the_barn"),
|
|
# ("The Born Loser", "http://comics.com/the_born_loser"),
|
|
# ("The Buckets", "http://comics.com/the_buckets"),
|
|
# ("The Dinette Set", "http://comics.com/the_dinette_set"),
|
|
# ("The Grizzwells", "http://comics.com/the_grizzwells"),
|
|
# ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
|
|
# ("The Knight Life", "http://comics.com/the_knight_life"),
|
|
# ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
|
|
# ("The Other Coast", "http://comics.com/the_other_coast"),
|
|
# ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
|
|
# ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
|
|
# ("Watch Your Head", "http://comics.com/watch_your_head"),
|
|
# ("Wizard of Id", "http://comics.com/wizard_of_id"),
|
|
# ("Working Daze", "http://comics.com/working_daze"),
|
|
# ("Working It Out", "http://comics.com/working_it_out"),
|
|
# ("Zack Hill", "http://comics.com/zack_hill"),
|
|
# ("(Th)ink", "http://comics.com/think"),
|
|
# "Tackling the political and social issues impacting communities of color."
|
|
# ("Adam Zyglis", "http://comics.com/adam_zyglis"),
|
|
# "Known for his excellent caricatures, as well as independent and incisive imagery. "
|
|
# ("Andy Singer", "http://comics.com/andy_singer"),
|
|
# ("Bill Day", "http://comics.com/bill_day"),
|
|
# "Powerful images on sensitive issues."
|
|
# ("Bill Schorr", "http://comics.com/bill_schorr"),
|
|
# ("Bob Englehart", "http://comics.com/bob_englehart"),
|
|
# ("Brian Fairrington", "http://comics.com/brian_fairrington"),
|
|
# ("Bruce Beattie", "http://comics.com/bruce_beattie"),
|
|
# ("Cam Cardow", "http://comics.com/cam_cardow"),
|
|
# ("Chip Bok", "http://comics.com/chip_bok"),
|
|
# ("Chris Britt", "http://comics.com/chris_britt"),
|
|
# ("Chuck Asay", "http://comics.com/chuck_asay"),
|
|
# ("Clay Bennett", "http://comics.com/clay_bennett"),
|
|
# ("Daryl Cagle", "http://comics.com/daryl_cagle"),
|
|
# ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
|
|
# "David Fitzsimmons is a new editorial cartoons on comics.com. He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
|
|
# ("Drew Litton", "http://comics.com/drew_litton"),
|
|
# "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
|
|
# ("Ed Stein", "http://comics.com/ed_stein"),
|
|
# "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
|
|
# ("Eric Allie", "http://comics.com/eric_allie"),
|
|
# "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
|
|
# ("Gary Markstein", "http://comics.com/gary_markstein"),
|
|
# ("Gary McCoy", "http://comics.com/gary_mccoy"),
|
|
# "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for Best Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
|
|
# ("Gary Varvel", "http://comics.com/gary_varvel"),
|
|
# ("Henry Payne", "http://comics.com/henry_payne"),
|
|
# ("JD Crowe", "http://comics.com/jd_crowe"),
|
|
# ("Jeff Parker", "http://comics.com/jeff_parker"),
|
|
# ("Jeff Stahler", "http://comics.com/jeff_stahler"),
|
|
# ("Jerry Holbert", "http://comics.com/jerry_holbert"),
|
|
# ("John Cole", "http://comics.com/john_cole"),
|
|
# ("John Darkow", "http://comics.com/john_darkow"),
|
|
# "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for the Columbia Daily Tribune, Missouri"
|
|
# ("John Sherffius", "http://comics.com/john_sherffius"),
|
|
# ("Larry Wright", "http://comics.com/larry_wright"),
|
|
# ("Lisa Benson", "http://comics.com/lisa_benson"),
|
|
# ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
|
|
# ("Matt Bors", "http://comics.com/matt_bors"),
|
|
# ("Michael Ramirez", "http://comics.com/michael_ramirez"),
|
|
# ("Mike Keefe", "http://comics.com/mike_keefe"),
|
|
# ("Mike Luckovich", "http://comics.com/mike_luckovich"),
|
|
# ("MIke Thompson", "http://comics.com/mike_thompson"),
|
|
# ("Monte Wolverton", "http://comics.com/monte_wolverton"),
|
|
# "Unique mix of perspectives"
|
|
# ("Mr. Fish", "http://comics.com/mr_fish"),
|
|
# "Side effects may include swelling"
|
|
# ("Nate Beeler", "http://comics.com/nate_beeler"),
|
|
# "Middle America meets the Beltway."
|
|
# ("Nick Anderson", "http://comics.com/nick_anderson"),
|
|
# ("Pat Bagley", "http://comics.com/pat_bagley"),
|
|
# "Unfair and Totally Unbalanced."
|
|
# ("Paul Szep", "http://comics.com/paul_szep"),
|
|
# ("RJ Matson", "http://comics.com/rj_matson"),
|
|
# "Power cartoons from NYC and Capitol Hill"
|
|
# ("Rob Rogers", "http://comics.com/rob_rogers"),
|
|
# "Humorous slant on current events"
|
|
# ("Robert Ariail", "http://comics.com/robert_ariail"),
|
|
# "Clever and unpredictable"
|
|
# ("Scott Stantis", "http://comics.com/scott_stantis"),
|
|
# ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
|
|
# ("Steve Benson", "http://comics.com/steve_benson"),
|
|
# ("Steve Breen", "http://comics.com/steve_breen"),
|
|
# ("Steve Kelley", "http://comics.com/steve_kelley"),
|
|
# ("Steve Sack", "http://comics.com/steve_sack"),
|
|
]:
|
|
articles = self.make_links(url)
|
|
if articles:
|
|
feeds.append((title, articles))
|
|
return feeds
|
|
|
|
def make_links(self, url):
|
|
soup = self.index_to_soup(url)
|
|
# print 'soup: ', soup
|
|
title = ''
|
|
current_articles = []
|
|
pages = range(1, self.num_comics_to_get+1)
|
|
for page in pages:
|
|
page_url = url + '/?Page=' + str(page)
|
|
soup = self.index_to_soup(page_url)
|
|
if soup:
|
|
strip_tag = soup.find('a', attrs={'class': 'STR_StripImage'})
|
|
if strip_tag:
|
|
print 'strip_tag: ', strip_tag
|
|
title = strip_tag['title']
|
|
print 'title: ', title
|
|
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
|
|
current_articles.reverse()
|
|
return current_articles
|
|
|
|
extra_css = '''
|
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
'''
|