mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
421 lines
26 KiB
Plaintext
421 lines
26 KiB
Plaintext
__license__ = 'GPL v3'
|
|
__copyright__ = 'Copyright 2010 Starson17'
|
|
'''
|
|
www.gocomics.com
|
|
'''
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class GoComics(BasicNewsRecipe):
|
|
title = 'Go Comics'
|
|
__author__ = 'Starson17'
|
|
__version__ = '1.06'
|
|
__date__ = '07 June 2011'
|
|
description = u'200+ Comics - Customize for more days/comics: Defaults to 1 day, 25 comics - 20 general, 5 editorial.'
|
|
category = 'news, comics'
|
|
language = 'en'
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
remove_attributes = ['style']
|
|
|
|
# USER PREFERENCES - COMICS AND NUMBER OF COMICS TO RETRIEVE ########
|
|
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
|
|
num_comics_to_get = 1
|
|
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
|
|
# Please do not overload their servers by selecting all comics and 1000
|
|
# strips from each!
|
|
|
|
keep_only_tags = [
|
|
dict(name='h1'),
|
|
dict(name='div', id=lambda x: x and x.startswith('mutable_')),
|
|
]
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
br.addheaders = [('Referer', 'http://www.gocomics.com/')]
|
|
return br
|
|
|
|
def parse_index(self):
|
|
feeds = []
|
|
for i, (title, url) in enumerate([ # {{{
|
|
# (u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
|
|
# (u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
|
|
# (u"Adam At Home", u"http://www.gocomics.com/adamathome"),
|
|
# (u"Agnes", u"http://www.gocomics.com/agnes"),
|
|
# (u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
|
|
# (u"Andy Capp", u"http://www.gocomics.com/andycapp"),
|
|
(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
|
|
# (u"Annie", u"http://www.gocomics.com/annie"),
|
|
# (u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
|
|
# (u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
|
|
(u"B.C.", u"http://www.gocomics.com/bc"),
|
|
# (u"Back in the Day", u"http://www.gocomics.com/backintheday"),
|
|
# (u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
|
|
(u"Baldo", u"http://www.gocomics.com/baldo"),
|
|
# (u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
|
|
# (u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
|
|
# (u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
|
|
# (u"Ben", u"http://www.gocomics.com/ben"),
|
|
# (u"Betty", u"http://www.gocomics.com/betty"),
|
|
# (u"Bewley", u"http://www.gocomics.com/bewley"),
|
|
# (u"Big Nate", u"http://www.gocomics.com/bignate"),
|
|
# (u"Big Top", u"http://www.gocomics.com/bigtop"),
|
|
# (u"Biographic", u"http://www.gocomics.com/biographic"),
|
|
# (u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
|
|
# (u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
|
|
# (u"Bliss", u"http://www.gocomics.com/bliss"),
|
|
# (u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
|
|
# (u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
|
|
# (u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
|
|
# (u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
|
|
# (u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
|
|
(u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
|
|
# (u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
|
|
# (u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
|
|
# (u"Brevity", u"http://www.gocomics.com/brevity"),
|
|
# (u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
|
|
(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
|
|
(u"Calvin and Hobbes",
|
|
u"http://www.gocomics.com/calvinandhobbes"),
|
|
# (u"Candorville", u"http://www.gocomics.com/candorville"),
|
|
# (u"Cathy", u"http://www.gocomics.com/cathy"),
|
|
# (u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
|
|
# (u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
|
|
# (u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
|
|
# (u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
|
|
# (u"Cleats", u"http://www.gocomics.com/cleats"),
|
|
# (u"Close to Home", u"http://www.gocomics.com/closetohome"),
|
|
# (u"Committed", u"http://www.gocomics.com/committed"),
|
|
# (u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
|
|
# (u"Cornered", u"http://www.gocomics.com/cornered"),
|
|
# (u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
|
|
# (u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
|
|
# (u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
|
|
# (u"Deep Cover", u"http://www.gocomics.com/deepcover"),
|
|
# (u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
|
|
# (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
|
|
# (u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
|
|
# (u"Doodles", u"http://www.gocomics.com/doodles"),
|
|
# (u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
|
|
# (u"Drabble", u"http://www.gocomics.com/drabble"),
|
|
# (u"Eek!", u"http://www.gocomics.com/eek"),
|
|
# (u"F Minus", u"http://www.gocomics.com/fminus"),
|
|
# (u"Family Tree", u"http://www.gocomics.com/familytree"),
|
|
# (u"Farcus", u"http://www.gocomics.com/farcus"),
|
|
# (u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
|
|
# (u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
|
|
# (u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
|
|
# (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
|
|
(u"For Better or For Worse",
|
|
u"http://www.gocomics.com/forbetterorforworse"),
|
|
# (u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
|
|
# (u"Fort Knox", u"http://www.gocomics.com/fortknox"),
|
|
# (u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
|
|
# (u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
|
|
(u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
|
|
# (u"Frazz", u"http://www.gocomics.com/frazz"),
|
|
# (u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
|
|
# (u"Free Range", u"http://www.gocomics.com/freerange"),
|
|
# (u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
|
|
# (u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
|
|
(u"Garfield", u"http://www.gocomics.com/garfield"),
|
|
# (u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
|
|
# (u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
|
|
(u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
|
|
# (u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
|
|
# (u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
|
|
# (u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
|
|
# (u"Graffiti", u"http://www.gocomics.com/graffiti"),
|
|
# (u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
|
|
# (u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
|
|
# (u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
|
|
# (u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
|
|
# (u"Home and Away", u"http://www.gocomics.com/homeandaway"),
|
|
# (u"Housebroken", u"http://www.gocomics.com/housebroken"),
|
|
# (u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
|
|
# (u"Imagine This", u"http://www.gocomics.com/imaginethis"),
|
|
# (u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
|
|
# (u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
|
|
# (u"Ink Pen", u"http://www.gocomics.com/inkpen"),
|
|
# (u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
|
|
# (u"Jane's World", u"http://www.gocomics.com/janesworld"),
|
|
# (u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
|
|
# (u"Jump Start", u"http://www.gocomics.com/jumpstart"),
|
|
# (u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
|
|
# (u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
|
|
# (u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
|
|
# (u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
|
|
# (u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
|
|
# (u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
|
|
# (u"Lio", u"http://www.gocomics.com/lio"),
|
|
# (u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
|
|
# (u"Little Otto", u"http://www.gocomics.com/littleotto"),
|
|
# (u"Lola", u"http://www.gocomics.com/lola"),
|
|
# (u"Love Is...", u"http://www.gocomics.com/loveis"),
|
|
(u"Luann", u"http://www.gocomics.com/luann"),
|
|
# (u"Maintaining", u"http://www.gocomics.com/maintaining"),
|
|
# (u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
|
|
# (u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
|
|
# (u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
|
|
# (u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
|
|
(u"Momma", u"http://www.gocomics.com/momma"),
|
|
# (u"Monty", u"http://www.gocomics.com/monty"),
|
|
# (u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
|
|
# (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
|
|
# (u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
|
|
# (u"Nancy", u"http://www.gocomics.com/nancy"),
|
|
# (u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
|
|
# (u"Nest Heads", u"http://www.gocomics.com/nestheads"),
|
|
# (u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
|
|
# (u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
|
|
(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
|
|
# (u"Off The Mark", u"http://www.gocomics.com/offthemark"),
|
|
# (u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
|
|
# (u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
|
|
# (u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
|
|
# (u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
|
|
# (u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
|
|
# (u"Overboard", u"http://www.gocomics.com/overboard"),
|
|
# (u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
|
|
(u"Peanuts", u"http://www.gocomics.com/peanuts"),
|
|
(u"Pearls Before Swine",
|
|
u"http://www.gocomics.com/pearlsbeforeswine"),
|
|
# (u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
|
|
# (u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
|
|
# (u"Pickles", u"http://www.gocomics.com/pickles"),
|
|
# (u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
|
|
# (u"Pluggers", u"http://www.gocomics.com/pluggers"),
|
|
(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
|
|
# (u"PreTeena", u"http://www.gocomics.com/preteena"),
|
|
# (u"Prickly City", u"http://www.gocomics.com/pricklycity"),
|
|
# (u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
|
|
# (u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
|
|
# (u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
|
|
# (u"Reality Check", u"http://www.gocomics.com/realitycheck"),
|
|
# (u"Red and Rover", u"http://www.gocomics.com/redandrover"),
|
|
# (u"Red Meat", u"http://www.gocomics.com/redmeat"),
|
|
# (u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
|
|
# (u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
|
|
# (u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
|
|
(u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
|
|
# (u"Rudy Park", u"http://www.gocomics.com/rudypark"),
|
|
# (u"Scary Gary", u"http://www.gocomics.com/scarygary"),
|
|
# (u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
|
|
(u"Shoe", u"http://www.gocomics.com/shoe"),
|
|
# (u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
|
|
# (u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
|
|
# (u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
|
|
# (u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
|
|
# (u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
|
|
# (u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
|
|
# (u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
|
|
# (u"Sylvia", u"http://www.gocomics.com/sylvia"),
|
|
# (u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
|
|
# (u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
|
|
# (u"That's Life", u"http://www.gocomics.com/thatslife"),
|
|
# (u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
|
|
# (u"The Barn", u"http://www.gocomics.com/thebarn"),
|
|
# (u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
|
|
# (u"The Boondocks", u"http://www.gocomics.com/boondocks"),
|
|
(u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
|
|
# (u"The Buckets", u"http://www.gocomics.com/thebuckets"),
|
|
# (u"The City", u"http://www.gocomics.com/thecity"),
|
|
# (u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
|
|
# (u"The Doozies", u"http://www.gocomics.com/thedoozies"),
|
|
# (u"The Duplex", u"http://www.gocomics.com/duplex"),
|
|
# (u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
|
|
# (u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
|
|
# (u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
|
|
# (u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
|
|
# (u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
|
|
# (u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
|
|
# (u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
|
|
(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
|
|
# (u"The Norm", u"http://www.gocomics.com/thenorm"),
|
|
# (u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
|
|
# (u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
|
|
# (u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
|
|
# (u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
|
|
# (u"TOBY", u"http://www.gocomics.com/toby"),
|
|
# (u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
|
|
# (u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
|
|
# (u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
|
|
# (u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
|
|
# (u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
|
|
# (u"Wee Pals", u"http://www.gocomics.com/weepals"),
|
|
# (u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
|
|
(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
|
|
# (u"Working Daze", u"http://www.gocomics.com/workingdaze"),
|
|
# (u"Working It Out", u"http://www.gocomics.com/workingitout"),
|
|
# (u"Yenny", u"http://www.gocomics.com/yenny"),
|
|
# (u"Zack Hill", u"http://www.gocomics.com/zackhill"),
|
|
# (u"Ziggy", u"http://www.gocomics.com/ziggy"),
|
|
(u"9 to 5", u"http://www.gocomics.com/9to5"),
|
|
(u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
|
|
(u"Herman", u"http://www.gocomics.com/herman"),
|
|
(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
|
|
(u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
|
|
(u"Ripley's Believe It or Not!",
|
|
u"http://www.gocomics.com/ripleysbelieveitornot"),
|
|
(u"Rubes", u"http://www.gocomics.com/rubes"),
|
|
(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
|
|
(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
|
|
(u"The Argyle Sweater",
|
|
u"http://www.gocomics.com/theargylesweater"),
|
|
#
|
|
# EDITORIAL CARTOONS #####################
|
|
# (u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
|
|
# (u"Andy Singer", u"http://www.gocomics.com/andysinger"),
|
|
# (u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
|
|
# (u"Bill Day", u"http://www.gocomics.com/billday"),
|
|
# (u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
|
|
# (u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
|
|
# (u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
|
|
# (u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
|
|
# (u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
|
|
# (u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
|
|
# (u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
|
|
# (u"Chip Bok",u"http://www.gocomics.com/chipbok"),
|
|
# (u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
|
|
# (u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
|
|
# (u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
|
|
# (u"Clay Jones",u"http://www.gocomics.com/clayjones"),
|
|
# (u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
|
|
# (u"Dana Summers",u"http://www.gocomics.com/danasummers"),
|
|
# (u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
|
|
# (u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
|
|
# (u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
|
|
# (u"Don Wright",u"http://www.gocomics.com/donwright"),
|
|
# (u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
|
|
# (u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
|
|
# (u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
|
|
# (u"Ed Stein", u"http://www.gocomics.com/edstein"),
|
|
# (u"Eric Allie", u"http://www.gocomics.com/ericallie"),
|
|
# (u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
|
|
# (u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
|
|
# (u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
|
|
# (u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
|
|
# (u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
|
|
# (u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
|
|
# (u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
|
|
# (u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
|
|
# (u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
|
|
# (u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
|
|
# (u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
|
|
# (u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
|
|
# (u"Joel Pett",u"http://www.gocomics.com/joelpett"),
|
|
# (u"John Cole", u"http://www.gocomics.com/johncole"),
|
|
# (u"John Darkow", u"http://www.gocomics.com/johndarkow"),
|
|
# (u"John Deering",u"http://www.gocomics.com/johndeering"),
|
|
# (u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
|
|
# (u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
|
|
# (u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
|
|
# (u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
|
|
# (u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
|
|
# (u"Larry Wright", u"http://www.gocomics.com/larrywright"),
|
|
# (u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
|
|
# (u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
|
|
# (u"Matt Bors", u"http://www.gocomics.com/mattbors"),
|
|
# (u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
|
|
# (u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
|
|
# (u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
|
|
# (u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
|
|
# (u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
|
|
# (u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
|
|
# (u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
|
|
# (u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
|
|
# (u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
|
|
# (u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
|
|
# (u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
|
|
# (u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
|
|
# (u"Paul Szep", u"http://www.gocomics.com/paulszep"),
|
|
# (u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
|
|
# (u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
|
|
# (u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
|
|
# (u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
|
|
# (u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
|
|
# (u"Small World",u"http://www.gocomics.com/smallworld"),
|
|
# (u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
|
|
# (u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
|
|
# (u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
|
|
# (u"Steve Sack", u"http://www.gocomics.com/stevesack"),
|
|
# (u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
|
|
# (u"Ted Rall",u"http://www.gocomics.com/tedrall"),
|
|
# (u"(Th)ink", u"http://www.gocomics.com/think"),
|
|
# (u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
|
|
# (u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
|
|
# (u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
|
|
# (u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
|
|
# (u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
|
|
# (u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
|
|
# (u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
|
|
# (u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
|
|
# (u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
|
|
# (u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
|
|
# (u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
|
|
# (u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
|
|
# (u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
|
|
]): # }}}
|
|
self.log('Working on: ', title, url)
|
|
articles = self.make_links(url)
|
|
if articles:
|
|
feeds.append((title, articles))
|
|
if self.test and i > 0:
|
|
break
|
|
return feeds
|
|
|
|
def make_links(self, url):
|
|
title = 'Temp'
|
|
current_articles = []
|
|
if self.test:
|
|
self.num_comics_to_get = 2
|
|
num = self.num_comics_to_get
|
|
while num > 0:
|
|
num -= 1
|
|
page_soup = self.index_to_soup(url)
|
|
if not page_soup:
|
|
break
|
|
content = page_soup.find(id='content')
|
|
if content is None:
|
|
break
|
|
feature = content.find(name='div', attrs={'class': 'feature'})
|
|
feature_nav = content.find(
|
|
name='ul', attrs={'class': 'feature-nav'})
|
|
if feature is None or feature_nav is None:
|
|
break
|
|
try:
|
|
a = feature.find('h1').find('a', href=True)
|
|
except:
|
|
self.log.exception('Failed to find current page link')
|
|
break
|
|
page_url = a['href']
|
|
if page_url.startswith('/'):
|
|
page_url = 'http://www.gocomics.com' + page_url
|
|
try:
|
|
strip_title = self.tag_to_string(
|
|
feature.find('h1').find('a', href=True))
|
|
except:
|
|
strip_title = 'Error - no Title found'
|
|
try:
|
|
date_title = self.tag_to_string(feature_nav.find('li'))
|
|
except:
|
|
date_title = 'Error - no Date found'
|
|
title = strip_title + ' - ' + date_title
|
|
current_articles.append(
|
|
{'title': title, 'url': page_url, 'description': '', 'date': ''})
|
|
a = feature_nav.find('a', href=True, attrs={'class': 'prev'})
|
|
if a is None:
|
|
break
|
|
url = a['href']
|
|
if url.startswith('/'):
|
|
url = 'http://www.gocomics.com' + url
|
|
current_articles.reverse()
|
|
return current_articles
|
|
|
|
def preprocess_html(self, soup):
|
|
headings = soup.findAll('h1')
|
|
for h1 in headings[1:]:
|
|
h1.extract()
|
|
self.adeify_images(soup)
|
|
return soup
|