__license__ = 'GPL v3' __copyright__ = 'Copyright 2010 Starson17' ''' www.gocomics.com ''' from calibre.web.feeds.news import BasicNewsRecipe class GoComics(BasicNewsRecipe): title = 'Go Comics' __author__ = 'Starson17' __version__ = '1.06' __date__ = '07 June 2011' description = u'200+ Comics - Customize for more days/comics: Defaults to 1 day, 25 comics - 20 general, 5 editorial.' category = 'news, comics' language = 'en' no_stylesheets = True remove_javascript = True remove_attributes = ['style'] # USER PREFERENCES - COMICS AND NUMBER OF COMICS TO RETRIEVE ######## # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes num_comics_to_get = 1 # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS # Please do not overload their servers by selecting all comics and 1000 # strips from each! keep_only_tags = [ dict(name='h1'), dict(name='div', id=lambda x: x and x.startswith('mutable_')), ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.addheaders = [('Referer', 'http://www.gocomics.com/')] return br def parse_index(self): feeds = [] for i, (title, url) in enumerate([ # {{{ # (u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"), # (u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"), # (u"Adam At Home", u"http://www.gocomics.com/adamathome"), # (u"Agnes", u"http://www.gocomics.com/agnes"), # (u"Alley Oop", u"http://www.gocomics.com/alleyoop"), # (u"Andy Capp", u"http://www.gocomics.com/andycapp"), (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"), # (u"Annie", u"http://www.gocomics.com/annie"), # (u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"), # (u"Ask Shagg", u"http://www.gocomics.com/askshagg"), (u"B.C.", u"http://www.gocomics.com/bc"), # (u"Back in the Day", u"http://www.gocomics.com/backintheday"), # (u"Bad Reporter", u"http://www.gocomics.com/badreporter"), (u"Baldo", u"http://www.gocomics.com/baldo"), # (u"Ballard Street", u"http://www.gocomics.com/ballardstreet"), # (u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"), # (u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"), # (u"Ben", u"http://www.gocomics.com/ben"), # (u"Betty", u"http://www.gocomics.com/betty"), # (u"Bewley", u"http://www.gocomics.com/bewley"), # (u"Big Nate", u"http://www.gocomics.com/bignate"), # (u"Big Top", u"http://www.gocomics.com/bigtop"), # (u"Biographic", u"http://www.gocomics.com/biographic"), # (u"Birdbrains", u"http://www.gocomics.com/birdbrains"), # (u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"), # (u"Bliss", u"http://www.gocomics.com/bliss"), # (u"Bloom County", u"http://www.gocomics.com/bloomcounty"), # (u"Bo Nanas", u"http://www.gocomics.com/bonanas"), # (u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"), # (u"Boomerangs", u"http://www.gocomics.com/boomerangs"), # (u"Bottomliners", u"http://www.gocomics.com/bottomliners"), (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"), # (u"Brainwaves", u"http://www.gocomics.com/brainwaves"), # (u"Brenda Starr", u"http://www.gocomics.com/brendastarr"), # (u"Brevity", u"http://www.gocomics.com/brevity"), # (u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"), (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"), (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"), # (u"Candorville", u"http://www.gocomics.com/candorville"), # (u"Cathy", u"http://www.gocomics.com/cathy"), # (u"C'est la Vie", u"http://www.gocomics.com/cestlavie"), # (u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"), # (u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"), # (u"Citizen Dog", u"http://www.gocomics.com/citizendog"), # (u"Cleats", u"http://www.gocomics.com/cleats"), # (u"Close to Home", u"http://www.gocomics.com/closetohome"), # (u"Committed", u"http://www.gocomics.com/committed"), # (u"Compu-toon", u"http://www.gocomics.com/compu-toon"), # (u"Cornered", u"http://www.gocomics.com/cornered"), # (u"Cow & Boy", u"http://www.gocomics.com/cow&boy"), # (u"Cul de Sac", u"http://www.gocomics.com/culdesac"), # (u"Daddy's Home", u"http://www.gocomics.com/daddyshome"), # (u"Deep Cover", u"http://www.gocomics.com/deepcover"), # (u"Dick Tracy", u"http://www.gocomics.com/dicktracy"), # (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"), # (u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"), # (u"Doodles", u"http://www.gocomics.com/doodles"), # (u"Doonesbury", u"http://www.gocomics.com/doonesbury"), # (u"Drabble", u"http://www.gocomics.com/drabble"), # (u"Eek!", u"http://www.gocomics.com/eek"), # (u"F Minus", u"http://www.gocomics.com/fminus"), # (u"Family Tree", u"http://www.gocomics.com/familytree"), # (u"Farcus", u"http://www.gocomics.com/farcus"), # (u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"), # (u"Ferd'nand", u"http://www.gocomics.com/ferdnand"), # (u"Flight Deck", u"http://www.gocomics.com/flightdeck"), # (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"), (u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"), # (u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"), # (u"Fort Knox", u"http://www.gocomics.com/fortknox"), # (u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"), # (u"FoxTrot", u"http://www.gocomics.com/foxtrot"), (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"), # (u"Frazz", u"http://www.gocomics.com/frazz"), # (u"Fred Basset", u"http://www.gocomics.com/fredbasset"), # (u"Free Range", u"http://www.gocomics.com/freerange"), # (u"Frog Applause", u"http://www.gocomics.com/frogapplause"), # (u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"), (u"Garfield", u"http://www.gocomics.com/garfield"), # (u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"), # (u"Geech Classics", u"http://www.gocomics.com/geechclassics"), (u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"), # (u"Gil Thorp", u"http://www.gocomics.com/gilthorp"), # (u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"), # (u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"), # (u"Graffiti", u"http://www.gocomics.com/graffiti"), # (u"Grand Avenue", u"http://www.gocomics.com/grandavenue"), # (u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"), # (u"Heart of the City", u"http://www.gocomics.com/heartofthecity"), # (u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"), # (u"Home and Away", u"http://www.gocomics.com/homeandaway"), # (u"Housebroken", u"http://www.gocomics.com/housebroken"), # (u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"), # (u"Imagine This", u"http://www.gocomics.com/imaginethis"), # (u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"), # (u"In the Sticks", u"http://www.gocomics.com/inthesticks"), # (u"Ink Pen", u"http://www.gocomics.com/inkpen"), # (u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"), # (u"Jane's World", u"http://www.gocomics.com/janesworld"), # (u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"), # (u"Jump Start", u"http://www.gocomics.com/jumpstart"), # (u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"), # (u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"), # (u"Last Kiss", u"http://www.gocomics.com/lastkiss"), # (u"Legend of Bill", u"http://www.gocomics.com/legendofbill"), # (u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"), # (u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"), # (u"Lio", u"http://www.gocomics.com/lio"), # (u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"), # (u"Little Otto", u"http://www.gocomics.com/littleotto"), # (u"Lola", u"http://www.gocomics.com/lola"), # (u"Love Is...", u"http://www.gocomics.com/loveis"), (u"Luann", u"http://www.gocomics.com/luann"), # (u"Maintaining", u"http://www.gocomics.com/maintaining"), # (u"Meg! Classics", u"http://www.gocomics.com/megclassics"), # (u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"), # (u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"), # (u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"), (u"Momma", u"http://www.gocomics.com/momma"), # (u"Monty", u"http://www.gocomics.com/monty"), # (u"Motley Classics", u"http://www.gocomics.com/motleyclassics"), # (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"), # (u"Mythtickle", u"http://www.gocomics.com/mythtickle"), # (u"Nancy", u"http://www.gocomics.com/nancy"), # (u"Natural Selection", u"http://www.gocomics.com/naturalselection"), # (u"Nest Heads", u"http://www.gocomics.com/nestheads"), # (u"NEUROTICA", u"http://www.gocomics.com/neurotica"), # (u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"), (u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"), # (u"Off The Mark", u"http://www.gocomics.com/offthemark"), # (u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"), # (u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"), # (u"One Big Happy", u"http://www.gocomics.com/onebighappy"), # (u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"), # (u"Over the Hedge", u"http://www.gocomics.com/overthehedge"), # (u"Overboard", u"http://www.gocomics.com/overboard"), # (u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"), (u"Peanuts", u"http://www.gocomics.com/peanuts"), (u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"), # (u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"), # (u"Pibgorn", u"http://www.gocomics.com/pibgorn"), # (u"Pickles", u"http://www.gocomics.com/pickles"), # (u"Pinkerton", u"http://www.gocomics.com/pinkerton"), # (u"Pluggers", u"http://www.gocomics.com/pluggers"), (u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"), # (u"PreTeena", u"http://www.gocomics.com/preteena"), # (u"Prickly City", u"http://www.gocomics.com/pricklycity"), # (u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"), # (u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"), # (u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"), # (u"Reality Check", u"http://www.gocomics.com/realitycheck"), # (u"Red and Rover", u"http://www.gocomics.com/redandrover"), # (u"Red Meat", u"http://www.gocomics.com/redmeat"), # (u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"), # (u"Rip Haywire", u"http://www.gocomics.com/riphaywire"), # (u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"), (u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"), # (u"Rudy Park", u"http://www.gocomics.com/rudypark"), # (u"Scary Gary", u"http://www.gocomics.com/scarygary"), # (u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"), (u"Shoe", u"http://www.gocomics.com/shoe"), # (u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"), # (u"Skin Horse", u"http://www.gocomics.com/skinhorse"), # (u"Slowpoke", u"http://www.gocomics.com/slowpoke"), # (u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"), # (u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"), # (u"State of the Union", u"http://www.gocomics.com/stateoftheunion"), # (u"Stone Soup", u"http://www.gocomics.com/stonesoup"), # (u"Sylvia", u"http://www.gocomics.com/sylvia"), # (u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"), # (u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"), # (u"That's Life", u"http://www.gocomics.com/thatslife"), # (u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"), # (u"The Barn", u"http://www.gocomics.com/thebarn"), # (u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"), # (u"The Boondocks", u"http://www.gocomics.com/boondocks"), (u"The Born Loser", u"http://www.gocomics.com/thebornloser"), # (u"The Buckets", u"http://www.gocomics.com/thebuckets"), # (u"The City", u"http://www.gocomics.com/thecity"), # (u"The Dinette Set", u"http://www.gocomics.com/dinetteset"), # (u"The Doozies", u"http://www.gocomics.com/thedoozies"), # (u"The Duplex", u"http://www.gocomics.com/duplex"), # (u"The Elderberries", u"http://www.gocomics.com/theelderberries"), # (u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"), # (u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"), # (u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"), # (u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"), # (u"The Knight Life", u"http://www.gocomics.com/theknightlife"), # (u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"), (u"The Middletons", u"http://www.gocomics.com/themiddletons"), # (u"The Norm", u"http://www.gocomics.com/thenorm"), # (u"The Other Coast", u"http://www.gocomics.com/theothercoast"), # (u"The Quigmans", u"http://www.gocomics.com/thequigmans"), # (u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"), # (u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"), # (u"TOBY", u"http://www.gocomics.com/toby"), # (u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"), # (u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"), # (u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"), # (u"W.T. Duck", u"http://www.gocomics.com/wtduck"), # (u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"), # (u"Wee Pals", u"http://www.gocomics.com/weepals"), # (u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"), (u"Wizard of Id", u"http://www.gocomics.com/wizardofid"), # (u"Working Daze", u"http://www.gocomics.com/workingdaze"), # (u"Working It Out", u"http://www.gocomics.com/workingitout"), # (u"Yenny", u"http://www.gocomics.com/yenny"), # (u"Zack Hill", u"http://www.gocomics.com/zackhill"), # (u"Ziggy", u"http://www.gocomics.com/ziggy"), (u"9 to 5", u"http://www.gocomics.com/9to5"), (u"Heathcliff", u"http://www.gocomics.com/heathcliff"), (u"Herman", u"http://www.gocomics.com/herman"), (u"Loose Parts", u"http://www.gocomics.com/looseparts"), (u"Marmaduke", u"http://www.gocomics.com/marmaduke"), (u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"), (u"Rubes", u"http://www.gocomics.com/rubes"), (u"Speed Bump", u"http://www.gocomics.com/speedbump"), (u"Strange Brew", u"http://www.gocomics.com/strangebrew"), (u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"), # # EDITORIAL CARTOONS ##################### # (u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"), # (u"Andy Singer", u"http://www.gocomics.com/andysinger"), # (u"Ben Sargent",u"http://www.gocomics.com/bensargent"), # (u"Bill Day", u"http://www.gocomics.com/billday"), # (u"Bill Schorr", u"http://www.gocomics.com/billschorr"), # (u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"), # (u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"), # (u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"), # (u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"), # (u"Cam Cardow", u"http://www.gocomics.com/camcardow"), # (u"Chan Lowe",u"http://www.gocomics.com/chanlowe"), # (u"Chip Bok",u"http://www.gocomics.com/chipbok"), # (u"Chris Britt",u"http://www.gocomics.com/chrisbritt"), # (u"Chuck Asay",u"http://www.gocomics.com/chuckasay"), # (u"Clay Bennett",u"http://www.gocomics.com/claybennett"), # (u"Clay Jones",u"http://www.gocomics.com/clayjones"), # (u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"), # (u"Dana Summers",u"http://www.gocomics.com/danasummers"), # (u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"), # (u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"), # (u"Dick Locher",u"http://www.gocomics.com/dicklocher"), # (u"Don Wright",u"http://www.gocomics.com/donwright"), # (u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"), # (u"Drew Litton", u"http://www.gocomics.com/drewlitton"), # (u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"), # (u"Ed Stein", u"http://www.gocomics.com/edstein"), # (u"Eric Allie", u"http://www.gocomics.com/ericallie"), # (u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"), # (u"Gary McCoy", u"http://www.gocomics.com/garymccoy"), # (u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"), # (u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"), # (u"Henry Payne", u"http://www.gocomics.com/henrypayne"), # (u"Jack Ohman",u"http://www.gocomics.com/jackohman"), # (u"JD Crowe", u"http://www.gocomics.com/jdcrowe"), # (u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"), # (u"Jeff Parker", u"http://www.gocomics.com/jeffparker"), # (u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"), # (u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"), # (u"Jim Morin",u"http://www.gocomics.com/jimmorin"), # (u"Joel Pett",u"http://www.gocomics.com/joelpett"), # (u"John Cole", u"http://www.gocomics.com/johncole"), # (u"John Darkow", u"http://www.gocomics.com/johndarkow"), # (u"John Deering",u"http://www.gocomics.com/johndeering"), # (u"John Sherffius", u"http://www.gocomics.com/johnsherffius"), # (u"Ken Catalino",u"http://www.gocomics.com/kencatalino"), # (u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"), # (u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"), # (u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"), # (u"Larry Wright", u"http://www.gocomics.com/larrywright"), # (u"Lisa Benson", u"http://www.gocomics.com/lisabenson"), # (u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"), # (u"Matt Bors", u"http://www.gocomics.com/mattbors"), # (u"Matt Davies",u"http://www.gocomics.com/mattdavies"), # (u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"), # (u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"), # (u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"), # (u"MIke Thompson", u"http://www.gocomics.com/mikethompson"), # (u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"), # (u"Mr. Fish", u"http://www.gocomics.com/mrfish"), # (u"Nate Beeler", u"http://www.gocomics.com/natebeeler"), # (u"Nick Anderson", u"http://www.gocomics.com/nickanderson"), # (u"Pat Bagley", u"http://www.gocomics.com/patbagley"), # (u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"), # (u"Paul Conrad",u"http://www.gocomics.com/paulconrad"), # (u"Paul Szep", u"http://www.gocomics.com/paulszep"), # (u"RJ Matson", u"http://www.gocomics.com/rjmatson"), # (u"Rob Rogers", u"http://www.gocomics.com/robrogers"), # (u"Robert Ariail", u"http://www.gocomics.com/robertariail"), # (u"Scott Stantis", u"http://www.gocomics.com/scottstantis"), # (u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"), # (u"Small World",u"http://www.gocomics.com/smallworld"), # (u"Steve Benson", u"http://www.gocomics.com/stevebenson"), # (u"Steve Breen", u"http://www.gocomics.com/stevebreen"), # (u"Steve Kelley", u"http://www.gocomics.com/stevekelley"), # (u"Steve Sack", u"http://www.gocomics.com/stevesack"), # (u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"), # (u"Ted Rall",u"http://www.gocomics.com/tedrall"), # (u"(Th)ink", u"http://www.gocomics.com/think"), # (u"Tom Toles",u"http://www.gocomics.com/tomtoles"), # (u"Tony Auth",u"http://www.gocomics.com/tonyauth"), # (u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"), # (u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"), # (u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"), # (u"ViewsAsia",u"http://www.gocomics.com/viewsasia"), # (u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"), # (u"ViewsEurope",u"http://www.gocomics.com/viewseurope"), # (u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"), # (u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"), # (u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"), # (u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"), # (u"Wit of the World",u"http://www.gocomics.com/witoftheworld"), ]): # }}} self.log('Working on: ', title, url) articles = self.make_links(url) if articles: feeds.append((title, articles)) if self.test and i > 0: break return feeds def make_links(self, url): title = 'Temp' current_articles = [] if self.test: self.num_comics_to_get = 2 num = self.num_comics_to_get while num > 0: num -= 1 page_soup = self.index_to_soup(url) if not page_soup: break content = page_soup.find(id='content') if content is None: break feature = content.find(name='div', attrs={'class': 'feature'}) feature_nav = content.find( name='ul', attrs={'class': 'feature-nav'}) if feature is None or feature_nav is None: break try: a = feature.find('h1').find('a', href=True) except: self.log.exception('Failed to find current page link') break page_url = a['href'] if page_url.startswith('/'): page_url = 'http://www.gocomics.com' + page_url try: strip_title = self.tag_to_string( feature.find('h1').find('a', href=True)) except: strip_title = 'Error - no Title found' try: date_title = self.tag_to_string(feature_nav.find('li')) except: date_title = 'Error - no Date found' title = strip_title + ' - ' + date_title current_articles.append( {'title': title, 'url': page_url, 'description': '', 'date': ''}) a = feature_nav.find('a', href=True, attrs={'class': 'prev'}) if a is None: break url = a['href'] if url.startswith('/'): url = 'http://www.gocomics.com' + url current_articles.reverse() return current_articles def preprocess_html(self, soup): headings = soup.findAll('h1') for h1 in headings[1:]: h1.extract() self.adeify_images(soup) return soup