From 8f42c11aa34f9ecd721de8625757edaa10cfb454 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 14 Aug 2010 08:17:43 -0600 Subject: [PATCH] GoComics by Starson17. Fixes #6500 (New Recipe: GoComics) --- resources/recipes/go_comics.recipe | 348 +++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 resources/recipes/go_comics.recipe diff --git a/resources/recipes/go_comics.recipe b/resources/recipes/go_comics.recipe new file mode 100644 index 0000000000..b98b628942 --- /dev/null +++ b/resources/recipes/go_comics.recipe @@ -0,0 +1,348 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'Copyright 2010 Starson17' +''' +www.gocomics.com +''' +from calibre.web.feeds.news import BasicNewsRecipe +import mechanize + +class GoComics(BasicNewsRecipe): + title = 'GoComics' + __author__ = 'Starson17' + __version__ = '1.02' + __date__ = '14 August 2010' + description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.' + category = 'news, comics' + language = 'en' + use_embedded_content= False + no_stylesheets = True + remove_javascript = True + cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg' + + ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ######## + # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes + num_comics_to_get = 7 + # comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large + comic_size = 900 + # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS + # Please do not overload their servers by selecting all comics and 1000 strips from each! + + conversion_options = {'linearize_tables' : True + , 'comment' : description + , 'tags' : category + , 'language' : language + } + + keep_only_tags = [dict(name='div', attrs={'class':['feature','banner']}), + ] + + remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}), + dict(name='div', attrs={'class':['tag-wrapper']}), + dict(name='ul', attrs={'class':['share-nav','feature-nav']}), + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + cookies = mechanize.CookieJar() + br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) + br.addheaders = [('Referer','http://www.gocomics.com/')] + return br + + def parse_index(self): + feeds = [] + for title, url in [ + ######## COMICS - GENERAL ######## + (u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"), + # (u"9 to 5", u"http://www.gocomics.com/9to5"), + # (u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"), + # (u"Adam@Home", u"http://www.gocomics.com/adamathome"), + # (u"Agnes", u"http://www.gocomics.com/agnes"), + # (u"Andy Capp", u"http://www.gocomics.com/andycapp"), + # (u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"), + # (u"Annie", u"http://www.gocomics.com/annie"), + (u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"), + # (u"Ask Shagg", u"http://www.gocomics.com/askshagg"), + (u"B.C.", u"http://www.gocomics.com/bc"), + # (u"Back in the Day", u"http://www.gocomics.com/backintheday"), + # (u"Bad Reporter", u"http://www.gocomics.com/badreporter"), + # (u"Baldo", u"http://www.gocomics.com/baldo"), + # (u"Ballard Street", u"http://www.gocomics.com/ballardstreet"), + # (u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"), + # (u"The Barn", u"http://www.gocomics.com/thebarn"), + # (u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"), + # (u"Bewley", u"http://www.gocomics.com/bewley"), + # (u"Big Top", u"http://www.gocomics.com/bigtop"), + # (u"Biographic", u"http://www.gocomics.com/biographic"), + (u"Birdbrains", u"http://www.gocomics.com/birdbrains"), + # (u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"), + # (u"Bliss", u"http://www.gocomics.com/bliss"), + (u"Bloom County", u"http://www.gocomics.com/bloomcounty"), + # (u"Bo Nanas", u"http://www.gocomics.com/bonanas"), + # (u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"), + # (u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"), + # (u"Boomerangs", u"http://www.gocomics.com/boomerangs"), + # (u"The Boondocks", u"http://www.gocomics.com/boondocks"), + # (u"Bottomliners", u"http://www.gocomics.com/bottomliners"), + # (u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"), + # (u"Brainwaves", u"http://www.gocomics.com/brainwaves"), + # (u"Brenda Starr", u"http://www.gocomics.com/brendastarr"), + # (u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"), + # (u"Broom Hilda", u"http://www.gocomics.com/broomhilda"), + (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"), + # (u"Candorville", u"http://www.gocomics.com/candorville"), + # (u"Cathy", u"http://www.gocomics.com/cathy"), + # (u"C'est la Vie", u"http://www.gocomics.com/cestlavie"), + # (u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"), + # (u"Citizen Dog", u"http://www.gocomics.com/citizendog"), + # (u"The City", u"http://www.gocomics.com/thecity"), + # (u"Cleats", u"http://www.gocomics.com/cleats"), + # (u"Close to Home", u"http://www.gocomics.com/closetohome"), + # (u"Compu-toon", u"http://www.gocomics.com/compu-toon"), + # (u"Cornered", u"http://www.gocomics.com/cornered"), + (u"Cul de Sac", u"http://www.gocomics.com/culdesac"), + # (u"Daddy's Home", u"http://www.gocomics.com/daddyshome"), + # (u"Deep Cover", u"http://www.gocomics.com/deepcover"), + # (u"Dick Tracy", u"http://www.gocomics.com/dicktracy"), + # (u"The Dinette Set", u"http://www.gocomics.com/dinetteset"), + # (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"), + # (u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"), + # (u"Doodles", u"http://www.gocomics.com/doodles"), + (u"Doonesbury", u"http://www.gocomics.com/doonesbury"), + # (u"The Doozies", u"http://www.gocomics.com/thedoozies"), + # (u"The Duplex", u"http://www.gocomics.com/duplex"), + # (u"Eek!", u"http://www.gocomics.com/eek"), + # (u"The Elderberries", u"http://www.gocomics.com/theelderberries"), + # (u"Flight Deck", u"http://www.gocomics.com/flightdeck"), + # (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"), + # (u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"), + (u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"), + # (u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"), + # (u"Fort Knox", u"http://www.gocomics.com/fortknox"), + # (u"FoxTrot", u"http://www.gocomics.com/foxtrot"), + (u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"), + # (u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"), + # (u"Fred Basset", u"http://www.gocomics.com/fredbasset"), + # (u"Free Range", u"http://www.gocomics.com/freerange"), + # (u"Frog Applause", u"http://www.gocomics.com/frogapplause"), + # (u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"), + (u"Garfield", u"http://www.gocomics.com/garfield"), + # (u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"), + # (u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"), + # (u"Gil Thorp", u"http://www.gocomics.com/gilthorp"), + # (u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"), + # (u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"), + # (u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"), + # (u"Heart of the City", u"http://www.gocomics.com/heartofthecity"), + # (u"Heathcliff", u"http://www.gocomics.com/heathcliff"), + # (u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"), + # (u"Home and Away", u"http://www.gocomics.com/homeandaway"), + # (u"Housebroken", u"http://www.gocomics.com/housebroken"), + # (u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"), + # (u"Imagine This", u"http://www.gocomics.com/imaginethis"), + # (u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"), + # (u"In the Sticks", u"http://www.gocomics.com/inthesticks"), + # (u"Ink Pen", u"http://www.gocomics.com/inkpen"), + # (u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"), + # (u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"), + # (u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"), + # (u"Last Kiss", u"http://www.gocomics.com/lastkiss"), + # (u"Legend of Bill", u"http://www.gocomics.com/legendofbill"), + # (u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"), + (u"Lio", u"http://www.gocomics.com/lio"), + # (u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"), + # (u"Little Otto", u"http://www.gocomics.com/littleotto"), + # (u"Loose Parts", u"http://www.gocomics.com/looseparts"), + # (u"Love Is...", u"http://www.gocomics.com/loveis"), + # (u"Maintaining", u"http://www.gocomics.com/maintaining"), + # (u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"), + # (u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"), + # (u"The Middletons", u"http://www.gocomics.com/themiddletons"), + # (u"Momma", u"http://www.gocomics.com/momma"), + # (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"), + # (u"Mythtickle", u"http://www.gocomics.com/mythtickle"), + # (u"Nest Heads", u"http://www.gocomics.com/nestheads"), + # (u"NEUROTICA", u"http://www.gocomics.com/neurotica"), + (u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"), + (u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"), + # (u"The Norm", u"http://www.gocomics.com/thenorm"), + # (u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"), + # (u"One Big Happy", u"http://www.gocomics.com/onebighappy"), + # (u"The Other Coast", u"http://www.gocomics.com/theothercoast"), + # (u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"), + # (u"Overboard", u"http://www.gocomics.com/overboard"), + # (u"Pibgorn", u"http://www.gocomics.com/pibgorn"), + # (u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"), + (u"Pickles", u"http://www.gocomics.com/pickles"), + # (u"Pinkerton", u"http://www.gocomics.com/pinkerton"), + # (u"Pluggers", u"http://www.gocomics.com/pluggers"), + (u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"), + # (u"PreTeena", u"http://www.gocomics.com/preteena"), + # (u"The Quigmans", u"http://www.gocomics.com/thequigmans"), + # (u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"), + (u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"), + # (u"Red and Rover", u"http://www.gocomics.com/redandrover"), + # (u"Red Meat", u"http://www.gocomics.com/redmeat"), + # (u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"), + # (u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"), + # (u"Rubes", u"http://www.gocomics.com/rubes"), + # (u"Scary Gary", u"http://www.gocomics.com/scarygary"), + (u"Shoe", u"http://www.gocomics.com/shoe"), + # (u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"), + # (u"Skin Horse", u"http://www.gocomics.com/skinhorse"), + # (u"Slowpoke", u"http://www.gocomics.com/slowpoke"), + # (u"Speed Bump", u"http://www.gocomics.com/speedbump"), + # (u"State of the Union", u"http://www.gocomics.com/stateoftheunion"), + (u"Stone Soup", u"http://www.gocomics.com/stonesoup"), + # (u"Strange Brew", u"http://www.gocomics.com/strangebrew"), + # (u"Sylvia", u"http://www.gocomics.com/sylvia"), + # (u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"), + # (u"Tiny Sepuku", u"http://www.gocomics.com/tinysepuku"), + # (u"TOBY", u"http://www.gocomics.com/toby"), + # (u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"), + # (u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"), + # (u"W.T. Duck", u"http://www.gocomics.com/wtduck"), + # (u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"), + # (u"Wee Pals", u"http://www.gocomics.com/weepals"), + # (u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"), + (u"Wizard of Id", u"http://www.gocomics.com/wizardofid"), + # (u"Working It Out", u"http://www.gocomics.com/workingitout"), + # (u"Yenny", u"http://www.gocomics.com/yenny"), + # (u"Zack Hill", u"http://www.gocomics.com/zackhill"), + (u"Ziggy", u"http://www.gocomics.com/ziggy"), + ######## COMICS - EDITORIAL ######## + ("Lalo Alcaraz","http://www.gocomics.com/laloalcaraz"), + ("Nick Anderson","http://www.gocomics.com/nickanderson"), + ("Chuck Asay","http://www.gocomics.com/chuckasay"), + ("Tony Auth","http://www.gocomics.com/tonyauth"), + ("Donna Barstow","http://www.gocomics.com/donnabarstow"), + # ("Bruce Beattie","http://www.gocomics.com/brucebeattie"), + # ("Clay Bennett","http://www.gocomics.com/claybennett"), + # ("Lisa Benson","http://www.gocomics.com/lisabenson"), + # ("Steve Benson","http://www.gocomics.com/stevebenson"), + # ("Chip Bok","http://www.gocomics.com/chipbok"), + # ("Steve Breen","http://www.gocomics.com/stevebreen"), + # ("Chris Britt","http://www.gocomics.com/chrisbritt"), + # ("Stuart Carlson","http://www.gocomics.com/stuartcarlson"), + # ("Ken Catalino","http://www.gocomics.com/kencatalino"), + # ("Paul Conrad","http://www.gocomics.com/paulconrad"), + # ("Jeff Danziger","http://www.gocomics.com/jeffdanziger"), + # ("Matt Davies","http://www.gocomics.com/mattdavies"), + # ("John Deering","http://www.gocomics.com/johndeering"), + # ("Bob Gorrell","http://www.gocomics.com/bobgorrell"), + # ("Walt Handelsman","http://www.gocomics.com/walthandelsman"), + # ("Clay Jones","http://www.gocomics.com/clayjones"), + # ("Kevin Kallaugher","http://www.gocomics.com/kevinkallaugher"), + # ("Steve Kelley","http://www.gocomics.com/stevekelley"), + # ("Dick Locher","http://www.gocomics.com/dicklocher"), + # ("Chan Lowe","http://www.gocomics.com/chanlowe"), + # ("Mike Luckovich","http://www.gocomics.com/mikeluckovich"), + # ("Gary Markstein","http://www.gocomics.com/garymarkstein"), + # ("Glenn McCoy","http://www.gocomics.com/glennmccoy"), + # ("Jim Morin","http://www.gocomics.com/jimmorin"), + # ("Jack Ohman","http://www.gocomics.com/jackohman"), + # ("Pat Oliphant","http://www.gocomics.com/patoliphant"), + # ("Joel Pett","http://www.gocomics.com/joelpett"), + # ("Ted Rall","http://www.gocomics.com/tedrall"), + # ("Michael Ramirez","http://www.gocomics.com/michaelramirez"), + # ("Marshall Ramsey","http://www.gocomics.com/marshallramsey"), + # ("Steve Sack","http://www.gocomics.com/stevesack"), + # ("Ben Sargent","http://www.gocomics.com/bensargent"), + # ("Drew Sheneman","http://www.gocomics.com/drewsheneman"), + # ("John Sherffius","http://www.gocomics.com/johnsherffius"), + # ("Small World","http://www.gocomics.com/smallworld"), + # ("Scott Stantis","http://www.gocomics.com/scottstantis"), + # ("Wayne Stayskal","http://www.gocomics.com/waynestayskal"), + # ("Dana Summers","http://www.gocomics.com/danasummers"), + # ("Paul Szep","http://www.gocomics.com/paulszep"), + # ("Mike Thompson","http://www.gocomics.com/mikethompson"), + # ("Tom Toles","http://www.gocomics.com/tomtoles"), + # ("Gary Varvel","http://www.gocomics.com/garyvarvel"), + # ("ViewsAfrica","http://www.gocomics.com/viewsafrica"), + # ("ViewsAmerica","http://www.gocomics.com/viewsamerica"), + # ("ViewsAsia","http://www.gocomics.com/viewsasia"), + # ("ViewsBusiness","http://www.gocomics.com/viewsbusiness"), + # ("ViewsEurope","http://www.gocomics.com/viewseurope"), + # ("ViewsLatinAmerica","http://www.gocomics.com/viewslatinamerica"), + # ("ViewsMidEast","http://www.gocomics.com/viewsmideast"), + # ("Views of the World","http://www.gocomics.com/viewsoftheworld"), + # ("Kerry Waghorn","http://www.gocomics.com/facesinthenews"), + # ("Dan Wasserman","http://www.gocomics.com/danwasserman"), + # ("Signe Wilkinson","http://www.gocomics.com/signewilkinson"), + # ("Wit of the World","http://www.gocomics.com/witoftheworld"), + # ("Don Wright","http://www.gocomics.com/donwright"), + ]: + articles = self.make_links(url) + if articles: + feeds.append((title, articles)) + return feeds + + def make_links(self, url): + title = 'Temp' + current_articles = [] + pages = range(1, self.num_comics_to_get+1) + for page in pages: + page_soup = self.index_to_soup(url) + if page_soup: + try: + strip_title = page_soup.h1.a.string + except: + strip_title = 'Error - no page_soup.h1.a.string' + try: + date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string + except: + date_title = 'Error - no page_soup.h1.li.string' + title = strip_title + ' - ' + date_title + for i in range(2): + try: + strip_url_date = page_soup.h1.a['href'] + break #success - this is normal exit + except: + continue #try to get strip_url_date again + continue # give up on this strip date + for i in range(2): + try: + prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href'] + break #success - this is normal exit + except: + continue #try to get prev_strip_url_date again + continue # give up on this prev strip date + if strip_url_date: + page_url = 'http://www.gocomics.com' + strip_url_date + else: + continue + if prev_strip_url_date: + prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date + else: + continue + current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''}) + url = prev_page_url + current_articles.reverse() + return current_articles + + def preprocess_html(self, soup): + if soup.title: + title_string = soup.title.string.strip() + _cd = title_string.split(',',1)[1] + comic_date = ' '.join(_cd.split(' ', 4)[0:-1]) + if soup.h1.span: + artist = soup.h1.span.string + soup.h1.span.string.replaceWith(comic_date + artist) + feature_item = soup.find('p',attrs={'class':'feature_item'}) + if feature_item.a: + a_tag = feature_item.a + a_href = a_tag["href"] + img_tag = a_tag.img + img_tag["src"] = a_href + img_tag["width"] = self.comic_size + img_tag["height"] = None + return self.adeify_images(soup) + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {max-width:100%; min-width:100%;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + '''