calibre/recipes/go_comics.recipe
2018-04-08 10:00:46 +05:30

605 lines
40 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = 'Copyright 2010 Starson17'
'''
www.gocomics.com
'''
import shutil, os
from calibre.constants import iswindows
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.filenames import ascii_filename
def absolutize(url):
if url.startswith('/'):
url = 'http://www.gocomics.com' + url
return url
class GoComics(BasicNewsRecipe):
title = 'Go Comics'
__author__ = 'Kovid Goyal'
__version__ = '1.06'
__date__ = '07 June 2011'
description = u'200+ Comics - Customize for more days/comics: Defaults to 1 day, 25 comics - 20 general, 5 editorial.'
category = 'news, comics'
encoding = 'utf-8'
language = 'en'
no_stylesheets = True
remove_javascript = True
remove_attributes = ['style']
# USER PREFERENCES - COMICS AND NUMBER OF COMICS TO RETRIEVE ########
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
num_comics_to_get = 1
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
# Please do not overload their servers by selecting all comics and 1000
# strips from each!
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.addheaders = [('Referer', 'http://www.gocomics.com/')]
return br
def parse_index(self):
feeds = []
self.gocomics_dir = PersistentTemporaryDirectory('_gocomics')
for i, (title, url) in enumerate([ # {{{
# (u"The Academia Waltz",u"http://www.gocomics.com/academiawaltz"),
# (u"Adam@Home",u"http://www.gocomics.com/adamathome"),
# (u"Adult Children",u"http://www.gocomics.com/adult-children"),
# (u"The Adventures of Business Cat",u"http://www.gocomics.com/the-adventures-of-business-cat"),
# (u"Agnes",u"http://www.gocomics.com/agnes"),
# (u"AJ and Magnus",u"http://www.gocomics.com/aj-and-magnus"),
# (u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
# (u"Ali's House",u"http://www.gocomics.com/alis-house"),
# (u"Alley Oop",u"http://www.gocomics.com/alley-oop"),
# (u"Amanda the Great",u"http://www.gocomics.com/amanda-the-great"),
# (u"Nick Anderson",u"http://www.gocomics.com/nickanderson"),
# (u"Andertoons",u"http://www.gocomics.com/andertoons"),
# (u"Andy Capp",u"http://www.gocomics.com/andycapp"),
# (u"Angry Little Girls",u"http://www.gocomics.com/angry-little-girls"),
(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
# (u"Annie",u"http://www.gocomics.com/annie"),
# (u"The Argyle Sweater",u"http://www.gocomics.com/theargylesweater"),
# (u"Robert Ariail",u"http://www.gocomics.com/robert-ariail"),
# (u"Arlo and Janis",u"http://www.gocomics.com/arloandjanis"),
# (u"Ask a Cat",u"http://www.gocomics.com/ask-a-cat"),
# (u"Ask Shagg",u"http://www.gocomics.com/askshagg"),
# (u"At the Zoo",u"http://www.gocomics.com/at-the-zoo"),
# (u"Aunty Acid",u"http://www.gocomics.com/aunty-acid"),
# (u"The Awkward Yeti",u"http://www.gocomics.com/the-awkward-yeti"),
(u"B.C.",u"http://www.gocomics.com/bc"),
# (u"Back to B.C.",u"http://www.gocomics.com/back-to-bc"),
# (u"Back in the Day",u"http://www.gocomics.com/backintheday"),
# (u"bacon",u"http://www.gocomics.com/bacon"),
# (u"Bad Machinery",u"http://www.gocomics.com/bad-machinery"),
# (u"Bad Reporter",u"http://www.gocomics.com/badreporter"),
# (u"Badlands",u"http://www.gocomics.com/badlands"),
(u"Baldo",u"http://www.gocomics.com/baldo"),
# (u"Ballard Street",u"http://www.gocomics.com/ballardstreet"),
# (u"Banana Triangle",u"http://www.gocomics.com/banana-triangle"),
# (u"Barkeater Lake Pandolph",u"http://www.gocomics.com/barkeaterlake"),
# (u"The Barn",u"http://www.gocomics.com/thebarn"),
# (u"Barney",u"http://www.gocomics.com/barneyandclyde"),
# (u"Basic Instructions",u"http://www.gocomics.com/basicinstructions"),
# (u"Beanie the Brownie",u"http://www.gocomics.com/beanie-the-brownie"),
# (u"Beardo",u"http://www.gocomics.com/beardo"),
# (u"Darrin Bell",u"http://www.gocomics.com/darrin-bell"),
# (u"Ben",u"http://www.gocomics.com/ben"),
# (u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
# (u"Lisa Benson",u"http://www.gocomics.com/lisabenson"),
# (u"Steve Benson",u"http://www.gocomics.com/stevebenson"),
# (u"Bent Objects",u"http://www.gocomics.com/bent-objects"),
# (u"The Bent Pinky",u"http://www.gocomics.com/the-bent-pinky"),
# (u"Berger & Wyse",u"http://www.gocomics.com/berger-and-wyse"),
# (u"Berkeley Mews",u"http://www.gocomics.com/berkeley-mews"),
# (u"The Best Medicine Cartoon",u"http://www.gocomics.com/the-best-medicine"),
# (u"Betty",u"http://www.gocomics.com/betty"),
# (u"Bewley",u"http://www.gocomics.com/bewley"),
# (u"Biff& Riley",u"http://www.gocomics.com/biff-and-riley"),
# (u"Big Nate",u"http://www.gocomics.com/bignate"),
# (u"Big Nate: First Class",u"http://www.gocomics.com/big-nate-first-class"),
# (u"The Big Picture",u"http://www.gocomics.com/thebigpicture"),
# (u"Big Top",u"http://www.gocomics.com/bigtop"),
# (u"Biographic",u"http://www.gocomics.com/biographic"),
# (u"Birdbrains",u"http://www.gocomics.com/birdbrains"),
# (u"Bleeker: The Rechargeable Dog",u"http://www.gocomics.com/bleeker"),
# (u"Bliss",u"http://www.gocomics.com/bliss"),
# (u"Bloom County",u"http://www.gocomics.com/bloomcounty"),
# (u"Bloom County 2016",u"http://www.gocomics.com/bloom-county"),
# (u"Bo Nanas",u"http://www.gocomics.com/bonanas"),
# (u"Bob the Squirrel",u"http://www.gocomics.com/bobthesquirrel"),
# (u"Chip Bok",u"http://www.gocomics.com/chipbok"),
# (u"Boomerangs",u"http://www.gocomics.com/boomerangs"),
# (u"The Boondocks",u"http://www.gocomics.com/boondocks"),
(u"The Born Loser",u"http://www.gocomics.com/the-born-loser"),
# (u"Matt Bors",u"http://www.gocomics.com/matt-bors"),
# (u"Bottomliners",u"http://www.gocomics.com/bottomliners"),
(u"Bound and Gagged",u"http://www.gocomics.com/boundandgagged"),
# (u"Brain Squirts",u"http://www.gocomics.com/brain-squirts"),
# (u"Break of Day",u"http://www.gocomics.com/break-of-day"),
# (u"Breaking Cat News",u"http://www.gocomics.com/breaking-cat-news"),
# (u"Steve Breen",u"http://www.gocomics.com/stevebreen"),
# (u"Brevity",u"http://www.gocomics.com/brevitypanel"),
# (u"Brewster Rockit",u"http://www.gocomics.com/brewsterrockit"),
# (u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
(u"Broom Hilda",u"http://www.gocomics.com/broomhilda"),
# (u"The Buckets",u"http://www.gocomics.com/thebuckets"),
# (u"Bully",u"http://www.gocomics.com/bully"),
# (u"Buni",u"http://www.gocomics.com/buni"),
# (u"Bushy Tales",u"http://www.gocomics.com/bushy-tales"),
(u"Calvin and Hobbes",u"http://www.gocomics.com/calvinandhobbes"),
# (u"Candorville",u"http://www.gocomics.com/candorville"),
# (u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
# (u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
# (u"Cathy",u"http://www.gocomics.com/cathy"),
# (u"Cattitudeƒ??Doggonit",u"http://www.gocomics.com/cattitude-doggonit"),
# (u"C'est la Vie",u"http://www.gocomics.com/cestlavie"),
# (u"Cheap Thrills Cuisine",u"http://www.gocomics.com/cheap-thrills-cuisine"),
# (u"Chuckle Bros",u"http://www.gocomics.com/chucklebros"),
# (u"Citizen Dog",u"http://www.gocomics.com/citizendog"),
# (u"The City",u"http://www.gocomics.com/thecity"),
# (u"Claw",u"http://www.gocomics.com/claw"),
# (u"Clear Blue Water",u"http://www.gocomics.com/clearbluewater"),
# (u"Cleats",u"http://www.gocomics.com/cleats"),
# (u"Close to Home",u"http://www.gocomics.com/closetohome"),
# (u"The Comic Strip That Has A Finale Every Day",u"http://www.gocomics.com/the-comic-strip-that-has-a-finale-every-day"),
# (u"Committed",u"http://www.gocomics.com/committed"),
# (u"Compu-toon",u"http://www.gocomics.com/compu-toon"),
# (u"The Conjurers",u"http://www.gocomics.com/the-conjurers"),
# (u"Connie to the Wonnie",u"http://www.gocomics.com/connie-to-the-wonnie"),
# (u"Cornered",u"http://www.gocomics.com/cornered"),
# (u"Cow and Boy Classics",u"http://www.gocomics.com/cowandboy"),
# (u"CowTown",u"http://www.gocomics.com/cowtown"),
# (u"The Creeps",u"http://www.gocomics.com/the-creeps"),
# (u"Crumb",u"http://www.gocomics.com/crumb"),
# (u"Cul de Sac",u"http://www.gocomics.com/culdesac"),
# (u"Dadding Badly",u"http://www.gocomics.com/dadding-badly"),
# (u"Daddy's Home",u"http://www.gocomics.com/daddyshome"),
# (u"The Daily Drawing",u"http://www.gocomics.com/the-daily-drawing"),
# (u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
# (u"Dark Side of the Horse",u"http://www.gocomics.com/darksideofthehorse"),
# (u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
# (u"Deep Dark Fears",u"http://www.gocomics.com/deep-dark-fears"),
# (u"John Deering",u"http://www.gocomics.com/johndeering"),
# (u"Diamond Lil",u"http://www.gocomics.com/diamondlil"),
# (u"Dick Tracy",u"http://www.gocomics.com/dicktracy"),
# (u"Dilbert Classics",u"http://www.gocomics.com/dilbert-classics"),
# (u"The Dinette Set",u"http://www.gocomics.com/dinetteset"),
# (u"Dinosaur Comics",u"http://www.gocomics.com/dinosaur-comics"),
# (u"Dog Eat Doug",u"http://www.gocomics.com/dogeatdoug"),
# (u"Dogs of C-Kennel",u"http://www.gocomics.com/dogsofckennel"),
# (u"Domestic Abuse",u"http://www.gocomics.com/domesticabuse"),
# (u"Doodle for Food",u"http://www.gocomics.com/doodle-for-food"),
# (u"Doodle Town",u"http://www.gocomics.com/doodle-town"),
# (u"Doonesbury",u"http://www.gocomics.com/doonesbury"),
# (u"The Doozies",u"http://www.gocomics.com/thedoozies"),
# (u"Dorris McComics",u"http://www.gocomics.com/dorris-mccomics"),
# (u"Alex Norris",u"http://www.gocomics.com/dorris-mccomics"),
# (u"Drabble",u"http://www.gocomics.com/drabble"),
# (u"Dragon Girl",u"http://www.gocomics.com/dragon-girl"),
# (u"Drive",u"http://www.gocomics.com/drive"),
# (u"dro-mo",u"http://www.gocomics.com/dro-mo"),
# (u"Dude and Dude",u"http://www.gocomics.com/dudedude"),
# (u"The Duplex",u"http://www.gocomics.com/duplex"),
# (u"Tim Eagan",u"http://www.gocomics.com/tim-eagan"),
# (u"Edge City",u"http://www.gocomics.com/edge-city"),
# (u"Edge of Adventure",u"http://www.gocomics.com/edge-of-adventure"),
# (u"Eek!",u"http://www.gocomics.com/eek"),
# (u"The Elderberries",u"http://www.gocomics.com/theelderberries"),
# (u"Emmy Lou",u"http://www.gocomics.com/emmy-lou),
# (u"Endtown",u"http://www.gocomics.com/endtown"),
# (u"Eric the Circle",u"http://www.gocomics.com/eric-the-circle"),
# (u"Everyday People Cartoons",u"http://www.gocomics.com/everyday-people-cartoons"),
# (u"Eyebeam",u"http://www.gocomics.com/eyebeam"),
# (u"F Minus",u"http://www.gocomics.com/fminus"),
# (u"Family Tree",u"http://www.gocomics.com/familytree"),
# (u"Farcus",u"http://www.gocomics.com/farcus"),
# (u"Fat Cats",u"http://www.gocomics.com/fat-cats"),
# (u"Flo and Friends",u"http://www.gocomics.com/floandfriends"),
# (u"The Flying McCoys",u"http://www.gocomics.com/theflyingmccoys"),
# (u"Foolish Mortals",u"http://www.gocomics.com/foolish-mortals"),
(u"For Better or For Worse",u"http://www.gocomics.com/forbetterorforworse"),
# (u"For Heaven's Sake",u"http://www.gocomics.com/forheavenssake"),
# (u"Fort Knox",u"http://www.gocomics.com/fortknox"),
# (u"Four Eyes",u"http://www.gocomics.com/four-eyes"),
# (u"Fowl Language",u"http://www.gocomics.com/fowl-language"),
# (u"FoxTrot",u"http://www.gocomics.com/foxtrot"),
# (u"FoxTrot Classics",u"http://www.gocomics.com/foxtrotclassics"),
# (u"Francis",u"http://www.gocomics.com/francis"),
(u"Frank and Ernest",u"http://www.gocomics.com/frank-and-ernest"),
# (u"Frankie Comics",u"http://www.gocomics.com/frankie-comics"),
# (u"Frazz",u"http://www.gocomics.com/frazz"),
# (u"Fred Basset",u"http://www.gocomics.com/fredbasset"),
# (u"Free Range",u"http://www.gocomics.com/freerange"),
# (u"Freshly Squeezed",u"http://www.gocomics.com/freshlysqueezed"),
# (u"Frog Applause",u"http://www.gocomics.com/frogapplause"),
# (u"From the Mo Willems Sketchbook",u"http://www.gocomics.com/from-the-mo-willems-sketchbook"),
# (u"The Fusco Brothers",u"http://www.gocomics.com/thefuscobrothers"),
(u"Garfield",u"http://www.gocomics.com/garfield"),
# (u"Garfield Classics",u"http://www.gocomics.com/garfield-classics"),
# (u"Garfield Minus Garfield",u"http://www.gocomics.com/garfieldminusgarfield"),
# (u"Gasoline Alley",u"http://www.gocomics.com/gasolinealley"),
# (u"Geech",u"http://www.gocomics.com/geech"),
# (u"Gentle Creatures",u"http://www.gocomics.com/gentle-creatures"),
# (u"The Gentleman's Armchair",u"http://www.gocomics.com/the-gentlemans-armchair"),
# (u"Get a Life",u"http://www.gocomics.com/getalife"),
(u"Get Fuzzy",u"http://www.gocomics.com/getfuzzy"),
# (u"Gil",u"http://www.gocomics.com/gil"),
# (u"Gil Thorp",u"http://www.gocomics.com/gilthorp"),
# (u"Ginger Meggs",u"http://www.gocomics.com/gingermeggs"),
# (u"Glasbergen Cartoons",u"http://www.gocomics.com/glasbergen-cartoons"),
# (u"G-Man Webcomics",u"http://www.gocomics.com/g-man-webcomics"),
# (u"Gnome Syndicate",u"http://www.gocomics.com/gnome-syndicate"),
# (u"Goats",u"http://www.gocomics.com/goats"),
# (u"The Fans!",u"http://www.gocomics.com/fan-art"),
# (u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
# (u"Graffiti",u"http://www.gocomics.com/graffiti"),
# (u"Grand Avenue",u"http://www.gocomics.com/grand-avenue"),
# (u"Gray Matters",u"http://www.gocomics.com/gray-matters"),
# (u"Green Humour",u"http://www.gocomics.com/green-humour"),
# (u"The Grizzwells",u"http://www.gocomics.com/thegrizzwells"),
# (u"Half Full",u"http://www.gocomics.com/half-full"),
# (u"Ham Shears",u"http://www.gocomics.com/ham-shears"),
# (u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
# (u"Phil Hands",u"http://www.gocomics.com/phil-hands"),
# (u"Health Capsules",u"http://www.gocomics.com/healthcapsules"),
# (u"Heart of the City",u"http://www.gocomics.com/heartofthecity"),
(u"Heathcliff",u"http://www.gocomics.com/heathcliff"),
# (u"Joe Heller",u"http://www.gocomics.com/joe-heller"),
# (u"Rebecca Hendin",u"http://www.gocomics.com/rebecca-hendin"),
# (u"Herb and Jamaal",u"http://www.gocomics.com/herbandjamaal"),
# (u"Herman",u"http://www.gocomics.com/herman"),
# (u"Hipster Picnic",u"http://www.gocomics.com/hipster-picnic"),
# (u"Jerry Holbert",u"http://www.gocomics.com/jerryholbert"),
# (u"Home and Away",u"http://www.gocomics.com/homeandaway"),
# (u"Hot Comics for Cool People",u"http://www.gocomics.com/hot-comics-for-cool-people"),
# (u"HUBRIS!",u"http://www.gocomics.com/hubris"),
# (u"Human Cull",u"http://www.gocomics.com/human-cull"),
# (u"The Humble Stumble",u"http://www.gocomics.com/humble-stumble"),
# (u"Hutch Owen",u"http://www.gocomics.com/hutch-owen"),
# (u"Imagine This",u"http://www.gocomics.com/imaginethis"),
# (u"In Security",u"http://www.gocomics.com/in-security"),
# (u"In the Bleachers",u"http://www.gocomics.com/inthebleachers"),
# (u"In the Sticks",u"http://www.gocomics.com/inthesticks"),
# (u"Inherit the Mirth",u"http://www.gocomics.com/inherit-the-mirth"),
# (u"Ink Pen",u"http://www.gocomics.com/inkpen"),
# (u"Inspector Danger's Crime Quiz",u"http://www.gocomics.com/inspector-dangers-crime-quiz"),
# (u"Invisible Bread",u"http://www.gocomics.com/invisible-bread"),
# (u"It's All About You",u"http://www.gocomics.com/itsallaboutyou),
# (u"Jane's World",u"http://www.gocomics.com/janesworld"),
# (u"Jen Sorensen",u"http://www.gocomics.com/jen-sorensen"),
# (u"JetpackJr.",u"http://www.gocomics.com/jetpack-jr"),
# (u"Jim Benton Cartoons",u"http://www.gocomics.com/jim-benton-cartoons"),
# (u"Jim's Journal",u"http://www.gocomics.com/jimsjournal"),
# (u"Joe Vanilla",u"http://www.gocomics.com/joevanilla"),
# (u"Clay Jones",u"http://www.gocomics.com/clayjones"),
# (u"JumpStart",u"http://www.gocomics.com/jumpstart"),
# (u"Just Say Uncle",u"http://www.gocomics.com/just-say-uncle"),
# (u"The K Chronicles",u"http://www.gocomics.com/thekchronicles"),
# (u"Steve Kelley",u"http://www.gocomics.com/stevekelley"),
# (u"Kid Beowulf",u"http://www.gocomics.com/kid-beowulf"),
# (u"Kid Shay Comics",u"http://www.gocomics.com/kid-shay-comics"),
# (u"KidSpot",u"http://www.gocomics.com/kidspot"),
# (u"KidTown",u"http://www.gocomics.com/kidtown"),
# (u"Kit 'N' Carlyle",u"http://www.gocomics.com/kitncarlyle"),
# (u"Kitchen Capers",u"http://www.gocomics.com/kitchen-capers"),
# (u"Kliban",u"http://www.gocomics.com/kliban"),
# (u"Kliban's Cats",u"http://www.gocomics.com/klibans-cats"),
# (u"The Knight Life",u"http://www.gocomics.com/theknightlife"),
# (u"La Cucaracha",u"http://www.gocomics.com/lacucaracha"),
# (u"Lard's World Peace Tips",u"http://www.gocomics.com/lards-world-peace-tips"),
# (u"Last Kiss",u"http://www.gocomics.com/lastkiss"),
# (u"Lay Lines",u"http://www.gocomics.com/lay-lines"),
# (u"Learn to Speak Cat",u"http://www.gocomics.com/learn-to-speak-cat"),
# (u"The Lefty Bosco Picture Show",u"http://www.gocomics.com/leftyboscopictureshow"),
# (u"Legend of Bill",u"http://www.gocomics.com/legendofbill"),
# (u"Leigh Luna Comics",u"http://www.gocomics.com/leigh-luna-comics"),
# (u"Mike Lester",u"http://www.gocomics.com/mike-lester"),
# (u"Liberty Meadows",u"http://www.gocomics.com/libertymeadows"),
# (u"Li'l Abner",u"http://www.gocomics.com/lil-abner"),
# (u"Lio",u"http://www.gocomics.com/lio"),
# (u"Little Dog Lost",u"http://www.gocomics.com/littledoglost"),
# (u"Little Fried Chicken and Sushi",u"http://www.gocomics.com/little-fried-chicken-and-sushi"),
# (u"Little Nemo",u"http://www.gocomics.com/little-nemo"),
# (u"Win- Lose- Drew",u"http://www.gocomics.com/drewlitton"),
# (u"Lola",u"http://www.gocomics.com/lola"),
# (u"Looks Good on Paper",u"http://www.gocomics.com/looks-good-on-paper"),
# (u"Loose Parts",u"http://www.gocomics.com/looseparts"),
# (u"The Lost Bear",u"http://www.gocomics.com/the-lost-bear"),
# (u"Lost Side of Suburbia",u"http://www.gocomics.com/lostsideofsuburbia"),
# (u"Lost Sheep",u"http://www.gocomics.com/lostsheep"),
# (u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
(u"Luann",u"http://www.gocomics.com/luann"),
# (u"Luann Againn",u"http://www.gocomics.com/luann-againn"),
# (u"Mike Luckovich",u"http://www.gocomics.com/mikeluckovich"),
# (u"Lucky Cow",u"http://www.gocomics.com/luckycow"),
# (u"Lug Nuts",u"http://www.gocomics.com/lug-nuts"),
# (u"Lukey McGarryƒ??s TLDR",u"http://www.gocomics.com/lukey-mcgarrys-tldr"),
# (u"Lunarbaboon",u"http://www.gocomics.com/lunarbaboon"),
# (u"Magic in a Minute",u"http://www.gocomics.com/magicinaminute"),
# (u"Magnificatz",u"http://www.gocomics.com/magnificatz"),
# (u"Maintaining",u"http://www.gocomics.com/maintaining"),
# (u"Making It",u"http://www.gocomics.com/making-it"),
# (u"Maria's Day",u"http://www.gocomics.com/marias-day"),
# (u"Gary Markstein",u"http://www.gocomics.com/garymarkstein"),
(u"Marmaduke",u"http://www.gocomics.com/marmaduke"),
# (u"The Martian Confederacy",u"http://www.gocomics.com/the-martian-confederacy"),
# (u"MazeToons Puzzle",u"http://www.gocomics.com/mazetoons-puzzle"),
# (u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
# (u"Brian McFadden",u"http://www.gocomics.com/brian-mcfadden"),
# (u"The Meaning of Lila",u"http://www.gocomics.com/meaningoflila"),
# (u"Medium Large",u"http://www.gocomics.com/medium-large"),
# (u"Meg Classics",u"http://www.gocomics.com/meg-classics"),
# (u"Microcosm",u"http://www.gocomics.com/microcosm"),
(u"The Middletons",u"http://www.gocomics.com/themiddletons"),
# (u"Mike du Jour",u"http://www.gocomics.com/mike-du-jour"),
# (u"Minimum Security",u"http://www.gocomics.com/minimumsecurity"),
# (u"Moderately Confused",u"http://www.gocomics.com/moderately-confused"),
# (u"Molebashed",u"http://www.gocomics.com/molebashed"),
# (u"Molly and the Bear",u"http://www.gocomics.com/mollyandthebear"),
(u"Momma",u"http://www.gocomics.com/momma"),
# (u"Mom's Cancer",u"http://www.gocomics.com/moms-cancer"),
# (u"Monty",u"http://www.gocomics.com/monty"),
# (u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
# (u"Motley Classics",u"http://www.gocomics.com/motley-classics"),
# (u"Mr. Lowe",u"http://www.gocomics.com/mr-lowe"),
# (u"Mulligan",u"http://www.gocomics.com/mulligan"),
# (u"Mustard and Boloney",u"http://www.gocomics.com/mustard-and-boloney"),
# (u"Mutt & Jeff",u"http://www.gocomics.com/muttandjeff"),
# (u"My Cage: New and Old",u"http://www.gocomics.com/mycage"),
# (u"MythTickle",u"http://www.gocomics.com/mythtickle"),
# (u"Nancy",u"http://www.gocomics.com/nancy"),
# (u"Nancy Classics",u"http://www.gocomics.com/nancy-classics"),
# (u"Nest Heads",u"http://www.gocomics.com/nestheads"),
# (u"NEUROTICA",u"http://www.gocomics.com/neurotica"),
# (u"New Adventures of Queen Victoria",u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
# (u"Next Door Neighbors",u"http://www.gocomics.com/next-door-neighbors"),
# (u"Nick and Zuzu",u"http://www.gocomics.com/nick-and-zuzu),
(u"Non Sequitur",u"http://www.gocomics.com/nonsequitur"),
# (u"The Norm 4.0",u"http://www.gocomics.com/the-norm-4-0"),
# (u"The Norm Classics",u"http://www.gocomics.com/thenorm"),
# (u"Not Invented Here",u"http://www.gocomics.com/not-invented-here"),
# (u"Nothing is Not Something",u"http://www.gocomics.com/nothing-is-not-something"),
# (u"Now Recharging",u"http://www.gocomics.com/now-recharging"),
# (u"Off the Mark",u"http://www.gocomics.com/offthemark"),
# (u"Oh Brother!",u"http://www.gocomics.com/oh-brother"),
# (u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
# (u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
# (u"Ollie and Quentin",u"http://www.gocomics.com/ollie-and-quentin"),
# (u"On A Claire Day",u"http://www.gocomics.com/onaclaireday"),
# (u"One Big Happy",u"http://www.gocomics.com/onebighappy"),
# (u"Ordinary Bill",u"http://www.gocomics.com/ordinary-bill"),
# (u"Origins of the Sunday Comics",u"http://www.gocomics.com/origins-of-the-sunday-comics"),
# (u"The Other Coast",u"http://www.gocomics.com/theothercoast"),
# (u"The Other End",u"http://www.gocomics.com/the-other-end"),
# (u"Out of the Gene Pool Re-Runs",u"http://www.gocomics.com/outofthegenepool"),
# (u"Over the Hedge",u"http://www.gocomics.com/overthehedge"),
# (u"Overboard",u"http://www.gocomics.com/overboard"),
# (u"Owlturd",u"http://www.gocomics.com/owlturd"),
# (u"Ozy and Millie",u"http://www.gocomics.com/ozy-and-millie"),
# (u"Henry Payne",u"http://www.gocomics.com/henrypayne"),
# (u"PC and Pixel",u"http://www.gocomics.com/pcandpixel"),
(u"Peanuts",u"http://www.gocomics.com/peanuts"),
# (u"Peanuts Begins",u"http://www.gocomics.com/peanuts-begins"),
# (u"Peanuts Holiday Countdown",u"http://www.gocomics.com/peanuts-holiday-countdown"),
(u"Pearls Before Swine",u"http://www.gocomics.com/pearlsbeforeswine"),
# (u"Perry Bible Fellowship",u"http://www.gocomics.com/perry-bible-fellowship"),
# (u"Joel Pett",u"http://www.gocomics.com/joelpett"),
# (u"Phoebe and Her Unicorn",u"http://www.gocomics.com/phoebe-and-her-unicorn"),
# (u"Pibgorn",u"http://www.gocomics.com/pibgorn"),
# (u"Pibgorn Sketches",u"http://www.gocomics.com/pibgornsketches"),
# (u"Pickles",u"http://www.gocomics.com/pickles"),
# (u"Pictures in Boxes",u"http://www.gocomics.com/pictures-in-boxes"),
# (u"Pie Comic",u"http://www.gocomics.com/pie-comic"),
# (u"Pinkerton",u"http://www.gocomics.com/pinkerton"),
# (u"Please Listen to Me",u"http://www.gocomics.com/please-listen-to-me"),
# (u"Pluggers",u"http://www.gocomics.com/pluggers"),
(u"Pooch Cafe",u"http://www.gocomics.com/poochcafe"),
# (u"Poorcraft",u"http://www.gocomics.com/poorcraft"),
# (u"Poorly Drawn Lines",u"http://www.gocomics.com/poorly-drawn-lines"),
# (u"Pop Culture Shock Therapy",u"http://www.gocomics.com/pop-culture-shock-therapy"),
# (u"Poptropica",u"http://www.gocomics.com/poptropica"),
# (u"Pot-Shots",u"http://www.gocomics.com/pot-shots"),
# (u"PreTeena",u"http://www.gocomics.com/preteena"),
# (u"Prickly City",u"http://www.gocomics.com/pricklycity"),
# (u"Promises-Promises",u"http://www.gocomics.com/promises-promises"),
# (u"Questionable Quotebook",u"http://www.gocomics.com/questionable-quotebook"),
# (u"The Quixote Syndrome",u"http://www.gocomics.com/the-quixote-syndrome"),
# (u"Rabbits Against Magic",u"http://www.gocomics.com/rabbitsagainstmagic"),
# (u"Raising Duncan",u"http://www.gocomics.com/raising-duncan"),
# (u"Ted Rall",u"http://www.gocomics.com/ted-rall"),
# (u"Michael Ramirez",u"http://www.gocomics.com/michaelramirez"),
# (u"Marshall Ramsey",u"http://www.gocomics.com/marshallramsey"),
# (u"Randolph Itch 2 a.m.",u"http://www.gocomics.com/randolphitch"),
# (u"Tom Toles",u"http://www.gocomics.com/randolphitch"),
# (u"Random Acts of Nancy",u"http://www.gocomics.com/random-acts-of-nancy"),
# (u"Real Life Adventures",u"http://www.gocomics.com/reallifeadventures"),
# (u"Reality Check",u"http://www.gocomics.com/realitycheck"),
# (u"Red and Rover",u"http://www.gocomics.com/redandrover"),
# (u"Reply All",u"http://www.gocomics.com/replyall"),
# (u"Reply AllLite",u"http://www.gocomics.com/reply-all-lite"),
# (u"Richard's Poor Almanac",u"http://www.gocomics.com/richards-poor-almanac"),
# (u"Rip Haywire",u"http://www.gocomics.com/riphaywire"),
# (u"Ripley's Believe It or Not",u"http://www.gocomics.com/ripleysbelieveitornot"),
# (u"Robbie and Bobby",u"http://www.gocomics.com/robbie-and-bobby"),
# (u"Rob Rogers",u"http://www.gocomics.com/robrogers"),
(u"Rose is Rose",u"http://www.gocomics.com/roseisrose"),
# (u"Rubes",u"http://www.gocomics.com/rubes"),
# (u"Rudy Park",u"http://www.gocomics.com/rudypark"),
# (u"Sarah's Scribbles",u"http://www.gocomics.com/sarahs-scribbles"),
# (u"Saturday Morning Breakfast Cereal",u"http://www.gocomics.com/saturday-morning-breakfast-cereal"),
# (u"Savage Chickens",u"http://www.gocomics.com/savage-chickens"),
# (u"Scary Gary",u"http://www.gocomics.com/scarygary"),
# (u"Scenes from a Multiverse",u"http://www.gocomics.com/scenes-from-a-multiverse"),
# (u"Sheldon",u"http://www.gocomics.com/sheldon"),
# (u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
# (u"Shirley and Son Classics",u"http://www.gocomics.com/shirley-and-son-classics"),
(u"Shoe",u"http://www.gocomics.com/shoe"),
# (u"Shoecabbage",u"http://www.gocomics.com/shoecabbage"),
# (u"Shortcuts",u"http://www.gocomics.com/shortcuts"),
# (u"Shutterbug Follies",u"http://www.gocomics.com/shutterbug-follies"),
# (u"Sketchshark Comics",u"http://www.gocomics.com/sketchshark-comics"),
# (u"Sketchy Chics",u"http://www.gocomics.com/sketchy-chics"),
# (u"Skin Horse",u"http://www.gocomics.com/skinhorse"),
# (u"Skippy",u"http://www.gocomics.com/skippy"),
# (u"Small Potatoes",u"http://www.gocomics.com/small-potatoes"),
# (u"Snow Sez...",u"http://www.gocomics.com/snow-sez"),
# (u"Snowflakes",u"http://www.gocomics.com/snowflakes"),
# (u"Soulmates",u"http://www.gocomics.com/soulmates"),
# (u"Soup to Nutz",u"http://www.gocomics.com/soup-to-nutz"),
# (u"Speechless",u"http://www.gocomics.com/speechless"),
# (u"Speed Bump",u"http://www.gocomics.com/speedbump"),
# (u"Spirit of the Staircase",u"http://www.gocomics.com/spirit-of-the-staircase"),
# (u"Spot the Frog",u"http://www.gocomics.com/spot-the-frog"),
# (u"Jeff Stahler",u"http://www.gocomics.com/jeffstahler"),
# (u"Scott Stantis",u"http://www.gocomics.com/scottstantis"),
# (u"Starling",u"http://www.gocomics.com/starling"),
# (u"Starslip",u"http://www.gocomics.com/starslip"),
# (u"Sticky Comics",u"http://www.gocomics.com/sticky-comics"),
# (u"Stone Soup",u"http://www.gocomics.com/stonesoup"),
# (u"Stone Soup Classics",u"http://www.gocomics.com/stone-soup-classics"),
# (u"Strange Brew",u"http://www.gocomics.com/strangebrew"),
# (u"Dana Summers",u"http://www.gocomics.com/danasummers"),
# (u"Sunny Street",u"http://www.gocomics.com/sunny-street"),
# (u"The Sunshine Club",u"http://www.gocomics.com/the-sunshine-club"),
# (u"Sunshine State",u"http://www.gocomics.com/sunshine-state"),
# (u"Sweet and Sour Pork",u"http://www.gocomics.com/sweet-and-sour-pork"),
# (u"Sylvia",u"http://www.gocomics.com/sylvia"),
# (u"Paul Szep",u"http://www.gocomics.com/paulszep"),
# (u"Tank McNamara",u"http://www.gocomics.com/tankmcnamara"),
# (u"Tarzan",u"http://www.gocomics.com/tarzan"),
# (u"@Tavicat",u"http://www.gocomics.com/tavicat"),
# (u"Ten Cats",u"http://www.gocomics.com/ten-cats"),
# (u"That is Priceless",u"http://www.gocomics.com/that-is-priceless"),
# (u"That Monkey Tune",u"http://www.gocomics.com/that-monkey-tune"),
# (u"That New Carl Smell",u"http://www.gocomics.com/that-new-carl-smell"),
# (u"That's Life",u"http://www.gocomics.com/thats-life"),
# (u"Thatababy",u"http://www.gocomics.com/thatababy"),
# (u"Thin Lines",u"http://www.gocomics.com/thinlines"),
# (u"(th)ink",u"http://www.gocomics.com/think"),
# (u"Tiny Confessions",u"http://www.gocomics.com/tiny-confessions"),
# (u"Tiny Sepuku",u"http://www.gocomics.com/tinysepuku),
# (u"TOBY",u"http://www.gocomics.com/toby"),
# (u"Today's Dogg",u"http://www.gocomics.com/todays-dogg"),
# (u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
# (u"Tom the Dancing Bug",u"http://www.gocomics.com/tomthedancingbug"),
# (u"Super-Fun-Pak Comix",u"http://www.gocomics.com/super-fun-pak-comix"),
# (u"Too Much Coffee Man",u"http://www.gocomics.com/toomuchcoffeeman"),
# (u"Tough Town",u"http://www.gocomics.com/tough-town"),
# (u"Trivquiz",u"http://www.gocomics.com/trivquiz"),
# (u"Truth Facts",u"http://www.gocomics.com/truth-facts"),
# (u"Uncle Art's Funland",u"http://www.gocomics.com/uncleartsfunland"),
# (u"Understanding Chaos",u"http://www.gocomics.com/understanding-chaos"),
# (u"Unstrange Phenomena",u"http://www.gocomics.com/unstrange-phenomena"),
# (u"Up and Out",u"http://www.gocomics.com/up-and-out"),
# (u"The Upside Down World of Gustave Verbeek",u"http://www.gocomics.com/upside-down-world-of-gustave-verbeek"),
# (u"U.S. Acres",u"http://www.gocomics.com/us-acres"),
# (u"Gary Varvel",u"http://www.gocomics.com/garyvarvel"),
# (u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
# (u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
# (u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
# (u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
# (u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
# (u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
# (u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
# (u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
# (u"Viivi & Wagner",u"http://www.gocomics.com/viivi-and-wagner"),
# (u"Faces of the News by Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
# (u"Wallace the Brave",u"http://www.gocomics.com/wallace-the-brave"),
# (u"The Wandering Melon",u"http://www.gocomics.com/the-wandering-melon"),
# (u"Warped",u"http://www.gocomics.com/warped"),
# (u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
# (u"Watch Your Head",u"http://www.gocomics.com/watchyourhead"),
# (u"WaynoVision",u"http://www.gocomics.com/waynovision"),
# (u"Wee Pals",u"http://www.gocomics.com/weepals"),
# (u"We the Robots",u"http://www.gocomics.com/we-the-robots"),
# (u"Wicked Crispy",u"http://www.gocomics.com/wicked-crispy"),
# (u"Widdershins",u"http://www.gocomics.com/widdershins"),
# (u"Wide Open",u"http://www.gocomics.com/wide-open"),
# (u"Signe Wilkinson",u"http://www.gocomics.com/signewilkinson"),
# (u"Winston",u"http://www.gocomics.com/winston"),
# (u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
# (u"CartoonArts International",u"http://www.gocomics.com/witoftheworld"),
(u"Wizard of Id",u"http://www.gocomics.com/wizardofid"),
# (u"Wizard of Id Classics",u"http://www.gocomics.com/wizard-of-id-classics"),
# (u"Wondermark",u"http://www.gocomics.com/wondermark"),
# (u"Working Daze",u"http://www.gocomics.com/working-daze"),
# (u"Working It Out",u"http://www.gocomics.com/workingitout"),
# (u"World of Wonder",u"http://www.gocomics.com/world-of-wonder"),
# (u"The Worried Well",u"http://www.gocomics.com/the-worried-well"),
# (u"The Worst Thing I've Ever Done",u"http://www.gocomics.com/the-worst-thing-ive-ever-done"),
# (u"Wrong Hands",u"http://www.gocomics.com/wrong-hands"),
# (u"W.T. Duck",u"http://www.gocomics.com/wtduck"),
# (u"Matt Wuerker",u"http://www.gocomics.com/mattwuerker"),
# (u"WuMo",u"http://www.gocomics.com/wumo"),
# (u"Wyatt",u"http://www.gocomics.com/wyatt"),
# (u"Yenny Lopez",u"http://www.gocomics.com/yenny-lopez"),
# (u"Zack Hill",u"http://www.gocomics.com/zackhill"),
# (u"Zen Pencils",u"http://www.gocomics.com/zen-pencils"),
# (u"Ziggy",u"http://www.gocomics.com/ziggy"),
# (u"2 Cows and a Chicken",u"http://www.gocomics.com/2cowsandachicken"),
# (u"9 to 5",u"http://www.gocomics.com/9to5"),
# (u"9 Chickweed Lane",u"http://www.gocomics.com/9chickweedlane"),
]): # }}}
self.log('Working on: ', title, url)
articles = self.make_links(title, url)
if articles:
feeds.append((title, articles))
if self.test and i > 0:
break
return feeds
def cleanup(self):
try:
shutil.rmtree(self.gocomics_dir)
except EnvironmentError:
pass
def parse_comic_page(self, content):
a = content.find('a', itemprop='image')
if a is None:
raise StopIteration()
img = a.find('img')
if img is None:
raise StopIteration()
img['srcset'] = ''
title_parts = a['title'].split()
title = ' '.join(title_parts[:-2])
return {'h1':title, 'date':title_parts[-1], 'img':str(img)}
def render_comic_page(self, data, num, title):
fname = ascii_filename('%03d_%s' % (num, title)).replace(' ', '_')
path = os.path.join(self.gocomics_dir, fname)
html = '<html><body>{h1}<h2>{date}</h2><div>{img}</div></body></html>'.format(**data)
with lopen(path, 'wb') as f:
f.write(html.encode('utf-8'))
return {'title':'Page %d of %s' % ((num + 1), title), 'url': ('file:' if iswindows else 'file://') + path.replace(os.sep, '/')}
def make_links(self, title, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'class': lambda x: x and 'gc-deck' in x.split()})
a = div.find('a', href=True)
url = absolutize(a['href'])
current_articles = []
if self.test:
self.num_comics_to_get = 2
num = self.num_comics_to_get
while num > 0:
num -= 1
page_soup = self.index_to_soup(url)
if not page_soup:
break
content = page_soup.find(attrs={'class': lambda x: x and 'comic__image' in x.split()})
if content is None:
break
current_articles.append(self.parse_comic_page(content))
a = content.parent.find('a', attrs={'href':True, 'class':lambda x: x and 'fa-caret-left' in x.split()})
if a is None:
break
url = a['href']
if url.startswith('/'):
url = 'http://www.gocomics.com' + url
return [self.render_comic_page(ar, i, title) for i, ar in enumerate(reversed(current_articles))]