mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fixes #1819960 [recipe broken Smilezilla](https://bugs.launchpad.net/calibre/+bug/1819960)
69 lines
2.4 KiB
Plaintext
69 lines
2.4 KiB
Plaintext
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
import os
|
|
import re
|
|
|
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class SmileZilla(BasicNewsRecipe):
|
|
|
|
title = 'SmileZilla'
|
|
language = 'en'
|
|
__author__ = "Will"
|
|
JOKES_INDEX = 'http://www.smilezilla.com/joke.do'
|
|
STORIES_INDEX = 'http://www.smilezilla.com/story.do'
|
|
description = 'Daily Jokes and funny stoires'
|
|
oldest_article = 1
|
|
no_stylesheets = True
|
|
encoding = 'utf-8'
|
|
|
|
remove_tags = [dict(name='table')]
|
|
|
|
def _get_entry(self, soup):
|
|
return soup.find('form', attrs={'name': 'contentForm'})
|
|
|
|
def _get_section_title(self, soup):
|
|
title_div = soup.find('div', attrs={'class': 'title'})
|
|
return self.tag_to_string(title_div).strip()
|
|
|
|
def parse_index(self):
|
|
self.tdir = PersistentTemporaryDirectory()
|
|
|
|
def as_soup(url):
|
|
soup = self.index_to_soup(url)
|
|
for img in soup.findAll('img', src=True):
|
|
if img['src'].startswith('/'):
|
|
img['src'] = 'http://www.smilezilla.com' + img['src']
|
|
return soup
|
|
|
|
articles = []
|
|
|
|
soup = as_soup(self.JOKES_INDEX)
|
|
jokes_entry = self._get_entry(soup)
|
|
section_title = self._get_section_title(soup)
|
|
todays_jokes = []
|
|
for i, text in enumerate(re.findall(r'<hr.*?>(.+?)<table', type(u'')(jokes_entry), flags=re.DOTALL)):
|
|
title = 'Joke {}'.format(i + 1)
|
|
with open(os.path.join(self.tdir, 'j{}.html'.format(i)), 'wb') as f:
|
|
f.write(b'<html><body>')
|
|
f.write(text.encode('utf-8'))
|
|
todays_jokes.append({'title': title, 'url': 'file:///' + f.name})
|
|
articles.append((section_title, todays_jokes))
|
|
|
|
soup = as_soup(self.STORIES_INDEX)
|
|
entry = self._get_entry(soup)
|
|
section_title = self._get_section_title(soup)
|
|
|
|
todays_stories = []
|
|
for i, text in enumerate(re.findall(r'<hr.*?>(.+?)<table', type(u'')(entry), flags=re.DOTALL)):
|
|
title = 'Story {}'.format(i)
|
|
with open(os.path.join(self.tdir, 's{}.html'.format(i)), 'wb') as f:
|
|
f.write(b'<html><body>')
|
|
f.write(text.encode('utf-8'))
|
|
todays_stories.append({'title': title, 'url': 'file:///' + f.name})
|
|
articles.append((section_title, todays_stories))
|
|
|
|
return articles
|