# -*- coding: utf8 -*- from calibre.web.feeds.recipes import BasicNewsRecipe import re class Arcadia_BBS(BasicNewsRecipe): title = u'Arcadia' __author__ = 'Masahiro Hasegawa' language = 'ja' encoding = 'utf8' filter_regexps = [r'ad\.jp\.ap\.valuecommerce.com', ] timefmt = '[%Y/%m/%d]' remove_tags_before = dict(name='a', attrs={'name': 'kiji'}) sid_list = [] # some sotory id def parse_index(self): result = [] for sid in self.sid_list: s_result = [] soup = self.index_to_soup( 'http://www.mai-net.net/bbs/sst/sst.php?act=dump&all=%d' % sid) sec = soup.findAll('a', attrs={'href': re.compile(r'.*?kiji')}) for s in sec[:-2]: s_result.append(dict(title=s.string, url="http://www.mai-net.net" + s['href'], date=s.parent.parent.parent.findAll('td')[ 3].string[:-6], description='', content='')) result.append((s_result[0]['title'], s_result)) return result