import re from calibre.web.feeds.recipes import BasicNewsRecipe # Comment out sections you are not interested in sections = [ ('정치', 'politics'), ('사회', 'national'), ('경제', 'economy'), ('국제', 'international'), ('사설칼럼', 'editorials'), ('의학과학', 'science'), ('문화연예', 'culture'), ('스포츠', 'sports'), ('사람속으로', 'inmul'), # Following sections are marked as marked optional # as default. Uncomment to enable. # (u'건강', 'health'), # (u'레저', 'leisure'), # (u'도서', 'book'), # (u'공연', 'show'), # (u'여성', 'woman'), # (u'여행', 'travel'), # (u'생활정보', 'lifeinfo'), ] class Donga(BasicNewsRecipe): language = 'ko' title = '동아일보' description = '동아일보 기사' __author__ = 'Minsik Cho' ignore_duplicate_articles = {'title', 'url'} compress_news_images = True no_stylesheets = True oldest_article = 2 encoding = 'utf-8' # RSS Feed in syntax: # https://rss.donga.com/[sections].xml feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections] # Remove logo and print buttons remove_tags = [ dict(name='div', attrs={'class': 'popHeaderWrap'}), dict(name='div', attrs={'class': 'etc'}), ] def print_version(self, url): # Original url in syntax: # https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1 # Return print version url with syntax: # https://www.donga.com/news/View?gid=[gid]&date=[date] reobject = re.search(r'(?<=/all/)([0-9]*)/([0-9]*)', url) date = reobject.group(1) gid = reobject.group(2) return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date