mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
101 lines
3.8 KiB
Plaintext
101 lines
3.8 KiB
Plaintext
# vim:fileencoding=UTF-8
|
|
from __future__ import unicode_literals
|
|
from __future__ import print_function
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2013, Eddie Lau'
|
|
__Date__ = ''
|
|
|
|
'''
|
|
Change Log:
|
|
2013/09/28 -- update due to website redesign, add cover
|
|
2013/03/30 -- first version
|
|
'''
|
|
|
|
try:
|
|
from urllib.parse import unquote
|
|
except ImportError:
|
|
from urllib import unquote
|
|
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
class AM730(BasicNewsRecipe):
|
|
title = u'AM730'
|
|
__author__ = 'Eddie Lau'
|
|
publisher = 'AM730'
|
|
oldest_article = 1
|
|
max_articles_per_feed = 100
|
|
language = 'zh'
|
|
encoding = 'utf-8'
|
|
auto_cleanup = False
|
|
remove_javascript = True
|
|
use_embedded_content = False
|
|
no_stylesheets = True
|
|
description = 'http://www.am730.com.hk'
|
|
category = 'Chinese, News, Hong Kong'
|
|
masthead_url = 'https://upload.wikimedia.org/wikipedia/en/5/58/Am730_Hong_Kong_newspaper_logo.png'
|
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa
|
|
remove_tags =[dict(name='div',attrs={'class':'col-xs-12 col-sm-1 col-md-1 share-button'}),
|
|
dict(name='div',attrs={'class':'logo-container print-logo'}),
|
|
dict(name='div',attrs={'id':'galleria'})]
|
|
keep_only_tags = [dict(name='div', attrs={'class': 'col-xs-12 col-sm-12 col-md-12 news-detail-content-container'}),
|
|
# dict(name='div',attrs={'class':'columns-left'})]
|
|
]
|
|
compress_news_images = True
|
|
compress_news_images_auto_size = 16
|
|
compress_news_images_max_size = 20 # kB
|
|
scale_news_images =(600,800)
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
|
|
debug=False
|
|
|
|
def get_cover_url(self):
|
|
return self.masthead_url
|
|
|
|
def getAMSectionArticles(self, sectionName,url):
|
|
# print sectionName
|
|
soup = self.index_to_soup(url)
|
|
articles = []
|
|
for aTag in soup.findAll('a',attrs={'class':'newsimglink'}):
|
|
href = aTag.get('href',False)
|
|
if not href.encode("utf-8").startswith(url.encode("utf-8")) :
|
|
continue # not in same section
|
|
|
|
title = href.split('/')[-1].split('-')[0]
|
|
title = unquote(title.encode('ASCII')) # .decode('utf-8')
|
|
if self.debug:
|
|
print(title)
|
|
try:
|
|
if articles.index({'title':title,'url':href})>=0:
|
|
# print 'already added'
|
|
continue # already added
|
|
except:
|
|
pass
|
|
|
|
articles.append({'title':title,'url':href})
|
|
|
|
if (len(articles) >= self.max_articles_per_feed):
|
|
break
|
|
if self.debug:
|
|
print(articles)
|
|
return (sectionName,articles)
|
|
|
|
def parse_index(self):
|
|
# hard code sections
|
|
Sections=[('新聞','https://www.am730.com.hk/news/%E6%96%B0%E8%81%9E'),
|
|
('財經','https://www.am730.com.hk/news/%E8%B2%A1%E7%B6%93'),
|
|
('健康','https://www.am730.com.hk/news/%E5%81%A5%E5%BA%B7'),
|
|
('科技','https://www.am730.com.hk/news/%E7%A7%91%E6%8A%80'),
|
|
('體育','https://www.am730.com.hk/news/%E9%AB%94%E8%82%B2'),
|
|
('娛樂','https://www.am730.com.hk/news/%E5%A8%9B%E6%A8%82'),
|
|
('旅遊.飲食','https://www.am730.com.hk/news/%E6%97%85%E9%81%8A.%E9%A3%B2%E9%A3%9F')
|
|
] # articles =[]
|
|
SectionsArticles=[]
|
|
for (title, url) in Sections:
|
|
if self.debug:
|
|
print(title)
|
|
print(url)
|
|
SectionsArticles.append(self.getAMSectionArticles(title,url))
|
|
# feeds.append(articles[0]['url'])
|
|
return SectionsArticles
|