mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update AM 730 and Ming Pao (HK)
This commit is contained in:
parent
ea9a2dfd8f
commit
fd77ad2c92
@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Eddie Lau'
|
__copyright__ = '2013, Eddie Lau'
|
||||||
__Date__ = ''
|
__Date__ = ''
|
||||||
__HiResImg__ = True
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2013/09/28 -- update due to website redesign, add cover
|
||||||
2013/03/30 -- first version
|
2013/03/30 -- first version
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@ -32,18 +32,17 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
auto_cleanup = False
|
auto_cleanup = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
description = 'http://www.am730.com.hk'
|
description = 'http://www.am730.com.hk'
|
||||||
category = 'Chinese, News, Hong Kong'
|
category = 'Chinese, News, Hong Kong'
|
||||||
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
|
||||||
|
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 20px; margin-bottom: 20px; max-height:70%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} li {font-size:50%; margin-left:auto; margin-right:auto;}'
|
||||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
|
keep_only_tags = [dict(name='h2', attrs={'class':'printTopic'}),
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
|
dict(name='div', attrs={'id':'article_content'}),
|
||||||
dict(name='div', attrs={'class':'thecontent wordsnap'}),
|
dict(name='div', attrs={'id':'slider'})]
|
||||||
dict(name='a', attrs={'class':'lightboximg'})]
|
remove_tags = [dict(name='img', attrs={'src':'images/am730_article_logo.jpg'}),
|
||||||
remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
|
dict(name='img', attrs={'src':'images/am_endmark.gif'})]
|
||||||
dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
|
|
||||||
|
|
||||||
def get_dtlocal(self):
|
def get_dtlocal(self):
|
||||||
dt_utc = datetime.datetime.utcnow()
|
dt_utc = datetime.datetime.utcnow()
|
||||||
@ -84,6 +83,16 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
def get_weekday(self):
|
def get_weekday(self):
|
||||||
return self.get_dtlocal().weekday()
|
return self.get_dtlocal().weekday()
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('http://www.am730.com.hk')
|
||||||
|
cover = 'http://www.am730.com.hk/' + soup.find(attrs={'id':'mini_news_img'}).find('img').get('src', False)
|
||||||
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
|
try:
|
||||||
|
br.open(cover)
|
||||||
|
except:
|
||||||
|
cover = None
|
||||||
|
return cover
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
picdiv = soup.find('img')
|
picdiv = soup.find('img')
|
||||||
@ -93,48 +102,17 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = []
|
feeds = []
|
||||||
soup = self.index_to_soup('http://www.am730.com.hk/')
|
soup = self.index_to_soup('http://www.am730.com.hk/')
|
||||||
ul = soup.find(attrs={'class':'nav-section'})
|
optgroups = soup.findAll('optgroup')
|
||||||
sectionList = []
|
for optgroup in optgroups:
|
||||||
for li in ul.findAll('li'):
|
sectitle = optgroup.get('label')
|
||||||
a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
|
articles = []
|
||||||
title = li.find('a').get('title', False).strip()
|
for option in optgroup.findAll('option'):
|
||||||
sectionList.append((title, a))
|
articlelink = "http://www.am730.com.hk/" + option.get('value')
|
||||||
for title, url in sectionList:
|
title = option.string
|
||||||
articles = self.parse_section(url)
|
articles.append({'title': title, 'url': articlelink})
|
||||||
if articles:
|
feeds.append((sectitle, articles))
|
||||||
feeds.append((title, articles))
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_section(self, url):
|
|
||||||
soup = self.index_to_soup(url)
|
|
||||||
items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
|
|
||||||
current_articles = []
|
|
||||||
for item in items:
|
|
||||||
a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
|
|
||||||
articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
|
|
||||||
title = self.tag_to_string(a)
|
|
||||||
description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
|
|
||||||
current_articles.append({'title': title, 'url': articlelink, 'description': description})
|
|
||||||
return current_articles
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
multia = soup.findAll('a')
|
|
||||||
for a in multia:
|
|
||||||
if not (a == None):
|
|
||||||
image = a.find('img')
|
|
||||||
if not (image == None):
|
|
||||||
if __HiResImg__:
|
|
||||||
image['src'] = image.get('src').replace('/thumbs/', '/')
|
|
||||||
caption = image.get('alt')
|
|
||||||
tag = Tag(soup, "photo", [])
|
|
||||||
tag2 = Tag(soup, "photocaption", [])
|
|
||||||
tag.insert(0, image)
|
|
||||||
if not caption == None:
|
|
||||||
tag2.insert(0, caption)
|
|
||||||
tag.insert(1, tag2)
|
|
||||||
a.replaceWith(tag)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def create_opf(self, feeds, dir=None):
|
def create_opf(self, feeds, dir=None):
|
||||||
if dir is None:
|
if dir is None:
|
||||||
dir = self.output_dir
|
dir = self.output_dir
|
||||||
@ -288,3 +266,4 @@ class AppleDaily(BasicNewsRecipe):
|
|||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2011, Eddie Lau'
|
__copyright__ = '2010-2013, Eddie Lau'
|
||||||
|
|
||||||
# Region - Hong Kong, Vancouver, Toronto
|
# Region - Hong Kong, Vancouver, Toronto
|
||||||
__Region__ = 'Hong Kong'
|
__Region__ = 'Hong Kong'
|
||||||
@ -32,6 +32,7 @@ __Date__ = ''
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2013/09/28: allow thumbnails even with hi-res images
|
||||||
2012/04/24: improved parsing of news.mingpao.com content
|
2012/04/24: improved parsing of news.mingpao.com content
|
||||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||||
@ -846,8 +847,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
# thumbnails shouldn't be available if using hi-res images
|
if __IncludeThumbnails__ and first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
|
||||||
img = soup.find('img')
|
img = soup.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
self.add_toc_thumbnail(article, img['src'])
|
self.add_toc_thumbnail(article, img['src'])
|
||||||
@ -1071,3 +1071,4 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user