mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
6240a00209
commit
fc8e5feabd
BIN
recipes/icons/mlody_technik_pl.png
Normal file
BIN
recipes/icons/mlody_technik_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
Binary file not shown.
Before Width: | Height: | Size: 15 KiB |
@ -29,14 +29,14 @@ __Date__ = ''
|
|||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||||
2011/10/19: fix a bug in txt source parsing
|
2011/10/19: fix a bug in txt source parsing
|
||||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||||
2011/10/04: option to get hi-res photos for the articles
|
2011/10/04: option to get hi-res photos for the articles
|
||||||
2011/09/21: fetching "column" section is made optional.
|
2011/09/21: fetching "column" section is made optional.
|
||||||
2011/09/18: parse "column" section stuff from source text file directly.
|
2011/09/18: parse "column" section stuff from source text file directly.
|
||||||
2011/09/07: disable "column" section as it is no longer offered free.
|
2011/09/07: disable "column" section as it is no longer offered free.
|
||||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
@ -60,7 +60,6 @@ Change Log:
|
|||||||
2010/10/31: skip repeated articles in section pages
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import (browser, iswindows, __appname__, force_unicode, preferred_encoding, as_unicode)
|
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
import os, datetime, re, mechanize
|
import os, datetime, re, mechanize
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
@ -204,13 +203,13 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchyear(self):
|
def get_fetchyear(self):
|
||||||
if __Date__ <> '':
|
if __Date__ <> '':
|
||||||
return __Date__[0:4]
|
return __Date__[0:4]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y")
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
def get_fetchmonth(self):
|
def get_fetchmonth(self):
|
||||||
if __Date__ <> '':
|
if __Date__ <> '':
|
||||||
return __Date__[4:6]
|
return __Date__[4:6]
|
||||||
@ -268,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -305,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
# articles = self.parse_section(url)
|
# articles = self.parse_section(url)
|
||||||
@ -322,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclPremium__ == True:
|
if __InclPremium__ == True:
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
@ -330,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -410,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(i)
|
title = self.tag_to_string(i)
|
||||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url)
|
br.open_novisit(url)
|
||||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
@ -437,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
# parse from www.mingpaovan.com
|
# parse from www.mingpaovan.com
|
||||||
def parse_section3(self, url, baseUrl):
|
def parse_section3(self, url, baseUrl):
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
@ -559,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
photo = photo.replace('class="photo"', '')
|
photo = photo.replace('class="photo"', '')
|
||||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||||
new_html = new_raw_html + '</body></html>'
|
new_html = new_raw_html + '</body></html>'
|
||||||
else:
|
else:
|
||||||
# .txt based file
|
# .txt based file
|
||||||
splitter = re.compile(r'\n') # Match non-digits
|
splitter = re.compile(r'\n') # Match non-digits
|
||||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||||
@ -622,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||||
if __HiResImg__ == True:
|
if __HiResImg__ == True:
|
||||||
# TODO: add a _ in front of an image url
|
# TODO: add a _ in front of an image url
|
||||||
if url.rfind('news.mingpao.com') > -1:
|
if url.rfind('news.mingpao.com') > -1:
|
||||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||||
br = mechanize.Browser()
|
br = mechanize.Browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
for img in imglist:
|
for img in imglist:
|
||||||
gifimg = img.replace('jpg"', 'gif"')
|
gifimg = img.replace('jpg"', 'gif"')
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
new_html = new_html.replace(img, gifimg)
|
new_html = new_html.replace(img, gifimg)
|
||||||
except:
|
except:
|
||||||
# find the location of the first _
|
# find the location of the first _
|
||||||
pos = img.find('_')
|
pos = img.find('_')
|
||||||
if pos > -1:
|
if pos > -1:
|
||||||
# if found, insert _ after the first _
|
# if found, insert _ after the first _
|
||||||
newimg = img[0:pos] + '_' + img[pos:]
|
newimg = img[0:pos] + '_' + img[pos:]
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
else:
|
else:
|
||||||
# if not found, insert _ after "
|
# if not found, insert _ after "
|
||||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||||
elif url.rfind('life.mingpao.com') > -1:
|
elif url.rfind('life.mingpao.com') > -1:
|
||||||
@ -675,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#print 'Use hi-res img', newimg
|
#print 'Use hi-res img', newimg
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
return new_html
|
return new_html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
@ -684,7 +683,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(stype=True):
|
for item in soup.findAll(stype=True):
|
||||||
del item['absmiddle']
|
del item['absmiddle']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
# thumbnails shouldn't be available if using hi-res images
|
# thumbnails shouldn't be available if using hi-res images
|
||||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
@ -699,7 +698,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
@ -721,12 +720,12 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
# display a simple text
|
# display a simple text
|
||||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||||
# display word counts
|
# display word counts
|
||||||
counts = 0
|
counts = 0
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
@ -908,5 +907,5 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,14 +29,14 @@ __Date__ = ''
|
|||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||||
2011/10/19: fix a bug in txt source parsing
|
2011/10/19: fix a bug in txt source parsing
|
||||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||||
2011/10/04: option to get hi-res photos for the articles
|
2011/10/04: option to get hi-res photos for the articles
|
||||||
2011/09/21: fetching "column" section is made optional.
|
2011/09/21: fetching "column" section is made optional.
|
||||||
2011/09/18: parse "column" section stuff from source text file directly.
|
2011/09/18: parse "column" section stuff from source text file directly.
|
||||||
2011/09/07: disable "column" section as it is no longer offered free.
|
2011/09/07: disable "column" section as it is no longer offered free.
|
||||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
@ -60,7 +60,6 @@ Change Log:
|
|||||||
2010/10/31: skip repeated articles in section pages
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import (browser, iswindows, __appname__, force_unicode, preferred_encoding, as_unicode)
|
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
import os, datetime, re, mechanize
|
import os, datetime, re, mechanize
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
@ -204,13 +203,13 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchyear(self):
|
def get_fetchyear(self):
|
||||||
if __Date__ <> '':
|
if __Date__ <> '':
|
||||||
return __Date__[0:4]
|
return __Date__[0:4]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y")
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
def get_fetchmonth(self):
|
def get_fetchmonth(self):
|
||||||
if __Date__ <> '':
|
if __Date__ <> '':
|
||||||
return __Date__[4:6]
|
return __Date__[4:6]
|
||||||
@ -268,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -305,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
# articles = self.parse_section(url)
|
# articles = self.parse_section(url)
|
||||||
@ -322,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclPremium__ == True:
|
if __InclPremium__ == True:
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
@ -330,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -410,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(i)
|
title = self.tag_to_string(i)
|
||||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url)
|
br.open_novisit(url)
|
||||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
@ -437,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
# parse from www.mingpaovan.com
|
# parse from www.mingpaovan.com
|
||||||
def parse_section3(self, url, baseUrl):
|
def parse_section3(self, url, baseUrl):
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
@ -559,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
photo = photo.replace('class="photo"', '')
|
photo = photo.replace('class="photo"', '')
|
||||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||||
new_html = new_raw_html + '</body></html>'
|
new_html = new_raw_html + '</body></html>'
|
||||||
else:
|
else:
|
||||||
# .txt based file
|
# .txt based file
|
||||||
splitter = re.compile(r'\n') # Match non-digits
|
splitter = re.compile(r'\n') # Match non-digits
|
||||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||||
@ -622,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||||
if __HiResImg__ == True:
|
if __HiResImg__ == True:
|
||||||
# TODO: add a _ in front of an image url
|
# TODO: add a _ in front of an image url
|
||||||
if url.rfind('news.mingpao.com') > -1:
|
if url.rfind('news.mingpao.com') > -1:
|
||||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||||
br = mechanize.Browser()
|
br = mechanize.Browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
for img in imglist:
|
for img in imglist:
|
||||||
gifimg = img.replace('jpg"', 'gif"')
|
gifimg = img.replace('jpg"', 'gif"')
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
new_html = new_html.replace(img, gifimg)
|
new_html = new_html.replace(img, gifimg)
|
||||||
except:
|
except:
|
||||||
# find the location of the first _
|
# find the location of the first _
|
||||||
pos = img.find('_')
|
pos = img.find('_')
|
||||||
if pos > -1:
|
if pos > -1:
|
||||||
# if found, insert _ after the first _
|
# if found, insert _ after the first _
|
||||||
newimg = img[0:pos] + '_' + img[pos:]
|
newimg = img[0:pos] + '_' + img[pos:]
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
else:
|
else:
|
||||||
# if not found, insert _ after "
|
# if not found, insert _ after "
|
||||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||||
elif url.rfind('life.mingpao.com') > -1:
|
elif url.rfind('life.mingpao.com') > -1:
|
||||||
@ -675,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#print 'Use hi-res img', newimg
|
#print 'Use hi-res img', newimg
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
return new_html
|
return new_html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
@ -684,7 +683,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(stype=True):
|
for item in soup.findAll(stype=True):
|
||||||
del item['absmiddle']
|
del item['absmiddle']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
# thumbnails shouldn't be available if using hi-res images
|
# thumbnails shouldn't be available if using hi-res images
|
||||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
@ -699,7 +698,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
@ -721,12 +720,12 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
# display a simple text
|
# display a simple text
|
||||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||||
# display word counts
|
# display word counts
|
||||||
counts = 0
|
counts = 0
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
@ -908,5 +907,5 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,14 +29,14 @@ __Date__ = ''
|
|||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||||
2011/10/19: fix a bug in txt source parsing
|
2011/10/19: fix a bug in txt source parsing
|
||||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||||
2011/10/04: option to get hi-res photos for the articles
|
2011/10/04: option to get hi-res photos for the articles
|
||||||
2011/09/21: fetching "column" section is made optional.
|
2011/09/21: fetching "column" section is made optional.
|
||||||
2011/09/18: parse "column" section stuff from source text file directly.
|
2011/09/18: parse "column" section stuff from source text file directly.
|
||||||
2011/09/07: disable "column" section as it is no longer offered free.
|
2011/09/07: disable "column" section as it is no longer offered free.
|
||||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||||
@ -60,7 +60,6 @@ Change Log:
|
|||||||
2010/10/31: skip repeated articles in section pages
|
2010/10/31: skip repeated articles in section pages
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import (browser, iswindows, __appname__, force_unicode, preferred_encoding, as_unicode)
|
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
import os, datetime, re, mechanize
|
import os, datetime, re, mechanize
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
@ -204,13 +203,13 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
def get_fetchyear(self):
|
def get_fetchyear(self):
|
||||||
if __Date__ <> '':
|
if __Date__ <> '':
|
||||||
return __Date__[0:4]
|
return __Date__[0:4]
|
||||||
else:
|
else:
|
||||||
return self.get_dtlocal().strftime("%Y")
|
return self.get_dtlocal().strftime("%Y")
|
||||||
|
|
||||||
def get_fetchmonth(self):
|
def get_fetchmonth(self):
|
||||||
if __Date__ <> '':
|
if __Date__ <> '':
|
||||||
return __Date__[4:6]
|
return __Date__[4:6]
|
||||||
@ -268,7 +267,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -305,7 +304,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||||
# articles = self.parse_section(url)
|
# articles = self.parse_section(url)
|
||||||
@ -322,7 +321,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
if __InclPremium__ == True:
|
if __InclPremium__ == True:
|
||||||
# parse column section articles directly from .txt files
|
# parse column section articles directly from .txt files
|
||||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||||
@ -330,7 +329,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
articles = self.parse_section2_txt(url, keystr)
|
articles = self.parse_section2_txt(url, keystr)
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((title, articles))
|
feeds.append((title, articles))
|
||||||
|
|
||||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||||
articles = self.parse_section(url)
|
articles = self.parse_section(url)
|
||||||
@ -410,7 +409,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
title = self.tag_to_string(i)
|
title = self.tag_to_string(i)
|
||||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url)
|
br.open_novisit(url)
|
||||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||||
@ -437,7 +436,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
included_urls.append(url)
|
included_urls.append(url)
|
||||||
current_articles.reverse()
|
current_articles.reverse()
|
||||||
return current_articles
|
return current_articles
|
||||||
|
|
||||||
# parse from www.mingpaovan.com
|
# parse from www.mingpaovan.com
|
||||||
def parse_section3(self, url, baseUrl):
|
def parse_section3(self, url, baseUrl):
|
||||||
self.get_fetchdate()
|
self.get_fetchdate()
|
||||||
@ -559,7 +558,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
photo = photo.replace('class="photo"', '')
|
photo = photo.replace('class="photo"', '')
|
||||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||||
new_html = new_raw_html + '</body></html>'
|
new_html = new_raw_html + '</body></html>'
|
||||||
else:
|
else:
|
||||||
# .txt based file
|
# .txt based file
|
||||||
splitter = re.compile(r'\n') # Match non-digits
|
splitter = re.compile(r'\n') # Match non-digits
|
||||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||||
@ -622,23 +621,23 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||||
if __HiResImg__ == True:
|
if __HiResImg__ == True:
|
||||||
# TODO: add a _ in front of an image url
|
# TODO: add a _ in front of an image url
|
||||||
if url.rfind('news.mingpao.com') > -1:
|
if url.rfind('news.mingpao.com') > -1:
|
||||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||||
br = mechanize.Browser()
|
br = mechanize.Browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
for img in imglist:
|
for img in imglist:
|
||||||
gifimg = img.replace('jpg"', 'gif"')
|
gifimg = img.replace('jpg"', 'gif"')
|
||||||
try:
|
try:
|
||||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||||
new_html = new_html.replace(img, gifimg)
|
new_html = new_html.replace(img, gifimg)
|
||||||
except:
|
except:
|
||||||
# find the location of the first _
|
# find the location of the first _
|
||||||
pos = img.find('_')
|
pos = img.find('_')
|
||||||
if pos > -1:
|
if pos > -1:
|
||||||
# if found, insert _ after the first _
|
# if found, insert _ after the first _
|
||||||
newimg = img[0:pos] + '_' + img[pos:]
|
newimg = img[0:pos] + '_' + img[pos:]
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
else:
|
else:
|
||||||
# if not found, insert _ after "
|
# if not found, insert _ after "
|
||||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||||
elif url.rfind('life.mingpao.com') > -1:
|
elif url.rfind('life.mingpao.com') > -1:
|
||||||
@ -675,7 +674,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
#print 'Use hi-res img', newimg
|
#print 'Use hi-res img', newimg
|
||||||
new_html = new_html.replace(img, newimg)
|
new_html = new_html.replace(img, newimg)
|
||||||
return new_html
|
return new_html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
@ -684,7 +683,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(stype=True):
|
for item in soup.findAll(stype=True):
|
||||||
del item['absmiddle']
|
del item['absmiddle']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
# thumbnails shouldn't be available if using hi-res images
|
# thumbnails shouldn't be available if using hi-res images
|
||||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||||
@ -699,7 +698,7 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
@ -721,12 +720,12 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
# display a simple text
|
# display a simple text
|
||||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||||
# display word counts
|
# display word counts
|
||||||
counts = 0
|
counts = 0
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||||
if not articlebodies:
|
if not articlebodies:
|
||||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
@ -908,5 +907,5 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user