diff --git a/recipes/icons/mlody_technik_pl.png b/recipes/icons/mlody_technik_pl.png new file mode 100644 index 0000000000..9529ff0511 Binary files /dev/null and b/recipes/icons/mlody_technik_pl.png differ diff --git a/recipes/icons/mlody_technik_pl.recipe b/recipes/icons/mlody_technik_pl.recipe deleted file mode 100644 index f689e69a92..0000000000 Binary files a/recipes/icons/mlody_technik_pl.recipe and /dev/null differ diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe index 88a7354cde..ea1823e1cc 100644 --- a/recipes/ming_pao.recipe +++ b/recipes/ming_pao.recipe @@ -29,14 +29,14 @@ __Date__ = '' ''' Change Log: 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away - from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day + from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device. 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010' 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt 2011/10/19: fix a bug in txt source parsing 2011/10/17: disable fetching of premium content, also improved txt source parsing 2011/10/04: option to get hi-res photos for the articles -2011/09/21: fetching "column" section is made optional. +2011/09/21: fetching "column" section is made optional. 2011/09/18: parse "column" section stuff from source text file directly. 2011/09/07: disable "column" section as it is no longer offered free. 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source @@ -60,7 +60,6 @@ Change Log: 2010/10/31: skip repeated articles in section pages ''' -from calibre import (browser, iswindows, __appname__, force_unicode, preferred_encoding, as_unicode) from calibre.utils.date import now as nowf import os, datetime, re, mechanize from calibre.web.feeds.recipes import BasicNewsRecipe @@ -204,13 +203,13 @@ class MPRecipe(BasicNewsRecipe): return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] else: return self.get_dtlocal().strftime("%Y-%m-%d") - + def get_fetchyear(self): if __Date__ <> '': return __Date__[0:4] else: - return self.get_dtlocal().strftime("%Y") - + return self.get_dtlocal().strftime("%Y") + def get_fetchmonth(self): if __Date__ <> '': return __Date__[4:6] @@ -268,7 +267,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -305,7 +304,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), # (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: # articles = self.parse_section(url) @@ -322,7 +321,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + if __InclPremium__ == True: # parse column section articles directly from .txt files for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') @@ -330,7 +329,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -410,7 +409,7 @@ class MPRecipe(BasicNewsRecipe): title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): - try: + try: br.open_novisit(url) url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article current_articles.append({'title': title, 'url': url, 'description': ''}) @@ -437,7 +436,7 @@ class MPRecipe(BasicNewsRecipe): included_urls.append(url) current_articles.reverse() return current_articles - + # parse from www.mingpaovan.com def parse_section3(self, url, baseUrl): self.get_fetchdate() @@ -559,7 +558,7 @@ class MPRecipe(BasicNewsRecipe): photo = photo.replace('class="photo"', '') new_raw_html = new_raw_html + '
' + photo + '
' new_html = new_raw_html + '' - else: + else: # .txt based file splitter = re.compile(r'\n') # Match non-digits new_raw_html = 'Untitled
' @@ -622,23 +621,23 @@ class MPRecipe(BasicNewsRecipe): #raw_html = raw_html.replace(u'

\u3010', u'\u3010') if __HiResImg__ == True: # TODO: add a _ in front of an image url - if url.rfind('news.mingpao.com') > -1: + if url.rfind('news.mingpao.com') > -1: imglist = re.findall('src="?.*?jpg"', new_html) br = mechanize.Browser() br.set_handle_redirect(False) for img in imglist: gifimg = img.replace('jpg"', 'gif"') - try: + try: br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1]) new_html = new_html.replace(img, gifimg) - except: + except: # find the location of the first _ pos = img.find('_') if pos > -1: # if found, insert _ after the first _ newimg = img[0:pos] + '_' + img[pos:] new_html = new_html.replace(img, newimg) - else: + else: # if not found, insert _ after " new_html = new_html.replace(img[1:], '"_' + img[1:]) elif url.rfind('life.mingpao.com') > -1: @@ -675,7 +674,7 @@ class MPRecipe(BasicNewsRecipe): #print 'Use hi-res img', newimg new_html = new_html.replace(img, newimg) return new_html - + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] @@ -684,7 +683,7 @@ class MPRecipe(BasicNewsRecipe): for item in soup.findAll(stype=True): del item['absmiddle'] return soup - + def populate_article_metadata(self, article, soup, first): # thumbnails shouldn't be available if using hi-res images if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'): @@ -699,7 +698,7 @@ class MPRecipe(BasicNewsRecipe): if not articlebodies: articlebodies = soup.findAll('div',attrs={'id':'newscontent01'}) if not articlebodies: - articlebodies = soup.findAll('div',attrs={'class':'content'}) + articlebodies = soup.findAll('div',attrs={'class':'content'}) if not articlebodies: articlebodies = soup.findAll('div', attrs={'id':'font'}) if articlebodies: @@ -721,12 +720,12 @@ class MPRecipe(BasicNewsRecipe): # display a simple text #article.summary = article.text_summary = u'\u66f4\u591a......' # display word counts - counts = 0 + counts = 0 articlebodies = soup.findAll('div',attrs={'id':'newscontent'}) if not articlebodies: articlebodies = soup.findAll('div',attrs={'id':'newscontent01'}) if not articlebodies: - articlebodies = soup.findAll('div',attrs={'class':'content'}) + articlebodies = soup.findAll('div',attrs={'class':'content'}) if not articlebodies: articlebodies = soup.findAll('div', attrs={'id':'font'}) if articlebodies: @@ -908,5 +907,5 @@ class MPRecipe(BasicNewsRecipe): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): opf.render(opf_file, ncx_file) - + diff --git a/recipes/ming_pao_toronto.recipe b/recipes/ming_pao_toronto.recipe index 739a808aba..84001d3952 100644 --- a/recipes/ming_pao_toronto.recipe +++ b/recipes/ming_pao_toronto.recipe @@ -29,14 +29,14 @@ __Date__ = '' ''' Change Log: 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away - from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day + from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device. 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010' 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt 2011/10/19: fix a bug in txt source parsing 2011/10/17: disable fetching of premium content, also improved txt source parsing 2011/10/04: option to get hi-res photos for the articles -2011/09/21: fetching "column" section is made optional. +2011/09/21: fetching "column" section is made optional. 2011/09/18: parse "column" section stuff from source text file directly. 2011/09/07: disable "column" section as it is no longer offered free. 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source @@ -60,7 +60,6 @@ Change Log: 2010/10/31: skip repeated articles in section pages ''' -from calibre import (browser, iswindows, __appname__, force_unicode, preferred_encoding, as_unicode) from calibre.utils.date import now as nowf import os, datetime, re, mechanize from calibre.web.feeds.recipes import BasicNewsRecipe @@ -204,13 +203,13 @@ class MPRecipe(BasicNewsRecipe): return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] else: return self.get_dtlocal().strftime("%Y-%m-%d") - + def get_fetchyear(self): if __Date__ <> '': return __Date__[0:4] else: - return self.get_dtlocal().strftime("%Y") - + return self.get_dtlocal().strftime("%Y") + def get_fetchmonth(self): if __Date__ <> '': return __Date__[4:6] @@ -268,7 +267,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -305,7 +304,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), # (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: # articles = self.parse_section(url) @@ -322,7 +321,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + if __InclPremium__ == True: # parse column section articles directly from .txt files for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') @@ -330,7 +329,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -410,7 +409,7 @@ class MPRecipe(BasicNewsRecipe): title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): - try: + try: br.open_novisit(url) url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article current_articles.append({'title': title, 'url': url, 'description': ''}) @@ -437,7 +436,7 @@ class MPRecipe(BasicNewsRecipe): included_urls.append(url) current_articles.reverse() return current_articles - + # parse from www.mingpaovan.com def parse_section3(self, url, baseUrl): self.get_fetchdate() @@ -559,7 +558,7 @@ class MPRecipe(BasicNewsRecipe): photo = photo.replace('class="photo"', '') new_raw_html = new_raw_html + '

' + photo + '
' new_html = new_raw_html + '' - else: + else: # .txt based file splitter = re.compile(r'\n') # Match non-digits new_raw_html = 'Untitled
' @@ -622,23 +621,23 @@ class MPRecipe(BasicNewsRecipe): #raw_html = raw_html.replace(u'

\u3010', u'\u3010') if __HiResImg__ == True: # TODO: add a _ in front of an image url - if url.rfind('news.mingpao.com') > -1: + if url.rfind('news.mingpao.com') > -1: imglist = re.findall('src="?.*?jpg"', new_html) br = mechanize.Browser() br.set_handle_redirect(False) for img in imglist: gifimg = img.replace('jpg"', 'gif"') - try: + try: br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1]) new_html = new_html.replace(img, gifimg) - except: + except: # find the location of the first _ pos = img.find('_') if pos > -1: # if found, insert _ after the first _ newimg = img[0:pos] + '_' + img[pos:] new_html = new_html.replace(img, newimg) - else: + else: # if not found, insert _ after " new_html = new_html.replace(img[1:], '"_' + img[1:]) elif url.rfind('life.mingpao.com') > -1: @@ -675,7 +674,7 @@ class MPRecipe(BasicNewsRecipe): #print 'Use hi-res img', newimg new_html = new_html.replace(img, newimg) return new_html - + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] @@ -684,7 +683,7 @@ class MPRecipe(BasicNewsRecipe): for item in soup.findAll(stype=True): del item['absmiddle'] return soup - + def populate_article_metadata(self, article, soup, first): # thumbnails shouldn't be available if using hi-res images if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'): @@ -699,7 +698,7 @@ class MPRecipe(BasicNewsRecipe): if not articlebodies: articlebodies = soup.findAll('div',attrs={'id':'newscontent01'}) if not articlebodies: - articlebodies = soup.findAll('div',attrs={'class':'content'}) + articlebodies = soup.findAll('div',attrs={'class':'content'}) if not articlebodies: articlebodies = soup.findAll('div', attrs={'id':'font'}) if articlebodies: @@ -721,12 +720,12 @@ class MPRecipe(BasicNewsRecipe): # display a simple text #article.summary = article.text_summary = u'\u66f4\u591a......' # display word counts - counts = 0 + counts = 0 articlebodies = soup.findAll('div',attrs={'id':'newscontent'}) if not articlebodies: articlebodies = soup.findAll('div',attrs={'id':'newscontent01'}) if not articlebodies: - articlebodies = soup.findAll('div',attrs={'class':'content'}) + articlebodies = soup.findAll('div',attrs={'class':'content'}) if not articlebodies: articlebodies = soup.findAll('div', attrs={'id':'font'}) if articlebodies: @@ -908,5 +907,5 @@ class MPRecipe(BasicNewsRecipe): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): opf.render(opf_file, ncx_file) - + diff --git a/recipes/ming_pao_vancouver.recipe b/recipes/ming_pao_vancouver.recipe index 687d830db9..8dc2c78cb7 100644 --- a/recipes/ming_pao_vancouver.recipe +++ b/recipes/ming_pao_vancouver.recipe @@ -29,14 +29,14 @@ __Date__ = '' ''' Change Log: 2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away - from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day + from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device. 2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010' 2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt 2011/10/19: fix a bug in txt source parsing 2011/10/17: disable fetching of premium content, also improved txt source parsing 2011/10/04: option to get hi-res photos for the articles -2011/09/21: fetching "column" section is made optional. +2011/09/21: fetching "column" section is made optional. 2011/09/18: parse "column" section stuff from source text file directly. 2011/09/07: disable "column" section as it is no longer offered free. 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source @@ -60,7 +60,6 @@ Change Log: 2010/10/31: skip repeated articles in section pages ''' -from calibre import (browser, iswindows, __appname__, force_unicode, preferred_encoding, as_unicode) from calibre.utils.date import now as nowf import os, datetime, re, mechanize from calibre.web.feeds.recipes import BasicNewsRecipe @@ -204,13 +203,13 @@ class MPRecipe(BasicNewsRecipe): return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] else: return self.get_dtlocal().strftime("%Y-%m-%d") - + def get_fetchyear(self): if __Date__ <> '': return __Date__[0:4] else: - return self.get_dtlocal().strftime("%Y") - + return self.get_dtlocal().strftime("%Y") + def get_fetchmonth(self): if __Date__ <> '': return __Date__[4:6] @@ -268,7 +267,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -305,7 +304,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), # (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]: # articles = self.parse_section(url) @@ -322,7 +321,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + if __InclPremium__ == True: # parse column section articles directly from .txt files for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl') @@ -330,7 +329,7 @@ class MPRecipe(BasicNewsRecipe): articles = self.parse_section2_txt(url, keystr) if articles: feeds.append((title, articles)) - + for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'), (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]: articles = self.parse_section(url) @@ -410,7 +409,7 @@ class MPRecipe(BasicNewsRecipe): title = self.tag_to_string(i) url = 'http://life.mingpao.com/cfm/' + i.get('href', False) if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1): - try: + try: br.open_novisit(url) url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article current_articles.append({'title': title, 'url': url, 'description': ''}) @@ -437,7 +436,7 @@ class MPRecipe(BasicNewsRecipe): included_urls.append(url) current_articles.reverse() return current_articles - + # parse from www.mingpaovan.com def parse_section3(self, url, baseUrl): self.get_fetchdate() @@ -559,7 +558,7 @@ class MPRecipe(BasicNewsRecipe): photo = photo.replace('class="photo"', '') new_raw_html = new_raw_html + '

' + photo + '
' new_html = new_raw_html + '' - else: + else: # .txt based file splitter = re.compile(r'\n') # Match non-digits new_raw_html = 'Untitled
' @@ -622,23 +621,23 @@ class MPRecipe(BasicNewsRecipe): #raw_html = raw_html.replace(u'

\u3010', u'\u3010') if __HiResImg__ == True: # TODO: add a _ in front of an image url - if url.rfind('news.mingpao.com') > -1: + if url.rfind('news.mingpao.com') > -1: imglist = re.findall('src="?.*?jpg"', new_html) br = mechanize.Browser() br.set_handle_redirect(False) for img in imglist: gifimg = img.replace('jpg"', 'gif"') - try: + try: br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1]) new_html = new_html.replace(img, gifimg) - except: + except: # find the location of the first _ pos = img.find('_') if pos > -1: # if found, insert _ after the first _ newimg = img[0:pos] + '_' + img[pos:] new_html = new_html.replace(img, newimg) - else: + else: # if not found, insert _ after " new_html = new_html.replace(img[1:], '"_' + img[1:]) elif url.rfind('life.mingpao.com') > -1: @@ -675,7 +674,7 @@ class MPRecipe(BasicNewsRecipe): #print 'Use hi-res img', newimg new_html = new_html.replace(img, newimg) return new_html - + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] @@ -684,7 +683,7 @@ class MPRecipe(BasicNewsRecipe): for item in soup.findAll(stype=True): del item['absmiddle'] return soup - + def populate_article_metadata(self, article, soup, first): # thumbnails shouldn't be available if using hi-res images if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'): @@ -699,7 +698,7 @@ class MPRecipe(BasicNewsRecipe): if not articlebodies: articlebodies = soup.findAll('div',attrs={'id':'newscontent01'}) if not articlebodies: - articlebodies = soup.findAll('div',attrs={'class':'content'}) + articlebodies = soup.findAll('div',attrs={'class':'content'}) if not articlebodies: articlebodies = soup.findAll('div', attrs={'id':'font'}) if articlebodies: @@ -721,12 +720,12 @@ class MPRecipe(BasicNewsRecipe): # display a simple text #article.summary = article.text_summary = u'\u66f4\u591a......' # display word counts - counts = 0 + counts = 0 articlebodies = soup.findAll('div',attrs={'id':'newscontent'}) if not articlebodies: articlebodies = soup.findAll('div',attrs={'id':'newscontent01'}) if not articlebodies: - articlebodies = soup.findAll('div',attrs={'class':'content'}) + articlebodies = soup.findAll('div',attrs={'class':'content'}) if not articlebodies: articlebodies = soup.findAll('div', attrs={'id':'font'}) if articlebodies: @@ -908,5 +907,5 @@ class MPRecipe(BasicNewsRecipe): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): opf.render(opf_file, ncx_file) - +