[merge] Merge from trunk`

2025-06-23 15:30:45 -04:00 · 2011-10-09 16:59:29 -07:00 · 2011-10-09 16:59:29 -07:00 · ed9f1133a1
commit ed9f1133a1
parent ff7f90c2ec b4e6ceadd8
7 changed files with 233 additions and 102 deletions
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
-    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
-    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -56,6 +54,14 @@ class Economist(BasicNewsRecipe):
        return br
    '''

+    def get_cover_url(self):
+        br = self.browser
+        br.open(self.INDEX)
+        issue = br.geturl().split('/')[4]
+        self.log('Fetching cover for issue: %s'%issue)
+        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
+        return cover_url
+
    def parse_index(self):
        return self.economist_parse_index()

--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
-    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
-    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -40,6 +38,14 @@ class Economist(BasicNewsRecipe):
    # downloaded with connection reset by peer (104) errors.
    delay = 1

+    def get_cover_url(self):
+        br = self.browser
+        br.open(self.INDEX)
+        issue = br.geturl().split('/')[4]
+        self.log('Fetching cover for issue: %s'%issue)
+        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
+        return cover_url
+

    def parse_index(self):
        try:
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -5,30 +5,46 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    description = 'News as provide by The Metro -UK'

    __author__ = 'Dave Asbury'
+    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
+
    no_stylesheets = True
    oldest_article = 1
-    max_articles_per_feed = 25
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True

-    preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
+    #preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
+    preprocess_regexps = [
+    (re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
+    preprocess_regexps = [
+    (re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]

    language = 'en_GB'


    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'

-    extra_css = 'h2 {font: sans-serif medium;}'
+
    keep_only_tags = [
 	dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
                    dict(attrs={'class':['img-cnt figure']}),
    	dict(attrs={'class':['art-img']}),
-
-                    dict(name='div', attrs={'class':'art-lft'})
+                    dict(name='div', attrs={'class':'art-lft'}),
+                    dict(name='p')
    ]
    remove_tags    = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
                             'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
-	dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
-	          ]
+	          dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
+                              ,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
+                               ]
    feeds          = [
        (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
+
+    extra_css  = '''
+                    body {font: sans-serif medium;}'
+	h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+               	h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
+                	span{ font-size:9.5px; font-weight:bold;font-style:italic}
+                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+
+	 '''
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -6,19 +6,24 @@ __Region__ = 'Hong Kong'
 # Users of Kindle 3 with limited system-level CJK support
 # please replace the following "True" with "False".
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles
 __UseChineseTitle__ = False
 # Set it to False if you want to skip images
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
 __UseLife__ = True
-# (HK only) if __UseLife__ is true, turn this on if you want to include the column section
+# (HK only) It is to disable the column section which is now a premium content
 __InclCols__ = False
+# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
+__ParsePFF__ = False
+# (HK only) Turn below to True if you wish hi-res images
+__HiResImg__ = False


 '''
 Change Log:
-2011/09/21: fetching "column" section is made optional. Default is False
+2011/10/04: option to get hi-res photos for the articles
+2011/09/21: fetching "column" section is made optional.
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -42,7 +47,7 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''

-import os, datetime, re
+import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -56,7 +61,7 @@ class MPRecipe(BasicNewsRecipe):
        title       = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
@ -147,43 +152,6 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''

-    def image_url_processor(cls, baseurl, url):
-        # trick: break the url at the first occurance of digit, add an additional
-        # '_' at the front
-        # not working, may need to move this to preprocess_html() method
-#        minIdx = 10000
-#        i0 = url.find('0')
-#        if i0 >= 0 and i0 < minIdx:
-#           minIdx = i0
-#        i1 = url.find('1')
-#        if i1 >= 0 and i1 < minIdx:
-#           minIdx = i1
-#        i2 = url.find('2')
-#        if i2 >= 0 and i2 < minIdx:
-#           minIdx = i2
-#        i3 = url.find('3')
-#        if i3 >= 0 and i0 < minIdx:
-#           minIdx = i3
-#        i4 = url.find('4')
-#        if i4 >= 0 and i4 < minIdx:
-#           minIdx = i4
-#        i5 = url.find('5')
-#        if i5 >= 0 and i5 < minIdx:
-#           minIdx = i5
-#        i6 = url.find('6')
-#        if i6 >= 0 and i6 < minIdx:
-#           minIdx = i6
-#        i7 = url.find('7')
-#        if i7 >= 0 and i7 < minIdx:
-#           minIdx = i7
-#        i8 = url.find('8')
-#        if i8 >= 0 and i8 < minIdx:
-#           minIdx = i8
-#        i9 = url.find('9')
-#        if i9 >= 0 and i9 < minIdx:
-#           minIdx = i9
-        return url
-
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
@ -260,15 +228,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
+                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))

                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #if ed_articles:
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))

                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -279,20 +248,39 @@ class MPRecipe(BasicNewsRecipe):

                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #if fin_articles:
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))

-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
-                    articles = self.parse_section(url)
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
+                    articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))

+                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                #    articles = self.parse_section(url)
+                #    if articles:
+                #        feeds.append((title, articles))
+
                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #if ent_articles:
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+
+                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
+                                          ]:
+                    articles = self.parse_section2(url, keystr)
+                    if articles:
+                        feeds.append((title, articles))
+
+                if __InclCols__ == True:
+                    # parse column section articles directly from .txt files
+                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
+                                              ]:
+                        articles = self.parse_section2_txt(url, keystr)
+                        if articles:
+                            feeds.append((title, articles))

                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -300,11 +288,6 @@ class MPRecipe(BasicNewsRecipe):
                    if articles:
                        feeds.append((title, articles))

-
-                # special- columns
-                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
-                if col_articles:
-                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -348,6 +331,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
+            # replace the url to the print-friendly version
+            if __ParsePFF__ == True:
+                if url.rfind('Redirect') <> -1:
+                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
+                    url = re.sub('%2F.*%2F', '/', url)
+                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
+                    url = url.replace('%2Etxt', '_print.htm')
+                    url = url.replace('%5F', '_')
+                else:
+                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -472,38 +465,119 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles

-    # preprocess those .txt based files
+    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
-        if url.rfind('ftp') == -1:
+        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
+        if __HiResImg__ == True:
+            # TODO: add a _ in front of an image url
+            if url.rfind('news.mingpao.com') > -1:
+                imglist =  re.findall('src="?.*?jpg"', raw_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                for img in imglist:
+                    gifimg = img.replace('jpg"', 'gif"')
+                    try:
+                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
+                        raw_html = raw_html.replace(img, gifimg)
+                    except:
+                        # find the location of the first _
+                        pos = img.find('_')
+                        if pos > -1:
+                            # if found, insert _ after the first _
+                            newimg = img[0:pos] + '_' + img[pos:]
+                            raw_html = raw_html.replace(img, newimg)
+                        else:
+                            # if not found, insert _ after "
+                            raw_html = raw_html.replace(img[1:], '"_' + img[1:])
+            elif url.rfind('life.mingpao.com') > -1:
+                imglist = re.findall('src=\'?.*?jpg\'', raw_html)
+                br = mechanize.Browser()
+                br.set_handle_redirect(False)
+                #print 'Img list: ', imglist, '\n'
+                for img in imglist:
+                    gifimg = img.replace('jpg\'', 'gif\'')
+                    try:
+                        #print 'Original: ', url
+                        #print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
+                        gifurl = re.sub(r'dailynews.*txt', '', url)
+                        #print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
+                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
+                        #print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
+                        #br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
+                        raw_html = raw_html.replace(img, gifimg)
+                    except:
+                        #print 'GIF not found'
+                        pos = img.rfind('/')
+                        newimg = img[0:pos+1] + '_' + img[pos+1:]
+                        #print 'newimg: ', newimg
+                        raw_html = raw_html.replace(img, newimg)
+        if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
            return raw_html
        else:
-            splitter = re.compile(r'\n') # Match non-digits
-            new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
-            next_is_img_txt = False
-            title_started = False
-            met_article_start_char = False
-            for item in splitter.split(raw_html):
-                if item.startswith(u'\u3010'):
-                    met_article_start_char = True
-                    new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
-                else:
-                    if next_is_img_txt == False:
-                        if item.startswith('='):
-                            next_is_img_txt = True
-                            new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+            if url.rfind('_print.htm') <> -1:
+                # javascript based file
+                splitter = re.compile(r'\n')
+                new_raw_html = '<html><head><title>Untitled</title></head>'
+                new_raw_html = new_raw_html + '<body>'
+                for item in splitter.split(raw_html):
+                    if item.startswith('var heading1 ='):
+                        heading = item.replace('var heading1 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="heading">' + heading
+                    if item.startswith('var heading2 ='):
+                        heading = item.replace('var heading2 = \'', '')
+                        heading = heading.replace('\'', '')
+                        heading = heading.replace(';', '')
+                        if heading <> '':
+                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
                        else:
-                            if met_article_start_char == False:
-                                if title_started == False:
-                                    new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
-                                    title_started = True
-                                else:
-                                    new_raw_html = new_raw_html + item + '\n'
-                            else:
-                                new_raw_html = new_raw_html + item + '<p>\n'
+                            new_raw_html = new_raw_html + '</div>'
+                    if item.startswith('var content ='):
+                        content = item.replace("var content = ", '')
+                        content = content.replace('\'', '')
+                        content = content.replace(';', '')
+                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
+                    if item.startswith('var photocontent ='):
+                        photo = item.replace('var photocontent = \'', '')
+                        photo = photo.replace('\'', '')
+                        photo = photo.replace(';', '')
+                        photo = photo.replace('<tr>', '')
+                        photo = photo.replace('<td>', '')
+                        photo = photo.replace('</tr>', '')
+                        photo = photo.replace('</td>', '<br>')
+                        photo = photo.replace('class="photo"', '')
+                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
+                return new_raw_html + '</body></html>'
+            else:
+                # .txt based file
+                splitter = re.compile(r'\n') # Match non-digits
+                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
+                next_is_img_txt = False
+                title_started = False
+                met_article_start_char = False
+                for item in splitter.split(raw_html):
+                    if item.startswith(u'\u3010'):
+                        met_article_start_char = True
+                        new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
                    else:
-                        next_is_img_txt = False
-                        new_raw_html = new_raw_html + item + '\n'
-            return new_raw_html + '</div></body></html>'
+                        if next_is_img_txt == False:
+                            if item.startswith('='):
+                                next_is_img_txt = True
+                                new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
+                            else:
+                                if met_article_start_char == False:
+                                    if title_started == False:
+                                        new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
+                                        title_started = True
+                                    else:
+                                        new_raw_html = new_raw_html + item + '\n'
+                                else:
+                                    new_raw_html = new_raw_html + item + '<p>\n'
+                        else:
+                            next_is_img_txt = False
+                            new_raw_html = new_raw_html + item + '\n'
+                return new_raw_html + '</div></body></html>'

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
@ -604,7 +678,7 @@ class MPRecipe(BasicNewsRecipe):
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
--- a/recipes/revista_piaui.recipe
+++ b/recipes/revista_piaui.recipe
@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RevistaPiaui(BasicNewsRecipe):
+    title          = u'Revista piau\xed'
+    language = 'pt_BR'
+    __author__ = u'Eduardo Gustini Simões'
+    oldest_article = 31
+    max_articles_per_feed = 50
+    auto_cleanup = True
+
+    feeds          = [(u'Edi\xe7\xe3o Atual', u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')]
+
+    def parse_feeds (self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+           for article in feed.articles[:]:
+                 soup = self.index_to_soup('http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')
+                 itemTitle = article.title.partition('|')[0].rstrip()
+                 item = soup.find(text=itemTitle)
+                 articleDescription = item.parent.parent.description.string.partition('<br  />')[2]
+                 article.summary = articleDescription
+
+        return feeds
+
+    def populate_article_metadata(self, article, soup, first):
+        h2 = soup.find('h2')
+        h2.string.replaceWith(h2.string.partition('|')[0].rstrip())
+        h2.replaceWith(h2.prettify() +  '<p><em>' + article.summary + '</em></p><p><em>' + ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '</em></p>')
--- a/src/calibre/library/coloring.py
+++ b/src/calibre/library/coloring.py
@ -133,7 +133,7 @@ class Rule(object): # {{{
                'lt': ('1', '', ''),
                'gt': ('', '', '1')
        }[action]
-        return "cmp(format_date(raw_field('%s'), 'yyyy-MM-dd'), %s, '%s', '%s', '%s')" % (col,
+        return "strcmp(format_date(raw_field('%s'), 'yyyy-MM-dd'), '%s', '%s', '%s', '%s')" % (col,
                val, lt, eq, gt)

    def multiple_condition(self, col, action, val, sep):
--- a/src/calibre/manual/template_lang.rst
+++ b/src/calibre/manual/template_lang.rst
@ -266,7 +266,7 @@ The following functions are available in addition to those described in single-f
    * ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string
    * ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
    * ``list_difference(list1, list2, separator)`` -- return a list made by removing from `list1` any item found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
-    * ``list_equals(list1, sep1, list2, sep2, yes_val, no_val) -- return `yes_val` if list1 and list2 contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
+    * ``list_equals(list1, sep1, list2, sep2, yes_val, no_val)`` -- return `yes_val` if `list1` and `list2` contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
    * ``list_intersection(list1, list2, separator)`` -- return a list made by removing from `list1` any item not found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
    * ``list_sort(list, direction, separator)`` -- return list sorted using a case-insensitive sort. If `direction` is zero, the list is sorted ascending, otherwise descending. The list items are separated by separator, as are the items in the returned list.
    * ``list_union(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list.