...

2025-09-29 15:31:08 -04:00 · 2012-08-17 09:47:27 +05:30 · 2012-08-17 09:47:27 +05:30 · 29f58de5c0
commit 29f58de5c0
parent 036cea09d7
8 changed files with 62 additions and 99 deletions
--- a/recipes/calgary_herald.recipe
+++ b/recipes/calgary_herald.recipe
@ -5,14 +5,9 @@ __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
 class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
-##    description = u'News from Ottawa, ON'   
+##    description = u'News from Ottawa, ON'
 ##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
 ##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
    Kindle_Fire=False
    masthead_url = std_logo_url
-    url_list = []    
+    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                #photocredit { font-size: xx-small; font-weight: normal; }'''
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
    remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
    def get_cover_url(self):
-        from datetime import timedelta, datetime, date
+        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        if self.Kindle_Fire:
            from calibre import fit_image
            from calibre.utils.magick import Image, create_canvas
            img = Image()
            img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
                            div.insert(0,img)
                            allpics.append(div)
                pgall.replaceWith(allpics)
-            
+
        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
            pg.extract()
        return self.strip_anchors(soup)
-                        
+
    def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if 'GALLERY' in title.upper():
                        return
                    if 'PHOTOS' in title.upper():
-                        return                  
+                        return
                    dtag = adiv.find('div','content')
                    description=''
                    print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-                    
+
--- a/recipes/edmonton_journal.recipe
+++ b/recipes/edmonton_journal.recipe
@ -5,14 +5,9 @@ __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
 class CanWestPaper(BasicNewsRecipe):
@ -23,7 +18,7 @@ class CanWestPaper(BasicNewsRecipe):
        (u'Vancouver',u'/news/vancouver/index.html'),
        (u'Calgary',u'/news/calgary/index.html'),
        (u'Edmonton',u'/news/edmonton/index.html'),
-        (u'Montreal',u'/news/montreal/index.html'),,
+        (u'Montreal',u'/news/montreal/index.html'),
        (u'Fraser Valley',u'/news/fraser-valley/index.html'),
        (u'British Columbia',u'/news/bc/index.html'),
        (u'Alberta',u'/news/alberta/index.html'),
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
-##    description = u'News from Ottawa, ON'   
+##    description = u'News from Ottawa, ON'
 ##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
 ##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
    Kindle_Fire=False
    masthead_url = std_logo_url
-    url_list = []    
+    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                #photocredit { font-size: xx-small; font-weight: normal; }'''
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
    remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
    def get_cover_url(self):
-        from datetime import timedelta, datetime, date
+        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        if self.Kindle_Fire:
            from calibre import fit_image
            from calibre.utils.magick import Image, create_canvas
            img = Image()
            img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
                            div.insert(0,img)
                            allpics.append(div)
                pgall.replaceWith(allpics)
-            
+
        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
            pg.extract()
        return self.strip_anchors(soup)
-                        
+
    def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if 'GALLERY' in title.upper():
                        return
                    if 'PHOTOS' in title.upper():
-                        return                  
+                        return
                    dtag = adiv.find('div','content')
                    description=''
                    print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-                    
+
--- a/recipes/montreal_gazette.recipe
+++ b/recipes/montreal_gazette.recipe
@ -5,14 +5,9 @@ __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
 class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
-##    description = u'News from Ottawa, ON'   
+##    description = u'News from Ottawa, ON'
 ##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
 ##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
    Kindle_Fire=False
    masthead_url = std_logo_url
-    url_list = []    
+    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                #photocredit { font-size: xx-small; font-weight: normal; }'''
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
    remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
    def get_cover_url(self):
-        from datetime import timedelta, datetime, date
+        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        if self.Kindle_Fire:
            from calibre import fit_image
            from calibre.utils.magick import Image, create_canvas
            img = Image()
            img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
                            div.insert(0,img)
                            allpics.append(div)
                pgall.replaceWith(allpics)
-            
+
        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
            pg.extract()
        return self.strip_anchors(soup)
-                        
+
    def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if 'GALLERY' in title.upper():
                        return
                    if 'PHOTOS' in title.upper():
-                        return                  
+                        return
                    dtag = adiv.find('div','content')
                    description=''
                    print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-                    
+
--- a/recipes/ottawa_citizen.recipe
+++ b/recipes/ottawa_citizen.recipe
@ -5,14 +5,9 @@ __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
 class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
    # un-comment the following six lines for the Ottawa Citizen
    title = u'Ottawa Citizen'
    url_prefix = 'http://www.ottawacitizen.com'
-    description = u'News from Ottawa, ON'   
+    description = u'News from Ottawa, ON'
    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
    logo_url = 'oclogo.jpg'
    fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
    Kindle_Fire=False
    masthead_url = std_logo_url
-    url_list = []    
+    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                #photocredit { font-size: xx-small; font-weight: normal; }'''
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
    remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
    def get_cover_url(self):
-        from datetime import timedelta, datetime, date
+        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        if self.Kindle_Fire:
            from calibre import fit_image
            from calibre.utils.magick import Image, create_canvas
            img = Image()
            img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
                            div.insert(0,img)
                            allpics.append(div)
                pgall.replaceWith(allpics)
-            
+
        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
            pg.extract()
        return self.strip_anchors(soup)
-                        
+
    def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if 'GALLERY' in title.upper():
                        return
                    if 'PHOTOS' in title.upper():
-                        return                  
+                        return
                    dtag = adiv.find('div','content')
                    description=''
                    print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-                    
+
--- a/recipes/vancouver_provice.recipe
+++ b/recipes/vancouver_provice.recipe
@ -5,14 +5,9 @@ __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
 class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
-##    description = u'News from Ottawa, ON'   
+##    description = u'News from Ottawa, ON'
 ##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
 ##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
    Kindle_Fire=False
    masthead_url = std_logo_url
-    url_list = []    
+    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                #photocredit { font-size: xx-small; font-weight: normal; }'''
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
    remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
    def get_cover_url(self):
-        from datetime import timedelta, datetime, date
+        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        if self.Kindle_Fire:
            from calibre import fit_image
            from calibre.utils.magick import Image, create_canvas
            img = Image()
            img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
                            div.insert(0,img)
                            allpics.append(div)
                pgall.replaceWith(allpics)
-            
+
        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
            pg.extract()
        return self.strip_anchors(soup)
-                        
+
    def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if 'GALLERY' in title.upper():
                        return
                    if 'PHOTOS' in title.upper():
-                        return                  
+                        return
                    dtag = adiv.find('div','content')
                    description=''
                    print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-                    
+
--- a/recipes/vancouver_sun.recipe
+++ b/recipes/vancouver_sun.recipe
@ -5,14 +5,9 @@ __license__   = 'GPL v3'
 '''
 www.canada.com
 '''
-import string, re
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
 class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
    # un-comment the following six lines for the Ottawa Citizen
 ##    title = u'Ottawa Citizen'
 ##    url_prefix = 'http://www.ottawacitizen.com'
-##    description = u'News from Ottawa, ON'   
+##    description = u'News from Ottawa, ON'
 ##    std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
 ##    logo_url = 'oclogo.jpg'
 ##    fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
    Kindle_Fire=False
    masthead_url = std_logo_url
-    url_list = []    
+    url_list = []
    language = 'en_CA'
    __author__ = 'Nick Redding'
    no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
                #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
                #photocredit { font-size: xx-small; font-weight: normal; }'''
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
    remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
    def get_cover_url(self):
-        from datetime import timedelta, datetime, date
+        from datetime import timedelta, date
        cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
        br = BasicNewsRecipe.get_browser()
        daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
    def prepare_masthead_image(self, path_to_image, out_path):
        if self.Kindle_Fire:
            from calibre import fit_image
            from calibre.utils.magick import Image, create_canvas
            img = Image()
            img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
                            div.insert(0,img)
                            allpics.append(div)
                pgall.replaceWith(allpics)
-            
+
        for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
            pg.extract()
        return self.strip_anchors(soup)
-                        
+
    def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if 'GALLERY' in title.upper():
                        return
                    if 'PHOTOS' in title.upper():
-                        return                  
+                        return
                    dtag = adiv.find('div','content')
                    description=''
                    print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
            parse_web_index(k,url)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-                    
+
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -123,7 +123,7 @@ class PluginWidget(QWidget,Ui_Form):
                    # Look up custom column friendly name
                    rule['field'] = self.eligible_custom_fields[rule['field']]['field']
                    if rule['pattern'] in [_('any value'),_('any date')]:
-                        rule_pattern = '.*'
+                        rule['pattern'] = '.*'
                    elif rule['pattern'] == _('unspecified'):
                        rule['pattern'] = 'None'
            if 'prefix' in rule:
--- a/src/calibre/gui2/store/opensearch_store.py
+++ b/src/calibre/gui2/store/opensearch_store.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import mimetypes
 from contextlib import closing
 from lxml import etree