Pull from trunk

2025-06-23 15:30:45 -04:00 · 2009-02-21 20:42:54 -08:00 · 2009-02-21 20:42:54 -08:00 · 1d6a6586a9
commit 1d6a6586a9
parent 87ff17b50f 6c8d6a4edb
20 changed files with 278 additions and 38 deletions
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.4.138'
+__version__   = '0.4.139'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 '''
 Various run time constants.
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -233,7 +233,7 @@ class RTFMetadataWriter(MetadataWriterPlugin):
 class MOBIMetadataWriter(MetadataWriterPlugin):
    
    name        = 'Set MOBI metadata'
-    file_types  = set(['mobi', 'prc'])
+    file_types  = set(['mobi', 'prc', 'azw'])
    description = _('Set metadata in %s files')%'MOBI'
    author      = 'Marshall T. Vandegrift'
    
@ -246,4 +246,4 @@ plugins = [HTML2ZIP]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
-                                        x.__name__.endswith('MetadataWriter')]
+                                        x.__name__.endswith('MetadataWriter')]
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@ -33,6 +33,7 @@ class CYBOOKG3(USBMS):
    
    EBOOK_DIR_MAIN = "eBooks"
    EBOOK_DIR_CARD = "eBooks"
+    THUMBNAIL_HEIGHT = 144
    SUPPORTS_SUB_DIRS = True
    
    def upload_books(self, files, names, on_card=False, end_session=True, 
--- a/src/calibre/devices/cybookg3/t2b.py
+++ b/src/calibre/devices/cybookg3/t2b.py
@ -30,7 +30,7 @@ def write_t2b(t2bfile, coverdata=None):
    if coverdata != None:
        coverdata = StringIO.StringIO(coverdata)
        cover = Image.open(coverdata).convert("L")
-        cover.thumbnail((96, 144))
+        cover.thumbnail((96, 144), Image.ANTIALIAS)
        t2bcover = Image.new('L', (96, 144), 'white')
        
        x, y = cover.size
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
    def save(self):
        for meta in list(self.root.xpath('//meta')):
            meta.getparent().remove(meta)
-        #for img in self.root.xpath('//img[@src]'):
-        #    self.convert_image(img)
-        Processor.save(self)
+        # Strip all comments since Adobe DE is petrified of them
+        Processor.save(self, strip_comments=True)
        
    def remove_first_image(self):
        images = self.root.xpath('//img')
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -331,9 +331,8 @@ class PreProcessor(object):
                  # Convert all entities, since lxml doesn't handle them well
                  (re.compile(r'&(\S+?);'), convert_entities),
                  # Remove the <![if/endif tags inserted by everybody's darling, MS Word
-                  (re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
-                  # Strip all comments since Adobe DE is petrified of them
-                  (re.compile(r'<!--[^>]*>'), lambda match : ''),
+                  (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE), 
+                   lambda match: ''),
                  ]
                     
    # Fix pdftohtml markup
@ -447,7 +446,7 @@ class Parser(PreProcessor, LoggingInterface):
    def save_path(self):
        return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
    
-    def save(self):
+    def save(self, strip_comments=False):
        '''
        Save processed HTML into the content directory.
        Should be called after all HTML processing is finished.
@ -458,7 +457,11 @@ class Parser(PreProcessor, LoggingInterface):
            svg.set('xmlns', 'http://www.w3.org/2000/svg')
        
        ans = tostring(self.root, pretty_print=self.opts.pretty_print)
-        ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
+        ans = re.compile(r'<head>', re.IGNORECASE).sub(
+            '<head>\n\t<meta http-equiv="Content-Type" '
+            'content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
+        if strip_comments:
+            ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
        with open(self.save_path(), 'wb') as f:
            f.write(ans)
            return f.name
@ -594,7 +597,7 @@ class Processor(Parser):
                mark = etree.Element('hr', style=page_break_before)
            elem.addprevious(mark)
    
-    def save(self):
+    def save(self, strip_comments=False):
        style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
        for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
            if sheet is not None:
@ -608,7 +611,7 @@ class Processor(Parser):
                if isinstance(raw, unicode):
                    raw = raw.encode('utf-8')
                open(path, 'wb').write(raw)
-        return Parser.save(self)
+        return Parser.save(self, strip_comments=strip_comments)
    
    def populate_toc(self, toc):
        '''
--- a/src/calibre/ebooks/lrf/init.py
+++ b/src/calibre/ebooks/lrf/init.py
@ -30,6 +30,7 @@ preferred_source_formats = [
                            'XHTML',
                            'PRC',
                            'AZW',
+                            'FB2',
                            'RTF',
                            'PDF',
                            'TXT',
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@ -38,6 +38,7 @@ def extract_embedded_content(doc):
            open(fname, 'wb').write(data)

 def to_html(fb2file, tdir):
+    fb2file = os.path.abspath(fb2file)
    cwd = os.getcwd()
    try:
        os.chdir(tdir)
@ -52,7 +53,7 @@ def to_html(fb2file, tdir):
        result = transform(doc)
        open('index.html', 'wb').write(transform.tostring(result))
        try:
-            mi = get_metadata(open(fb2file, 'rb'))
+            mi = get_metadata(open(fb2file, 'rb'), 'fb2')
        except:
            mi = MetaInformation(None, None)
        if not mi.title:
--- a/src/calibre/gui2/images/news/e_novine.png
+++ b/src/calibre/gui2/images/news/e_novine.png
--- a/src/calibre/trac/plugins/templates/linux.html
+++ b/src/calibre/trac/plugins/templates/linux.html
@ -114,10 +114,13 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
 wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz 
 cd calibre*
 python setup.py build &amp;&amp; sudo python setup.py install
+sudo calibre_postinstall
                                </pre>
                                Note that if your distribution does not have a
                                correctly compiled libunrar.so, ${app} will not 
-                                support rar files.
+                                support rar files. The calibre_postinstall step 
+                                is required for device detection and integration
+                                with your desktop environment.
                            </p>
                        </div>
                    </td>
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Contains the logic for parsing feeds.
 '''
-import time, logging, traceback, copy
+import time, logging, traceback, copy, re
 from datetime import datetime

 from calibre.web.feeds.feedparser import parse
+from calibre import entity_to_unicode
 from lxml import html

 class Article(object):
@ -19,6 +20,11 @@ class Article(object):
        self.downloaded = False
        self.id = id
        self.title = title.strip() if title else title
+        try:
+            self.title = re.sub(r'&(\S+);', 
+                entity_to_unicode, self.title)
+        except:
+            pass
        self.url = url
        self.summary = summary
        if summary and not isinstance(summary, unicode):
@ -37,6 +43,7 @@ class Article(object):
        self.date = published
        self.utctime = datetime(*self.date[:6])
        self.localtime = self.utctime + self.time_offset
+
                
    def __repr__(self):
        return \
@ -91,7 +98,8 @@ class Feed(object):
            if len(self.articles) >= max_articles_per_feed:
                break
            self.parse_article(item)
-
+            
+        
    def populate_from_preparsed_feed(self, title, articles, oldest_article=7, 
                           max_articles_per_feed=100):
        self.title      = title if title else _('Unknown feed')
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -30,7 +30,8 @@ recipe_modules = ['recipe_' + r for r in (
           'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
           'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
           'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
-           'la_republica', 'physics_today',
+           'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
+           'al_jazeera', 'winsupersite', 
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_al_jazeera.py
+++ b/src/calibre/web/feeds/recipes/recipe_al_jazeera.py
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+aljazeera.net
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AlJazeera(BasicNewsRecipe):
+    title                  = 'Al Jazeera in English'
+    __author__             = 'Darko Miletic'
+    description            = 'News from Middle East'
+    publisher              = 'Al Jazeera'
+    category               = 'news, politics, middle east'
+    simultaneous_downloads = 1
+    delay                  = 4    
+    oldest_article         = 1
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    encoding               = 'iso-8859-1'
+    remove_javascript      = True
+    use_embedded_content   = False
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        , '--ignore-tables'
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True' 
+     
+    keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
+
+    remove_tags = [
+                     dict(name=['object','link'])
+                    ,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
+                  ]
+
+    feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(face=True):
+            del item['face']
+        return soup
+
--- a/src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
+++ b/src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
@ -0,0 +1,82 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from urlparse import urlparse, urlunparse
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+from threading import RLock
+
+class ChicagoTribune(BasicNewsRecipe):
+    
+    title       = 'Chicago Tribune'
+    __author__  = 'Kovid Goyal'
+    description = 'Politics, local and business news from Chicago'
+    language    = _('English')
+    use_embedded_content    = False
+    articles_are_obfuscated = True
+    remove_tags_before      = dict(name='h1')
+    obfuctation_lock        = RLock()
+    
+    feeds = [
+             ('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
+             ('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
+             ('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
+             ('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
+             ('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
+             ('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
+             ('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
+             ('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
+             ('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
+             ('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
+             ('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
+             ('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
+             ('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
+             ('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
+             ('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
+             ('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
+             ('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
+             ('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
+             ('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
+             ('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
+             ('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
+             ('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
+             ('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
+             ('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
+             ('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
+             ('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
+             ('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
+             ('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
+             ('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
+             ('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
+             ]
+    
+    temp_files = []
+    
+    def get_article_url(self, article):
+        return article.get('feedburner_origlink', article.get('guid', article.get('link')))
+    
+    def get_obfuscated_article(self, url, logger):
+        with self.obfuctation_lock:
+            soup = self.index_to_soup(url)
+            img = soup.find('img', alt='Print')
+            if img is not None:
+                a = img.parent.find('a', href=True)
+                purl = urlparse(url)
+                xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
+                soup = self.index_to_soup(xurl)
+                for img in soup.findAll('img', src=True):
+                    if img['src'].startswith('/'):
+                        img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
+                html = unicode(soup)
+            else:
+                h1 = soup.find(id='page-title')
+                body = soup.find(attrs={'class':re.compile('asset-content')})
+                html = u'<html><head/><body>%s</body></html>'%(unicode(h1)+unicode(body))
+            self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
+            self.temp_files[-1].write(html.encode('utf-8'))
+            self.temp_files[-1].close()
+            return self.temp_files[-1].name
+    
--- a/src/calibre/web/feeds/recipes/recipe_e_novine.py
+++ b/src/calibre/web/feeds/recipes/recipe_e_novine.py
@ -0,0 +1,58 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+
+'''
+e-novine.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class E_novine(BasicNewsRecipe):
+    title                 = 'E-Novine'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Serbia'
+    publisher             = 'E-novine'
+    category              = 'news, politics, Balcans'
+    oldest_article        = 1
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1250'
+    cover_url             = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
+    remove_javascript     = True
+    use_embedded_content  = False
+    language              = _('Serbian')
+    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+    
+    html2lrf_options = [
+                          '--comment', description
+                        , '--category', category
+                        , '--publisher', publisher
+                        ]
+    
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"' 
+     
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
+
+    remove_tags = [dict(name=['object','link','embed','iframe'])]
+
+    feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
+
+    def preprocess_html(self, soup):
+        soup.html['xml:lang'] = 'sr-Latn-ME'
+        soup.html['lang']     = 'sr-Latn-ME'
+        mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
+        soup.head.insert(0,mtag)
+        for item in soup.findAll(style=True):
+            del item['style']
+        ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
+        if ftag:
+           it = ftag.div
+           it.extract()
+           ftag.div.extract()
+           ftag.insert(0,it)
+        return soup
--- a/src/calibre/web/feeds/recipes/recipe_infobae.py
+++ b/src/calibre/web/feeds/recipes/recipe_infobae.py
@ -19,7 +19,7 @@ class Infobae(BasicNewsRecipe):
    no_stylesheets        = True
    use_embedded_content  = False
    language              = _('Spanish')
-    encoding              = 'iso-8859-1'
+    encoding              = 'cp1252'
    cover_url             = 'http://www.infobae.com/imgs/header/header.gif'
    remove_javascript     = True
    
@ -28,9 +28,10 @@ class Infobae(BasicNewsRecipe):
                        , '--category' , category
                        , '--publisher', publisher
                        , '--ignore-tables'
+                        , '--ignore-colors'
                        ]
    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'

    remove_tags = [
                     dict(name=['embed','link','object'])
--- a/src/calibre/web/feeds/recipes/recipe_la_segunda.py
+++ b/src/calibre/web/feeds/recipes/recipe_la_segunda.py
@ -6,8 +6,8 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 lasegunda.com
 '''

-from calibre.web.feeds.news import BasicNewsRecipe
-
+from calibre.web.feeds.news import BasicNewsRecipe
+
 class LaSegunda(BasicNewsRecipe):
    title                 = 'La Segunda'
    __author__            = 'Darko Miletic'
@ -21,14 +21,16 @@ class LaSegunda(BasicNewsRecipe):
    encoding              = 'cp1252'
    cover_url             = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
    remove_javascript     = True
+    language              = _('Spanish')    
    
    html2lrf_options = [
-                          '--comment', description
+                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
+                        , '--ignore-tables'
                        ]
    
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' 
+    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "' 
                        
    keep_only_tags = [dict(name='table')]
                        
@ -52,10 +54,7 @@ class LaSegunda(BasicNewsRecipe):
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
        soup.head.insert(0,mtag)
-        for item in soup.findAll(name='table', width=True):
-            del item['width']
        for item in soup.findAll(style=True):
            del item['style']
        return soup
    
-    language = _('Spanish')    
--- a/src/calibre/web/feeds/recipes/recipe_pagina12.py
+++ b/src/calibre/web/feeds/recipes/recipe_pagina12.py
@ -7,11 +7,10 @@ pagina12.com.ar
 '''

 from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe

-from calibre.web.feeds.news import BasicNewsRecipe
-
 class Pagina12(BasicNewsRecipe):
-    title                 = u'Pagina/12'
+    title                 = 'Pagina/12'
    __author__            = 'Darko Miletic'
    description           = 'Noticias de Argentina y el resto del mundo'
    publisher             = 'La Pagina S.A.'
@ -20,12 +19,14 @@ class Pagina12(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
-    cover_url             = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
+    cover_url             = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
    remove_javascript     = True
    use_embedded_content  = False
+    language              = _('Spanish')    
+    
    
    html2lrf_options = [
-                          '--comment', description
+                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
@ -50,5 +51,3 @@ class Pagina12(BasicNewsRecipe):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
-
-    language = _('Spanish')    
--- a/src/calibre/web/feeds/recipes/recipe_winsupersite.py
+++ b/src/calibre/web/feeds/recipes/recipe_winsupersite.py
@ -0,0 +1,28 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Winsupersite(BasicNewsRecipe):
+    title          = u'Supersite for Windows'
+    description           = u'Paul Thurrott SuperSite for Windows'
+    publisher             = 'Paul Thurrott'
+    __author__            = 'Hypernova'
+    language              = _('English')
+    oldest_article = 30
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_javascript     = True
+    html2lrf_options = ['--ignore-tables']
+    html2epub_options = 'linearize_tables = True'
+    remove_tags_before = dict(name='h1')
+    preprocess_regexps = [
+   (re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
+    lambda match: '</body>'),
+]
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open('http://www.winsupersite.com')
+        return br
+    
+    feeds          = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]
--- a/upload.py
+++ b/upload.py
@ -284,7 +284,13 @@ class gui(OptionlessCommand):
                manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files)
                with open('images.qrc', 'wb') as f:
                    f.write(manifest)
-                check_call(['pyrcc4', '-o', images, 'images.qrc'])
+                try:
+                    check_call(['pyrcc4', '-o', images, 'images.qrc'])
+                except:
+                    import traceback
+                    traceback.print_exc()
+                    raise Exception('You do not have pyrcc4 in your PATH. '
+                                    'Install the PyQt4 development tools.')
            else:
                print 'Images are up to date'
        finally:
@ -670,7 +676,7 @@ class stage3(OptionlessCommand):
    
    def run(self):
        OptionlessCommand.run(self)
-        self.misc() 
+        self.misc()
        
 class stage2(OptionlessCommand):
    description = 'Stage 2 of the build process' 
@ -699,4 +705,4 @@ class upload(OptionlessCommand):
            ('stage1', None),
            ('stage2', None),
            ('stage3', None)
-            ]
+            ]