sync to trunk.

2025-07-08 02:34:06 -04:00 · 2011-10-12 17:54:56 -04:00 · 2011-10-12 17:54:56 -04:00 · fd288645d0
commit fd288645d0
parent 4b262b3420 cadbff1290
47 changed files with 2246 additions and 690 deletions
--- a/recipes/defensenews.recipe
+++ b/recipes/defensenews.recipe
@ -0,0 +1,62 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.defensenews.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class DefenseNews(BasicNewsRecipe):
    title                 = 'Defense News'
    __author__            = 'Darko Miletic'
    description           = 'Find late-breaking defense news from the leading defense news weekly'
    publisher             = 'Gannett Government Media Corporation'
    category              = 'defense news, defence news, defense, defence, defence budget, defence policy'
    oldest_article        = 31
    max_articles_per_feed = 200
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    language              = 'en'
    remove_empty_feeds    = True
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.defensenews.com/images/logo_defensenews2.jpg'
    extra_css             = """
                               body{font-family: Arial,Helvetica,sans-serif }
                               img{margin-bottom: 0.4em; display:block}
                               .info{font-size: small; color: gray}
                            """
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [
                     dict(name=['meta','link'])
                    ,dict(attrs={'class':['toolbar','related','left','right']})
                  ]
    remove_tags_before = attrs={'class':'storyWrp'}
    remove_tags_after = attrs={'class':'middle'}
    remove_attributes=['lang']
    feeds = [
              (u'Europe'  , u'http://www.defensenews.com/rss/eur/'            )
             ,(u'Americas', u'http://www.defensenews.com/rss/ame/'            )
             ,(u'Asia & Pacific rim', u'http://www.defensenews.com/rss/asi/'  )
             ,(u'Middle east & Africa', u'http://www.defensenews.com/rss/mid/')
             ,(u'Air', u'http://www.defensenews.com/rss/air/'                 )
             ,(u'Land', u'http://www.defensenews.com/rss/lan/'                )
             ,(u'Naval', u'http://www.defensenews.com/rss/sea/'               )
            ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup
--- a/recipes/dilbert.recipe
+++ b/recipes/dilbert.recipe
@ -2,6 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 '''
 http://www.dilbert.com
 DrMerry added cover Image 2011-11-12
 '''
 from calibre.web.feeds.recipes import BasicNewsRecipe
@ -9,7 +10,7 @@ import re
 class DilbertBig(BasicNewsRecipe):
    title                  = 'Dilbert'
-    __author__             = 'Darko Miletic and Starson17'
+    __author__             = 'Darko Miletic and Starson17 contribution of DrMerry'
    description            = 'Dilbert'
    reverse_article_order = True
    oldest_article         = 15
@ -20,6 +21,7 @@ class DilbertBig(BasicNewsRecipe):
    publisher              = 'UNITED FEATURE SYNDICATE, INC.'
    category               = 'comic'
    language               = 'en'
    cover_url         = 'http://dilbert.com/mobile/mobile/dilbert.app.icon.png'
    conversion_options = {
                             'comments'        : description
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -56,6 +54,14 @@ class Economist(BasicNewsRecipe):
        return br
    '''
    def get_cover_url(self):
        br = self.browser
        br.open(self.INDEX)
        issue = br.geturl().split('/')[4]
        self.log('Fetching cover for issue: %s'%issue)
        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
        return cover_url
    def parse_index(self):
        return self.economist_parse_index()
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -22,8 +22,6 @@ class Economist(BasicNewsRecipe):
            ' perspective. Best downloaded on Friday mornings (GMT)')
    extra_css      = '.headline {font-size: x-large;} \n h2 { font-size: small;  } \n h1 { font-size: medium;  }'
    oldest_article = 7.0
    cover_url = 'http://media.economist.com/sites/default/files/imagecache/print-cover-thumbnail/print-covers/currentcoverus_large.jpg'
    #cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
    remove_tags = [
            dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
            dict(attrs={'class':['dblClkTrk', 'ec-article-info',
@ -40,6 +38,14 @@ class Economist(BasicNewsRecipe):
    # downloaded with connection reset by peer (104) errors.
    delay = 1
    def get_cover_url(self):
        br = self.browser
        br.open(self.INDEX)
        issue = br.geturl().split('/')[4]
        self.log('Fetching cover for issue: %s'%issue)
        cover_url = "http://media.economist.com/sites/default/files/imagecache/print-cover-full/print-covers/%s_CNA400.jpg" %(issue.translate(None,'-'))
        return cover_url
    def parse_index(self):
        try:
--- a/recipes/faznet.recipe
+++ b/recipes/faznet.recipe
@ -19,45 +19,20 @@ class FazNet(BasicNewsRecipe):
    no_stylesheets        = True
    encoding              = 'utf-8'
    remove_javascript     = True
-
+    keep_only_tags = [{'class':'FAZArtikelEinleitung'},
-    html2lrf_options = [
+            {'id':'ArtikelTabContent_0'}]
                          '--comment', description
                        , '--category', category
                        , '--publisher', publisher
                        ]
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
    remove_tags = [
                     dict(name=['object','link','embed','base'])
                    ,dict(name='div',
                        attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo',
                            'ArtikelServices', 'ModulLesermeinungenFooter',
                            'ModulArtikelServices', 'BoxTool Aufklappen_Grau',
                            'SocialMediaUnten', ]}),
                    dict(id=['KurzLinkMenu', 'ArtikelServicesMenu']),
                  ]
    feeds = [
-              ('FAZ.NET Aktuell', 'http://www.faz.net/s/RubF3CE08B362D244869BE7984590CB6AC1/Tpl~Epartner~SRss_.xml'),
+              ('FAZ.NET Aktuell', 'http://www.faz.net/aktuell/?rssview=1'),
-              ('Politik', 'http://www.faz.net/s/RubA24ECD630CAE40E483841DB7D16F4211/Tpl~Epartner~SRss_.xml'),
+              ('Politik', 'http://www.faz.net/aktuell/politik/?rssview=1'),
-              ('Wirtschaft', 'http://www.faz.net/s/RubC9401175958F4DE28E143E68888825F6/Tpl~Epartner~SRss_.xml'),
+              ('Wirtschaft', 'http://www.faz.net/aktuell/wirtschaft/?rssview=1'),
-              ('Feuilleton', 'http://www.faz.net/s/RubCC21B04EE95145B3AC877C874FB1B611/Tpl~Epartner~SRss_.xml'),
+              ('Feuilleton', 'http://www.faz.net/aktuell/feuilleton/?rssview=1'),
-              ('Sport', 'http://www.faz.net/s/Rub9F27A221597D4C39A82856B0FE79F051/Tpl~Epartner~SRss_.xml'),
+              ('Sport', 'http://www.faz.net/aktuell/sport/?rssview=1'),
-              ('Gesellschaft', 'http://www.faz.net/s/Rub02DBAA63F9EB43CEB421272A670A685C/Tpl~Epartner~SRss_.xml'),
+              ('Gesellschaft', 'http://www.faz.net/aktuell/gesellschaft/?rssview=1'),
-              ('Finanzen', 'http://www.faz.net/s/Rub4B891837ECD14082816D9E088A2D7CB4/Tpl~Epartner~SRss_.xml'),
+              ('Finanzen', 'http://www.faz.net/aktuell/finanzen/?rssview=1'),
-              ('Wissen', 'http://www.faz.net/s/Rub7F4BEE0E0C39429A8565089709B70C44/Tpl~Epartner~SRss_.xml'),
+              ('Technik & Motor', 'http://www.faz.net/aktuell/technik-motor/?rssview=1'),
-              ('Reise', 'http://www.faz.net/s/RubE2FB5CA667054BDEA70FB3BC45F8D91C/Tpl~Epartner~SRss_.xml'),
+              ('Wissen', 'http://www.faz.net/aktuell/wissen/?rssview=1'),
-              ('Technik & Motor', 'http://www.faz.net/s/Rub01E4D53776494844A85FDF23F5707AD8/Tpl~Epartner~SRss_.xml'),
+              ('Reise', 'http://www.faz.net/aktuell/reise/?rssview=1'),
-              ('Beruf & Chance', 'http://www.faz.net/s/RubB1E10A8367E8446897468EDAA6EA0504/Tpl~Epartner~SRss_.xml')
+              ('Beruf & Chance', 'http://www.faz.net/aktuell/beruf-chance/?rssview=1')
            ]
    def preprocess_html(self, soup):
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
        soup.head.insert(0,mtag)
        del soup.body['onload']
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/geek_poke.recipe
+++ b/recipes/geek_poke.recipe
@ -1,35 +1,71 @@
 #!/usr/bin/python
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 from calibre.utils.magick import Image
 class AdvancedUserRecipe1307556816(BasicNewsRecipe):
    title          = u'Geek and Poke'
    __author__     = u'DrMerry'
    description    = u'Geek and Poke Cartoons'
    publisher      = u'Oliver Widder'
    author         = u'Oliver Widder, DrMerry (calibre-code), calibre'
    oldest_article = 31
    max_articles_per_feed = 100
    language       = u'en'
    simultaneous_downloads = 5
    #delay          = 1
-    timefmt        = ' [%A, %d %B, %Y]'
+    timefmt        = ' [%a, %d %B, %Y]'
    summary_length = -1
    no_stylesheets = True
    category = 'News.IT, Cartoon, Humor, Geek'
    use_embedded_content = False
    cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
    remove_javascript = True
    remove_empty_feeds = True
    publication_type = 'blog'
    conversion_options = {
                            'comments'         : ''
                            ,'tags'            : category
                            ,'language'        : language
                            ,'publisher'       : publisher
                            ,'author'          : author
                         }
-    preprocess_regexps = [ (re.compile(r'(<p>&nbsp;</p>|<iframe.*</iframe>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
+    remove_tags_before = dict(name='p', attrs={'class':'content-nav'})
-                                          (re.compile(r'(&nbsp;|  )', re.DOTALL|re.IGNORECASE),lambda match: ' '),
+    remove_tags_after = dict(name='div', attrs={'class':'entry-content'})
-                                          (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>')
+    remove_tags = [dict(name='div', attrs={'class':'entry-footer'}),
                        dict(name='div', attrs={'id':'alpha'}),
                        dict(name='div', attrs={'id':'gamma'}),
                        dict(name='iframe'),
                        dict(name='p', attrs={'class':'content-nav'})]
    filter_regexps = [(r'feedburner\.com'),
                        (r'pixel.quantserve\.com'),
                        (r'googlesyndication\.com'),
                        (r'yimg\.com'),
                        (r'scorecardresearch\.com')]
    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
                        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
                        (re.compile(r'<h2[^>]*>([^<]*)</h2>[^>]*(<div[^>]*>)', re.DOTALL|re.IGNORECASE), lambda match: match.group(2) + '<div id="MERRYdate">' + match.group(1) + '</div>'),
                        (re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + match.group(2) + '</h3>'),
                        (re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + '<br><cite>' + match.group(2) + '</cite>'),
                        (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>'),
                        (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')
                        ]
-    extra_css = 'body, h3, p, h2, h1, div, span{margin:0px} h2.date-header {font-size: 0.7em; color:#eee;} h3.entry-header{font-size: 1.0em} div.entry-body{font-size: 0.9em}'
+    extra_css = 'body, h3, p, #MERRYdate, h1, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em} #MERRYdate {font-size: 0.5em}'
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            width, height = img.size
            #print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
            img.trim(0)
            img.save(iurl)
            width, height = img.size
            #print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
        return soup
-    remove_tags_before = dict(name='h2', attrs={'class':'date-header'})
+    feeds          = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']
    remove_tags_after = dict(name='div', attrs={'class':'entry-body'})
    feeds          = [(u'Geek and Poke', u'http://feeds.feedburner.com/GeekAndPoke?format=xml')]
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -119,10 +119,8 @@ class Guardian(BasicNewsRecipe):
                        }
    def parse_index(self):
        try:
        feeds = []
        for title, href in self.find_sections():
            feeds.append((title, list(self.find_articles(href))))
        return feeds
-        except:
+
            raise NotImplementedError
--- a/recipes/heise_online.recipe
+++ b/recipes/heise_online.recipe
@ -1,7 +1,9 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class AdvancedUserRecipe(BasicNewsRecipe):
-    title = 'Heise-online'
+    title = 'heise online'
    description = 'News vom Heise-Verlag'
    __author__ = 'schuster'
    use_embedded_content   = False
@ -12,10 +14,11 @@ class AdvancedUserRecipe(BasicNewsRecipe):
    remove_empty_feeds = True
    timeout = 5
    no_stylesheets = True
    encoding = 'utf-8'
    remove_tags_after = dict(name ='p', attrs={'class':'editor'})
-    remove_tags = [dict(id='navi_top_container'),
+    remove_tags = [{'class':'navi_top_container'},
                            dict(id='navi_bottom'),
                            dict(id='mitte_rechts'),
                            dict(id='navigation'),
@ -25,28 +28,28 @@ class AdvancedUserRecipe(BasicNewsRecipe):
                            dict(id='content_foren'),
                            dict(id='seiten_navi'),
                            dict(id='adbottom'),
-                            dict(id='sitemap')]
+                            dict(id='sitemap'),
                            dict(name='a', href=re.compile(r'^/([a-zA-Z]+/)?')),
                ]
    feeds =  [
                   ('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
                   ('Auto', 'http://www.heise.de/autos/rss/news.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
                   ('Mac&i', 'http://www.heise.de/mac-and-i/news.rdf'),
                   ('Mobile ', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Netz ', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open ', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Security ', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('C`t', 'http://www.heise.de/ct/rss/artikel-atom.xml'),
                   ('iX', 'http://www.heise.de/ix/news/news.rdf'),
-                   ('Mach-flott', 'http://www.heise.de/mach-flott/rss/mach-flott-atom.xml'),
+                      ('Technology Review', 'http://www.heise.de/tr/news-atom.xml'),
                   ('mobil', 'http://www.heise.de/mobil/newsticker/heise-atom.xml'),
                   ('Security', 'http://www.heise.de/security/news/news-atom.xml'),
                   ('Netze', 'http://www.heise.de/netze/rss/netze-atom.xml'),
                   ('Open Source', 'http://www.heise.de/open/news/news-atom.xml'),
                   ('Resale ', 'http://www.heise.de/resale/rss/resale.rdf'),
                   ('Foto ', 'http://www.heise.de/foto/rss/news-atom.xml'),
                   ('Autos', 'http://www.heise.de/autos/rss/news.rdf'),
                   ('Mac & i', 'http://www.heise.de/mac-and-i/news.rdf'),
                   ('Blog: Babel-Bulletin', 'http://www.heise.de/developer/rss/babel-bulletin/blog.rdf'),
                   ('Blog: Der Dotnet-Doktor', 'http://www.heise.de/developer/rss/dotnet-doktor/blog.rdf'),
                   ('Blog: Bernds Management-Welt', 'http://www.heise.de/developer/rss/bernds-management-welt/blog.rdf'),
-                   ('Blog: IT conversation', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
+                   ('Blog: The World of IT', 'http://www.heise.de/developer/rss/world-of-it/blog.rdf'),
                   ('Blog: Kais bewegtes Web', 'http://www.heise.de/developer/rss/kais-bewegtes-web/blog.rdf')
             ]
    def print_version(self, url):
        return url + '?view=print'
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@ -18,6 +18,7 @@ class HoustonChronicle(BasicNewsRecipe):
    keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
        'hst-articletext' in x or 'hst-galleryitem' in x)}
    remove_attributes = ['xmlns']
    feeds = [
            ('News', "http://www.chron.com/rss/feed/News-270.php"),
--- a/recipes/merco_press.recipe
+++ b/recipes/merco_press.recipe
@ -0,0 +1,27 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MercoPress(BasicNewsRecipe):
    title = u'Merco Press'
    description = u"Read News, Stories and Insight Analysis from Latin America and Mercosur. Politics, Economy, Business and Investments in South America."
    cover_url = 'http://en.mercopress.com/web/img/en/mercopress-logo.gif'
    __author__ = 'Russell Phillips'
    language = 'en'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    extra_css = 'img{padding-bottom:1ex; display:block; text-align: center;}'
    remove_tags = [dict(name='a')]
    feeds = [('Antarctica', 'http://en.mercopress.com/rss/antarctica'),
        ('Argentina', 'http://en.mercopress.com/rss/argentina'),
        ('Brazil', 'http://en.mercopress.com/rss/brazil'),
        ('Falkland Islands', 'http://en.mercopress.com/rss/falkland-islands'),
        ('International News', 'http://en.mercopress.com/rss/international'),
        ('Latin America', 'http://en.mercopress.com/rss/latin-america'),
        ('Mercosur', 'http://en.mercopress.com/rss/mercosur'),
        ('Paraguay', 'http://en.mercopress.com/rss/paraguay'),
        ('United States', 'http://en.mercopress.com/rss/united-states'),
        ('Uruguay://en.mercopress.com/rss/uruguay')]
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -5,30 +5,46 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    description = 'News as provide by The Metro -UK'
    __author__ = 'Dave Asbury'
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
    no_stylesheets = True
    oldest_article = 1
-    max_articles_per_feed = 25
+    max_articles_per_feed = 20
    remove_empty_feeds = True
    remove_javascript     = True
-    preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
+    #preprocess_regexps = [(re.compile(r'Tweet'), lambda  a : '')]
    preprocess_regexps = [
    (re.compile(r'<span class="img-cap legend">', re.IGNORECASE | re.DOTALL), lambda match: '<p></p><span class="img-cap legend"> ')]
    preprocess_regexps = [
    (re.compile(r'tweet', re.IGNORECASE | re.DOTALL), lambda match: '')]
    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
-    extra_css = 'h2 {font: sans-serif medium;}'
+
    keep_only_tags = [
 	dict(name='h1'),dict(name='h2', attrs={'class':'h2'}),
                    dict(attrs={'class':['img-cnt figure']}),
    	dict(attrs={'class':['art-img']}),
-
+                    dict(name='div', attrs={'class':'art-lft'}),
-                    dict(name='div', attrs={'class':'art-lft'})
+                    dict(name='p')
    ]
    remove_tags    = [dict(name='div', attrs={'class':[ 'news m12 clrd clr-b p5t shareBtm', 'commentForm', 'metroCommentInnerWrap',
                             'art-rgt','pluck-app pluck-comm','news m12 clrd clr-l p5t', 'flt-r' ]}),
 	          dict(attrs={'class':[ 'metroCommentFormWrap','commentText','commentsNav','avatar','submDateAndTime']})
                              ,dict(name='div', attrs={'class' : 'clrd art-fd fd-gr1-b'})
                               ]
    feeds          = [
        (u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
    extra_css  = '''
                    body {font: sans-serif medium;}'
 	h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
               	h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
                	span{ font-size:9.5px; font-weight:bold;font-style:italic}
                    p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
 	 '''
--- a/recipes/ming_pao.recipe
+++ b/recipes/ming_pao.recipe
@ -6,19 +6,24 @@ __Region__ = 'Hong Kong'
 # Users of Kindle 3 with limited system-level CJK support
 # please replace the following "True" with "False".
 __MakePeriodical__ = True
-# Turn below to true if your device supports display of CJK titles
+# Turn below to True if your device supports display of CJK titles
 __UseChineseTitle__ = False
 # Set it to False if you want to skip images
 __KeepImages__ = True
-# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
+# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source
 __UseLife__ = True
-# (HK only) if __UseLife__ is true, turn this on if you want to include the column section
+# (HK only) It is to disable the column section which is now a premium content
 __InclCols__ = False
 # (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats
 __ParsePFF__ = False
 # (HK only) Turn below to True if you wish hi-res images
 __HiResImg__ = False
 '''
 Change Log:
-2011/09/21: fetching "column" section is made optional. Default is False
+2011/10/04: option to get hi-res photos for the articles
 2011/09/21: fetching "column" section is made optional.
 2011/09/18: parse "column" section stuff from source text file directly.
 2011/09/07: disable "column" section as it is no longer offered free.
 2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
@ -42,7 +47,7 @@ Change Log:
 2010/10/31: skip repeated articles in section pages
 '''
-import os, datetime, re
+import os, datetime, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from contextlib import nested
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
@ -56,7 +61,7 @@ class MPRecipe(BasicNewsRecipe):
        title       = 'Ming Pao - Hong Kong'
        description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
        category    = 'Chinese, News, Hong Kong'
-        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
+        extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
        masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
        keep_only_tags = [dict(name='h1'),
                          dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
@ -147,43 +152,6 @@ class MPRecipe(BasicNewsRecipe):
    conversion_options = {'linearize_tables':True}
    timefmt = ''
    def image_url_processor(cls, baseurl, url):
        # trick: break the url at the first occurance of digit, add an additional
        # '_' at the front
        # not working, may need to move this to preprocess_html() method
 #        minIdx = 10000
 #        i0 = url.find('0')
 #        if i0 >= 0 and i0 < minIdx:
 #           minIdx = i0
 #        i1 = url.find('1')
 #        if i1 >= 0 and i1 < minIdx:
 #           minIdx = i1
 #        i2 = url.find('2')
 #        if i2 >= 0 and i2 < minIdx:
 #           minIdx = i2
 #        i3 = url.find('3')
 #        if i3 >= 0 and i0 < minIdx:
 #           minIdx = i3
 #        i4 = url.find('4')
 #        if i4 >= 0 and i4 < minIdx:
 #           minIdx = i4
 #        i5 = url.find('5')
 #        if i5 >= 0 and i5 < minIdx:
 #           minIdx = i5
 #        i6 = url.find('6')
 #        if i6 >= 0 and i6 < minIdx:
 #           minIdx = i6
 #        i7 = url.find('7')
 #        if i7 >= 0 and i7 < minIdx:
 #           minIdx = i7
 #        i8 = url.find('8')
 #        if i8 >= 0 and i8 < minIdx:
 #           minIdx = i8
 #        i9 = url.find('9')
 #        if i9 >= 0 and i9 < minIdx:
 #           minIdx = i9
        return url
    def get_dtlocal(self):
        dt_utc = datetime.datetime.utcnow()
        if __Region__ == 'Hong Kong':
@ -260,15 +228,16 @@ class MPRecipe(BasicNewsRecipe):
            else:
                for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
                                   (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
-                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
+                                   (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
                                   (u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))
                # special- editorial
-                ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
+                #ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
-                if ed_articles:
+                #if ed_articles:
-                    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
+                #    feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
                for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
                                   (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
@ -279,20 +248,39 @@ class MPRecipe(BasicNewsRecipe):
                # special - finance
                #fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
-                fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
+                #fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
-                if fin_articles:
+                #if fin_articles:
-                    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
+                #    feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
-                for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
+                for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
-                                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
+                    articles = self.parse_section2(url, keystr)
                    articles = self.parse_section(url)
                    if articles:
                        feeds.append((title, articles))
                #for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
                #                   (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
                #    articles = self.parse_section(url)
                #    if articles:
                #        feeds.append((title, articles))
                # special - entertainment
-                ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
+                #ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
-                if ent_articles:
+                #if ent_articles:
-                    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
+                #    feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
                for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
                                          ]:
                    articles = self.parse_section2(url, keystr)
                    if articles:
                        feeds.append((title, articles))
                if __InclCols__ == True:
                    # parse column section articles directly from .txt files
                    for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
                                              ]:
                        articles = self.parse_section2_txt(url, keystr)
                        if articles:
                            feeds.append((title, articles))
                for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
                                   (u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
@ -300,11 +288,6 @@ class MPRecipe(BasicNewsRecipe):
                    if articles:
                        feeds.append((title, articles))
                # special- columns
                col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
                if col_articles:
                    feeds.append((u'\u5c08\u6b04 Columns', col_articles))
        elif __Region__ == 'Vancouver':
            for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
                               (u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
@ -348,6 +331,16 @@ class MPRecipe(BasicNewsRecipe):
            title = self.tag_to_string(a)
            url = a.get('href', False)
            url = 'http://news.mingpao.com/' + dateStr + '/' +url
            # replace the url to the print-friendly version
            if __ParsePFF__ == True:
                if url.rfind('Redirect') <> -1:
                    url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
                    url = re.sub('%2F.*%2F', '/', url)
                    title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
                    url = url.replace('%2Etxt', '_print.htm')
                    url = url.replace('%5F', '_')
                else:
                    url = url.replace('.htm', '_print.htm')
            if url not in included_urls and url.rfind('Redirect') == -1:
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
                included_urls.append(url)
@ -472,11 +465,92 @@ class MPRecipe(BasicNewsRecipe):
        current_articles.reverse()
        return current_articles
-    # preprocess those .txt based files
+    # preprocess those .txt and javascript based files
    def preprocess_raw_html(self, raw_html, url):
-        if url.rfind('ftp') == -1:
+        #raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
        if __HiResImg__ == True:
            # TODO: add a _ in front of an image url
            if url.rfind('news.mingpao.com') > -1:
                imglist =  re.findall('src="?.*?jpg"', raw_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                for img in imglist:
                    gifimg = img.replace('jpg"', 'gif"')
                    try:
                        br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        raw_html = raw_html.replace(img, gifimg)
                    except:
                        # find the location of the first _
                        pos = img.find('_')
                        if pos > -1:
                            # if found, insert _ after the first _
                            newimg = img[0:pos] + '_' + img[pos:]
                            raw_html = raw_html.replace(img, newimg)
                        else:
                            # if not found, insert _ after "
                            raw_html = raw_html.replace(img[1:], '"_' + img[1:])
            elif url.rfind('life.mingpao.com') > -1:
                imglist = re.findall('src=\'?.*?jpg\'', raw_html)
                br = mechanize.Browser()
                br.set_handle_redirect(False)
                #print 'Img list: ', imglist, '\n'
                for img in imglist:
                    gifimg = img.replace('jpg\'', 'gif\'')
                    try:
                        #print 'Original: ', url
                        #print 'To append: ', "/../" + gifimg[5:len(gifimg)-1]
                        gifurl = re.sub(r'dailynews.*txt', '', url)
                        #print 'newurl: ', gifurl + gifimg[5:len(gifimg)-1]
                        br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
                        #print 'URL: ', url + "/../" + gifimg[5:len(gifimg)-1]
                        #br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
                        raw_html = raw_html.replace(img, gifimg)
                    except:
                        #print 'GIF not found'
                        pos = img.rfind('/')
                        newimg = img[0:pos+1] + '_' + img[pos+1:]
                        #print 'newimg: ', newimg
                        raw_html = raw_html.replace(img, newimg)
        if url.rfind('ftp') == -1 and url.rfind('_print.htm') == -1:
            return raw_html
        else:
            if url.rfind('_print.htm') <> -1:
                # javascript based file
                splitter = re.compile(r'\n')
                new_raw_html = '<html><head><title>Untitled</title></head>'
                new_raw_html = new_raw_html + '<body>'
                for item in splitter.split(raw_html):
                    if item.startswith('var heading1 ='):
                        heading = item.replace('var heading1 = \'', '')
                        heading = heading.replace('\'', '')
                        heading = heading.replace(';', '')
                        new_raw_html = new_raw_html + '<div class="heading">' + heading
                    if item.startswith('var heading2 ='):
                        heading = item.replace('var heading2 = \'', '')
                        heading = heading.replace('\'', '')
                        heading = heading.replace(';', '')
                        if heading <> '':
                            new_raw_html = new_raw_html + '<br>' + heading + '</div>'
                        else:
                            new_raw_html = new_raw_html + '</div>'
                    if item.startswith('var content ='):
                        content = item.replace("var content = ", '')
                        content = content.replace('\'', '')
                        content = content.replace(';', '')
                        new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
                    if item.startswith('var photocontent ='):
                        photo = item.replace('var photocontent = \'', '')
                        photo = photo.replace('\'', '')
                        photo = photo.replace(';', '')
                        photo = photo.replace('<tr>', '')
                        photo = photo.replace('<td>', '')
                        photo = photo.replace('</tr>', '')
                        photo = photo.replace('</td>', '<br>')
                        photo = photo.replace('class="photo"', '')
                        new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
                return new_raw_html + '</body></html>'
            else:
                # .txt based file
                splitter = re.compile(r'\n') # Match non-digits
                new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
                next_is_img_txt = False
@ -604,7 +678,7 @@ class MPRecipe(BasicNewsRecipe):
                    if po is None:
                        self.play_order_counter += 1
                        po = self.play_order_counter
-                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
+                    parent.add_item('%sindex.html'%adir, None, a.title if a.title else ('Untitled Article'),
                                    play_order=po, author=auth, description=desc)
                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                    for sp in a.sub_pages:
--- a/recipes/penguin_news.recipe
+++ b/recipes/penguin_news.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MercoPress(BasicNewsRecipe):
    title          = u'Penguin News'
    description = u"Penguin News: the Falkland Islands' only newspaper."
    cover_url = 'http://www.penguin-news.com/templates/rt_syndicate_j15/images/logo/light/logo1.png'
    language = 'en'
    __author__ = 'Russell Phillips'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup = True
    extra_css  = 'img{padding-bottom:1ex; display:block; text-align: center;}'
    feeds = [(u'Penguin News - Falkland Islands', u'http://www.penguin-news.com/index.php?format=feed&amp;type=rss')]
--- a/recipes/revista_piaui.recipe
+++ b/recipes/revista_piaui.recipe
@ -0,0 +1,29 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
 class RevistaPiaui(BasicNewsRecipe):
    title          = u'Revista piau\xed'
    language = 'pt_BR'
    __author__ = u'Eduardo Gustini Simões'
    oldest_article = 31
    max_articles_per_feed = 50
    auto_cleanup = True
    feeds          = [(u'Edi\xe7\xe3o Atual', u'http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')]
    def parse_feeds (self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for feed in feeds:
           for article in feed.articles[:]:
                 soup = self.index_to_soup('http://revistapiaui.estadao.com.br/feed/rss/edicao-atual.xml')
                 itemTitle = article.title.partition('|')[0].rstrip()
                 item = soup.find(text=itemTitle)
                 articleDescription = item.parent.parent.description.string.partition('<br  />')[2]
                 article.summary = articleDescription
        return feeds
    def populate_article_metadata(self, article, soup, first):
        h2 = soup.find('h2')
        h2.string.replaceWith(h2.string.partition('|')[0].rstrip())
        h2.replaceWith(h2.prettify() +  '<p><em>' + article.summary + '</em></p><p><em>' + ' posted at ' + article.localtime.strftime('%d-%m-%Y') + '</em></p>')
--- a/recipes/slate.recipe
+++ b/recipes/slate.recipe
@ -9,285 +9,79 @@ calibre recipe for slate.com
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag
 class Slate(BasicNewsRecipe):
    # Method variables for customizing downloads
    description             = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
-    __author__              = 'GRiker, Sujata Raman and Nick Redding'
+    __author__              = 'Kovid Goyal'
    max_articles_per_feed   = 100
    oldest_article          = 14
    recursions              = 0
    delay                   = 0
    simultaneous_downloads  = 5
    timeout                 = 120.0
    timefmt                 = ''
    feeds                   = None
    no_stylesheets          = True
    encoding                = None
    language = 'en'
-
+    title = 'Slate'
-    slate_complete = True
+    INDEX = 'http://slate.com'
-    if slate_complete:
+    encoding = 'utf-8'
-        title = 'Slate (complete)'
+    preprocess_regexps = [
-    else:
+            (re.compile(r'<!--.*?-->', re.DOTALL), lambda x: ''),
-        title = 'Slate (weekly)'
+            (re.compile(r'^.*?<html', re.DOTALL), lambda x:'<html'),
-
+            (re.compile(r'<meta[^>]+?/>', re.DOTALL), lambda x:''),
-    # Method variables for customizing feed parsing
+            ]
-    summary_length          = 250
+    remove_tags = [
-    use_embedded_content    = None
+            {'name':['link', 'script']},
-
+            {'class':['share-box-flank', 'sl-crumbs', 'sl-tbar',
-    # Method variables for pre/post processing of HTML
+                'sl-chunky-tbar']},
-    preprocess_regexps = [ (re.compile(r'<p><em>Disclosure: <strong>Slate</strong> is owned by the Washington Post.*</p>',
+            ]
-                                        re.DOTALL|re.IGNORECASE),
+    remove_tags_after = [{'class':'sl-art-creds-cntr'}]
-                                        lambda match: ''),
+    keep_only_tags = {'class':'sl-body-wrapper'}
-                           (re.compile(r'<p><strong><em>Join the discussion about this story on.*</p>',
+    remove_attributes = ['style']
                                        re.DOTALL|re.IGNORECASE),
                                        lambda match: '')   ]
    match_regexps           = []
    # The second entry is for 'Big Money', which comes from a different site, uses different markup
    keep_only_tags          = [dict(attrs={   'id':['article_top', 'article_body']}),
                               dict(attrs={   'id':['content']})  ]
    # The second entry is for 'Big Money', which comes from a different site, uses different markup
    remove_tags             = [dict(attrs={   'id':['toolbox','recommend_tab','insider_ad_wrapper',
                                                    'article_bottom_tools_cntr','fray_article_discussion','fray_article_links','bottom_sponsored_links','author_bio',
                                                    'bizbox_links_bottom','ris_links_wrapper','BOXXLE',
                                                    'comments_button','add_comments_button','comments-to-fray','marriott_ad',
                                                    'article_bottom_tools','recommend_tab2','fbog_article_bottom_cntr']}),
                               dict(attrs={    'id':['content-top','service-links-bottom','hed']})   ]
    excludedDescriptionKeywords =   ['Slate V','Twitter feed','podcast']
    excludedTitleKeywords =         ['Gabfest','Slate V','on Twitter']
    excludedAuthorKeywords =        []
    excludedContentKeywords =       ['http://twitter.com/Slate']
    extra_css = '''
                  .h1_subhead{font-family:Arial; font-size:small; }
                   h1{font-family:Verdana; font-size:large; }
                 .byline        {font-family:Georgia;   margin-bottom: 0px; }
                 .dateline      {font-family:Arial;  font-size: smaller; height: 0pt;}
                 .imagewrapper  {font-family:Verdana;font-size:x-small; }
                 .source        {font-family:Verdana; font-size:x-small;}
                 .credit        {font-family:Verdana; font-size:     smaller;}
                 #article_body  {font-family:Verdana; }
                 #content  {font-family:Arial; }
                 .caption{font-family:Verdana;font-style:italic; font-size:x-small;}
                 h3{font-family:Arial; font-size:small}
                  '''
    # Local variables to extend class
    baseURL = 'http://slate.com'
    section_dates = []
    # class extension methods
    def tag_to_strings(self, tag):
        if not tag:
            return ''
        if isinstance(tag, basestring):
            return tag
        strings = []
        for item in tag.contents:
            if isinstance(item, (NavigableString, CData)):
                strings.append(item.string)
            elif isinstance(item, Tag):
                res = self.tag_to_string(item,use_alt=False)
                if res:
                    strings.append(res)
        return strings
    def extract_named_sections(self):
        soup = self.index_to_soup( self.baseURL )
        soup_nav_bar = soup.find(True, attrs={'id':'nav'})
        briefing_nav = soup.find('li')
        briefing_url = briefing_nav.a['href']
        for section_nav in soup_nav_bar.findAll('li'):
            section_name = self.tag_to_string(section_nav,use_alt=False)
            self.section_dates.append(section_name)
        soup = self.index_to_soup(briefing_url)
        self.log("Briefing url = %s " % briefing_url)
        section_lists = soup.findAll('ul','view_links_list')
        sections = []
        for section in section_lists :
            sections.append(section)
        return sections
    def extract_dated_sections(self):
        soup = self.index_to_soup( self.baseURL )
        soup_top_stories = soup.find(True, attrs={'id':'tap3_cntr'})
        if soup_top_stories:
            self.section_dates.append("Top Stories")
            self.log("SELECTION TOP STORIES %s" % "Top Stories")
        soup = soup.find(True, attrs={'id':'toc_links_container'})
        todays_section = soup.find(True, attrs={'class':'todaydateline'})
        self.section_dates.append(self.tag_to_string(todays_section,use_alt=False))
        self.log("SELECTION DATE %s" % self.tag_to_string(todays_section,use_alt=False))
        older_section_dates = soup.findAll(True, attrs={'class':'maindateline'})
        for older_section in older_section_dates :
            self.section_dates.append(self.tag_to_string(older_section,use_alt=False))
            self.log("SELECTION DATE %s" % self.tag_to_string(older_section,use_alt=False))
        if soup_top_stories:
            headline_stories = soup_top_stories
            self.log("HAVE top_stories")
        else:
            headline_stories = None
            self.log("NO top_stories")
        section_lists = soup.findAll('ul')
        # Prepend the headlines to the first section
        if headline_stories:
            section_lists.insert(0,headline_stories)
        sections = []
        for section in section_lists :
            sections.append(section)
        return sections
    def extract_section_articles(self, sections_html) :
        # Find the containers with section content
        sections = sections_html
        articles = {}
        key = None
        ans = []
        for (i,section) in enumerate(sections) :
            # Get the section name
            if section.has_key('id') :
                self.log("PROCESSING SECTION id = %s" % section['id'])
                key = self.section_dates[i]
                if key.startswith("Pod"):
                    continue
                if key.startswith("Blog"):
                    continue
                articles[key] = []
                ans.append(key)
            elif self.slate_complete:
                key = self.section_dates[i]
                if key.startswith("Pod"):
                    continue
                if key.startswith("Blog"):
                    continue
                self.log("PROCESSING SECTION name = %s" % key)
                articles[key] = []
                ans.append(key)
            else :
                self.log("SECTION %d HAS NO id" % i);
                continue
            # Get the section article_list
            article_list = section.findAll('li')
            # Extract the article attributes
            for article in article_list :
                bylines = self.tag_to_strings(article)
                url = article.a['href']
                title = bylines[0]
                full_title = self.tag_to_string(article,use_alt=False)
                #self.log("ARTICLE TITLE%s" % title)
                #self.log("ARTICLE FULL_TITLE%s" % full_title)
                #self.log("URL %s" % url)
                author = None
                description = None
                pubdate = None
                if len(bylines) == 2 and self.tag_to_string(article).find("Today's Papers") > 0 :
                    description = "A summary of what's in the major U.S. newspapers."
                if len(bylines) == 3 :
                    author = bylines[2].strip()
                    author = re.sub('[\r][\n][\t][\t\t]','', author)
                    author = re.sub(',','', author)
                    if bylines[1] is not None :
                        description = bylines[1]
                        full_byline = self.tag_to_string(article)
                        if full_byline.find('major U.S. newspapers') > 0 :
                            description = "A summary of what's in the major U.S. newspapers."
                if len(bylines) > 3  and author is not None:
                    author += " | "
                    for (i,substring) in enumerate(bylines[3:]) :
                        #print "substring: %s" % substring.encode('cp1252')
                        author += substring.strip()
                        if i < len(bylines[3:]) :
                            author += " | "
                # Skip articles whose descriptions contain excluded keywords
                if description is not None and len(self.excludedDescriptionKeywords):
                    excluded = re.compile('|'.join(self.excludedDescriptionKeywords))
                    found_excluded = excluded.search(description)
                    if found_excluded :
                        self.log("  >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
                        continue
                # Skip articles whose title contain excluded keywords
                if full_title is not None and len(self.excludedTitleKeywords):
                    excluded = re.compile('|'.join(self.excludedTitleKeywords))
                    #self.log("evaluating full_title: %s" % full_title)
                    found_excluded = excluded.search(full_title)
                    if found_excluded :
                        self.log("  >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
                        continue
                # Skip articles whose author contain excluded keywords
                if author is not None and len(self.excludedAuthorKeywords):
                    excluded = re.compile('|'.join(self.excludedAuthorKeywords))
                    found_excluded = excluded.search(author)
                    if found_excluded :
                        self.log("  >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0)))
                        continue
                skip_this_article = False
                # Check to make sure we're not adding a duplicate
                for article in articles[key] :
                    if article['url'] == url :
                        skip_this_article = True
                        self.log("SKIPPING DUP %s" % url)
                        break
                if skip_this_article :
                    continue
                # Build the dictionary entry for this article
                feed = key
                if not articles.has_key(feed) :
                    articles[feed] = []
                articles[feed].append(dict(title=title, url=url, date=pubdate, description=description,
                                           author=author, content=''))
                #self.log("KEY %s" % feed)
                #self.log("APPENDED %s" % url)
            # Promote 'newspapers' to top
            for (i,article) in enumerate(articles[feed]) :
                if article['description'] is not None :
                    if article['description'].find('newspapers') > 0 :
                        articles[feed].insert(0,articles[feed].pop(i))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
    def print_version(self, url):
-        return url + 'pagenum/all/'
+        return url.replace('.html', '.single.html')
    # Class methods
    def parse_index(self) :
-        if self.slate_complete:
+        ans = []
-            sections = self.extract_named_sections()
+        for sectitle, url in (
-        else:
+                ('News & Politics', '/articles/news_and_politics.html'),
-            sections = self.extract_dated_sections()
+                ('Technology', '/articles/technology.html'),
-        section_list = self.extract_section_articles(sections)
+                ('Business', '/articles/business.html'),
-        return section_list
+                ('Arts', '/articles/arts.html'),
                ('Life', '/articles/life.html'),
                ('Health & Science', '/articles/health_and_science.html'),
                ('Sports', '/articles/sports.html'),
                ('Double X', '/articles/double_x.html'),
                ):
            url = self.INDEX + url
            self.log('Found section:', sectitle)
            articles = self.slate_section_articles(self.index_to_soup(url))
            if articles:
                ans.append((sectitle, articles))
        return ans
    def slate_section_articles(self, soup):
        cont = soup.find('div', id='most_read')
        seen = set()
        ans = []
        for h4 in cont.findAll('h4'):
            a = h4.find('a', href=True)
            if a is None: continue
            url = a['href']
            if url.startswith('/'):
                url = self.INDEX + url
            if url in seen: continue
            seen.add(url)
            title = self.tag_to_string(a)
            parent = h4.parent
            h3 = parent.find('h3')
            desc = ''
            if h3 is not None:
                desc = self.tag_to_string(h3)
            a = parent.find('a', rel='author')
            if a is not None:
                a = self.tag_to_string(a)
            art = {'title':title, 'description':desc, 'date':'', 'url':url}
            if a:
                art['author'] = a
            self.log('\tFound article:', title, ' by ', a)
            ans.append(art)
        return ans
    def get_masthead_url(self):
        masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif'
@ -299,153 +93,4 @@ class Slate(BasicNewsRecipe):
            masthead = None
        return masthead
    def stripAnchors(self,soup):
        body = soup.find('div',attrs={'id':['article_body','content']})
        if body is not None:
            paras = body.findAll('p')
            if paras is not None:
                for para in paras:
                    aTags = para.findAll('a')
                    if aTags is not None:
                        for a in aTags:
                            if a.img is None:
                                #print repr(a.renderContents())
                                a.replaceWith(a.renderContents().decode('utf-8','replace'))
        return soup
    def preprocess_html(self, soup) :
        # Remove 'grayPlus4.png' images
        imgs = soup.findAll('img')
        if imgs is not None:
            for img in imgs:
                if re.search("grayPlus4.png",str(img)):
                    img.extract()
        # Delete article based upon content keywords
        if len(self.excludedDescriptionKeywords):
            excluded = re.compile('|'.join(self.excludedContentKeywords))
            found_excluded = excluded.search(str(soup))
            if found_excluded :
                print "No allowed content found, removing article"
                raise Exception('Rejected article')
        # Articles from www.thebigmoney.com use different tagging for byline, dateline and body
        head = soup.find('head')
        if head.link is not None and re.search('www\.thebigmoney\.com', str(head)):
            byline = soup.find('div',attrs={'id':'byline'})
            if byline is not None:
                byline['class'] = byline['id']
            dateline = soup.find('div',attrs={'id':'dateline'})
            if dateline is not None:
                dateline['class'] = dateline['id']
            body = soup.find('div',attrs={'id':'content'})
            if body is not None:
                body['class'] = 'article_body'
            # Synthesize a department kicker
            h3Tag = Tag(soup,'h3')
            emTag = Tag(soup,'em')
            emTag.insert(0,NavigableString("the big money: Today's business press"))
            h3Tag.insert(0,emTag)
            soup.body.insert(0,h3Tag)
        # Strip anchors from HTML
        return self.stripAnchors(soup)
    def postprocess_html(self, soup, first_fetch) :
        # Fix up dept_kicker as <h3><em>
        dept_kicker = soup.find('div', attrs={'class':'department_kicker'})
        if dept_kicker is not None :
            kicker_strings = self.tag_to_strings(dept_kicker)
            kicker = ''.join(kicker_strings[2:])
            kicker = re.sub('\.','',kicker)
            h3Tag = Tag(soup, "h3")
            emTag = Tag(soup, "em")
            emTag.insert(0,NavigableString(kicker))
            h3Tag.insert(0, emTag)
            dept_kicker.replaceWith(h3Tag)
        else:
            self.log("No kicker--return null")
            return None
       # Fix up the concatenated byline and dateline
        byline = soup.find(True,attrs={'class':'byline'})
        if byline is not None :
            bylineTag = Tag(soup,'div')
            bylineTag['class'] = 'byline'
            #bylineTag['height'] = '0em'
            bylineTag.insert(0,self.tag_to_string(byline))
            byline.replaceWith(bylineTag)
        dateline = soup.find(True, attrs={'class':'dateline'})
        if dateline is not None :
            datelineTag = Tag(soup, 'div')
            datelineTag['class'] = 'dateline'
            #datelineTag['margin-top'] = '0em'
            datelineTag.insert(0,self.tag_to_string(dateline))
            dateline.replaceWith(datelineTag)
        # Change captions to italic, add <hr>
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption is not None:
                emTag = Tag(soup, "em")
                emTag.insert(0, '<br />' + self.tag_to_string(caption))
                hrTag = Tag(soup, 'hr')
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)
        # Fix photos
        for photo in soup.findAll('span',attrs={'class':'imagewrapper'}):
            if photo.a is not None and photo.a.img is not None:
                divTag = Tag(soup,'div')
                divTag['class'] ='imagewrapper'
                divTag.insert(0,photo.a.img)
                photo.replaceWith(divTag)
        return soup
    def postprocess_book(self, oeb, opts, log) :
        def extract_byline(href) :
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            byline = soup.find(True,attrs={'class':'byline'})
            if byline is not None:
                return self.tag_to_string(byline,use_alt=False)
            else :
                return None
        def extract_description(href) :
            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
            paragraphs = soup.findAll('p')
            for p in paragraphs :
                if self.tag_to_string(p,use_alt=False).startswith('By ') or \
                   self.tag_to_string(p,use_alt=False).startswith('Posted '):
                    continue
                comment = p.find(text=lambda text:isinstance(text, Comment))
                if comment is not None:
                    continue
                else:
                    return self.tag_to_string(p,use_alt=False)[:self.summary_length] + '...'
            return None
        # Method entry point here
        # Single section toc looks different than multi-section tocs
        if oeb.toc.depth() == 2 :
            for article in oeb.toc :
                if article.author is None :
                    article.author = extract_byline(article.href)
                if article.description is None :
                    article.description = extract_description(article.href)
        elif oeb.toc.depth() == 3 :
            for section in oeb.toc :
                for article in section :
                    if article.author is None :
                        article.author = extract_byline(article.href)
                    if article.description is None :
                        article.description = extract_description(article.href)
--- a/recipes/wow.recipe
+++ b/recipes/wow.recipe
@ -0,0 +1,17 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class WoW(BasicNewsRecipe):
    title          = u'WoW Insider'
    language       = 'en'
    __author__     = 'Krittika Goyal'
    oldest_article = 1 #days
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds          = [
 ('WoW',
 'http://wow.joystiq.com/rss.xml')
 ]
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@ -224,6 +224,9 @@ try:
 except:
    try:
        HOST=get_ip_address('wlan0')
    except:
        try:
            HOST=get_ip_address('ppp0')
        except:
            HOST='192.168.1.2'
--- a/setup/extensions.py
+++ b/setup/extensions.py
@ -336,7 +336,7 @@ class Build(Command):
                oinc = ['/Fo'+obj] if iswindows else ['-o', obj]
                cmd = [compiler] + cflags + ext.cflags + einc + sinc + oinc
                self.info(' '.join(cmd))
-                subprocess.check_call(cmd)
+                self.check_call(cmd)
        dest = self.dest(ext)
        elib = self.lib_dirs_to_ldflags(ext.lib_dirs)
@ -350,18 +350,32 @@ class Build(Command):
            else:
                cmd += objects + ext.extra_objs + ['-o', dest] + ldflags + ext.ldflags + elib + xlib
            self.info('\n\n', ' '.join(cmd), '\n\n')
-            subprocess.check_call(cmd)
+            self.check_call(cmd)
            if iswindows:
                #manifest = dest+'.manifest'
                #cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest]
                #self.info(*cmd)
-                #subprocess.check_call(cmd)
+                #self.check_call(cmd)
                #os.remove(manifest)
                for x in ('.exp', '.lib'):
                    x = os.path.splitext(dest)[0]+x
                    if os.path.exists(x):
                        os.remove(x)
    def check_call(self, *args, **kwargs):
        """print cmdline if an error occured
        If something is missing (qmake e.g.) you get a non-informative error
         self.check_call(qmc + [ext.name+'.pro'])
         so you would have to look a the source to see the actual command.
        """
        try:
            subprocess.check_call(*args, **kwargs)
        except:
            cmdline = ' '.join(['"%s"' % (arg) if ' ' in arg else arg for arg in args[0]])
            print "Error while executing: %s\n" % (cmdline)
            raise
    def build_qt_objects(self, ext):
        obj_pat = 'release\\*.obj' if iswindows else '*.o'
        objects = glob.glob(obj_pat)
@ -380,8 +394,8 @@ class Build(Command):
            qmc = [QMAKE, '-o', 'Makefile']
            if iswindows:
                qmc += ['-spec', 'win32-msvc2008']
-            subprocess.check_call(qmc + [ext.name+'.pro'])
+            self.check_call(qmc + [ext.name+'.pro'])
-            subprocess.check_call([make, '-f', 'Makefile'])
+            self.check_call([make, '-f', 'Makefile'])
            objects = glob.glob(obj_pat)
        return list(map(self.a, objects))
@ -407,7 +421,7 @@ class Build(Command):
            cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\
                    pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf]
            self.info(' '.join(cmd))
-            subprocess.check_call(cmd)
+            self.check_call(cmd)
        module = self.j(src_dir, self.b(dest))
        if self.newer(dest, [sbf]+qt_objects):
            mf = self.j(src_dir, 'Makefile')
@ -417,7 +431,7 @@ class Build(Command):
            makefile.extra_include_dirs = ext.inc_dirs
            makefile.generate()
-            subprocess.check_call([make, '-f', mf], cwd=src_dir)
+            self.check_call([make, '-f', mf], cwd=src_dir)
            shutil.copy2(module, dest)
    def clean(self):
@ -457,7 +471,7 @@ class BuildPDF2XML(Command):
                    cmd += ['-I'+x for x in poppler_inc_dirs+magick_inc_dirs]
                    cmd += ['/Fo'+obj, src]
                self.info(*cmd)
-                subprocess.check_call(cmd)
+                self.check_call(cmd)
            objects.append(obj)
        if self.newer(dest, objects):
@ -470,7 +484,7 @@ class BuildPDF2XML(Command):
                        png_libs+magick_libs+poppler_libs+ft_libs+jpg_libs+pdfreflow_libs]
                cmd += ['/OUT:'+dest] + objects
            self.info(*cmd)
-            subprocess.check_call(cmd)
+            self.check_call(cmd)
        self.info('Binary installed as', dest)
--- a/setup/installer/init.py
+++ b/setup/installer/init.py
@ -20,17 +20,23 @@ for x in [
    EXCLUDES.extend(['--exclude', x])
 SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
 def get_rsync_pw():
    return open('/home/kovid/work/kde/conf/buildbot').read().partition(
                ':')[-1].strip()
 class Rsync(Command):
    description = 'Sync source tree from development machine'
    SYNC_CMD = ' '.join(BASE_RSYNC+SAFE_EXCLUDES+
-            ['rsync://{host}/work/{project}', '..'])
+            ['rsync://buildbot@{host}/work/{project}', '..'])
    def run(self, opts):
        cmd = self.SYNC_CMD.format(host=HOST, project=PROJECT)
        env = dict(os.environ)
        env['RSYNC_PASSWORD'] = get_rsync_pw()
        self.info(cmd)
-        subprocess.check_call(cmd, shell=True)
+        subprocess.check_call(cmd, shell=True, env=env)
 class Push(Command):
@ -81,7 +87,8 @@ class VMInstaller(Command):
    def get_build_script(self):
-        ans = '\n'.join(self.BUILD_PREFIX)+'\n\n'
+        rs = ['export RSYNC_PASSWORD=%s'%get_rsync_pw()]
        ans = '\n'.join(self.BUILD_PREFIX + rs)+'\n\n'
        ans += ' && \\\n'.join(self.BUILD_RSYNC) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_CLEAN) + ' && \\\n'
        ans += ' && \\\n'.join(self.BUILD_BUILD) + ' && \\\n'
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -278,6 +278,8 @@ def get_proxies(debug=True):
            continue
        if proxy.startswith(key+'://'):
            proxy = proxy[len(key)+3:]
        if key == 'https' and proxy.startswith('http://'):
            proxy = proxy[7:]
        if proxy.endswith('/'):
            proxy = proxy[:-1]
        if len(proxy) > 4:
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -502,6 +502,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
 # }}}
 from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.djvu.input import DJVUInput
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.html.input import HTMLInput
@ -555,7 +556,8 @@ from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import KINDLE, KINDLE2, KINDLE_DX
 from calibre.devices.nook.driver import NOOK, NOOK_COLOR
-from calibre.devices.prs505.driver import PRS505, PRST1
+from calibre.devices.prs505.driver import PRS505
 from calibre.devices.prst1.driver import PRST1
 from calibre.devices.user_defined.driver import USER_DEFINED
 from calibre.devices.android.driver import ANDROID, S60, WEBOS
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
@ -599,6 +601,7 @@ plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
 plugins += [
    ComicInput,
    DJVUInput,
    EPUBInput,
    FB2Input,
    HTMLInput,
@ -1143,6 +1146,16 @@ class StoreAmazonDEKindleStore(StoreBase):
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonFRKindleStore(StoreBase):
    name = 'Amazon FR Kindle'
    author = 'Charles Haley'
    description = u'Tous les ebooks Kindle'
    actual_plugin = 'calibre.gui2.store.stores.amazon_fr_plugin:AmazonFRKindleStore'
    headquarters = 'DE'
    formats = ['KINDLE']
    affiliate = True
 class StoreAmazonUKKindleStore(StoreBase):
    name = 'Amazon UK Kindle'
    author = 'Charles Haley'
@ -1520,6 +1533,7 @@ plugins += [
    StoreArchiveOrgStore,
    StoreAmazonKindleStore,
    StoreAmazonDEKindleStore,
    StoreAmazonFRKindleStore,
    StoreAmazonUKKindleStore,
    StoreBaenWebScriptionStore,
    StoreBNStore,
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -4,7 +4,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys
 from itertools import izip
 from xml.sax.saxutils import escape
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -217,7 +217,7 @@ class DevicePlugin(Plugin):
        '''
        Unix version of :meth:`can_handle_windows`
-        :param device_info: Is a tupe of (vid, pid, bcd, manufacturer, product,
+        :param device_info: Is a tuple of (vid, pid, bcd, manufacturer, product,
                            serial number)
        '''
@ -414,7 +414,8 @@ class DevicePlugin(Plugin):
    @classmethod
    def config_widget(cls):
        '''
-        Should return a QWidget. The QWidget contains the settings for the device interface
+        Should return a QWidget. The QWidget contains the settings for the
        device interface
        '''
        raise NotImplementedError()
@ -429,8 +430,9 @@ class DevicePlugin(Plugin):
    @classmethod
    def settings(cls):
        '''
-        Should return an opts object. The opts object should have at least one attribute
+        Should return an opts object. The opts object should have at least one
-        `format_map` which is an ordered list of formats for the device.
+        attribute `format_map` which is an ordered list of formats for the
        device.
        '''
        raise NotImplementedError()
@ -516,3 +518,9 @@ class BookList(list):
        '''
        raise NotImplementedError()
    def prepare_addable_books(self, paths):
        '''
        Given a list of paths, returns another list of paths. These paths
        point to addable versions of the books.
        '''
        return paths
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -299,34 +299,3 @@ class PRS505(USBMS):
                f.write(metadata.thumbnail[-1])
            debug_print('Cover uploaded to: %r'%cpath)
 class PRST1(USBMS):
    name           = 'SONY PRST1 and newer Device Interface'
    gui_name       = 'SONY Reader'
    description    = _('Communicate with Sony PRST1 and newer eBook readers')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    FORMATS      = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
    VENDOR_ID    = [0x054c]   #: SONY Vendor Id
    PRODUCT_ID   = [0x05c2]
    BCD          = [0x226]
    VENDOR_NAME        = 'SONY'
    WINDOWS_MAIN_MEM   = re.compile(
            r'(PRS-T1&)'
            )
    THUMBNAIL_HEIGHT = 217
    SCAN_FROM_ROOT = True
    EBOOK_DIR_MAIN = __appname__
    def windows_filter_pnp_id(self, pnp_id):
        return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
    def get_carda_ebook_dir(self, for_upload=False):
        if for_upload:
            return __appname__
        return self.EBOOK_DIR_CARD_A
--- a/src/calibre/devices/prst1/init.py
+++ b/src/calibre/devices/prst1/init.py
@ -0,0 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -0,0 +1,481 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 '''
 Device driver for the SONY T1 devices
 '''
 import os, time, re
 import sqlite3 as sqlite
 from contextlib import closing
 from calibre.devices.usbms.driver import USBMS, debug_print
 from calibre.devices.usbms.device import USBDevice
 from calibre.devices.usbms.books import CollectionsBookList
 from calibre.devices.usbms.books import BookList
 from calibre.ebooks.metadata import authors_to_sort_string
 from calibre.constants import islinux
 DBPATH = 'Sony_Reader/database/books.db'
 THUMBPATH = 'Sony_Reader/database/cache/books/%s/thumbnail/main_thumbnail.jpg'
 class ImageWrapper(object):
    def __init__(self, image_path):
        self.image_path = image_path
 class PRST1(USBMS):
    name           = 'SONY PRST1 and newer Device Interface'
    gui_name       = 'SONY Reader'
    description    = _('Communicate with the PRST1 and newer SONY eBook readers')
    author         = 'Kovid Goyal'
    supported_platforms = ['windows', 'osx', 'linux']
    path_sep = '/'
    booklist_class = CollectionsBookList
    FORMATS      = ['epub', 'pdf', 'txt']
    CAN_SET_METADATA = ['collections']
    CAN_DO_DEVICE_DB_PLUGBOARD = True
    VENDOR_ID    = [0x054c]   #: SONY Vendor Id
    PRODUCT_ID   = [0x05c2]
    BCD          = [0x226]
    VENDOR_NAME        = 'SONY'
    WINDOWS_MAIN_MEM   = re.compile(
            r'(PRS-T1&)'
            )
    WINDOWS_CARD_A_MEM = re.compile(
            r'(PRS-T1__SD&)'
            )
    MAIN_MEMORY_VOLUME_LABEL = 'SONY Reader Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'SONY Reader Storage Card'
    THUMBNAIL_HEIGHT = 144
    SUPPORTS_SUB_DIRS = True
    SUPPORTS_USE_AUTHOR_SORT = True
    MUST_READ_METADATA = True
    EBOOK_DIR_MAIN   = 'Sony_Reader/media/books'
    EXTRA_CUSTOMIZATION_MESSAGE = [
        _('Comma separated list of metadata fields '
            'to turn into collections on the device. Possibilities include: ')+\
                    'series, tags, authors',
        _('Upload separate cover thumbnails for books') +
             ':::'+_('Normally, the SONY readers get the cover image from the'
             ' ebook file itself. With this option, calibre will send a '
             'separate cover image to the reader, useful if you are '
             'sending DRMed books in which you cannot change the cover.'),
        _('Refresh separate covers when using automatic management') +
             ':::' +
              _('Set this option to have separate book covers uploaded '
                'every time you connect your device. Unset this option if '
                'you have so many books on the reader that performance is '
                'unacceptable.'),
        _('Preserve cover aspect ratio when building thumbnails') +
              ':::' +
              _('Set this option if you want the cover thumbnails to have '
                'the same aspect ratio (width to height) as the cover. '
                'Unset it if you want the thumbnail to be the maximum size, '
                'ignoring aspect ratio.'),
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                ', '.join(['series', 'tags']),
                True,
                False,
                True,
    ]
    OPT_COLLECTIONS    = 0
    OPT_UPLOAD_COVERS  = 1
    OPT_REFRESH_COVERS = 2
    OPT_PRESERVE_ASPECT_RATIO = 3
    plugboards = None
    plugboard_func = None
    def post_open_callback(self):
        # Set the thumbnail width to the theoretical max if the user has asked
        # that we do not preserve aspect ratio
        if not self.settings().extra_customization[self.OPT_PRESERVE_ASPECT_RATIO]:
            self.THUMBNAIL_WIDTH = 108
    def windows_filter_pnp_id(self, pnp_id):
        return '_LAUNCHER' in pnp_id or '_SETTING' in pnp_id
    def get_carda_ebook_dir(self, for_upload=False):
        if for_upload:
            return self.EBOOK_DIR_MAIN
        return self.EBOOK_DIR_CARD_A
    def get_main_ebook_dir(self, for_upload=False):
        if for_upload:
            return self.EBOOK_DIR_MAIN
        return ''
    def can_handle(self, devinfo, debug=False):
        if islinux:
            dev = USBDevice(devinfo)
            main, carda, cardb = self.find_device_nodes(detected_device=dev)
            if main is None and carda is None and cardb is None:
                if debug:
                    print ('\tPRS-T1: Appears to be in non data mode'
                            ' or was ejected, ignoring')
                return False
        return True
    def books(self, oncard=None, end_session=True):
        dummy_bl = BookList(None, None, None)
        if (
                (oncard == 'carda' and not self._card_a_prefix) or
                (oncard and oncard != 'carda')
            ):
            self.report_progress(1.0, _('Getting list of books on device...'))
            return dummy_bl
        prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
        # Let parent driver get the books
        self.booklist_class.rebuild_collections = self.rebuild_collections
        bl = USBMS.books(self, oncard=oncard, end_session=end_session)
        dbpath = self.normalize_path(prefix + DBPATH)
        debug_print("SQLite DB Path: " + dbpath)
        with closing(sqlite.connect(dbpath)) as connection:
            # Replace undecodable characters in the db instead of erroring out
            connection.text_factory = lambda x: unicode(x, "utf-8", "replace")
            cursor = connection.cursor()
            # Query collections
            query = '''
                SELECT books._id, collection.title
                    FROM collections
                    LEFT OUTER JOIN books
                    LEFT OUTER JOIN collection
                    WHERE collections.content_id = books._id AND
                    collections.collection_id = collection._id
                '''
            cursor.execute(query)
            bl_collections = {}
            for i, row in enumerate(cursor):
                bl_collections.setdefault(row[0], [])
                bl_collections[row[0]].append(row[1])
            for idx, book in enumerate(bl):
                query = 'SELECT _id, thumbnail FROM books WHERE file_path = ?'
                t = (book.lpath,)
                cursor.execute (query, t)
                for i, row in enumerate(cursor):
                    book.device_collections = bl_collections.get(row[0], None)
                    thumbnail = row[1]
                    if thumbnail is not None:
                        thumbnail = self.normalize_path(prefix + thumbnail)
                        book.thumbnail = ImageWrapper(thumbnail)
            cursor.close()
        return bl
    def set_plugboards(self, plugboards, pb_func):
        self.plugboards = plugboards
        self.plugboard_func = pb_func
    def sync_booklists(self, booklists, end_session=True):
        debug_print('PRST1: starting sync_booklists')
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.strip() for x in
                    opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
        else:
            collections = []
        debug_print('PRST1: collection fields:', collections)
        if booklists[0] is not None:
            self.update_device_database(booklists[0], collections, None)
        if booklists[1] is not None:
            self.update_device_database(booklists[1], collections, 'carda')
        USBMS.sync_booklists(self, booklists, end_session=end_session)
        debug_print('PRST1: finished sync_booklists')
    def update_device_database(self, booklist, collections_attributes, oncard):
        debug_print('PRST1: starting update_device_database')
        plugboard = None
        if self.plugboard_func:
            plugboard = self.plugboard_func(self.__class__.__name__,
                    'device_db', self.plugboards)
            debug_print("PRST1: Using Plugboard", plugboard)
        prefix = self._card_a_prefix if oncard == 'carda' else self._main_prefix
        if prefix is None:
            # Reader has no sd card inserted
            return
        source_id = 1 if oncard == 'carda' else 0
        dbpath = self.normalize_path(prefix + DBPATH)
        debug_print("SQLite DB Path: " + dbpath)
        collections = booklist.get_collections(collections_attributes)
        with closing(sqlite.connect(dbpath)) as connection:
            self.update_device_books(connection, booklist, source_id, plugboard)
            self.update_device_collections(connection, booklist, collections, source_id)
        debug_print('PRST1: finished update_device_database')
    def update_device_books(self, connection, booklist, source_id, plugboard):
        opts = self.settings()
        upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
        refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
        cursor = connection.cursor()
        # Get existing books
        query = 'SELECT file_path, _id FROM books'
        cursor.execute(query)
        db_books = {}
        for i, row in enumerate(cursor):
            lpath = row[0].replace('\\', '/')
            db_books[lpath] = row[1]
        for book in booklist:
            # Run through plugboard if needed
            if plugboard is not None:
                newmi = book.deepcopy_metadata()
                newmi.template_to_attribute(book, plugboard)
            else:
                newmi = book
            # Get Metadata We Want
            lpath = book.lpath
            try:
                if opts.use_author_sort:
                    if newmi.author_sort:
                        author = newmi.author_sort
                    else:
                        author = authors_to_sort_string(newmi.authors)
                else:
                    author = newmi.authors[0]
            except:
                author = _('Unknown')
            title = newmi.title or _('Unknown')
            # Get modified date
            modified_date = os.path.getmtime(book.path)
            time_offset = time.altzone if time.daylight else time.timezone
            modified_date = (modified_date - time_offset) * 1000
            if lpath not in db_books:
                query = '''
                INSERT INTO books
                (title, author, source_id, added_date, modified_date,
                file_path, file_name, file_size, mime_type, corrupted,
                prevent_delete)
                values (?,?,?,?,?,?,?,?,?,0,0)
                '''
                t = (title, author, source_id, int(time.time() * 1000),
                        modified_date, lpath,
                        os.path.basename(lpath), book.size, book.mime)
                cursor.execute(query, t)
                book.bookId = cursor.lastrowid
                if upload_covers:
                    self.upload_book_cover(connection, book, source_id)
                debug_print('Inserted New Book: ' + book.title)
            else:
                query = '''
                UPDATE books
                SET title = ?, author = ?, modified_date = ?, file_size = ?
                WHERE file_path = ?
                '''
                t = (title, author, modified_date, book.size, lpath)
                cursor.execute(query, t)
                book.bookId = db_books[lpath]
                if refresh_covers:
                    self.upload_book_cover(connection, book, source_id)
                db_books[lpath] = None
        for book, bookId in db_books.items():
            if bookId is not None:
                # Remove From Collections
                query = 'DELETE FROM collections WHERE content_id = ?'
                t = (bookId,)
                cursor.execute(query, t)
                # Remove from Books
                query = 'DELETE FROM books where _id = ?'
                t = (bookId,)
                cursor.execute(query, t)
                debug_print('Deleted Book:' + book)
        connection.commit()
        cursor.close()
    def update_device_collections(self, connection, booklist, collections,
            source_id):
        cursor = connection.cursor()
        if collections:
            # Get existing collections
            query = 'SELECT _id, title FROM collection'
            cursor.execute(query)
            db_collections = {}
            for i, row in enumerate(cursor):
                db_collections[row[1]] = row[0]
            for collection, books in collections.items():
                if collection not in db_collections:
                    query = 'INSERT INTO collection (title, source_id) VALUES (?,?)'
                    t = (collection, source_id)
                    cursor.execute(query, t)
                    db_collections[collection] = cursor.lastrowid
                    debug_print('Inserted New Collection: ' + collection)
                # Get existing books in collection
                query = '''
                SELECT books.file_path, content_id
                FROM collections
                LEFT OUTER JOIN books
                WHERE collection_id = ? AND books._id = collections.content_id
                '''
                t = (db_collections[collection],)
                cursor.execute(query, t)
                db_books = {}
                for i, row in enumerate(cursor):
                    db_books[row[0]] = row[1]
                for idx, book in enumerate(books):
                    if collection not in book.device_collections:
                        book.device_collections.append(collection)
                    if db_books.get(book.lpath, None) is None:
                        query = '''
                        INSERT INTO collections (collection_id, content_id,
                        added_order) values (?,?,?)
                        '''
                        t = (db_collections[collection], book.bookId, idx)
                        cursor.execute(query, t)
                        debug_print('Inserted Book Into Collection: ' +
                                book.title + ' -> ' + collection)
                    else:
                        query = '''
                        UPDATE collections
                        SET added_order = ?
                        WHERE content_id = ? AND collection_id = ?
                        '''
                        t = (idx, book.bookId, db_collections[collection])
                        cursor.execute(query, t)
                    db_books[book.lpath] = None
                for bookPath, bookId in db_books.items():
                    if bookId is not None:
                        query = ('DELETE FROM collections '
                                'WHERE content_id = ? AND collection_id = ? ')
                        t = (bookId, db_collections[collection],)
                        cursor.execute(query, t)
                        debug_print('Deleted Book From Collection: ' + bookPath
                                + ' -> ' + collection)
                db_collections[collection] = None
            for collection, collectionId in db_collections.items():
                if collectionId is not None:
                    # Remove Books from Collection
                    query = ('DELETE FROM collections '
                            'WHERE collection_id = ?')
                    t = (collectionId,)
                    cursor.execute(query, t)
                    # Remove Collection
                    query = ('DELETE FROM collection '
                            'WHERE _id = ?')
                    t = (collectionId,)
                    cursor.execute(query, t)
                    debug_print('Deleted Collection: ' + collection)
        connection.commit()
        cursor.close()
    def rebuild_collections(self, booklist, oncard):
        debug_print('PRST1: starting rebuild_collections')
        opts = self.settings()
        if opts.extra_customization:
            collections = [x.strip() for x in
                    opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
        else:
            collections = []
        debug_print('PRST1: collection fields:', collections)
        self.update_device_database(booklist, collections, oncard)
        debug_print('PRS-T1: finished rebuild_collections')
    def upload_cover(self, path, filename, metadata, filepath):
        debug_print('PRS-T1: uploading cover')
        if filepath.startswith(self._main_prefix):
            prefix = self._main_prefix
            source_id = 0
        else:
            prefix = self._card_a_prefix
            source_id = 1
        metadata.lpath = filepath.partition(prefix)[2]
        metadata.lpath = metadata.lpath.replace('\\', '/')
        dbpath = self.normalize_path(prefix + DBPATH)
        debug_print("SQLite DB Path: " + dbpath)
        with closing(sqlite.connect(dbpath)) as connection:
            cursor = connection.cursor()
            query = 'SELECT _id FROM books WHERE file_path = ?'
            t = (metadata.lpath,)
            cursor.execute(query, t)
            for i, row in enumerate(cursor):
                metadata.bookId = row[0]
            cursor.close()
            if getattr(metadata, 'bookId', None) is not None:
                debug_print('PRS-T1: refreshing cover for book being sent')
                self.upload_book_cover(connection, metadata, source_id)
        debug_print('PRS-T1: done uploading cover')
    def upload_book_cover(self, connection, book, source_id):
        debug_print('PRST1: Uploading/Refreshing Cover for ' + book.title)
        if not book.thumbnail or not book.thumbnail[-1]:
            return
        cursor = connection.cursor()
        thumbnail_path = THUMBPATH%book.bookId
        prefix = self._main_prefix if source_id is 0 else self._card_a_prefix
        thumbnail_file_path = os.path.join(prefix, *thumbnail_path.split('/'))
        thumbnail_dir_path = os.path.dirname(thumbnail_file_path)
        if not os.path.exists(thumbnail_dir_path):
            os.makedirs(thumbnail_dir_path)
        with open(thumbnail_file_path, 'wb') as f:
            f.write(book.thumbnail[-1])
        query = 'UPDATE books SET thumbnail = ? WHERE _id = ?'
        t = (thumbnail_path, book.bookId,)
        cursor.execute(query, t)
        connection.commit()
        cursor.close()
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -483,7 +483,7 @@ class Device(DeviceConfig, DevicePlugin):
        self._card_a_prefix = get_card_prefix('carda')
        self._card_b_prefix = get_card_prefix('cardb')
-    def find_device_nodes(self):
+    def find_device_nodes(self, detected_device=None):
        def walk(base):
            base = os.path.abspath(os.path.realpath(base))
@ -507,8 +507,11 @@ class Device(DeviceConfig, DevicePlugin):
        d, j = os.path.dirname, os.path.join
        usb_dir = None
        if detected_device is None:
            detected_device = self.detected_device
        def test(val, attr):
-            q = getattr(self.detected_device, attr)
+            q = getattr(detected_device, attr)
            return q == val
        for x, isfile in walk('/sys/devices'):
@ -596,6 +599,8 @@ class Device(DeviceConfig, DevicePlugin):
                label = self.STORAGE_CARD2_VOLUME_LABEL
                if not label:
                    label = self.STORAGE_CARD_VOLUME_LABEL + ' 2'
            if not label:
                label = 'E-book Reader (%s)'%type
            extra = 0
            while True:
                q = ' (%d)'%extra if extra else ''
--- a/src/calibre/ebooks/djvu/init.py
+++ b/src/calibre/ebooks/djvu/init.py
@ -0,0 +1,12 @@
 #!/usr/bin/env  python
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>'
 __docformat__ = 'restructuredtext en'
 '''
 Used for DJVU input
 '''
--- a/src/calibre/ebooks/djvu/djvu.py
+++ b/src/calibre/ebooks/djvu/djvu.py
@ -0,0 +1,146 @@
 #! /usr/bin/env python
 # coding: utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
 # this code is based on:
 # Lizardtech DjVu Reference
 # DjVu v3
 # November 2005
 import sys
 import struct
 from cStringIO import StringIO
 from .djvubzzdec import BZZDecoder
 class DjvuChunk(object):
    def __init__(self, buf, start, end, align=True, bigendian=True,
            inclheader=False, verbose=0):
        self.subtype = None
        self._subchunks = []
        self.buf = buf
        pos = start + 4
        self.type = buf[start:pos]
        self.align = align      # whether to align to word (2-byte) boundaries
        self.headersize = 0 if inclheader else 8
        if bigendian:
            self.strflag = b'>'
        else:
            self.strflag = b'<'
        oldpos, pos = pos, pos+4
        self.size = struct.unpack(self.strflag+b'L', buf[oldpos:pos])[0]
        self.dataend = pos + self.size - (8 if inclheader else 0)
        if self.type == b'FORM':
            oldpos, pos = pos, pos+4
            #print oldpos, pos
            self.subtype = buf[oldpos:pos]
            #self.headersize += 4
        self.datastart = pos
        if verbose > 0:
            print ('found', self.type, self.subtype, pos, self.size)
        if self.type in b'FORM'.split():
            if verbose > 0:
                print ('processing substuff %d %d (%x)' % (pos, self.dataend,
                    self.dataend))
            numchunks = 0
            while pos < self.dataend:
                x = DjvuChunk(buf, pos, start+self.size, verbose=verbose)
                numchunks += 1
                self._subchunks.append(x)
                newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0)
                if verbose > 0:
                    print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend,
                        newpos, self.dataend, x.headersize))
                pos = newpos
            if verbose > 0:
                print ('                  end of chunk %d (%x)' % (pos, pos))
    def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100):
        if out:
            out.write(b'  ' * indent)
            out.write(b'%s%s [%d]\n' % (self.type,
                b':' + self.subtype if self.subtype else b'', self.size))
        if txtout and self.type == b'TXTz':
            inbuf = StringIO(self.buf[self.datastart: self.dataend])
            outbuf = StringIO()
            decoder = BZZDecoder(inbuf, outbuf)
            while True:
                xxres = decoder.convert(1024 * 1024)
                if not xxres:
                    break
            res = outbuf.getvalue()
            l = 0
            for x in res[:3]:
                l <<= 8
                l += ord(x)
            if verbose > 0 and out:
                print >> out, l
            txtout.write(res[3:3+l])
            txtout.write(b'\n\f')
        if txtout and self.type == b'TXTa':
            res = self.buf[self.datastart: self.dataend]
            l = 0
            for x in res[:3]:
                l <<= 8
                l += ord(x)
            if verbose > 0 and out:
                print >> out, l
            txtout.write(res[3:3+l])
            txtout.write(b'\n\f')
        if indent >= maxlevel:
            return
        for schunk in self._subchunks:
            schunk.dump(verbose=verbose, indent=indent+1, out=out, txtout=txtout)
 class DJVUFile(object):
    def __init__(self, instream, verbose=0):
        self.instream = instream
        buf = self.instream.read(4)
        assert(buf == b'AT&T')
        buf = self.instream.read()
        self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose)
    def get_text(self, outfile=None):
        self.dc.dump(txtout=outfile)
    def dump(self, outfile=None, maxlevel=0):
        self.dc.dump(out=outfile, maxlevel=maxlevel)
 def main():
    from ruamel.util.program import Program
    class DJVUDecoder(Program):
        def __init__(self):
            Program.__init__(self)
        def parser_setup(self):
            Program.parser_setup(self)
            #self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0)
            #self._argparser.add_argument('--combine', '-c', type=int, default=1)
            #self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
            #self._argparser.add_argument('--force', '-f', action='store_true')
            #self._argparser.add_argument('classname')
            self._argparser.add_argument('--text', '-t', action='store_true')
            self._argparser.add_argument('--dump', type=int, default=0)
            self._argparser.add_argument('file', nargs='+')
        def run(self):
            if self._args.verbose > 1: # can be negative with --quiet
                print (self._args.file)
            x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
            if self._args.text:
                print (x.get_text(sys.stdout))
            if self._args.dump:
                x.dump(sys.stdout, maxlevel=self._args.dump)
            return 0
    tt = DJVUDecoder()
    res = tt.result
    if res != 0:
        print (res)
 if __name__ == '__main__':
    main()
--- a/src/calibre/ebooks/djvu/djvubzzdec.py
+++ b/src/calibre/ebooks/djvu/djvubzzdec.py
@ -0,0 +1,746 @@
 #! /usr/bin/env python
 # coding: utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
 #__docformat__ = 'restructuredtext en'
 # Copyright (C) 2011 Anthon van der Neut, Ruamel bvba
 # Adapted from Leon Bottou's djvulibre C++ code,
 # ( ZPCodec.{cpp,h} and BSByteStream.{cpp,h} )
 # that code was first converted to C removing any dependencies on the DJVU libre
 # framework for ByteStream, making it into a ctypes callable shared object
 # then to python, and remade into a class
 original_copyright_notice = '''
 //C- -------------------------------------------------------------------
 //C- DjVuLibre-3.5
 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
 //C- Copyright (c) 2001  AT&T
 //C-
 //C- This software is subject to, and may be distributed under, the
 //C- GNU General Public License, either Version 2 of the license,
 //C- or (at your option) any later version. The license should have
 //C- accompanied the software or you may obtain a copy of the license
 //C- from the Free Software Foundation at http://www.fsf.org .
 //C-
 //C- This program is distributed in the hope that it will be useful,
 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 //C- GNU General Public License for more details.
 //C-
 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
 //C- Lizardtech Software.  Lizardtech Software has authorized us to
 //C- replace the original DjVu(r) Reference Library notice by the following
 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
 //C-
 //C-  ------------------------------------------------------------------
 //C- | DjVu (r) Reference Library (v. 3.5)
 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
 //C- | 6,058,214 and patents pending.
 //C- |
 //C- | This software is subject to, and may be distributed under, the
 //C- | GNU General Public License, either Version 2 of the license,
 //C- | or (at your option) any later version. The license should have
 //C- | accompanied the software or you may obtain a copy of the license
 //C- | from the Free Software Foundation at http://www.fsf.org .
 //C- |
 //C- | The computer code originally released by LizardTech under this
 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
 //C- | General Public License.   This grant only confers the right to
 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
 //C- | the extent such infringement is reasonably necessary to enable
 //C- | recipient to make, have made, practice, sell, or otherwise dispose
 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
 //C- | any greater extent that may be necessary to utilize further
 //C- | modifications or combinations.
 //C- |
 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
 //C- +------------------------------------------------------------------
 //
 // $Id: BSByteStream.cpp,v 1.9 2007/03/25 20:48:29 leonb Exp $
 // $Name: release_3_5_23 $
 '''
 MAXBLOCK = 4096
 FREQMAX = 4
 CTXIDS = 3
 MAXLEN = 1024 ** 2
 # Exception classes used by this module.
 class BZZDecoderError(Exception):
    """This exception is raised when BZZDecode runs into trouble
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return "BZZDecoderError: %s" % (self.msg)
 # This table has been designed for the ZPCoder
 #   * by running the following command in file 'zptable.sn':
 #   * (fast-crude (steady-mat 0.0035  0.0002) 260)))
 default_ztable = [ # {{{
  (0x8000, 0x0000, 84, 145),    # 000: p=0.500000 (    0,    0)
  (0x8000, 0x0000, 3, 4),       # 001: p=0.500000 (    0,    0)
  (0x8000, 0x0000, 4, 3),       # 002: p=0.500000 (    0,    0)
  (0x6bbd, 0x10a5, 5, 1),       # 003: p=0.465226 (    0,    0)
  (0x6bbd, 0x10a5, 6, 2),       # 004: p=0.465226 (    0,    0)
  (0x5d45, 0x1f28, 7, 3),       # 005: p=0.430708 (    0,    0)
  (0x5d45, 0x1f28, 8, 4),       # 006: p=0.430708 (    0,    0)
  (0x51b9, 0x2bd3, 9, 5),       # 007: p=0.396718 (    0,    0)
  (0x51b9, 0x2bd3, 10, 6),      # 008: p=0.396718 (    0,    0)
  (0x4813, 0x36e3, 11, 7),      # 009: p=0.363535 (    0,    0)
  (0x4813, 0x36e3, 12, 8),      # 010: p=0.363535 (    0,    0)
  (0x3fd5, 0x408c, 13, 9),      # 011: p=0.331418 (    0,    0)
  (0x3fd5, 0x408c, 14, 10),     # 012: p=0.331418 (    0,    0)
  (0x38b1, 0x48fd, 15, 11),     # 013: p=0.300585 (    0,    0)
  (0x38b1, 0x48fd, 16, 12),     # 014: p=0.300585 (    0,    0)
  (0x3275, 0x505d, 17, 13),     # 015: p=0.271213 (    0,    0)
  (0x3275, 0x505d, 18, 14),     # 016: p=0.271213 (    0,    0)
  (0x2cfd, 0x56d0, 19, 15),     # 017: p=0.243438 (    0,    0)
  (0x2cfd, 0x56d0, 20, 16),     # 018: p=0.243438 (    0,    0)
  (0x2825, 0x5c71, 21, 17),     # 019: p=0.217391 (    0,    0)
  (0x2825, 0x5c71, 22, 18),     # 020: p=0.217391 (    0,    0)
  (0x23ab, 0x615b, 23, 19),     # 021: p=0.193150 (    0,    0)
  (0x23ab, 0x615b, 24, 20),     # 022: p=0.193150 (    0,    0)
  (0x1f87, 0x65a5, 25, 21),     # 023: p=0.170728 (    0,    0)
  (0x1f87, 0x65a5, 26, 22),     # 024: p=0.170728 (    0,    0)
  (0x1bbb, 0x6962, 27, 23),     # 025: p=0.150158 (    0,    0)
  (0x1bbb, 0x6962, 28, 24),     # 026: p=0.150158 (    0,    0)
  (0x1845, 0x6ca2, 29, 25),     # 027: p=0.131418 (    0,    0)
  (0x1845, 0x6ca2, 30, 26),     # 028: p=0.131418 (    0,    0)
  (0x1523, 0x6f74, 31, 27),     # 029: p=0.114460 (    0,    0)
  (0x1523, 0x6f74, 32, 28),     # 030: p=0.114460 (    0,    0)
  (0x1253, 0x71e6, 33, 29),     # 031: p=0.099230 (    0,    0)
  (0x1253, 0x71e6, 34, 30),     # 032: p=0.099230 (    0,    0)
  (0x0fcf, 0x7404, 35, 31),     # 033: p=0.085611 (    0,    0)
  (0x0fcf, 0x7404, 36, 32),     # 034: p=0.085611 (    0,    0)
  (0x0d95, 0x75d6, 37, 33),     # 035: p=0.073550 (    0,    0)
  (0x0d95, 0x75d6, 38, 34),     # 036: p=0.073550 (    0,    0)
  (0x0b9d, 0x7768, 39, 35),     # 037: p=0.062888 (    0,    0)
  (0x0b9d, 0x7768, 40, 36),     # 038: p=0.062888 (    0,    0)
  (0x09e3, 0x78c2, 41, 37),     # 039: p=0.053539 (    0,    0)
  (0x09e3, 0x78c2, 42, 38),     # 040: p=0.053539 (    0,    0)
  (0x0861, 0x79ea, 43, 39),     # 041: p=0.045365 (    0,    0)
  (0x0861, 0x79ea, 44, 40),     # 042: p=0.045365 (    0,    0)
  (0x0711, 0x7ae7, 45, 41),     # 043: p=0.038272 (    0,    0)
  (0x0711, 0x7ae7, 46, 42),     # 044: p=0.038272 (    0,    0)
  (0x05f1, 0x7bbe, 47, 43),     # 045: p=0.032174 (    0,    0)
  (0x05f1, 0x7bbe, 48, 44),     # 046: p=0.032174 (    0,    0)
  (0x04f9, 0x7c75, 49, 45),     # 047: p=0.026928 (    0,    0)
  (0x04f9, 0x7c75, 50, 46),     # 048: p=0.026928 (    0,    0)
  (0x0425, 0x7d0f, 51, 47),     # 049: p=0.022444 (    0,    0)
  (0x0425, 0x7d0f, 52, 48),     # 050: p=0.022444 (    0,    0)
  (0x0371, 0x7d91, 53, 49),     # 051: p=0.018636 (    0,    0)
  (0x0371, 0x7d91, 54, 50),     # 052: p=0.018636 (    0,    0)
  (0x02d9, 0x7dfe, 55, 51),     # 053: p=0.015421 (    0,    0)
  (0x02d9, 0x7dfe, 56, 52),     # 054: p=0.015421 (    0,    0)
  (0x0259, 0x7e5a, 57, 53),     # 055: p=0.012713 (    0,    0)
  (0x0259, 0x7e5a, 58, 54),     # 056: p=0.012713 (    0,    0)
  (0x01ed, 0x7ea6, 59, 55),     # 057: p=0.010419 (    0,    0)
  (0x01ed, 0x7ea6, 60, 56),     # 058: p=0.010419 (    0,    0)
  (0x0193, 0x7ee6, 61, 57),     # 059: p=0.008525 (    0,    0)
  (0x0193, 0x7ee6, 62, 58),     # 060: p=0.008525 (    0,    0)
  (0x0149, 0x7f1a, 63, 59),     # 061: p=0.006959 (    0,    0)
  (0x0149, 0x7f1a, 64, 60),     # 062: p=0.006959 (    0,    0)
  (0x010b, 0x7f45, 65, 61),     # 063: p=0.005648 (    0,    0)
  (0x010b, 0x7f45, 66, 62),     # 064: p=0.005648 (    0,    0)
  (0x00d5, 0x7f6b, 67, 63),     # 065: p=0.004506 (    0,    0)
  (0x00d5, 0x7f6b, 68, 64),     # 066: p=0.004506 (    0,    0)
  (0x00a5, 0x7f8d, 69, 65),     # 067: p=0.003480 (    0,    0)
  (0x00a5, 0x7f8d, 70, 66),     # 068: p=0.003480 (    0,    0)
  (0x007b, 0x7faa, 71, 67),     # 069: p=0.002602 (    0,    0)
  (0x007b, 0x7faa, 72, 68),     # 070: p=0.002602 (    0,    0)
  (0x0057, 0x7fc3, 73, 69),     # 071: p=0.001843 (    0,    0)
  (0x0057, 0x7fc3, 74, 70),     # 072: p=0.001843 (    0,    0)
  (0x003b, 0x7fd7, 75, 71),     # 073: p=0.001248 (    0,    0)
  (0x003b, 0x7fd7, 76, 72),     # 074: p=0.001248 (    0,    0)
  (0x0023, 0x7fe7, 77, 73),     # 075: p=0.000749 (    0,    0)
  (0x0023, 0x7fe7, 78, 74),     # 076: p=0.000749 (    0,    0)
  (0x0013, 0x7ff2, 79, 75),     # 077: p=0.000402 (    0,    0)
  (0x0013, 0x7ff2, 80, 76),     # 078: p=0.000402 (    0,    0)
  (0x0007, 0x7ffa, 81, 77),     # 079: p=0.000153 (    0,    0)
  (0x0007, 0x7ffa, 82, 78),     # 080: p=0.000153 (    0,    0)
  (0x0001, 0x7fff, 81, 79),     # 081: p=0.000027 (    0,    0)
  (0x0001, 0x7fff, 82, 80),     # 082: p=0.000027 (    0,    0)
  (0x5695, 0x0000, 9, 85),      # 083: p=0.411764 (    2,    3)
  (0x24ee, 0x0000, 86, 226),    # 084: p=0.199988 (    1,    0)
  (0x8000, 0x0000, 5, 6),       # 085: p=0.500000 (    3,    3)
  (0x0d30, 0x0000, 88, 176),    # 086: p=0.071422 (    4,    0)
  (0x481a, 0x0000, 89, 143),    # 087: p=0.363634 (    1,    2)
  (0x0481, 0x0000, 90, 138),    # 088: p=0.024388 (   13,    0)
  (0x3579, 0x0000, 91, 141),    # 089: p=0.285711 (    1,    3)
  (0x017a, 0x0000, 92, 112),    # 090: p=0.007999 (   41,    0)
  (0x24ef, 0x0000, 93, 135),    # 091: p=0.199997 (    1,    5)
  (0x007b, 0x0000, 94, 104),    # 092: p=0.002611 (  127,    0)
  (0x1978, 0x0000, 95, 133),    # 093: p=0.137929 (    1,    8)
  (0x0028, 0x0000, 96, 100),    # 094: p=0.000849 (  392,    0)
  (0x10ca, 0x0000, 97, 129),    # 095: p=0.090907 (    1,   13)
  (0x000d, 0x0000, 82, 98),     # 096: p=0.000276 ( 1208,    0)
  (0x0b5d, 0x0000, 99, 127),    # 097: p=0.061537 (    1,   20)
  (0x0034, 0x0000, 76, 72),     # 098: p=0.001102 ( 1208,    1)
  (0x078a, 0x0000, 101, 125),   # 099: p=0.040815 (    1,   31)
  (0x00a0, 0x0000, 70, 102),    # 100: p=0.003387 (  392,    1)
  (0x050f, 0x0000, 103, 123),   # 101: p=0.027397 (    1,   47)
  (0x0117, 0x0000, 66, 60),     # 102: p=0.005912 (  392,    2)
  (0x0358, 0x0000, 105, 121),   # 103: p=0.018099 (    1,   72)
  (0x01ea, 0x0000, 106, 110),   # 104: p=0.010362 (  127,    1)
  (0x0234, 0x0000, 107, 119),   # 105: p=0.011940 (    1,  110)
  (0x0144, 0x0000, 66, 108),    # 106: p=0.006849 (  193,    1)
  (0x0173, 0x0000, 109, 117),   # 107: p=0.007858 (    1,  168)
  (0x0234, 0x0000, 60, 54),     # 108: p=0.011925 (  193,    2)
  (0x00f5, 0x0000, 111, 115),   # 109: p=0.005175 (    1,  256)
  (0x0353, 0x0000, 56, 48),     # 110: p=0.017995 (  127,    2)
  (0x00a1, 0x0000, 69, 113),    # 111: p=0.003413 (    1,  389)
  (0x05c5, 0x0000, 114, 134),   # 112: p=0.031249 (   41,    1)
  (0x011a, 0x0000, 65, 59),     # 113: p=0.005957 (    2,  389)
  (0x03cf, 0x0000, 116, 132),   # 114: p=0.020618 (   63,    1)
  (0x01aa, 0x0000, 61, 55),     # 115: p=0.009020 (    2,  256)
  (0x0285, 0x0000, 118, 130),   # 116: p=0.013652 (   96,    1)
  (0x0286, 0x0000, 57, 51),     # 117: p=0.013672 (    2,  168)
  (0x01ab, 0x0000, 120, 128),   # 118: p=0.009029 (  146,    1)
  (0x03d3, 0x0000, 53, 47),     # 119: p=0.020710 (    2,  110)
  (0x011a, 0x0000, 122, 126),   # 120: p=0.005961 (  222,    1)
  (0x05c5, 0x0000, 49, 41),     # 121: p=0.031250 (    2,   72)
  (0x00ba, 0x0000, 124, 62),    # 122: p=0.003925 (  338,    1)
  (0x08ad, 0x0000, 43, 37),     # 123: p=0.046979 (    2,   47)
  (0x007a, 0x0000, 72, 66),     # 124: p=0.002586 (  514,    1)
  (0x0ccc, 0x0000, 39, 31),     # 125: p=0.069306 (    2,   31)
  (0x01eb, 0x0000, 60, 54),     # 126: p=0.010386 (  222,    2)
  (0x1302, 0x0000, 33, 25),     # 127: p=0.102940 (    2,   20)
  (0x02e6, 0x0000, 56, 50),     # 128: p=0.015695 (  146,    2)
  (0x1b81, 0x0000, 29, 131),    # 129: p=0.148935 (    2,   13)
  (0x045e, 0x0000, 52, 46),     # 130: p=0.023648 (   96,    2)
  (0x24ef, 0x0000, 23, 17),     # 131: p=0.199999 (    3,   13)
  (0x0690, 0x0000, 48, 40),     # 132: p=0.035533 (   63,    2)
  (0x2865, 0x0000, 23, 15),     # 133: p=0.218748 (    2,    8)
  (0x09de, 0x0000, 42, 136),    # 134: p=0.053434 (   41,    2)
  (0x3987, 0x0000, 137, 7),     # 135: p=0.304346 (    2,    5)
  (0x0dc8, 0x0000, 38, 32),     # 136: p=0.074626 (   41,    3)
  (0x2c99, 0x0000, 21, 139),    # 137: p=0.241378 (    2,    7)
  (0x10ca, 0x0000, 140, 172),   # 138: p=0.090907 (   13,    1)
  (0x3b5f, 0x0000, 15, 9),      # 139: p=0.312499 (    3,    7)
  (0x0b5d, 0x0000, 142, 170),   # 140: p=0.061537 (   20,    1)
  (0x5695, 0x0000, 9, 85),      # 141: p=0.411764 (    2,    3)
  (0x078a, 0x0000, 144, 168),   # 142: p=0.040815 (   31,    1)
  (0x8000, 0x0000, 141, 248),   # 143: p=0.500000 (    2,    2)
  (0x050f, 0x0000, 146, 166),   # 144: p=0.027397 (   47,    1)
  (0x24ee, 0x0000, 147, 247),   # 145: p=0.199988 (    0,    1)
  (0x0358, 0x0000, 148, 164),   # 146: p=0.018099 (   72,    1)
  (0x0d30, 0x0000, 149, 197),   # 147: p=0.071422 (    0,    4)
  (0x0234, 0x0000, 150, 162),   # 148: p=0.011940 (  110,    1)
  (0x0481, 0x0000, 151, 95),    # 149: p=0.024388 (    0,   13)
  (0x0173, 0x0000, 152, 160),   # 150: p=0.007858 (  168,    1)
  (0x017a, 0x0000, 153, 173),   # 151: p=0.007999 (    0,   41)
  (0x00f5, 0x0000, 154, 158),   # 152: p=0.005175 (  256,    1)
  (0x007b, 0x0000, 155, 165),   # 153: p=0.002611 (    0,  127)
  (0x00a1, 0x0000, 70, 156),    # 154: p=0.003413 (  389,    1)
  (0x0028, 0x0000, 157, 161),   # 155: p=0.000849 (    0,  392)
  (0x011a, 0x0000, 66, 60),     # 156: p=0.005957 (  389,    2)
  (0x000d, 0x0000, 81, 159),    # 157: p=0.000276 (    0, 1208)
  (0x01aa, 0x0000, 62, 56),     # 158: p=0.009020 (  256,    2)
  (0x0034, 0x0000, 75, 71),     # 159: p=0.001102 (    1, 1208)
  (0x0286, 0x0000, 58, 52),     # 160: p=0.013672 (  168,    2)
  (0x00a0, 0x0000, 69, 163),    # 161: p=0.003387 (    1,  392)
  (0x03d3, 0x0000, 54, 48),     # 162: p=0.020710 (  110,    2)
  (0x0117, 0x0000, 65, 59),     # 163: p=0.005912 (    2,  392)
  (0x05c5, 0x0000, 50, 42),     # 164: p=0.031250 (   72,    2)
  (0x01ea, 0x0000, 167, 171),   # 165: p=0.010362 (    1,  127)
  (0x08ad, 0x0000, 44, 38),     # 166: p=0.046979 (   47,    2)
  (0x0144, 0x0000, 65, 169),    # 167: p=0.006849 (    1,  193)
  (0x0ccc, 0x0000, 40, 32),     # 168: p=0.069306 (   31,    2)
  (0x0234, 0x0000, 59, 53),     # 169: p=0.011925 (    2,  193)
  (0x1302, 0x0000, 34, 26),     # 170: p=0.102940 (   20,    2)
  (0x0353, 0x0000, 55, 47),     # 171: p=0.017995 (    2,  127)
  (0x1b81, 0x0000, 30, 174),    # 172: p=0.148935 (   13,    2)
  (0x05c5, 0x0000, 175, 193),   # 173: p=0.031249 (    1,   41)
  (0x24ef, 0x0000, 24, 18),     # 174: p=0.199999 (   13,    3)
  (0x03cf, 0x0000, 177, 191),   # 175: p=0.020618 (    1,   63)
  (0x2b74, 0x0000, 178, 222),   # 176: p=0.235291 (    4,    1)
  (0x0285, 0x0000, 179, 189),   # 177: p=0.013652 (    1,   96)
  (0x201d, 0x0000, 180, 218),   # 178: p=0.173910 (    6,    1)
  (0x01ab, 0x0000, 181, 187),   # 179: p=0.009029 (    1,  146)
  (0x1715, 0x0000, 182, 216),   # 180: p=0.124998 (    9,    1)
  (0x011a, 0x0000, 183, 185),   # 181: p=0.005961 (    1,  222)
  (0x0fb7, 0x0000, 184, 214),   # 182: p=0.085105 (   14,    1)
  (0x00ba, 0x0000, 69, 61),     # 183: p=0.003925 (    1,  338)
  (0x0a67, 0x0000, 186, 212),   # 184: p=0.056337 (   22,    1)
  (0x01eb, 0x0000, 59, 53),     # 185: p=0.010386 (    2,  222)
  (0x06e7, 0x0000, 188, 210),   # 186: p=0.037382 (   34,    1)
  (0x02e6, 0x0000, 55, 49),     # 187: p=0.015695 (    2,  146)
  (0x0496, 0x0000, 190, 208),   # 188: p=0.024844 (   52,    1)
  (0x045e, 0x0000, 51, 45),     # 189: p=0.023648 (    2,   96)
  (0x030d, 0x0000, 192, 206),   # 190: p=0.016529 (   79,    1)
  (0x0690, 0x0000, 47, 39),     # 191: p=0.035533 (    2,   63)
  (0x0206, 0x0000, 194, 204),   # 192: p=0.010959 (  120,    1)
  (0x09de, 0x0000, 41, 195),    # 193: p=0.053434 (    2,   41)
  (0x0155, 0x0000, 196, 202),   # 194: p=0.007220 (  183,    1)
  (0x0dc8, 0x0000, 37, 31),     # 195: p=0.074626 (    3,   41)
  (0x00e1, 0x0000, 198, 200),   # 196: p=0.004750 (  279,    1)
  (0x2b74, 0x0000, 199, 243),   # 197: p=0.235291 (    1,    4)
  (0x0094, 0x0000, 72, 64),     # 198: p=0.003132 (  424,    1)
  (0x201d, 0x0000, 201, 239),   # 199: p=0.173910 (    1,    6)
  (0x0188, 0x0000, 62, 56),     # 200: p=0.008284 (  279,    2)
  (0x1715, 0x0000, 203, 237),   # 201: p=0.124998 (    1,    9)
  (0x0252, 0x0000, 58, 52),     # 202: p=0.012567 (  183,    2)
  (0x0fb7, 0x0000, 205, 235),   # 203: p=0.085105 (    1,   14)
  (0x0383, 0x0000, 54, 48),     # 204: p=0.019021 (  120,    2)
  (0x0a67, 0x0000, 207, 233),   # 205: p=0.056337 (    1,   22)
  (0x0547, 0x0000, 50, 44),     # 206: p=0.028571 (   79,    2)
  (0x06e7, 0x0000, 209, 231),   # 207: p=0.037382 (    1,   34)
  (0x07e2, 0x0000, 46, 38),     # 208: p=0.042682 (   52,    2)
  (0x0496, 0x0000, 211, 229),   # 209: p=0.024844 (    1,   52)
  (0x0bc0, 0x0000, 40, 34),     # 210: p=0.063636 (   34,    2)
  (0x030d, 0x0000, 213, 227),   # 211: p=0.016529 (    1,   79)
  (0x1178, 0x0000, 36, 28),     # 212: p=0.094593 (   22,    2)
  (0x0206, 0x0000, 215, 225),   # 213: p=0.010959 (    1,  120)
  (0x19da, 0x0000, 30, 22),     # 214: p=0.139999 (   14,    2)
  (0x0155, 0x0000, 217, 223),   # 215: p=0.007220 (    1,  183)
  (0x24ef, 0x0000, 26, 16),     # 216: p=0.199998 (    9,    2)
  (0x00e1, 0x0000, 219, 221),   # 217: p=0.004750 (    1,  279)
  (0x320e, 0x0000, 20, 220),    # 218: p=0.269229 (    6,    2)
  (0x0094, 0x0000, 71, 63),     # 219: p=0.003132 (    1,  424)
  (0x432a, 0x0000, 14, 8),      # 220: p=0.344827 (    6,    3)
  (0x0188, 0x0000, 61, 55),     # 221: p=0.008284 (    2,  279)
  (0x447d, 0x0000, 14, 224),    # 222: p=0.349998 (    4,    2)
  (0x0252, 0x0000, 57, 51),     # 223: p=0.012567 (    2,  183)
  (0x5ece, 0x0000, 8, 2),       # 224: p=0.434782 (    4,    3)
  (0x0383, 0x0000, 53, 47),     # 225: p=0.019021 (    2,  120)
  (0x8000, 0x0000, 228, 87),    # 226: p=0.500000 (    1,    1)
  (0x0547, 0x0000, 49, 43),     # 227: p=0.028571 (    2,   79)
  (0x481a, 0x0000, 230, 246),   # 228: p=0.363634 (    2,    1)
  (0x07e2, 0x0000, 45, 37),     # 229: p=0.042682 (    2,   52)
  (0x3579, 0x0000, 232, 244),   # 230: p=0.285711 (    3,    1)
  (0x0bc0, 0x0000, 39, 33),     # 231: p=0.063636 (    2,   34)
  (0x24ef, 0x0000, 234, 238),   # 232: p=0.199997 (    5,    1)
  (0x1178, 0x0000, 35, 27),     # 233: p=0.094593 (    2,   22)
  (0x1978, 0x0000, 138, 236),   # 234: p=0.137929 (    8,    1)
  (0x19da, 0x0000, 29, 21),     # 235: p=0.139999 (    2,   14)
  (0x2865, 0x0000, 24, 16),     # 236: p=0.218748 (    8,    2)
  (0x24ef, 0x0000, 25, 15),     # 237: p=0.199998 (    2,    9)
  (0x3987, 0x0000, 240, 8),     # 238: p=0.304346 (    5,    2)
  (0x320e, 0x0000, 19, 241),    # 239: p=0.269229 (    2,    6)
  (0x2c99, 0x0000, 22, 242),    # 240: p=0.241378 (    7,    2)
  (0x432a, 0x0000, 13, 7),      # 241: p=0.344827 (    3,    6)
  (0x3b5f, 0x0000, 16, 10),     # 242: p=0.312499 (    7,    3)
  (0x447d, 0x0000, 13, 245),    # 243: p=0.349998 (    2,    4)
  (0x5695, 0x0000, 10, 2),      # 244: p=0.411764 (    3,    2)
  (0x5ece, 0x0000, 7, 1),       # 245: p=0.434782 (    3,    4)
  (0x8000, 0x0000, 244, 83),    # 246: p=0.500000 (    2,    2)
  (0x8000, 0x0000, 249, 250),   # 247: p=0.500000 (    1,    1)
  (0x5695, 0x0000, 10, 2),      # 248: p=0.411764 (    3,    2)
  (0x481a, 0x0000, 89, 143),    # 249: p=0.363634 (    1,    2)
  (0x481a, 0x0000, 230, 246),   # 250: p=0.363634 (    2,    1)
  (0, 0, 0, 0),
  (0, 0, 0, 0),
  (0, 0, 0, 0),
  (0, 0, 0, 0),
  (0, 0, 0, 0),
 ]
 xmtf = (
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
  0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
  0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
  0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
  0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
  0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
  0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
  0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
  0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
  0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
  0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
  0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
  0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
  0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
 )
 # }}}
 def chr3(l):
    return bytes(bytearray(l))
 class BZZDecoder():
    def __init__(self, infile, outfile):
        self.instream = infile
        self.outf = outfile
        self.ieof = False
        self.bptr = None
        self.xsize = None
        self.outbuf = [0] * (MAXBLOCK * 1024)
        self.byte = None
        self.scount = 0
        self.delay = 25
        self.a = 0
        self.code = 0
        self.bufint = 0
        self.ctx = [0] * 300
        # table
        self.p = [0] * 256
        self.m = [0] * 256
        self.up = [0] * 256
        self.dn = [0] * 256
        # machine independent ffz
        self.ffzt = [0] * 256
        # Create machine independent ffz table
        for i in range(256):
            j = i
            while(j & 0x80):
                self.ffzt[i] += 1
                j <<= 1
        # Initialize table
        self.newtable(default_ztable)
        # Codebit counter
        # Read first 16 bits of code
        if not self.read_byte():
            self.byte = 0xff
        self.code = (self.byte << 8)
        if not self.read_byte():
            self.byte = 0xff
        self.code = self.code | self.byte
        # Preload buffer
        self.preload()
        # Compute initial fence
        self.fence = self.code
        if self.code >= 0x8000:
            self.fence = 0x7fff
    def convert(self, sz):
        if self.ieof:
            return 0
        copied = 0
        while sz > 0 and not (self.ieof):
            # Decode if needed
            if not self.xsize:
                self.bptr = 0
                if not self.decode():   # input block size set in decode
                    self.xsize = 1
                    self.ieof = True
                self.xsize -= 1
            # Compute remaining
            bytes = self.xsize
            if bytes > sz:
                bytes = sz
            # Transfer
            if bytes:
                for i in range(bytes):
                    self.outf.write(chr3(self.outbuf[self.bptr + i]))
            self.xsize -= bytes
            self.bptr += bytes
            sz -= bytes
            copied += bytes
            # offset += bytes; // for tell()
        return copied
    def preload(self):
        while self.scount <= 24:
            if self.read_byte() < 1:
                self.byte = 0xff
                if --self.delay < 1:
                    raise BZZDecoderError("BiteStream EOF")
            self.bufint = (self.bufint << 8) | self.byte
            self.scount += 8
    def newtable(self, table):
        for i in range(256):
            self.p[i] = table[i][0]
            self.m[i] = table[i][1]
            self.up[i] = table[i][2]
            self.dn[i] = table[i][3]
    def decode(self):
        outbuf = self.outbuf
        # Decode block size
        self.xsize = self.decode_raw(24)
        if not self.xsize:
            return 0
        if self.xsize > MAXBLOCK * 1024:        # 4MB (4096 * 1024) is max block
            raise BZZDecoderError("BiteStream.corrupt")
        # Dec11ode Estimation Speed
        fshift = 0
        if self.zpcodec_decoder():
            fshift += 1
            if self.zpcodec_decoder():
                fshift += 1
        # Prepare Quasi MTF
        mtf = list(xmtf) # unsigned chars
        freq = [0] * FREQMAX
        fadd = 4
        # Decode
        mtfno = 3
        markerpos = -1
        for i in range(self.xsize):
            ctxid = CTXIDS - 1
            if ctxid > mtfno:
                ctxid = mtfno
            cx = self.ctx
            if self.zpcodec_decode(cx, ctxid):
                mtfno = 0
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, ctxid + CTXIDS):
                mtfno = 1
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS):
                mtfno = 2 + self.decode_binary(cx, 2*CTXIDS + 1, 1)
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS+2):
                mtfno = 4 + self.decode_binary(cx, 2*CTXIDS+2 + 1, 2)
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS + 6):
                mtfno = 8 + self.decode_binary(cx, 2*CTXIDS + 6 + 1, 3)
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS + 14):
                mtfno = 16 + self.decode_binary(cx, 2*CTXIDS + 14 + 1, 4)
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS + 30 ):
                mtfno = 32 + self.decode_binary(cx, 2*CTXIDS + 30 + 1, 5)
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS + 62 ):
                mtfno = 64 + self.decode_binary(cx, 2*CTXIDS + 62 + 1, 6)
                outbuf[i] = mtf[mtfno]
            elif self.zpcodec_decode(cx, 2*CTXIDS + 126):
                mtfno = 128 + self.decode_binary(cx, 2*CTXIDS + 126 + 1, 7)
                outbuf[i] = mtf[mtfno]
            else:
                mtfno = 256  # EOB
                outbuf[i] = 0
                markerpos = i
                continue
            # Rotate mtf according to empirical frequencies (new!)
            # :rotate label
            # Adjust frequencies for overflow
            fadd = fadd + (fadd >> fshift)
            if fadd > 0x10000000:
                fadd >>= 24
                freq[0] >>= 24
                freq[1] >>= 24
                freq[2] >>= 24
                freq[3] >>= 24
                for k in range(4, FREQMAX):
                    freq[k] = freq[k] >> 24
                # Relocate new char according to new freq
            fc = fadd
            if mtfno < FREQMAX:
                fc += freq[mtfno]
            k = mtfno
            while (k >= FREQMAX):
                mtf[k] = mtf[k - 1]
                k -= 1
            while (k > 0 and fc >= freq[k - 1]):
                mtf[k] = mtf[k - 1]
                freq[k] = freq[k - 1]
                k -= 1
            mtf[k] = outbuf[i]
            freq[k] = fc
        #///////////////////////////////
        #//////// Reconstruct the string
        if markerpos < 1 or markerpos >= self.xsize:
            raise BZZDecoderError("BiteStream.corrupt")
        # Allocate pointers
        posn = [0] * self.xsize
        # Prepare count buffer
        count = [0] * 256
        # Fill count buffer
        for i in range(markerpos):
            c = outbuf[i]
            posn[i] = (c << 24) | (count[c] & 0xffffff)
            count[c] += 1
        for i in range(markerpos + 1, self.xsize):
            c = outbuf[i]
            posn[i] = (c << 24) | (count[c] & 0xffffff)
            count[c] += 1
        # Compute sorted char positions
        last = 1
        for i in range(256):
            tmp = count[i]
            count[i] = last
            last += tmp
        # Undo the sort transform
        i = 0
        last = self.xsize - 1
        while last > 0:
            n = posn[i]
            c = (posn[i] >> 24)
            last -= 1
            outbuf[last] = c
            i = count[c] + (n & 0xffffff)
        # Free and check
        if i != markerpos:
            raise BZZDecoderError("BiteStream.corrupt")
        return self.xsize
    def decode_raw(self, bits):
        n = 1
        m = (1 << bits)
        while n < m:
            b = self.zpcodec_decoder()
            n = (n << 1) | b
        return n - m
    def decode_binary(self, ctx, index, bits):
        n = 1
        m = (1 << bits)
        while n < m:
            b = self.zpcodec_decode(ctx, index + n - 1)
            n = (n << 1) | b
        return n - m
    def zpcodec_decoder(self):
        return self.decode_sub_simple(0, 0x8000 + (self.a >> 1))
    def decode_sub_simple(self, mps, z):
        # Test MPS/LPS
        if z > self.code:
            # LPS branch
            z = 0x10000 - z
            self.a += +z
            self.code = self.code + z
            # LPS renormalization
            shift = self.ffz()
            self.scount -= shift
            self.a = self.a << shift
            self.a &= 0xffff
            self.code = (self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))
            self.code &= 0xffff
            if self.scount < 16:
                self.preload()
            # Adjust fence
            self.fence = self.code
            if self.code >= 0x8000:
                self.fence = 0x7fff
            result = mps ^ 1
        else:
            # MPS renormalization
            self.scount -= 1
            self.a = (z << 1) & 0xffff
            self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1))
            self.code &= 0xffff
            if self.scount < 16:
                self.preload()
            # Adjust fence
            self.fence = self.code
            if self.code >= 0x8000:
                self.fence = 0x7fff
            result = mps
        return result
    def decode_sub(self, ctx, index, z):
        # Save bit
        bit = (ctx[index] & 1)
        # Avoid interval reversion
        d = 0x6000 + ((z + self.a) >> 2)
        if z > d:
            z = d
        # Test MPS/LPS
        if z > self.code:
            # LPS branch
            z = 0x10000 - z
            self.a += +z
            self.code = self.code + z
            # LPS adaptation
            ctx[index] = self.dn[ctx[index]]
            # LPS renormalization
            shift = self.ffz()
            self.scount -= shift
            self.a = (self.a << shift) & 0xffff
            self.code = ((self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))) & 0xffff
            if self.scount < 16:
                self.preload()
            # Adjust fence
            self.fence = self.code
            if self.code >= 0x8000:
                self.fence = 0x7fff
            return bit ^ 1
        else:
            # MPS adaptation
            if self.a >= self.m[ctx[index]]:
                ctx[index] = self.up[ctx[index]]
            # MPS renormalization
            self.scount -= 1
            self.a = z << 1 & 0xffff
            self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1)) & 0xffff
            if self.scount < 16:
                self.preload()
            # Adjust fence
            self.fence = self.code
            if self.code >= 0x8000:
                self.fence = 0x7fff
            return bit
    def zpcodec_decode(self, ctx, index):
        z = self.a + self.p[ctx[index]]
        if z <= self.fence:
            self.a = z
            res = (ctx[index] & 1)
        else:
            res = self.decode_sub(ctx, index, z)
        return res
    def read_byte(self):
        res = 0
        if self.instream:
            ires = self.instream.read(1)
            res = len(ires)
            if res:
                self.byte = ord(ires[0])
        else:
            raise NotImplementedError
        return res
    def ffz(self):
        x = self.a
        if (x >= 0xff00):
            return (self.ffzt[x & 0xff] + 8)
        else:
            return (self.ffzt[(x >> 8) & 0xff])
 ### for testing
 def main():
    import sys
    infile = file(sys.argv[1], "rb")
    outfile = file(sys.argv[2], "wb")
    dec = BZZDecoder(infile, outfile)
    while True:
        res = dec.convert(1024 * 1024)
        if not res:
            break
 if __name__ == "__main__":
    main()
--- a/src/calibre/ebooks/djvu/input.py
+++ b/src/calibre/ebooks/djvu/input.py
@ -0,0 +1,87 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>'
 __docformat__ = 'restructuredtext en'
 import os
 from subprocess import Popen, PIPE
 from cStringIO import StringIO
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic
 class DJVUInput(InputFormatPlugin):
    name        = 'DJVU Input'
    author      = 'Anthon van der Neut'
    description = 'Convert OCR-ed DJVU files (.djvu) to HTML'
    file_types  = set(['djvu', 'djv'])
    options = set([
        OptionRecommendation(name='use_djvutxt', recommended_value=True,
            help=_('Try to use the djvutxt program and fall back to pure '
                'python implementation if it fails or is not available')),
    ])
    def convert(self, stream, options, file_ext, log, accelerators):
        stdout = StringIO()
        ppdjvu = True
        # using djvutxt is MUCH faster, should make it an option
        if options.use_djvutxt and os.path.exists('/usr/bin/djvutxt'):
            from calibre.ptempfile import PersistentTemporaryFile
            try:
                fp = PersistentTemporaryFile(suffix='.djvu', prefix='djv_input')
                filename = fp._name
                fp.write(stream.read())
                fp.close()
                cmd = ['djvutxt', filename]
                stdout.write(Popen(cmd, stdout=PIPE, close_fds=True).communicate()[0])
                os.remove(filename)
                ppdjvu = False
            except:
                stream.seek(0) # retry with the pure python converter
        if ppdjvu:
            from .djvu import DJVUFile
            x = DJVUFile(stream)
            x.get_text(stdout)
        html = convert_basic(stdout.getvalue().replace(b"\n", b' ').replace(
            b'\037', b'\n\n'))
        # Run the HTMLized text through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = os.getcwdu()
        if file_ext != 'txtz' and hasattr(stream, 'name'):
            base = os.path.dirname(stream.name)
        fname = os.path.join(base, 'index.html')
        c = 0
        while os.path.exists(fname):
            c += 1
            fname = 'index%d.html'%c
        htmlfile = open(fname, 'wb')
        with htmlfile:
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        with open(htmlfile.name, 'rb') as f:
            oeb = html_input.convert(f, options, 'html', log,
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile.name)
        # Set metadata from file.
        from calibre.customize.ui import get_file_type_metadata
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
        return oeb
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -246,6 +246,7 @@ class CSSFlattener(object):
                    cssdict['font-size'] = '%.1fpt'%font_size
                del node.attrib['size']
            if 'face' in node.attrib:
                cssdict['font-family'] = node.attrib['face']
                del node.attrib['face']
        if 'color' in node.attrib:
            cssdict['color'] = node.attrib['color']
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -397,6 +397,7 @@ class AddAction(InterfaceAction):
            d = error_dialog(self.gui, _('Add to library'), _('No book files found'))
            d.exec_()
            return
        paths = self.gui.device_manager.device.prepare_addable_books(paths)
        from calibre.gui2.add import Adder
        self.__adder_func = partial(self._add_from_device_adder, on_card=None,
                                                    model=view.model())
--- a/src/calibre/gui2/convert/djvu_input.py
+++ b/src/calibre/gui2/convert/djvu_input.py
@ -0,0 +1,24 @@
 # coding: utf-8
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
 from calibre.gui2.convert.djvu_input_ui import Ui_Form
 from calibre.gui2.convert import Widget
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('DJVU Input')
    HELP = _('Options specific to')+' DJVU '+_('input')
    COMMIT_NAME = 'djvu_input'
    ICON = I('mimetypes/djvu.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
            ['use_djvutxt', ])
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/djvu_input.ui
+++ b/src/calibre/gui2/convert/djvu_input.ui
@ -0,0 +1,28 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Form</class>
 <widget class="QWidget" name="Form">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>400</width>
    <height>300</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Form</string>
  </property>
  <layout class="QVBoxLayout" name="verticalLayout">
   <item>
    <widget class="QCheckBox" name="opt_use_djvutxt">
     <property name="text">
      <string>Use &amp;djvutxt, if available, for faster processing</string>
     </property>
    </widget>
   </item>
  </layout>
 </widget>
 <resources/>
 <connections/>
 </ui>
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -538,14 +538,20 @@ class CoversModel(QAbstractListModel): # {{{
            current_cover = QPixmap(I('default_cover.png'))
        self.blank = QPixmap(I('blank.png')).scaled(150, 200)
        self.cc = current_cover
        self.reset_covers(do_reset=False)
-        self.covers = [self.get_item(_('Current cover'), current_cover)]
+    def reset_covers(self, do_reset=True):
        self.covers = [self.get_item(_('Current cover'), self.cc)]
        self.plugin_map = {}
        for i, plugin in enumerate(metadata_plugins(['cover'])):
            self.covers.append((plugin.name+'\n'+_('Searching...'),
                QVariant(self.blank), None, True))
            self.plugin_map[plugin] = i+1
        if do_reset:
            self.reset()
    def get_item(self, src, pmap, waiting=False):
        sz = '%dx%d'%(pmap.width(), pmap.height())
        text = QVariant(src + '\n' + sz)
@ -654,6 +660,9 @@ class CoversView(QListView): # {{{
        self.select(0)
        self.delegate.start_animation()
    def reset_covers(self):
        self.m.reset_covers()
    def clear_failed(self):
        plugin = self.m.plugin_for_index(self.currentIndex())
        self.m.clear_failed()
@ -683,12 +692,18 @@ class CoversWidget(QWidget): # {{{
        l.addWidget(self.covers_view, 1, 0)
        self.continue_processing = True
    def reset_covers(self):
        self.covers_view.reset_covers()
    def start(self, book, current_cover, title, authors):
        self.continue_processing = True
        self.abort.clear()
        self.book, self.current_cover = book, current_cover
        self.title, self.authors = title, authors
        self.log('Starting cover download for:', book.title)
        self.log('Query:', title, authors, self.book.identifiers)
-        self.msg.setText('<p>'+_('Downloading covers for <b>%s</b>, please wait...')%book.title)
+        self.msg.setText('<p>'+
            _('Downloading covers for <b>%s</b>, please wait...')%book.title)
        self.covers_view.start()
        self.worker = CoverWorker(self.log, self.abort, self.title,
@ -726,8 +741,9 @@ class CoversWidget(QWidget): # {{{
        if num < 2:
            txt = _('Could not find any covers for <b>%s</b>')%self.book.title
        else:
-            txt = _('Found <b>%(num)d</b> covers of %(title)s. Pick the one you like'
+            txt = _('Found <b>%(num)d</b> covers of %(title)s. '
-                    ' best.')%dict(num=num-1, title=self.title)
+                    'Pick the one you like best.')%dict(num=num-1,
                            title=self.title)
        self.msg.setText(txt)
        self.finished.emit()
@ -832,10 +848,14 @@ class FullFetch(QDialog): # {{{
        self.next_button.clicked.connect(self.next_clicked)
        self.ok_button = self.bb.button(self.bb.Ok)
        self.ok_button.clicked.connect(self.ok_clicked)
        self.prev_button = self.bb.addButton(_('Back'), self.bb.ActionRole)
        self.prev_button.setIcon(QIcon(I('back.png')))
        self.prev_button.clicked.connect(self.back_clicked)
        self.log_button = self.bb.addButton(_('View log'), self.bb.ActionRole)
        self.log_button.clicked.connect(self.view_log)
        self.log_button.setIcon(QIcon(I('debug.png')))
        self.ok_button.setVisible(False)
        self.prev_button.setVisible(False)
        self.identify_widget = IdentifyWidget(self.log, self)
        self.identify_widget.rejected.connect(self.reject)
@ -857,12 +877,21 @@ class FullFetch(QDialog): # {{{
    def book_selected(self, book):
        self.next_button.setVisible(False)
        self.ok_button.setVisible(True)
        self.prev_button.setVisible(True)
        self.book = book
        self.stack.setCurrentIndex(1)
        self.log('\n\n')
        self.covers_widget.start(book, self.current_cover,
                self.title, self.authors)
    def back_clicked(self):
        self.next_button.setVisible(True)
        self.ok_button.setVisible(False)
        self.prev_button.setVisible(False)
        self.stack.setCurrentIndex(0)
        self.covers_widget.cancel()
        self.covers_widget.reset_covers()
    def accept(self):
        # Prevent the usual dialog accept mechanisms from working
        pass
--- a/src/calibre/gui2/preferences/plugboard.py
+++ b/src/calibre/gui2/preferences/plugboard.py
@ -58,7 +58,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.device_to_formats_map = {}
        for device in device_plugins():
            n = device_name_for_plugboards(device)
-            self.device_to_formats_map[n] = set(device.FORMATS)
+            self.device_to_formats_map[n] = set(device.settings().format_map)
            if getattr(device, 'CAN_DO_DEVICE_DB_PLUGBOARD', False):
                self.device_to_formats_map[n].add('device_db')
            if n not in self.devices:
--- a/src/calibre/gui2/preferences/server.ui
+++ b/src/calibre/gui2/preferences/server.ui
@ -206,7 +206,7 @@
   <item>
    <widget class="QCheckBox" name="opt_autolaunch_server">
     <property name="text">
-      <string>Run server &amp;automatically on startup</string>
+      <string>Run server &amp;automatically when calibre starts</string>
     </property>
    </widget>
   </item>
--- a/src/calibre/gui2/search_restriction_mixin.py
+++ b/src/calibre/gui2/search_restriction_mixin.py
@ -37,6 +37,7 @@ class SearchRestrictionMixin(object):
        search = unicode(search)
        if not search:
            self.search_restriction.setCurrentIndex(0)
            self._apply_search_restriction('')
        else:
            s = '*' + search
            if self.search_restriction.count() > 1:
--- a/src/calibre/gui2/store/stores/amazon_de_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from contextlib import closing
 from lxml import html
@ -37,27 +36,16 @@ class AmazonDEKindleStore(StorePlugin):
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url =  search_url + urllib.quote_plus(query)
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Amazon has two results pages.
            # 20110725: seems that is_shot is gone.
 #            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
 #            # Horizontal grid of books.
 #            if is_shot:
 #                data_xpath = '//div[contains(@class, "result")]'
 #                format_xpath = './/div[@class="productTitle"]/text()'
 #                cover_xpath = './/div[@class="productTitle"]//img/@src'
 #            # Vertical list of books.
 #            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
 # end is_shot else
            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -80,11 +68,9 @@ class AmazonDEKindleStore(StorePlugin):
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 #                if is_shot:
 #                    author = format.split(' von ')[-1]
 #                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
-                author = author.split('von ')[-1]
+                if author.startswith('von '):
                    author = author[4:]
                counter -= 1
--- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py
@ -0,0 +1,82 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from contextlib import closing
 from lxml import html
 from PyQt4.Qt import QUrl
 from calibre import browser
 from calibre.gui2 import open_url
 from calibre.gui2.store import StorePlugin
 from calibre.gui2.store.search_result import SearchResult
 class AmazonFRKindleStore(StorePlugin):
    '''
    For comments on the implementation, please see amazon_plugin.py
    '''
    def open(self, parent=None, detail_item=None, external=False):
        aff_id = {'tag': 'charhale-21'}
        store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id
        if detail_item:
            aff_id['asin'] = detail_item
            store_link = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' % aff_id
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords='
        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break
                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). So we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format.lower():
                    continue
                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = ''.join(data.xpath("@name"))
                cover_url = ''.join(data.xpath(cover_xpath))
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
                author = unicode(''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()')))
                if author.startswith('de '):
                    author = author[3:]
                counter -= 1
                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.formats = 'Kindle'
                s.drm = SearchResult.DRM_UNKNOWN
                yield s
--- a/src/calibre/gui2/store/stores/amazon_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_plugin.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 import random
 import re
 import urllib
 from contextlib import closing
 from lxml import html
@ -122,12 +121,12 @@ class AmazonKindleStore(StorePlugin):
        open_url(QUrl(store_link))
    def search(self, query, max_results=10, timeout=60):
-        url =  self.search_url + urllib.quote_plus(query)
+        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Amazon has two results pages.
            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
--- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py
+++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import urllib
 from contextlib import closing
 from lxml import html
@ -34,27 +33,16 @@ class AmazonUKKindleStore(StorePlugin):
    def search(self, query, max_results=10, timeout=60):
        search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords='
-        url =  search_url + urllib.quote_plus(query)
+        url = search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
-            doc = html.fromstring(f.read())
+            doc = html.fromstring(f.read().decode('latin-1', 'replace'))
            # Amazon has two results pages.
            # 20110725: seems that is_shot is gone.
 #            is_shot = doc.xpath('boolean(//div[@id="shotgunMainResults"])')
 #            # Horizontal grid of books.
 #            if is_shot:
 #                data_xpath = '//div[contains(@class, "result")]'
 #                format_xpath = './/div[@class="productTitle"]/text()'
 #                cover_xpath = './/div[@class="productTitle"]//img/@src'
 #            # Vertical list of books.
 #            else:
            data_xpath = '//div[contains(@class, "result") and contains(@class, "product")]'
            format_xpath = './/span[@class="format"]/text()'
            cover_xpath = './/img[@class="productImage"]/@src'
 # end is_shot else
            for data in doc.xpath(data_xpath):
                if counter <= 0:
@ -77,11 +65,9 @@ class AmazonUKKindleStore(StorePlugin):
                title = ''.join(data.xpath('.//div[@class="title"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="newPrice"]/span/text()'))
 #                if is_shot:
 #                    author = format.split(' von ')[-1]
 #                else:
                author = ''.join(data.xpath('.//div[@class="title"]/span[@class="ptBrand"]/text()'))
-                author = author.split('by ')[-1]
+                if author.startswith('by '):
                    author = author[3:]
                counter -= 1
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -47,6 +47,9 @@ def get_parser(usage):
 def get_db(dbpath, options):
    if options.library_path is not None:
        dbpath = options.library_path
    if dbpath is None:
        raise ValueError('No saved library path, either run the GUI or use the'
                ' --with-library option')
    dbpath = os.path.abspath(dbpath)
    return LibraryDatabase2(dbpath)
--- a/src/calibre/library/coloring.py
+++ b/src/calibre/library/coloring.py
@ -133,7 +133,7 @@ class Rule(object): # {{{
                'lt': ('1', '', ''),
                'gt': ('', '', '1')
        }[action]
-        return "cmp(format_date(raw_field('%s'), 'yyyy-MM-dd'), %s, '%s', '%s', '%s')" % (col,
+        return "strcmp(format_date(raw_field('%s'), 'yyyy-MM-dd'), '%s', '%s', '%s', '%s')" % (col,
                val, lt, eq, gt)
    def multiple_condition(self, col, action, val, sep):
--- a/src/calibre/manual/template_lang.rst
+++ b/src/calibre/manual/template_lang.rst
@ -266,7 +266,7 @@ The following functions are available in addition to those described in single-f
    * ``has_cover()`` -- return ``Yes`` if the book has a cover, otherwise return the empty string
    * ``not(value)`` -- returns the string "1" if the value is empty, otherwise returns the empty string. This function works well with test or first_non_empty. You can have as many values as you want.
    * ``list_difference(list1, list2, separator)`` -- return a list made by removing from `list1` any item found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
-    * ``list_equals(list1, sep1, list2, sep2, yes_val, no_val) -- return `yes_val` if list1 and list2 contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
+    * ``list_equals(list1, sep1, list2, sep2, yes_val, no_val)`` -- return `yes_val` if `list1` and `list2` contain the same items, otherwise return `no_val`. The items are determined by splitting each list using the appropriate separator character (`sep1` or `sep2`). The order of items in the lists is not relevant. The compare is case insensitive.
    * ``list_intersection(list1, list2, separator)`` -- return a list made by removing from `list1` any item not found in `list2`, using a case-insensitive compare. The items in `list1` and `list2` are separated by separator, as are the items in the returned list.
    * ``list_sort(list, direction, separator)`` -- return list sorted using a case-insensitive sort. If `direction` is zero, the list is sorted ascending, otherwise descending. The list items are separated by separator, as are the items in the returned list.
    * ``list_union(list1, list2, separator)`` -- return a list made by merging the items in list1 and list2, removing duplicate items using a case-insensitive compare. If items differ in case, the one in list1 is used. The items in list1 and list2 are separated by separator, as are the items in the returned list.
--- a/src/calibre/utils/smtp.py
+++ b/src/calibre/utils/smtp.py
@ -18,11 +18,13 @@ def create_mail(from_, to, subject, text=None, attachment_data=None,
    assert text or attachment_data
    from email.mime.multipart import MIMEMultipart
    from email.utils import formatdate
    outer = MIMEMultipart()
    outer['Subject'] = subject
    outer['To'] = to
    outer['From'] = from_
    outer['Date'] = formatdate(localtime=True)
    outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'
    if text is not None: