GwR initial release of Catalog features

2025-07-09 03:04:10 -04:00 · 2010-01-21 09:31:42 -07:00 · 2010-01-21 09:31:42 -07:00 · 3024d37142
commit 3024d37142
parent 59a5e1296a c290fc198c
33 changed files with 1141 additions and 251 deletions
--- a/resources/images/news/joop.png
+++ b/resources/images/news/joop.png
--- a/resources/images/news/kitsapun.png
+++ b/resources/images/news/kitsapun.png
--- a/resources/images/news/nrcnext.png
+++ b/resources/images/news/nrcnext.png
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/resources/recipes/amspec.recipe
+++ b/resources/recipes/amspec.recipe
@ -1,7 +1,5 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spectator.org
 '''
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TheAmericanSpectator(BasicNewsRecipe):
    title                 = 'The American Spectator'
    __author__            = 'Darko Miletic'
    language = 'en'
    description           = 'News from USA'
    category              = 'news, politics, USA, world'
    publisher             = 'The American Spectator'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'en'
    INDEX                 = 'http://spectator.org'
-    html2lrf_options = [
+    conversion_options = {  
-                             '--comment'       , description
+                             'comments'        : description
-                           , '--category'      , 'news, politics, USA'
+                            ,'tags'            : category
-                           , '--publisher'     , title
+                            ,'language'        : language
-                         ]
+                            ,'publisher'       : publisher
                         }
    keep_only_tags   = [
                             dict(name='div', attrs={'class':'post inner'})
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
    remove_tags     = [
                             dict(name='object')
-                            ,dict(name='div', attrs={'class':'col3'         })
+                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
-                            ,dict(name='div', attrs={'class':'post-options' })
+                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
                            ,dict(name='p'  , attrs={'class':'letter-editor'})
                            ,dict(name='div', attrs={'class':'social'       })
                        ]
-    feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
+    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
    def get_cover_url(self):
        cover_url = None
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
    def print_version(self, url):
        return url + '/print'
    def get_article_url(self, article):
        return article.get('guid', None)
--- a/resources/recipes/drivelry.recipe
+++ b/resources/recipes/drivelry.recipe
@ -0,0 +1,41 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class drivelrycom(BasicNewsRecipe):
    title          = u'drivelry.com'
    language       = 'en'
    description    = 'A blog by Mike Abrahams'
    __author__     = 'Krittika Goyal'
    oldest_article = 60 #days
    max_articles_per_feed = 25
    #encoding = 'latin1'
    remove_stylesheets = True
    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
    remove_tags_after  = dict(name='div', attrs={'id':'bookmark'})
    remove_tags = [
       dict(name='iframe'),
       dict(name='div', attrs={'class':['sidebar']}),
       dict(name='div', attrs={'id':['bookmark']}),
       #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
       #dict(name='ul', attrs={'class':'articleTools'}),
    ]
    feeds          = [
 ('drivelry.com',
 'http://feeds.feedburner.com/drivelry'),
 ]
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'id':'main'})
        #td = heading.findParent(name='td')
        #td.extract()
        soup = BeautifulSoup('''
 <html><head><title>t</title></head><body>
 <p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
 </body></html>
 ''')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
--- a/resources/recipes/fokkeensukke.recipe
+++ b/resources/recipes/fokkeensukke.recipe
@ -1,23 +1,29 @@
 #!/usr/bin/python
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class FokkeEnSukkeRecipe(BasicNewsRecipe) :
    __license__   = 'GPL v3'
    __author__ = 'kwetal'
    language = 'nl'
-    description = u'Popular Dutch daily cartoon Fokke en Sukke'
+    country = 'NL'
    version = 2
    title = u'Fokke en Sukke'
-    no_stylesheets = True
+    publisher = u'Reid, Geleijnse & Van Tol'
-    # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
+    category = u'News, Cartoons'
-    # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
+    description = u'Popular Dutch daily cartoon Fokke en Sukke'
    template_css = ''
    INDEX = u'http://foksuk.nl'
-    # This cover is not as nice as it could be, needs some work
+    conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
-    #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
+
    no_stylesheets = True
    extra_css = '''
                    body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;}
                    div.title {text-align: center; margin-bottom: 1em;}
                    '''
    INDEX = u'http://foksuk.nl'
    cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
    keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
        links = index.findAll('a')
        maxIndex = len(links) - 1
        articles = []
-        for i in range(len(links)) :
+        for i in range(1, len(links)) :
-            # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
+            # There can be more than one cartoon for a given day (currently either one or two).
-            if i == 0 :
+            # If there's only one, there is just a link with the dayname.
-                continue
+            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
-
+            # In that case we're interested in the last two.
            # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
            if links[i].renderContents() in dayNames :
-                # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
+                # If the link is not in daynames, we processed it already, but if it is, let's see
                # if the next one has '1' as content
                if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
                    # Got you! Add it to the list
                    article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url'  : self.INDEX + links[i + 1]['href'], 'description' : ''}
@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
        return [[week, articles]]
    def preprocess_html(self, soup) :
        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
        cartoon = soup.find('div', attrs={'class' : 'cartoon'})
-        if cartoon :
+
            # It is a cartoon. Extract the title.
        title = ''
        img = soup.find('img', attrs = {'alt' : True})
        if img :
            title = img['alt']
-            # Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
+        tag = Tag(soup, 'div', [('class', 'title')])
            tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
        tag.insert(0, title)
        cartoon.insert(0, tag)
-            # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
+        # We only want the cartoon, so throw out the index
            # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
        select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
        if select :
            select.extract()
-            return cartoon
+        freshSoup = self.getFreshSoup(soup)
-        else :
+        freshSoup.body.append(cartoon)
-            # It is a TOC. Just return the whole lot.
+
-            return soup
+        return freshSoup
    def getFreshSoup(self, oldSoup):
        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
        if oldSoup.head.title:
            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
        return freshSoup
--- a/resources/recipes/ftd.recipe
+++ b/resources/recipes/ftd.recipe
@ -15,7 +15,7 @@ class FTDe(BasicNewsRecipe):
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
-    language = 'de'
+    language = _('German')
    max_articles_per_feed = 40
    no_stylesheets = True
@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe):
 		   dict(id='ADS_Top'),
 		   dict(id='spinner'),
 		   dict(id='ftd-contentad'),
 		   dict(id='ftd-promo'),
 		   dict(id='nava-50009007-1-0'),
 		   dict(id='navli-50009007-1-0'),
 		   dict(id='Box5000534-0-0-0'),
 		   dict(id='ExpV-1-0-0-1'),
 		   dict(id='ExpV-1-0-0-0'),
 		   dict(id='PollExpV-2-0-0-0'),
 		   dict(id='starRating'),
 		   dict(id='saveRating'),
 		   dict(id='yLayer'),
@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='ul', attrs={'class':'nav'}),
 		   dict(name='p', attrs={'class':'articleOptionHead'}),
 		   dict(name='p', attrs={'class':'articleOptionFoot'}),
 		   dict(name='p', attrs={'class':'moreInfo'}),
 		   dict(name='div', attrs={'class':'chartBox'}),
 		   dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
 		   dict(name='div', attrs={'class':'box boxNavTabs'}),
 		   dict(name='div', attrs={'class':'boxMMRgtLow'}),
 		   dict(name='span', attrs={'class':'vote_455857'}),
 		   dict(name='div', attrs={'class':'relatedhalb'}),
 		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
 		   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
 		   dict(name='div', attrs={'class':'box boxTeaser'}),
 		   dict(name='div', attrs={'class':'tagCloud'}),
 		   dict(name='div', attrs={'class':'pollView'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
 		   dict(name='div', attrs={'class':'ftdHpNav'}),
 		   dict(name='div', attrs={'class':'ftdHead'}),
@ -67,9 +77,10 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'wertungoben'}),
 		   dict(name='div', attrs={'class':'artikelfuss'}),
 		   dict(name='a', attrs={'class':'rating'}),
 		   dict(name='a', attrs={'href':'#rt'}),
 		   dict(name='div', attrs={'class':'articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
-    remove_tags_after = [dict(name='a', attrs={'class':'more'})]
+    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), 
 	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
@ -86,4 +97,4 @@ class FTDe(BasicNewsRecipe):
    def print_version(self, url):
-        return url + '?mode=print'
+        return url.replace('.html', '.html?mode=print')
--- a/resources/recipes/greader_uber.recipe
+++ b/resources/recipes/greader_uber.recipe
@ -0,0 +1,38 @@
 import urllib, re, mechanize
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre import __appname__
 class GoogleReaderUber(BasicNewsRecipe):
    title   = 'Google Reader Uber'
    description = 'This recipe downloads all unread feedsfrom your Google Reader account.'
    needs_subscription = True
    __author__ = 'rollercoaster, davec'
    base_url = 'http://www.google.com/reader/atom/'
    oldest_article = 365
    max_articles_per_feed = 250
    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
    use_embedded_content = True
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
                                        ('service', 'reader'), ('source', __appname__)])
            response = br.open('https://www.google.com/accounts/ClientLogin', request)
            sid = re.search('SID=(\S*)', response.read()).group(1)
            cookies = mechanize.CookieJar()
            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
            cookies.set_cookie(mechanize.Cookie(None, 'SID', sid, None, False, '.google.com', True, True, '/', True, False, None, True, '', '', None))
        return br
    def get_feeds(self):
        feeds = []
        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
        for id in soup.findAll(True, attrs={'name':['id']}):
            url = id.contents[0].replace('broadcast','reading-list')
            feeds.append((re.search('/([^/]*)$', url).group(1),
                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
        return feeds
--- a/resources/recipes/joop.recipe
+++ b/resources/recipes/joop.recipe
@ -0,0 +1,91 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 import re
 class JoopRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'nl'
    country = 'NL'
    version = 1
    title = u'Joop'
    publisher = u'Vara'
    category = u'News, Politics, Discussion'
    description = u'Political blog from the Netherlands'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'}))
    keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'}))
    keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')}))
    extra_css = '''
                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
                img {margin-right: 0.4em;}
                h3 {font-size: medium; font-style: italic; font-weight: normal;}
                h2 {font-size: xx-large; font-weight: bold}
                sub {color: #666666; font-size: x-small; font-weight: normal;}
                div.joop_byline {font-size: large}
                div.joop_byline_job {font-size: small; color: #696969;}
                div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em}
                '''
    INDEX = 'http://www.joop.nl'
    conversion_options = {'comments': description, 'tags': category, 'language': language,
                          'publisher': publisher}
    def parse_index(self):
        sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies']
        soup = self.index_to_soup(self.INDEX)
        answer = []
        div = soup.find('div', attrs = {'id': 'footer'})
        for section in sections:
            articles = []
            h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section)
            if h2:
                ul = h2.findNextSibling('ul', 'linklist')
                if ul:
                    for li in ul.findAll('li'):
                        title = self.tag_to_string(li.a)
                        url = self.INDEX + li.a['href']
                        articles.append({'title': title, 'date': None, 'url': url, 'description': ''})
            answer.append((section, articles))
        return answer
    def preprocess_html(self, soup):
        div = soup.find('div', 'author_head clearfix photo')
        if div:
            h2 = soup.find('h2')
            if h2:
                h2.name = 'div'
                h2['class'] = 'joop_byline'
                span = h2.find('span')
                if span:
                    span.name = 'div'
                    span['class'] = 'joop_byline_job'
                div.replaceWith(h2)
        h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'})
        if h2:
            txt = None
            span = h2.find('span', 'info')
            if span:
                txt = span.find(text = True)
            div = Tag(soup, 'div', attrs = [('class', 'joop_date')])
            div.append(txt)
            h2.replaceWith(div)
        return soup
--- a/resources/recipes/kitsapun.recipe
+++ b/resources/recipes/kitsapun.recipe
@ -0,0 +1,44 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kitsapun.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class Kitsapsun(BasicNewsRecipe):
    title                 = 'Kitsap Sun'
    __author__            = 'Darko Miletic'
    description           = 'News from Kitsap County'
    publisher             = 'Scripps Interactive Newspapers Group'
    category              = 'news, Kitsap county, USA'    
    language              = 'en'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'cp1252'
    use_embedded_content  = False
    conversion_options = {  
                             'comments' : description
                            ,'tags'     : category
                            ,'language' : language
                            ,'publisher': publisher
                         }
    keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
    feeds = [
               (u'News'         , u'http://www.kitsapsun.com/rss/headlines/news/'         )
              ,(u'Business'     , u'http://www.kitsapsun.com/rss/headlines/business/'     )
              ,(u'Communities'  , u'http://www.kitsapsun.com/rss/headlines/communities/'  )
              ,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
              ,(u'Lifestyles'   , u'http://www.kitsapsun.com/rss/headlines/lifestyles/'   )
            ]
    def print_version(self, url):
        return url.rpartition('/')[0] + '/?print=1'
--- a/resources/recipes/ledevoir.recipe
+++ b/resources/recipes/ledevoir.recipe
@ -0,0 +1,79 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini'
 __copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
 __version__     = 'v1.01'
 __date__        = '14, January 2010'
 __description__   = 'Canadian Paper '
 '''
 http://www.ledevoir.com/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class ledevoir(BasicNewsRecipe):
    author        = 'Lorenzo Vigentini'
    description   = 'Canadian Paper'
    cover_url      = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
    title          = u'Le Devoir'
    publisher      = 'leDevoir.com'
    category       = 'News, finance, economy, politics'
    language       = 'fr'
    encoding       = 'utf-8'
    timefmt        = '[%a, %d %b, %Y]'
    max_articles_per_feed = 50
    use_embedded_content  = False
    recursion             = 10
    remove_javascript     = True
    no_stylesheets        = True
    keep_only_tags  = [
                        dict(name='div', attrs={'id':'article'}),
                        dict(name='ul', attrs={'id':'ariane'})
                    ]
    remove_tags     = [
                        dict(name='div', attrs={'id':'dialog'}),
                        dict(name='div', attrs={'class':['interesse_actions','reactions']}),
                        dict(name='ul', attrs={'class':'mots_cles'}),
                        dict(name='a', attrs={'class':'haut'}),
                        dict(name='h5', attrs={'class':'interesse_actions'})
                    ]
    feeds          = [
                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
                     ]
    extra_css = '''
                h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
                h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
                .specs {line-height:1em;margin:1px 0;}
                .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
                .specs span.auteur a,
                .specs span.auteur span {text-transform:uppercase;color:#787878;}
                .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
                ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
                ul#ariane li {display:inline;}
                ul#ariane a {color:#2E2E2E;text-decoration:underline;}
                .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
                .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
                '''
--- a/resources/recipes/national_post.recipe
+++ b/resources/recipes/national_post.recipe
@ -70,11 +70,28 @@ class NYTimes(BasicNewsRecipe):
                feeds.append((current_section, current_articles))
            return feeds
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'class':'triline'})
-        #td = heading.findParent(name='td')
+        page2_link = soup.find('p','pagenav')
-        #td.extract()
+        if page2_link:
            atag = page2_link.find('a',href=True)
            if atag:
                page2_url = atag['href']
                if page2_url.startswith('story'):
                         page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
                elif page2_url.startswith( '/todays-paper/story.html'):
                    page2_url = 'http://www.nationalpost.com/'+page2_url
                page2_soup = self.index_to_soup(page2_url)
                if page2_soup:
                    page2_content = page2_soup.find('div','story-content')
                    if page2_content:
                        full_story = BeautifulSoup('<div></div>')
                        full_story.insert(0,story)
                        full_story.insert(1,page2_content)
                        story = full_story
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
--- a/resources/recipes/ncrnext.recipe
+++ b/resources/recipes/ncrnext.recipe
@ -1,29 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class NrcNextRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    version = 1
    language = 'nl'
-    description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
+    country = 'NL'
    version = 2
    title = u'nrcnext'
    publisher = u'NRC Media'
    category = u'News, Opinion, the Netherlands'
    description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
    conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
    no_stylesheets = True
-    template_css = ''
+    remove_javascript = True
    # I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way.
    keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
    # If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method.
    #keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ]
-    remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}),
+    remove_tags = []
-                          dict(name = 'div', attrs = {'class' : 'datumlabel'}),
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'}))
-                          dict(name = 'ul', attrs = {'class' : 'cats single'}),
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'}))
-                          dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}),
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'}))
-                          dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})]
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}))
    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'}))
-    use_embedded_content = False
+    extra_css = '''
                body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;}
                p.wp-caption-text {font-size: x-small; color: #666666;}
                h2.sub_title {font-size: medium; color: #696969;}
                h2.vlag {font-size: small; font-weight: bold;}
                '''
    def parse_index(self) :
        # Use the wesbite as an index. Their RSS feeds can be out of date.
@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe):
                # Find the links to the actual articles and rember the location they're pointing to and the title
                a = post.find('a', attrs={'rel' : 'bookmark'})
                href = a['href']
-                title = a.renderContents()
+                title = self.tag_to_string(a)
                if index == 'columnisten' :
-                    # In this feed/page articles can be written by more than one author. It is nice to see their names in the titles.
+                    # In this feed/page articles can be written by more than one author.
                    # It is nice to see their names in the titles.
                    flag = post.find('h2', attrs = {'class' : 'vlag'})
                    author = flag.contents[0].renderContents()
                    completeTitle = u''.join([author, u': ', title])
@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe):
        return answer
    def preprocess_html(self, soup) :
        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
        if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
            # It's an article, find the interesting part
            tag = soup.find('div', attrs = {'class' : 'post'})
            if tag:
-                # And replace any links with their text, so they don't show up underlined on my reader.
+                h2 = tag.find('h2', 'vlag')
-                for link in tag.findAll('a') :
+                if h2:
-                    link.replaceWith(link.renderContents())
+                    new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')])
                    new_h2.append(self.tag_to_string(h2))
                    h2.replaceWith(new_h2)
                else:
                    h2 = tag.find('h2')
                    if h2:
                        new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')])
                        new_h2.append(self.tag_to_string(h2))
                        h2.replaceWith(new_h2)
-                # Slows down my Sony reader; feel free to comment out
+                h1 = tag.find('h1')
                if h1:
                    new_h1 = Tag(soup, 'h1')
                    new_h1.append(self.tag_to_string(h1))
                    h1.replaceWith(new_h1)
                # Slows down my reader.
                for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
                    movie.extract()
                for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
                    movie.extract()
                for iframe in tag.findAll('iframe') :
                    iframe.extract()
-                homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
+                fresh_soup = self.getFreshSoup(soup)
-                body = homeMadeSoup.find('body')
+                fresh_soup.body.append(tag)
                body.append(tag)
-                return homeMadeSoup
+                return fresh_soup
            else:
                # This should never happen and other famous last words...
                return soup
        else :
            # It's a TOC, return the whole lot.
            return soup
    def postproces_html(self, soup) :
        # Should not happen, but it does. Slows down my Sony eReader
        for img in soup.findAll('img') :
            if img['src'].startswith('http://') :
                img.extract()
        # Happens for some movies which we are not able to view anyway
        for iframe in soup.findAll('iframe') :
            if iframe['src'].startswith('http://') :
                iframe.extract()
    def getFreshSoup(self, oldSoup):
        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
        if oldSoup.head.title:
            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
        return freshSoup
--- a/resources/recipes/yementimes.recipe
+++ b/resources/recipes/yementimes.recipe
@ -0,0 +1,125 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class YemenTimesRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en_YE'
    country = 'YE'
    version = 1
    title = u'Yemen Times'
    publisher = u'yementimes.com'
    category = u'News, Opinion, Yemen'
    description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content = False
    encoding = 'utf-8'
    remove_empty_feeds = True
    no_stylesheets = True
    remove_javascript = True
    keep_only_tags = []
    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
                                                      'class': 'DMAIN2'}))
    remove_attributes = ['style']
    INDEX = 'http://www.yementimes.com/'
    feeds = []
    feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
    feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
    feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
    feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
    feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
    feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
    feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
    feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
    feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
    feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
    feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
    feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
    feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
    extra_css = '''
                body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
                div.yemen_byline {font-size: medium; font-weight: bold;}
                div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
                .yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
                '''
    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
                          'publisher': publisher, 'linearize_tables': True}
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.set_handle_gzip(True)
        return br
    def parse_index(self):
        answer = []
        for feed_title, feed in self.feeds:
            soup = self.index_to_soup(feed)
            newsbox = soup.find('div', 'newsbox')
            main = newsbox.findNextSibling('table')
            articles = []
            for li in main.findAll('li'):
                title = self.tag_to_string(li.a)
                url = self.INDEX + li.a['href']
                articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/>&nbsp;'})
            answer.append((feed_title, articles))
        return answer
    def preprocess_html(self, soup):
        freshSoup = self.getFreshSoup(soup)
        headline = soup.find('div', attrs = {'id': 'DVMTIT'})
        if headline:
            div = headline.findNext('div', attrs = {'id': 'DVTOP'})
            img = None
            if div:
                img = div.find('img')
            headline.name = 'h1'
            freshSoup.body.append(headline)
            if img is not None:
                freshSoup.body.append(img)
        byline = soup.find('div', attrs = {'id': 'DVTIT'})
        if byline:
            date_el = byline.find('span')
            if date_el:
                pub_date = self.tag_to_string(date_el)
                date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
                date.append(pub_date)
                date_el.extract()
            raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
            author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
            if date is not None:
                freshSoup.body.append(date)
            freshSoup.body.append(author)
        story = soup.find('div', attrs = {'id': 'DVDET'})
        if story:
            for table in story.findAll('table'):
                if table.find('img'):
                    table['class'] = 'yemen_caption'
            freshSoup.body.append(story)
        return freshSoup
    def getFreshSoup(self, oldSoup):
        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
        if oldSoup.head.title:
            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
        return freshSoup
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -2,11 +2,12 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-import os, sys, tempfile, zipfile
+import atexit, os, shutil, sys, tempfile, zipfile
 from calibre.constants import numeric_version
 from calibre.ptempfile import PersistentTemporaryFile
 class Plugin(object):
    '''
    A calibre plugin. Useful members include:
@ -231,6 +232,8 @@ class CatalogPlugin(Plugin):
    A plugin that implements a catalog generator.
    '''
    resources_path = None
    #: Output file type for which this plugin should be run
    #: For example: 'epub' or 'xml'
    file_types = set([])
@ -249,22 +252,18 @@ class CatalogPlugin(Plugin):
    cli_options = []
    def cleanup(self, path):
        try:
            import os, shutil
            if os.path.exists(path):
                shutil.rmtree(path)
        except:
            pass
    def search_sort_db(self, db, opts):
-        if opts.search_text:
+
        # If declared, --ids overrides any declared search criteria
        if not opts.ids and opts.search_text:
            db.search(opts.search_text)
        if opts.sort_by:
            # 2nd arg = ascending
            db.sort(opts.sort_by, True)
-        return db.get_data_as_dict()
+        return db.get_data_as_dict(ids=opts.ids)
    def get_output_fields(self, opts):
        # Return a list of requested fields, with opts.sort_by first
@ -280,7 +279,9 @@ class CatalogPlugin(Plugin):
            fields = list(all_fields & requested_fields)
        else:
            fields = list(all_fields)
        fields.sort()
        if opts.sort_by:
            fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
        return fields
@ -291,35 +292,27 @@ class CatalogPlugin(Plugin):
        Tab will be dynamically generated and added to the Catalog Options dialog in 
        calibre.gui2.dialogs.catalog.py:Catalog
        '''
        import atexit
        from calibre.customize.builtins import plugins as builtin_plugins
        from calibre.customize.ui import config
        from calibre.ptempfile import PersistentTemporaryDirectory
-        if type(self) in builtin_plugins:
+        if not type(self) in builtin_plugins and \
-            print "%s: Built-in Catalog plugin, no init necessary" % self.name
+           not self.name in config['disabled_plugins']:
        else:
            print "%s: User-added plugin" % self.name
            print " Copying .ui and .py resources from %s to tmpdir" % self.plugin_path
            # Generate a list of resource files to extract from the zipped plugin
            # Copy to tmpdir/calibre_plugin_resources
            files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
            print " files_to_copy: %s" % files_to_copy
            resources = zipfile.ZipFile(self.plugin_path,'r')
-            temp_resources_path = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
+                        
            if self.resources_path is None:
                self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
            for file in files_to_copy:
                try:
-                    resources.extract(file, temp_resources_path)
+                    resources.extract(file, self.resources_path)
                    print " %s extracted to %s" % (file, temp_resources_path)
                except:
-                    print " %s not found in %s" % (file, os.path.basename(self.plugin_path))
+                    print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
                    continue
            resources.close()                
-            # Register temp_resources_path for deletion when calibre exits
+    def run(self, path_to_output, opts, db, ids):
            atexit.register(self.cleanup, temp_resources_path)
    def run(self, path_to_output, opts, db):
        '''
        Run the plugin. Must be implemented in subclasses.
        It should generate the catalog in the format specified
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -14,6 +14,7 @@ Windows PNP strings:
 2W00000&1', 3, u'G:\\')
 '''
 import re
 from calibre.devices.usbms.driver import USBMS
@ -108,6 +109,7 @@ class POCKETBOOK360(EB600):
    OSX_MAIN_MEM   = 'Philips Mass Storge Media'
    OSX_CARD_A_MEM = 'Philips Mass Storge Media'
    OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Pocket')
    @classmethod
    def can_handle(cls, dev, debug=False):
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
    if getattr(opts, 'series', None) is not None:
        mi.series = opts.series.strip()
    if getattr(opts, 'series_index', None) is not None:
        mi.series_index = float(opts.series_index.strip())
    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -25,12 +25,14 @@ def get_document_info(stream):
    while not found:
        prefix = block[-6:]
        block = prefix + stream.read(block_size)
        actual_block_size = len(block) - len(prefix)
        if len(block) == len(prefix):
            break
        idx = block.find(r'{\info')
        if idx >= 0:
            found = True
-            stream.seek(stream.tell() - block_size + idx - len(prefix))
+            pos = stream.tell() - actual_block_size + idx - len(prefix)
            stream.seek(pos)
        else:
            if block.find(r'\sect') > -1:
                break
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -90,7 +90,10 @@ class DetectStructure(object):
                    mark = etree.Element(XHTML('div'), style=page_break_after)
                else: # chapter_mark == 'both':
                    mark = etree.Element(XHTML('hr'), style=page_break_before)
                try:
                    elem.addprevious(mark)
                except TypeError:
                    self.log.exception('Failed to mark chapter')
    def create_level_based_toc(self):
        if self.opts.level1_toc is None:
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -20,6 +20,10 @@ class Font(object):
 class Column(object):
    # A column contains an element is the element bulges out to
    # the left or the right by at most HFUZZ*col width.
    HFUZZ = 0.2
    def __init__(self):
        self.left = self.right = self.top = self.bottom = 0
        self.width = self.height = 0
@ -41,6 +45,10 @@ class Column(object):
        for x in self.elements:
            yield x
    def contains(self, elem):
        return elem.left > self.left - self.HFUZZ*self.width and \
               elem.right < self.right + self.HFUZZ*self.width
 class Element(object):
    def __eq__(self, other):
@ -238,11 +246,10 @@ class Page(object):
        return columns
    def find_elements_in_row_of(self, x):
-        interval = Interval(x.top - self.YFUZZ * self.average_text_height,
+        interval = Interval(x.top,
                x.top + self.YFUZZ*(1+self.average_text_height))
        h_interval = Interval(x.left, x.right)
-        m = max(0, x.idx-15)
+        for y in self.elements[x.idx:x.idx+15]:
        for y in self.elements[m:x.idx+15]:
            if y is not x:
                y_interval = Interval(y.top, y.bottom)
                x_interval = Interval(y.left, y.right)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -169,6 +169,21 @@ class RTFInput(InputFormatPlugin):
        with open('styles.css', 'ab') as f:
            f.write(css)
    def preprocess(self, fname):
        self.log('\tPreprocessing to convert unicode characters')
        try:
            data = open(fname, 'rb').read()
            from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
            tokenizer = RtfTokenizer(data)
            tokens = RtfTokenParser(tokenizer.tokens)
            data = tokens.toRTF()
            fname = 'preprocessed.rtf'
            with open(fname, 'wb') as f:
                f.write(data)
        except:
            self.log.exception(
            'Failed to preprocess RTF to convert unicode sequences, ignoring...')
        return fname
    def convert(self, stream, options, file_ext, log,
                accelerators):
@ -177,8 +192,9 @@ class RTFInput(InputFormatPlugin):
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        self.log = log
        self.log('Converting RTF to XML...')
        fname = self.preprocess(stream.name)
        try:
-            xml = self.generate_xml(stream.name)
+            xml = self.generate_xml(fname)
        except RtfInvalidCodeException:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.'))
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@ -0,0 +1,344 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2010, Gerendi Sandor Attila'
 __docformat__ = 'restructuredtext en'
 """
 RTF tokenizer and token parser. v.1.0 (1/17/2010)
 Author: Gerendi Sandor Attila
 At this point this will tokenize a RTF file then rebuild it from the tokens.
 In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant.
 """
 class tokenDelimitatorStart():
    def __init__(self):
        pass
    def toRTF(self):
        return b'{'
    def __repr__(self):
        return '{'
 class tokenDelimitatorEnd():
    def __init__(self):
        pass
    def toRTF(self):
        return b'}'
    def __repr__(self):
        return '}'
 class tokenControlWord():
    def __init__(self, name, separator = ''):
        self.name = name
        self.separator = separator
    def toRTF(self):
        return self.name + self.separator
    def __repr__(self):
        return self.name + self.separator
 class tokenControlWordWithNumericArgument():
    def __init__(self, name, argument, separator = ''):
        self.name = name
        self.argument = argument
        self.separator = separator
    def toRTF(self):
        return self.name + repr(self.argument) + self.separator
    def __repr__(self):
        return self.name + repr(self.argument) + self.separator
 class tokenControlSymbol():
    def __init__(self, name):
        self.name = name
    def toRTF(self):
        return self.name
    def __repr__(self):
        return self.name
 class tokenData():
    def __init__(self, data):
        self.data = data
    def toRTF(self):
        return self.data
    def __repr__(self):
        return self.data
 class tokenBinN():
    def __init__(self, data, separator = ''):
        self.data = data
        self.separator = separator
    def toRTF(self):
        return "\\bin" + repr(len(self.data)) + self.separator + self.data
    def __repr__(self):
        return "\\bin" + repr(len(self.data)) + self.separator + self.data
 class token8bitChar():
    def __init__(self, data):
        self.data = data
    def toRTF(self):
        return "\\'" + self.data
    def __repr__(self):
        return "\\'" + self.data
 class tokenUnicode():
    def __init__(self, data, separator = '', current_ucn = 1, eqList = []):
        self.data = data
        self.separator = separator
        self.current_ucn = current_ucn
        self.eqList = eqList
    def toRTF(self):
        result = '\\u' + repr(self.data) + ' '
        ucn = self.current_ucn
        if len(self.eqList) < ucn:
            ucn = len(self.eqList)
            result =  tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result
        i = 0
        for eq in self.eqList:
            if i >= ucn:
                break
            result = result + eq.toRTF()
        return result
    def __repr__(self):
        return '\\u' + repr(self.data)
 def isAsciiLetter(value):
    return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z'))
 def isDigit(value):
    return (value >= '0') and (value <= '9')
 def isChar(value, char):
    return value == char
 def isString(buffer, string):
    return buffer == string
 class RtfTokenParser():
    def __init__(self, tokens):
        self.tokens = tokens
        self.process()
        self.processUnicode()
    def process(self):
        i = 0
        newTokens = []
        while i < len(self.tokens):
            if isinstance(self.tokens[i], tokenControlSymbol):
                if isString(self.tokens[i].name, "\\'"):
                    i = i + 1
                    if not isinstance(self.tokens[i], tokenData):
                        raise BaseException('Error: token8bitChar without data.')
                    if len(self.tokens[i].data) < 2:
                        raise BaseException('Error: token8bitChar without data.')
                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
                    if len(self.tokens[i].data) > 2:
                        newTokens.append(tokenData(self.tokens[i].data[2:]))
                    i = i + 1
                    continue
            newTokens.append(self.tokens[i])
            i = i + 1
        self.tokens = list(newTokens)
    def processUnicode(self):
        i = 0
        newTokens = []
        ucNbStack = [1]
        while i < len(self.tokens):
            if isinstance(self.tokens[i], tokenDelimitatorStart):
                ucNbStack.append(ucNbStack[len(ucNbStack) - 1])
                newTokens.append(self.tokens[i])
                i = i + 1
                continue
            if isinstance(self.tokens[i], tokenDelimitatorEnd):
                ucNbStack.pop()
                newTokens.append(self.tokens[i])
                i = i + 1
                continue
            if isinstance(self.tokens[i], tokenControlWordWithNumericArgument):
                if isString(self.tokens[i].name, '\\uc'):
                    ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument
                    newTokens.append(self.tokens[i])
                    i = i + 1
                    continue
                if isString(self.tokens[i].name, '\\u'):
                    x = i
                    j = 0
                    i = i + 1
                    replace = []
                    partialData = None
                    ucn = ucNbStack[len(ucNbStack) - 1]
                    while (i < len(self.tokens)) and (j < ucn):
                        if isinstance(self.tokens[i], tokenDelimitatorStart):
                            break
                        if isinstance(self.tokens[i], tokenDelimitatorEnd):
                            break
                        if isinstance(self.tokens[i], tokenData):
                            if len(self.tokens[i].data) >= ucn - j:
                                replace.append(tokenData(self.tokens[i].data[0 : ucn - j]))
                                if len(self.tokens[i].data) > ucn - j:
                                    partialData = tokenData(self.tokens[i].data[ucn - j:])
                                i = i + 1
                                break
                            else:
                                replace.append(self.tokens[i])
                                j = j + len(self.tokens[i].data)
                                i = i + 1
                                continue
                        if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN):
                            replace.append(self.tokens[i])
                            i = i + 1
                            j = j + 1
                            continue
                        raise BaseException('Error: incorect utf replacement.')
                    #calibre rtf2xml does not support utfreplace
                    replace = []
                    newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace))
                    if partialData != None:
                        newTokens.append(partialData)
                    continue
            newTokens.append(self.tokens[i])
            i = i + 1
        self.tokens = list(newTokens)
    def toRTF(self):
        result = []
        for token in self.tokens:
            result.append(token.toRTF())
        return "".join(result)
 class RtfTokenizer():
    def __init__(self, rtfData):
        self.rtfData = []
        self.tokens = []
        self.rtfData = rtfData
        self.tokenize()
    def tokenize(self):
        i = 0
        lastDataStart = -1
        while i < len(self.rtfData):
            if isChar(self.rtfData[i], '{'):
                if lastDataStart > -1:
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
                    lastDataStart = -1
                self.tokens.append(tokenDelimitatorStart())
                i = i + 1
                continue
            if isChar(self.rtfData[i], '}'):
                if lastDataStart > -1:
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
                    lastDataStart = -1
                self.tokens.append(tokenDelimitatorEnd())
                i = i + 1
                continue
            if isChar(self.rtfData[i], '\\'):
                if i + 1 >= len(self.rtfData):
                    raise BaseException('Error: Control character found at the end of the document.')
                if lastDataStart > -1:
                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
                    lastDataStart = -1
                tokenStart = i
                i = i + 1
                #Control Words
                if isAsciiLetter(self.rtfData[i]):
                    #consume <ASCII Letter Sequence>
                    consumed = False
                    while i < len(self.rtfData):
                        if not isAsciiLetter(self.rtfData[i]):
                            tokenEnd = i
                            consumed = True
                            break
                        i = i + 1
                    if not consumed:
                        raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
                    #we have numeric argument before delimiter
                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
                        #consume the numeric argument
                        consumed = False
                        l = 0
                        while i < len(self.rtfData):
                            if not isDigit(self.rtfData[i]):
                                consumed = True
                                break
                            l = l + 1
                            i = i + 1
                            if l > 10 :
                                raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
                        if not consumed:
                            raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
                    separator = ''
                    if isChar(self.rtfData[i], ' '):
                        separator = ' '
                    controlWord = self.rtfData[tokenStart: tokenEnd]
                    if tokenEnd < i:
                        value = int(self.rtfData[tokenEnd: i])
                        if isString(controlWord, "\\bin"):
                            i = i + value
                            self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator))
                        else:
                            self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator))
                    else:
                        self.tokens.append(tokenControlWord(controlWord, separator))
                    #space delimiter, we should discard it
                    if self.rtfData[i] == ' ':
                        i = i + 1
                #Control Symbol
                else:
                    self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1]))
                    i = i + 1
                continue
            if lastDataStart < 0:
                lastDataStart = i
            i = i + 1
    def toRTF(self):
        result = []
        for token in self.tokens:
            result.append(token.toRTF())
        return "".join(result)
 if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        print ("Usage %prog rtfFileToConvert")
        sys.exit()
    f = open(sys.argv[1], 'rb')
    data = f.read()
    f.close()
    tokenizer = RtfTokenizer(data)
    parsedTokens = RtfTokenParser(tokenizer.tokens)
    data = parsedTokens.toRTF()
    f = open(sys.argv[1], 'w')
    f.write(data)
    f.close()
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
 ORG_NAME = 'KovidsBrain'
 APP_UID  = 'libprs500'
 from calibre import islinux, iswindows, isosx
-from calibre.utils.config import Config, ConfigProxy, dynamic
+from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
 from calibre.utils.localization import set_qt_translator
 from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
 from calibre.ebooks.metadata import MetaInformation
 gprefs = JSONConfig('gui')
 NONE = QVariant() #: Null value to return from the data function of item models
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -4,10 +4,14 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from optparse import OptionParser
 from calibre.customize.conversion import OptionRecommendation, DummyReporter
 from calibre.ebooks.conversion.plumber import Plumber
-# ?from calibre.library.catalog import Catalog
+from calibre.customize.ui import plugin_for_catalog_format
 from calibre.utils.logging import Log
 from calibre.gui2 import choose_dir, Application
 def gui_convert(input, output, recommendations, notification=DummyReporter(),
        abort_after_input_dump=False, log=None):
@ -21,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
    plumber.run()
-def gui_catalog(fmt, title, dbspec, ids, out_file_name,
+def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
        notification=DummyReporter(), log=None):
    if log is None:
        log = Log()
@ -33,19 +37,25 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
    else: # To be implemented in the future
        pass
-    # Implement the interface to the catalog generating code here
+    # Create a minimal OptionParser that we can append to
-    #db
+    parser = OptionParser()
-    log("gui2.convert.gui_conversion:gui_catalog()")
+    args = []
-    log("fmt: %s" % fmt)
+    parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
-    log("title: %s" % title)
+    opts, args = parser.parse_args()
-    log("dbspec: %s" % dbspec)
+
-    log("ids: %s" % ids)
+    # Populate opts
-    log("out_file_name: %s" % out_file_name)
+    opts.ids = ids
    opts.search_text = None
    opts.sort_by = None
    # Extract the option dictionary to comma-separated lists
    for option in fmt_options:
        setattr(opts,option, ','.join(fmt_options[option]))
    # Fetch and run the plugin for fmt
    plugin = plugin_for_catalog_format(fmt)
    plugin.run(out_file_name, opts, db)
    # This needs to call the .run() method of the plugin associated with fmt
    # Needs to set up options before the call
    # catalog = Catalog(out_file_name, options, dbspec)
    # Can I call library.cli:catalog_option_parser()?
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@ -12,15 +12,18 @@ from PyQt4.Qt import QDialog, QWidget
 from calibre.customize.ui import config
 from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
-from calibre.gui2 import dynamic
+from calibre.gui2 import gprefs, dynamic
 from calibre.customize.ui import available_catalog_formats, catalog_plugins
 from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
 class Catalog(QDialog, Ui_Dialog):
    ''' Catalog Dialog builder'''
    widgets = []
    def __init__(self, parent, dbspec, ids):
        import re, cStringIO
        from calibre import prints as info
        from calibre.gui2 import dynamic
        from PyQt4.uic import compileUi
        QDialog.__init__(self, parent)
@ -42,6 +45,7 @@ class Catalog(QDialog, Ui_Dialog):
        self.fmts = []
        from calibre.customize.builtins import plugins as builtin_plugins
        from calibre.customize import CatalogPlugin
        for plugin in catalog_plugins():
            if plugin.name in config['disabled_plugins']:
@ -49,38 +53,30 @@ class Catalog(QDialog, Ui_Dialog):
            name = plugin.name.lower().replace(' ', '_')
            if type(plugin) in builtin_plugins:
-                info("Adding tab for builtin Catalog plugin %s" % plugin.name)                
+                #info("Adding widget for builtin Catalog plugin %s" % plugin.name)                
                try:
                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
                            fromlist=[1])
                    pw = catalog_widget.PluginWidget()
-                    pw.initialize()
+                    pw.initialize(name)
                    pw.ICON = I('forward.svg')    
-                    page = self.tabs.addTab(pw,pw.TITLE)
+                    self.widgets.append(pw)
-                    [self.fmts.append([file_type, pw.sync_enabled]) for file_type in plugin.file_types]
+                    [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]                    
                    info("\tSupported formats: %s" % plugin.file_types)
                    info("\tsync_enabled: %s" % pw.sync_enabled)
                except ImportError:
                    info("ImportError with %s" % name)
                    continue
            else:
-                # Test to see if .ui and .py files exist in tmpdir/calibre_plugin_resources
+                # Load dynamic tab
-                form = os.path.join(tempfile.gettempdir(),
+                form = os.path.join(plugin.resources_path,'%s.ui' % name)
-                                    'calibre_plugin_resources','%s.ui' % name)
+                klass = os.path.join(plugin.resources_path,'%s.py' % name)
-                klass = os.path.join(tempfile.gettempdir(),
+                compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)
                                  'calibre_plugin_resources','%s.py' % name)
                compiled_form = os.path.join(tempfile.gettempdir(),
                                  'calibre_plugin_resources','%s_ui.py' % name)
                plugin_resources = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')        
                if os.path.exists(form) and os.path.exists(klass):
-                    info("Adding tab for user-installed Catalog plugin %s" % plugin.name)
+                    #info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
-                    # Compile the form provided in plugin.zip
+                    # Compile the .ui form provided in plugin.zip
-                    if not os.path.exists(compiled_form) or \
+                    if not os.path.exists(compiled_form):
-                       os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
+                        # info('\tCompiling form', form)
                        info('\tCompiling form', form)
                        buf = cStringIO.StringIO()
                        compileUi(form, buf)
                        dat = buf.getvalue()
@ -88,35 +84,41 @@ class Catalog(QDialog, Ui_Dialog):
                                         re.DOTALL).sub(r'_("\1")', dat)
                        open(compiled_form, 'wb').write(dat)
-                    # Import the Catalog class from the dynamic .py file
+                    # Import the dynamic PluginWidget() from .py file provided in plugin.zip
                    try:
-                        sys.path.insert(0, plugin_resources)
+                        sys.path.insert(0, plugin.resources_path)
                        catalog_widget = __import__(name, fromlist=[1])
-                        dpw = catalog_widget.PluginWidget()
+                        pw = catalog_widget.PluginWidget()
-                        dpw.initialize()
+                        pw.initialize(name)
-                        dpw.ICON = I('forward.svg')    
+                        pw.ICON = I('forward.svg')    
-                        page = self.tabs.addTab(dpw, dpw.TITLE)
+                        self.widgets.append(pw)                        
-                        [self.fmts.append([file_type, dpw.sync_enabled]) for file_type in plugin.file_types]
+                        [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
                        info("\tSupported formats: %s" % plugin.file_types)
                        info("\tsync_enabled: %s" % dpw.sync_enabled)
                    except ImportError:
                        info("ImportError with %s" % name)
                        continue
                    finally:
-                        sys.path.remove(plugin_resources)
+                        sys.path.remove(plugin.resources_path)
                else:
                    info("No dynamic tab resources found for %s" % name)
        self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
        for pw in self.widgets:
            page = self.tabs.addTab(pw,pw.TITLE)
        # Generate a sorted list of installed catalog formats/sync_enabled pairs
-        # Generate a parallel list of sync_enabled[True|False]ß
+        fmts = sorted([x[0] for x in self.fmts])
-        self.fmts = sorted([x[0].upper() for x in self.fmts])
+
        self.sync_enabled_formats = []
        for fmt in self.fmts:
            if fmt[1]:
                self.sync_enabled_formats.append(fmt[0])
        # Callback when format changes
        self.format.currentIndexChanged.connect(self.format_changed)
        # Add the installed catalog format list to the format QComboBox
-        self.format.addItems(self.fmts)
+        self.format.addItems(fmts)
        pref = dynamic.get('catalog_preferred_format', 'CSV')
        idx = self.format.findText(pref)
@ -127,9 +129,8 @@ class Catalog(QDialog, Ui_Dialog):
            self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
    def format_changed(self, idx):
        print "format_changed(idx): idx: %d" % idx
        cf = unicode(self.format.currentText())
-        if cf in ('EPUB', 'MOBI'):
+        if cf in self.sync_enabled_formats:
            self.sync.setEnabled(True)
        else:
            self.sync.setDisabled(True)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -239,23 +239,35 @@ def fetch_scheduled_recipe(arg):
 def generate_catalog(parent, dbspec, ids):
    from calibre.gui2.dialogs.catalog import Catalog
-    # Build the Catalog dialog
+    # Build the Catalog dialog in gui2.dialogs.catalog
    d = Catalog(parent, dbspec, ids)
    if d.exec_() != d.Accepted:
        return None
    # Create the output file
    out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
    # Retrieve plugin options
    fmt_options = {}
    for x in range(d.tabs.count()):
        if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
            for fmt in d.fmts:
                if fmt[0] == d.catalog_format:
                    fmt_options = fmt[2].options()
                    # print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
    args = [
        d.catalog_format,
        d.catalog_title,
        dbspec,
        ids,
        out.name,
        fmt_options
        ]
    out.close()
    # This calls gui2.convert.gui_conversion:gui_catalog()
    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
            d.catalog_title
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 '''The main GUI'''
-import os, sys, textwrap, collections, time
+import atexit, os, shutil, sys, tempfile, textwrap, collections, time
 from xml.parsers.expat import ExpatError
 from Queue import Queue, Empty
 from threading import Thread
@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server
 from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
                            question_dialog,\
                           pixmap_to_data, choose_dir, \
-                           Dispatcher, \
+                           Dispatcher, gprefs, \
                           available_height, \
                           max_available_height, config, info_dialog, \
                           available_width, GetMetadata
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        cm.addAction(_('Bulk convert'))
        cm.addSeparator()
        ac = cm.addAction(
-                _('Create catalog of the books in your calibre library'))
+                _('Create catalog of books in your calibre library'))
        ac.triggered.connect(self.generate_catalog)
        self.action_convert.setMenu(cm)
        self._convert_single_hook = partial(self.convert_ebook, bulk=False)
@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
                     self.tags_view.recount)
        self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
        if not gprefs.get('quick_start_guide_added', False):
            from calibre.ebooks.metadata import MetaInformation
            mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
            mi.author_sort = 'Schember, John'
            mi.comments = "A guide to get you up an running with calibre"
            mi.publisher = 'calibre'
            self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
                    [mi])
            gprefs['quick_start_guide_added'] = True
            self.library_view.model().books_added(1)
            if hasattr(self, 'db_images'):
                self.db_images.reset()
        self.library_view.model().count_changed()
        ########################### Cover Flow ################################
        self.cover_flow = None
        if CoverFlow is not None:
@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            return
        self._add_books(books, to_device)
    def _add_books(self, paths, to_device, on_card=None):
        if on_card is None:
            on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
@ -1348,24 +1361,29 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
    def generate_catalog(self):    
        rows = self.library_view.selectionModel().selectedRows()
-        if not rows:
+        if not rows or len(rows) < 2:
            rows = xrange(self.library_view.model().rowCount(QModelIndex()))
        ids = map(self.library_view.model().id, rows)
        dbspec = None
        if not ids:
            return error_dialog(self, _('No books selected'),
                    _('No books selected to generate catalog for'),
                    show=True)
-        # calibre.gui2.tools:generate_catalog()
+
        # Calling gui2.tools:generate_catalog()
        ret = generate_catalog(self, dbspec, ids)
        if ret is None:
            return
        func, args, desc, out, sync, title = ret
        fmt = os.path.splitext(out)[1][1:].upper()
        job = self.job_manager.run_job(
                Dispatcher(self.catalog_generated), func, args=args,
                    description=desc)
        job.catalog_file_path = out
        job.fmt = fmt
        job.catalog_sync, job.catalog_title = sync, title        
        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
@ -1380,7 +1398,12 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            dynamic.set('catalogs_to_be_synced', sync)
        self.status_bar.showMessage(_('Catalog generated.'), 3000)
        self.sync_catalogs()
-
+		if job.fmt in ['CSV','XML']:
 			export_dir = choose_dir(self, 'Export Catalog Directory', 
 										          'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
 			if export_dir:
 				destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
 				shutil.copyfile(job.catalog_file_path, destination)
    ############################### Fetch news #################################
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -40,10 +40,9 @@ class CSV_XML(CatalogPlugin):
        from calibre.utils.logging import Log
        log = Log()
        self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
        # Update to .partition
        self.fmt = path_to_output.rpartition('.')[2]
-        if opts.verbose:
+        
        if False and opts.verbose:
            log("%s:run" % self.name)
            log(" path_to_output: %s" % path_to_output)
            log(" Output format: %s" % self.fmt)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -644,6 +644,10 @@ def catalog_option_parser(args):
    output, fmt = validate_command_line(parser, args, log)
    # Add options common to all catalog plugins
    parser.add_option('-i', '--ids', default=None, dest='ids',
                      help=_("Comma-separated list of database IDs to catalog.\n"
                      "If declared, --search is ignored.\n"
                             "Default: all"))
    parser.add_option('-s', '--search', default=None, dest='search_text',
                      help=_("Filter the results by the search query. "
                          "For the format of the search query, please see "
@ -656,31 +660,6 @@ def catalog_option_parser(args):
    # Add options specific to fmt plugin
    plugin = add_plugin_parser_options(fmt, parser, log)
    # Merge options from GUI Preferences
    '''
    # Placeholder sample code until we implement GUI preferences
    from calibre.library.save_to_disk import config
    c = config()
    for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
        opt = c.get_option(pref)
        switch = '--dont-'+pref.replace('_', '-')
        parser.add_option(switch, default=True, action='store_false',
                help=opt.help+' '+_('Specifying this switch will turn '
                    'this behavior off.'), dest=pref)
    for pref in ['timefmt', 'template', 'formats']:
        opt = c.get_option(pref)
        switch = '--'+pref
        parser.add_option(switch, default=opt.default,
                help=opt.help, dest=pref)
    for pref in ('replace_whitespace', 'to_lowercase'):
        opt = c.get_option(pref)
        switch = '--'+pref.replace('_', '-')
        parser.add_option(switch, default=False, action='store_true',
                help=opt.help)
    '''
    return parser, plugin, log
 def command_catalog(args, dbpath):
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
        return 1
    if opts.verbose:
        log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
    if opts.ids:
        opts.ids = [int(id) for id in opts.ids.split(',')]    
    with plugin:
        plugin.run(args[1], opts, get_db(dbpath, opts))
    return 0
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
        for i in iter(self):
            yield i[x]
-    def get_data_as_dict(self, prefix=None, authors_as_string=False):
+    def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
        '''
        Return all metadata stored in the database as a dict. Includes paths to
        the cover and each format.
        :param prefix: The prefix for all paths. By default, the prefix is the absolute path
        to the library folder.
        :param ids: Set of ids to return the data for. If None return data for
        all entries in database.
        '''
        if prefix is None:
            prefix = self.library_path
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
        data = []
        for record in self.data:
            if record is None: continue
            db_id = record[FIELD_MAP['id']]
            if ids is not None and db_id not in ids:
                continue
            x = {}
            for field in FIELDS:
                x[field] = record[FIELD_MAP[field]]
            data.append(x)
-            x['id'] = record[FIELD_MAP['id']]
+            x['id'] = db_id
            x['formats'] = []
            if not x['authors']:
                x['authors'] = _('Unknown')
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Manage application-wide preferences.
 '''
-import os, re, cPickle, textwrap, traceback, plistlib
+import os, re, cPickle, textwrap, traceback, plistlib, json
 from copy import deepcopy
 from functools import partial
 from optparse import OptionParser as _OptionParser
@ -564,23 +564,31 @@ class XMLConfig(dict):
    data types.
    '''
    EXTENSION = '.plist'
    def __init__(self, rel_path_to_cf_file):
        dict.__init__(self)
        self.file_path = os.path.join(config_dir,
                *(rel_path_to_cf_file.split('/')))
        self.file_path = os.path.abspath(self.file_path)
-        if not self.file_path.endswith('.plist'):
+        if not self.file_path.endswith(self.EXTENSION):
-            self.file_path += '.plist'
+            self.file_path += self.EXTENSION
        self.refresh()
    def raw_to_object(self, raw):
        return plistlib.readPlistFromString(raw)
    def to_raw(self):
        return plistlib.writePlistToString(self)
    def refresh(self):
        d = {}
        if os.path.exists(self.file_path):
            with ExclusiveFile(self.file_path) as f:
                raw = f.read()
                try:
-                    d = plistlib.readPlistFromString(raw) if raw.strip() else {}
+                    d = self.raw_to_object(raw) if raw.strip() else {}
                except SystemError:
                    pass
                except:
@ -618,11 +626,21 @@ class XMLConfig(dict):
            if not os.path.exists(dpath):
                os.makedirs(dpath, mode=CONFIG_DIR_MODE)
            with ExclusiveFile(self.file_path) as f:
-                raw = plistlib.writePlistToString(self)
+                raw = self.to_raw()
                f.seek(0)
                f.truncate()
                f.write(raw)
 class JSONConfig(XMLConfig):
    EXTENSION = '.json'
    def raw_to_object(self, raw):
        return json.loads(raw.decode('utf-8'))
    def to_raw(self):
        return json.dumps(self, indent=2)
 def _prefs():
    c = Config('global', 'calibre wide preferences')
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -104,6 +104,7 @@ _extra_lang_codes = {
        'en_CY' : _('English (Cyprus)'),
        'en_PK' : _('English (Pakistan)'),
        'en_SG' : _('English (Singapore)'),
        'en_YE' : _('English (Yemen)'),
        'de_AT' : _('German (AT)'),
        'nl'    : _('Dutch (NL)'),
        'nl_BE' : _('Dutch (BE)'),