GwR initial release of Catalog features

2025-07-09 03:04:10 -04:00 · 2010-01-21 09:31:42 -07:00 · 2010-01-21 09:31:42 -07:00 · 3024d37142
commit 3024d37142
parent 59a5e1296a c290fc198c
33 changed files with 1141 additions and 251 deletions
--- a/resources/images/news/joop.png
+++ b/resources/images/news/joop.png
--- a/resources/images/news/kitsapun.png
+++ b/resources/images/news/kitsapun.png
--- a/resources/images/news/nrcnext.png
+++ b/resources/images/news/nrcnext.png
--- a/resources/quick_start.epub
+++ b/resources/quick_start.epub
--- a/resources/recipes/amspec.recipe
+++ b/resources/recipes/amspec.recipe
@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spectator.org
 '''
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TheAmericanSpectator(BasicNewsRecipe):
    title                 = 'The American Spectator'
    __author__            = 'Darko Miletic'
-    language = 'en'
-
    description           = 'News from USA'
+    category              = 'news, politics, USA, world'
+    publisher             = 'The American Spectator'
    oldest_article        = 7
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
+    language              = 'en'
    INDEX                 = 'http://spectator.org'
      
-    html2lrf_options = [
-                             '--comment'       , description
-                           , '--category'      , 'news, politics, USA'
-                           , '--publisher'     , title
-                         ]
+    conversion_options = {  
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }

    keep_only_tags   = [
                             dict(name='div', attrs={'class':'post inner'})
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):

    remove_tags     = [
                             dict(name='object')
-                            ,dict(name='div', attrs={'class':'col3'         })
-                            ,dict(name='div', attrs={'class':'post-options' })
-                            ,dict(name='p'  , attrs={'class':'letter-editor'})
-                            ,dict(name='div', attrs={'class':'social'       })
+                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
+                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
                        ]
                         
-    feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
+    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]

    def get_cover_url(self):
        cover_url = None
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
          
    def print_version(self, url):
        return url + '/print'
+        
+    def get_article_url(self, article):
+        return article.get('guid', None)
+        
--- a/resources/recipes/drivelry.recipe
+++ b/resources/recipes/drivelry.recipe
@ -0,0 +1,41 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class drivelrycom(BasicNewsRecipe):
+    title          = u'drivelry.com'
+    language       = 'en'
+    description    = 'A blog by Mike Abrahams'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 60 #days
+    max_articles_per_feed = 25
+    #encoding = 'latin1'
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='div', attrs={'id':'bookmark'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':['sidebar']}),
+       dict(name='div', attrs={'id':['bookmark']}),
+       #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
+       #dict(name='ul', attrs={'class':'articleTools'}),
+    ]
+
+    feeds          = [
+('drivelry.com',
+ 'http://feeds.feedburner.com/drivelry'),
+
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'main'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('''
+<html><head><title>t</title></head><body>
+<p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
+</body></html>
+''')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
--- a/resources/recipes/fokkeensukke.recipe
+++ b/resources/recipes/fokkeensukke.recipe
@ -1,23 +1,29 @@
-#!/usr/bin/python
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag


 class FokkeEnSukkeRecipe(BasicNewsRecipe) :
    __license__   = 'GPL v3'
    __author__ = 'kwetal'
    language = 'nl'
-    description = u'Popular Dutch daily cartoon Fokke en Sukke'
+    country = 'NL'
+    version = 2

    title = u'Fokke en Sukke'
-    no_stylesheets = True
-    # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
-    # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
-    template_css = ''
-    INDEX = u'http://foksuk.nl'
+    publisher = u'Reid, Geleijnse & Van Tol'
+    category = u'News, Cartoons'
+    description = u'Popular Dutch daily cartoon Fokke en Sukke'

-    # This cover is not as nice as it could be, needs some work
-    #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
+    conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
+
+    no_stylesheets = True
+    extra_css = '''
+                    body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;}
+                    div.title {text-align: center; margin-bottom: 1em;}
+                    '''
+
+    INDEX = u'http://foksuk.nl'
+    cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'

    keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]

@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
        links = index.findAll('a')
        maxIndex = len(links) - 1
        articles = []
-        for i in range(len(links)) :
-            # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
-            if i == 0 :
-                continue
-
-            # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
-            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
+        for i in range(1, len(links)) :
+            # There can be more than one cartoon for a given day (currently either one or two).
+            # If there's only one, there is just a link with the dayname.
+            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
+            # In that case we're interested in the last two.
            if links[i].renderContents() in dayNames :
-                # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
+                # If the link is not in daynames, we processed it already, but if it is, let's see
+                # if the next one has '1' as content
                if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
                    # Got you! Add it to the list
                    article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url'  : self.INDEX + links[i + 1]['href'], 'description' : ''}
@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
        return [[week, articles]]

    def preprocess_html(self, soup) :
-        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
        cartoon = soup.find('div', attrs={'class' : 'cartoon'})
-        if cartoon :
-            # It is a cartoon. Extract the title.
-            title = ''
-            img = soup.find('img', attrs = {'alt' : True})
-            if img :
-                title = img['alt']

-            # Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
-            tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
-            tag.insert(0, title)
-            cartoon.insert(0, tag)
+        title = ''
+        img = soup.find('img', attrs = {'alt' : True})
+        if img :
+            title = img['alt']

-            # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
-            # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
-            select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
-            if select :
-                select.extract()
+        tag = Tag(soup, 'div', [('class', 'title')])
+        tag.insert(0, title)
+        cartoon.insert(0, tag)

-            return cartoon
-        else :
-            # It is a TOC. Just return the whole lot.
-            return soup
+        # We only want the cartoon, so throw out the index
+        select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
+        if select :
+            select.extract()
+
+        freshSoup = self.getFreshSoup(soup)
+        freshSoup.body.append(cartoon)
+
+        return freshSoup
+
+    def getFreshSoup(self, oldSoup):
+        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
+        if oldSoup.head.title:
+            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
+        return freshSoup


--- a/resources/recipes/ftd.recipe
+++ b/resources/recipes/ftd.recipe
@ -15,7 +15,7 @@ class FTDe(BasicNewsRecipe):
    __author__ = 'Oliver Niesner'
    use_embedded_content   = False
    timefmt = ' [%d %b %Y]'
-    language = 'de'
+    language = _('German')
    max_articles_per_feed = 40
    no_stylesheets = True
    
@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe):
 		   dict(id='ADS_Top'),
 		   dict(id='spinner'),
 		   dict(id='ftd-contentad'),
+		   dict(id='ftd-promo'),
 		   dict(id='nava-50009007-1-0'),
 		   dict(id='navli-50009007-1-0'),
+		   dict(id='Box5000534-0-0-0'),
+		   dict(id='ExpV-1-0-0-1'),
+		   dict(id='ExpV-1-0-0-0'),
+		   dict(id='PollExpV-2-0-0-0'),
 		   dict(id='starRating'),
 		   dict(id='saveRating'),
 		   dict(id='yLayer'),
@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='ul', attrs={'class':'nav'}),
 		   dict(name='p', attrs={'class':'articleOptionHead'}),
 		   dict(name='p', attrs={'class':'articleOptionFoot'}),
+		   dict(name='p', attrs={'class':'moreInfo'}),
 		   dict(name='div', attrs={'class':'chartBox'}),
 		   dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
-		   dict(name='div', attrs={'class':'box boxNavTabs '}),
+		   dict(name='div', attrs={'class':'box boxNavTabs'}),
+		   dict(name='div', attrs={'class':'boxMMRgtLow'}),
 		   dict(name='span', attrs={'class':'vote_455857'}),
 		   dict(name='div', attrs={'class':'relatedhalb'}),
 		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
+		   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
+		   dict(name='div', attrs={'class':'box boxTeaser'}),
 		   dict(name='div', attrs={'class':'tagCloud'}),
+		   dict(name='div', attrs={'class':'pollView'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
 		   dict(name='div', attrs={'class':'ftdHpNav'}),
 		   dict(name='div', attrs={'class':'ftdHead'}),
@ -67,9 +77,10 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'wertungoben'}),
 		   dict(name='div', attrs={'class':'artikelfuss'}),
 		   dict(name='a', attrs={'class':'rating'}),
+		   dict(name='a', attrs={'href':'#rt'}),
 		   dict(name='div', attrs={'class':'articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
-    remove_tags_after = [dict(name='a', attrs={'class':'more'})]
+    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
    
    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), 
 	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
@ -86,4 +97,4 @@ class FTDe(BasicNewsRecipe):
    

    def print_version(self, url):
-        return url + '?mode=print'
+        return url.replace('.html', '.html?mode=print')
--- a/resources/recipes/greader_uber.recipe
+++ b/resources/recipes/greader_uber.recipe
@ -0,0 +1,38 @@
+import urllib, re, mechanize
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre import __appname__
+
+class GoogleReaderUber(BasicNewsRecipe):
+    title   = 'Google Reader Uber'
+    description = 'This recipe downloads all unread feedsfrom your Google Reader account.'
+    needs_subscription = True
+    __author__ = 'rollercoaster, davec'
+    base_url = 'http://www.google.com/reader/atom/'
+    oldest_article = 365
+    max_articles_per_feed = 250
+    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
+    use_embedded_content = True
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+
+        if self.username is not None and self.password is not None:
+            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
+                                        ('service', 'reader'), ('source', __appname__)])
+            response = br.open('https://www.google.com/accounts/ClientLogin', request)
+            sid = re.search('SID=(\S*)', response.read()).group(1)
+
+            cookies = mechanize.CookieJar()
+            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
+            cookies.set_cookie(mechanize.Cookie(None, 'SID', sid, None, False, '.google.com', True, True, '/', True, False, None, True, '', '', None))
+        return br
+
+
+    def get_feeds(self):
+        feeds = []
+        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
+        for id in soup.findAll(True, attrs={'name':['id']}):
+            url = id.contents[0].replace('broadcast','reading-list')
+            feeds.append((re.search('/([^/]*)$', url).group(1),
+                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
+        return feeds
--- a/resources/recipes/joop.recipe
+++ b/resources/recipes/joop.recipe
@ -0,0 +1,91 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+import re
+
+class JoopRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'nl'
+    country = 'NL'
+    version = 1
+
+    title = u'Joop'
+    publisher = u'Vara'
+    category = u'News, Politics, Discussion'
+    description = u'Political blog from the Netherlands'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'}))
+    keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')}))
+
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+                img {margin-right: 0.4em;}
+                h3 {font-size: medium; font-style: italic; font-weight: normal;}
+                h2 {font-size: xx-large; font-weight: bold}
+                sub {color: #666666; font-size: x-small; font-weight: normal;}
+                div.joop_byline {font-size: large}
+                div.joop_byline_job {font-size: small; color: #696969;}
+                div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em}
+                '''
+
+    INDEX = 'http://www.joop.nl'
+
+    conversion_options = {'comments': description, 'tags': category, 'language': language,
+                          'publisher': publisher}
+
+    def parse_index(self):
+        sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies']
+        soup = self.index_to_soup(self.INDEX)
+        answer = []
+
+        div = soup.find('div', attrs = {'id': 'footer'})
+        for section in sections:
+            articles = []
+            h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section)
+            if h2:
+                ul = h2.findNextSibling('ul', 'linklist')
+                if ul:
+                    for li in ul.findAll('li'):
+                        title = self.tag_to_string(li.a)
+                        url = self.INDEX + li.a['href']
+                        articles.append({'title': title, 'date': None, 'url': url, 'description': ''})
+
+            answer.append((section, articles))
+
+        return answer
+
+    def preprocess_html(self, soup):
+        div = soup.find('div', 'author_head clearfix photo')
+        if div:
+            h2 = soup.find('h2')
+            if h2:
+                h2.name = 'div'
+                h2['class'] = 'joop_byline'
+                span = h2.find('span')
+                if span:
+                    span.name = 'div'
+                    span['class'] = 'joop_byline_job'
+                div.replaceWith(h2)
+
+        h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'})
+        if h2:
+            txt = None
+            span = h2.find('span', 'info')
+            if span:
+                txt = span.find(text = True)
+            div = Tag(soup, 'div', attrs = [('class', 'joop_date')])
+            div.append(txt)
+            h2.replaceWith(div)
+
+        return soup
+
+
--- a/resources/recipes/kitsapun.recipe
+++ b/resources/recipes/kitsapun.recipe
@ -0,0 +1,44 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.kitsapun.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kitsapsun(BasicNewsRecipe):
+    title                 = 'Kitsap Sun'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Kitsap County'
+    publisher             = 'Scripps Interactive Newspapers Group'
+    category              = 'news, Kitsap county, USA'    
+    language              = 'en'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+
+    conversion_options = {  
+                             'comments' : description
+                            ,'tags'     : category
+                            ,'language' : language
+                            ,'publisher': publisher
+                         }
+
+    
+    keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
+
+    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
+                  
+    feeds = [
+               (u'News'         , u'http://www.kitsapsun.com/rss/headlines/news/'         )
+              ,(u'Business'     , u'http://www.kitsapsun.com/rss/headlines/business/'     )
+              ,(u'Communities'  , u'http://www.kitsapsun.com/rss/headlines/communities/'  )
+              ,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
+              ,(u'Lifestyles'   , u'http://www.kitsapsun.com/rss/headlines/lifestyles/'   )
+            ]
+    
+    def print_version(self, url):
+        return url.rpartition('/')[0] + '/?print=1'
--- a/resources/recipes/ledevoir.recipe
+++ b/resources/recipes/ledevoir.recipe
@ -0,0 +1,79 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__   = 'Canadian Paper '
+
+'''
+http://www.ledevoir.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ledevoir(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Canadian Paper'
+
+    cover_url      = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
+    title          = u'Le Devoir'
+    publisher      = 'leDevoir.com'
+    category       = 'News, finance, economy, politics'
+
+    language       = 'fr'
+    encoding       = 'utf-8'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    max_articles_per_feed = 50
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    keep_only_tags  = [
+                        dict(name='div', attrs={'id':'article'}),
+                        dict(name='ul', attrs={'id':'ariane'})
+                    ]
+
+    remove_tags     = [
+                        dict(name='div', attrs={'id':'dialog'}),
+                        dict(name='div', attrs={'class':['interesse_actions','reactions']}),
+                        dict(name='ul', attrs={'class':'mots_cles'}),
+                        dict(name='a', attrs={'class':'haut'}),
+                        dict(name='h5', attrs={'class':'interesse_actions'})
+                    ]
+
+    feeds          = [
+                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
+                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
+                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
+                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
+                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
+                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
+                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
+                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
+                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
+                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
+                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
+                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
+                     ]
+
+    extra_css = '''
+                h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
+                h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .specs {line-height:1em;margin:1px 0;}
+                .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
+                .specs span.auteur a,
+                .specs span.auteur span {text-transform:uppercase;color:#787878;}
+                .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
+                ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
+                ul#ariane li {display:inline;}
+                ul#ariane a {color:#2E2E2E;text-decoration:underline;}
+                .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
+                .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
+                '''
--- a/resources/recipes/national_post.recipe
+++ b/resources/recipes/national_post.recipe
@ -70,11 +70,28 @@ class NYTimes(BasicNewsRecipe):
                feeds.append((current_section, current_articles))

            return feeds
+
    def preprocess_html(self, soup):
        story = soup.find(name='div', attrs={'class':'triline'})
-        #td = heading.findParent(name='td')
-        #td.extract()
+        page2_link = soup.find('p','pagenav')
+        if page2_link:
+            atag = page2_link.find('a',href=True)
+            if atag:
+                page2_url = atag['href']
+                if page2_url.startswith('story'):
+                         page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
+                elif page2_url.startswith( '/todays-paper/story.html'):
+                    page2_url = 'http://www.nationalpost.com/'+page2_url
+                page2_soup = self.index_to_soup(page2_url)
+                if page2_soup:
+                    page2_content = page2_soup.find('div','story-content')
+                    if page2_content:
+                        full_story = BeautifulSoup('<div></div>')
+                        full_story.insert(0,story)
+                        full_story.insert(1,page2_content)
+                        story = full_story
        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
        body = soup.find(name='body')
        body.insert(0, story)
        return soup
+
--- a/resources/recipes/ncrnext.recipe
+++ b/resources/recipes/ncrnext.recipe
@ -1,29 +1,38 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class NrcNextRecipe(BasicNewsRecipe):
-    __license__   = 'GPL v3'
+    __license__  = 'GPL v3'
    __author__ = 'kwetal'
-    version = 1
    language = 'nl'
+    country = 'NL'
+    version = 2
+
+    title = u'nrcnext'
+    publisher = u'NRC Media'
+    category = u'News, Opinion, the Netherlands'
    description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
-    title          = u'nrcnext'
+
+    conversion_options = {'comments': description, 'language': language, 'publisher': publisher}

    no_stylesheets = True
-    template_css = ''
+    remove_javascript = True

-    # I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way.
    keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
-    # If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method.
-    #keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ]

-    remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}),
-                          dict(name = 'div', attrs = {'class' : 'datumlabel'}),
-                          dict(name = 'ul', attrs = {'class' : 'cats single'}),
-                          dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}),
-                          dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})]
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'}))
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'}))
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}))
+    remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'}))

-    use_embedded_content = False
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;}
+                p.wp-caption-text {font-size: x-small; color: #666666;}
+                h2.sub_title {font-size: medium; color: #696969;}
+                h2.vlag {font-size: small; font-weight: bold;}
+                '''

    def parse_index(self) :
        # Use the wesbite as an index. Their RSS feeds can be out of date.
@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe):
                # Find the links to the actual articles and rember the location they're pointing to and the title
                a = post.find('a', attrs={'rel' : 'bookmark'})
                href = a['href']
-                title = a.renderContents()
+                title = self.tag_to_string(a)

                if index == 'columnisten' :
-                    # In this feed/page articles can be written by more than one author. It is nice to see their names in the titles.
+                    # In this feed/page articles can be written by more than one author.
+                    # It is nice to see their names in the titles.
                    flag = post.find('h2', attrs = {'class' : 'vlag'})
                    author = flag.contents[0].renderContents()
                    completeTitle = u''.join([author, u': ', title])
@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe):
        return answer

    def preprocess_html(self, soup) :
-        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
-        if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}) :
-            # It's an article, find the interesting part
+        if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
            tag = soup.find('div', attrs = {'class' : 'post'})
-            if tag :
-                # And replace any links with their text, so they don't show up underlined on my reader.
-                for link in tag.findAll('a') :
-                    link.replaceWith(link.renderContents())
+            if tag:
+                h2 = tag.find('h2', 'vlag')
+                if h2:
+                    new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')])
+                    new_h2.append(self.tag_to_string(h2))
+                    h2.replaceWith(new_h2)
+                else:
+                    h2 = tag.find('h2')
+                    if h2:
+                        new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')])
+                        new_h2.append(self.tag_to_string(h2))
+                        h2.replaceWith(new_h2)

-                # Slows down my Sony reader; feel free to comment out
-                for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}) :
+                h1 = tag.find('h1')
+                if h1:
+                    new_h1 = Tag(soup, 'h1')
+                    new_h1.append(self.tag_to_string(h1))
+                    h1.replaceWith(new_h1)
+
+                # Slows down my reader.
+                for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
                    movie.extract()
-                for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}) :
+                for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
                    movie.extract()
+                for iframe in tag.findAll('iframe') :
+                    iframe.extract()

-                homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
-                body = homeMadeSoup.find('body')
-                body.append(tag)
+                fresh_soup = self.getFreshSoup(soup)
+                fresh_soup.body.append(tag)

-                return homeMadeSoup
-            else :
+                return fresh_soup
+            else:
                # This should never happen and other famous last words...
                return soup
-        else :
-            # It's a TOC, return the whole lot.
-            return soup
-
-    def postproces_html(self, soup) :
-        # Should not happen, but it does. Slows down my Sony eReader
-        for img in soup.findAll('img') :
-            if img['src'].startswith('http://') :
-                img.extract()
-
-        # Happens for some movies which we are not able to view anyway
-        for iframe in soup.findAll('iframe') :
-            if iframe['src'].startswith('http://') :
-                iframe.extract()
-
-

+    def getFreshSoup(self, oldSoup):
+        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
+        if oldSoup.head.title:
+            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
+        return freshSoup

--- a/resources/recipes/yementimes.recipe
+++ b/resources/recipes/yementimes.recipe
@ -0,0 +1,125 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+
+class YemenTimesRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'en_YE'
+    country = 'YE'
+    version = 1
+
+    title = u'Yemen Times'
+    publisher = u'yementimes.com'
+    category = u'News, Opinion, Yemen'
+    description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    encoding = 'utf-8'
+
+    remove_empty_feeds = True
+    no_stylesheets = True
+    remove_javascript = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
+                                                      'class': 'DMAIN2'}))
+    remove_attributes = ['style']
+
+    INDEX = 'http://www.yementimes.com/'
+    feeds = []
+    feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
+    feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
+    feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
+    feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
+    feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
+    feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
+    feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
+    feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
+    feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
+    feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
+    feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
+    feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
+    feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
+
+    extra_css = '''
+                body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
+                div.yemen_byline {font-size: medium; font-weight: bold;}
+                div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
+                .yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
+                '''
+
+    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
+                          'publisher': publisher, 'linearize_tables': True}
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.set_handle_gzip(True)
+
+        return br
+
+    def parse_index(self):
+        answer = []
+        for feed_title, feed in self.feeds:
+            soup = self.index_to_soup(feed)
+
+            newsbox = soup.find('div', 'newsbox')
+            main = newsbox.findNextSibling('table')
+
+            articles = []
+            for li in main.findAll('li'):
+                title = self.tag_to_string(li.a)
+                url = self.INDEX + li.a['href']
+                articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/>&nbsp;'})
+
+            answer.append((feed_title, articles))
+
+        return answer
+
+    def preprocess_html(self, soup):
+        freshSoup = self.getFreshSoup(soup)
+
+        headline = soup.find('div', attrs = {'id': 'DVMTIT'})
+        if headline:
+            div = headline.findNext('div', attrs = {'id': 'DVTOP'})
+            img = None
+            if div:
+                img = div.find('img')
+
+            headline.name = 'h1'
+            freshSoup.body.append(headline)
+            if img is not None:
+                freshSoup.body.append(img)
+
+        byline = soup.find('div', attrs = {'id': 'DVTIT'})
+        if byline:
+            date_el = byline.find('span')
+            if date_el:
+                pub_date = self.tag_to_string(date_el)
+                date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
+                date.append(pub_date)
+                date_el.extract()
+
+            raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
+            author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
+
+            if date is not None:
+                freshSoup.body.append(date)
+            freshSoup.body.append(author)
+
+        story = soup.find('div', attrs = {'id': 'DVDET'})
+        if story:
+            for table in story.findAll('table'):
+                if table.find('img'):
+                    table['class'] = 'yemen_caption'
+
+            freshSoup.body.append(story)
+
+        return freshSoup
+
+    def getFreshSoup(self, oldSoup):
+        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
+        if oldSoup.head.title:
+            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
+        return freshSoup
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -2,11 +2,12 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import os, sys, tempfile, zipfile
+import atexit, os, shutil, sys, tempfile, zipfile

 from calibre.constants import numeric_version
 from calibre.ptempfile import PersistentTemporaryFile

+
 class Plugin(object):
    '''
    A calibre plugin. Useful members include:
@ -231,6 +232,8 @@ class CatalogPlugin(Plugin):
    A plugin that implements a catalog generator.
    '''

+    resources_path = None
+
    #: Output file type for which this plugin should be run
    #: For example: 'epub' or 'xml'
    file_types = set([])
@ -249,22 +252,18 @@ class CatalogPlugin(Plugin):

    cli_options = []
    
-    def cleanup(self, path):
-        try:
-            import os, shutil
-            if os.path.exists(path):
-                shutil.rmtree(path)
-        except:
-            pass

    def search_sort_db(self, db, opts):
-        if opts.search_text:
+
+        # If declared, --ids overrides any declared search criteria
+        if not opts.ids and opts.search_text:
            db.search(opts.search_text)
+
        if opts.sort_by:
            # 2nd arg = ascending
            db.sort(opts.sort_by, True)
        
-        return db.get_data_as_dict()
+        return db.get_data_as_dict(ids=opts.ids)

    def get_output_fields(self, opts):
        # Return a list of requested fields, with opts.sort_by first
@ -280,8 +279,10 @@ class CatalogPlugin(Plugin):
            fields = list(all_fields & requested_fields)
        else:
            fields = list(all_fields)
+
        fields.sort()
-        fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
+        if opts.sort_by:
+            fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
        return fields

    def initialize(self):
@ -291,35 +292,27 @@ class CatalogPlugin(Plugin):
        Tab will be dynamically generated and added to the Catalog Options dialog in 
        calibre.gui2.dialogs.catalog.py:Catalog
        '''
-        import atexit
        from calibre.customize.builtins import plugins as builtin_plugins
+        from calibre.customize.ui import config
+        from calibre.ptempfile import PersistentTemporaryDirectory
        
-        if type(self) in builtin_plugins:
-            print "%s: Built-in Catalog plugin, no init necessary" % self.name
-        else:
-            print "%s: User-added plugin" % self.name
-            print " Copying .ui and .py resources from %s to tmpdir" % self.plugin_path
-
-            # Generate a list of resource files to extract from the zipped plugin
-            # Copy to tmpdir/calibre_plugin_resources
+        if not type(self) in builtin_plugins and \
+           not self.name in config['disabled_plugins']:
            files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
-            print " files_to_copy: %s" % files_to_copy
            resources = zipfile.ZipFile(self.plugin_path,'r')
-            temp_resources_path = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
+                        
+            if self.resources_path is None:
+                self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
                 
            for file in files_to_copy:
                try:
-                    resources.extract(file, temp_resources_path)
-                    print " %s extracted to %s" % (file, temp_resources_path)
+                    resources.extract(file, self.resources_path)
                except:
-                    print " %s not found in %s" % (file, os.path.basename(self.plugin_path))
+                    print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
+                    continue
            resources.close()                
            
-            # Register temp_resources_path for deletion when calibre exits
-            atexit.register(self.cleanup, temp_resources_path)
-
-            
-    def run(self, path_to_output, opts, db):
+    def run(self, path_to_output, opts, db, ids):
        '''
        Run the plugin. Must be implemented in subclasses.
        It should generate the catalog in the format specified
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -14,6 +14,7 @@ Windows PNP strings:
 2W00000&1', 3, u'G:\\')

 '''
+import re

 from calibre.devices.usbms.driver import USBMS

@ -108,6 +109,7 @@ class POCKETBOOK360(EB600):

    OSX_MAIN_MEM   = 'Philips Mass Storge Media'
    OSX_CARD_A_MEM = 'Philips Mass Storge Media'
+    OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Pocket')

    @classmethod
    def can_handle(cls, dev, debug=False):
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
        mi.title_sort = title_sort(opts.title)
    if getattr(opts, 'tags', None) is not None:
        mi.tags = [t.strip() for t in opts.tags.split(',')]
+    if getattr(opts, 'series', None) is not None:
+        mi.series = opts.series.strip()
+    if getattr(opts, 'series_index', None) is not None:
+        mi.series_index = float(opts.series_index.strip())

    if getattr(opts, 'cover', None) is not None:
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
--- a/src/calibre/ebooks/metadata/rtf.py
+++ b/src/calibre/ebooks/metadata/rtf.py
@ -25,12 +25,14 @@ def get_document_info(stream):
    while not found:
        prefix = block[-6:]
        block = prefix + stream.read(block_size)
+        actual_block_size = len(block) - len(prefix)
        if len(block) == len(prefix):
            break
        idx = block.find(r'{\info')
        if idx >= 0:
            found = True
-            stream.seek(stream.tell() - block_size + idx - len(prefix))
+            pos = stream.tell() - actual_block_size + idx - len(prefix)
+            stream.seek(pos)
        else:
            if block.find(r'\sect') > -1:
                break
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -90,7 +90,10 @@ class DetectStructure(object):
                    mark = etree.Element(XHTML('div'), style=page_break_after)
                else: # chapter_mark == 'both':
                    mark = etree.Element(XHTML('hr'), style=page_break_before)
-                elem.addprevious(mark)
+                try:
+                    elem.addprevious(mark)
+                except TypeError:
+                    self.log.exception('Failed to mark chapter')

    def create_level_based_toc(self):
        if self.opts.level1_toc is None:
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@ -20,6 +20,10 @@ class Font(object):

 class Column(object):

+    # A column contains an element is the element bulges out to
+    # the left or the right by at most HFUZZ*col width.
+    HFUZZ = 0.2
+
    def __init__(self):
        self.left = self.right = self.top = self.bottom = 0
        self.width = self.height = 0
@ -41,6 +45,10 @@ class Column(object):
        for x in self.elements:
            yield x

+    def contains(self, elem):
+        return elem.left > self.left - self.HFUZZ*self.width and \
+               elem.right < self.right + self.HFUZZ*self.width
+
 class Element(object):

    def __eq__(self, other):
@ -238,11 +246,10 @@ class Page(object):
        return columns

    def find_elements_in_row_of(self, x):
-        interval = Interval(x.top - self.YFUZZ * self.average_text_height,
+        interval = Interval(x.top,
                x.top + self.YFUZZ*(1+self.average_text_height))
        h_interval = Interval(x.left, x.right)
-        m = max(0, x.idx-15)
-        for y in self.elements[m:x.idx+15]:
+        for y in self.elements[x.idx:x.idx+15]:
            if y is not x:
                y_interval = Interval(y.top, y.bottom)
                x_interval = Interval(y.left, y.right)
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -169,6 +169,21 @@ class RTFInput(InputFormatPlugin):
        with open('styles.css', 'ab') as f:
            f.write(css)

+    def preprocess(self, fname):
+        self.log('\tPreprocessing to convert unicode characters')
+        try:
+            data = open(fname, 'rb').read()
+            from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
+            tokenizer = RtfTokenizer(data)
+            tokens = RtfTokenParser(tokenizer.tokens)
+            data = tokens.toRTF()
+            fname = 'preprocessed.rtf'
+            with open(fname, 'wb') as f:
+                f.write(data)
+        except:
+            self.log.exception(
+            'Failed to preprocess RTF to convert unicode sequences, ignoring...')
+        return fname

    def convert(self, stream, options, file_ext, log,
                accelerators):
@ -177,8 +192,9 @@ class RTFInput(InputFormatPlugin):
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        self.log = log
        self.log('Converting RTF to XML...')
+        fname = self.preprocess(stream.name)
        try:
-            xml = self.generate_xml(stream.name)
+            xml = self.generate_xml(fname)
        except RtfInvalidCodeException:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.'))
--- a/src/calibre/ebooks/rtf/preprocess.py
+++ b/src/calibre/ebooks/rtf/preprocess.py
@ -0,0 +1,344 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Gerendi Sandor Attila'
+__docformat__ = 'restructuredtext en'
+
+"""
+RTF tokenizer and token parser. v.1.0 (1/17/2010)
+Author: Gerendi Sandor Attila
+
+At this point this will tokenize a RTF file then rebuild it from the tokens.
+In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant.
+"""
+
+class tokenDelimitatorStart():
+    def __init__(self):
+        pass
+    def toRTF(self):
+        return b'{'
+    def __repr__(self):
+        return '{'
+
+class tokenDelimitatorEnd():
+    def __init__(self):
+        pass
+    def toRTF(self):
+        return b'}'
+    def __repr__(self):
+        return '}'
+
+class tokenControlWord():
+    def __init__(self, name, separator = ''):
+        self.name = name
+        self.separator = separator
+    def toRTF(self):
+        return self.name + self.separator
+    def __repr__(self):
+        return self.name + self.separator
+
+class tokenControlWordWithNumericArgument():
+    def __init__(self, name, argument, separator = ''):
+        self.name = name
+        self.argument = argument
+        self.separator = separator
+    def toRTF(self):
+        return self.name + repr(self.argument) + self.separator
+    def __repr__(self):
+        return self.name + repr(self.argument) + self.separator
+
+class tokenControlSymbol():
+    def __init__(self, name):
+        self.name = name
+    def toRTF(self):
+        return self.name
+    def __repr__(self):
+        return self.name
+
+class tokenData():
+    def __init__(self, data):
+        self.data = data
+    def toRTF(self):
+        return self.data
+    def __repr__(self):
+        return self.data
+
+class tokenBinN():
+    def __init__(self, data, separator = ''):
+        self.data = data
+        self.separator = separator
+    def toRTF(self):
+        return "\\bin" + repr(len(self.data)) + self.separator + self.data
+    def __repr__(self):
+        return "\\bin" + repr(len(self.data)) + self.separator + self.data
+
+class token8bitChar():
+    def __init__(self, data):
+        self.data = data
+    def toRTF(self):
+        return "\\'" + self.data
+    def __repr__(self):
+        return "\\'" + self.data
+
+class tokenUnicode():
+    def __init__(self, data, separator = '', current_ucn = 1, eqList = []):
+        self.data = data
+        self.separator = separator
+        self.current_ucn = current_ucn
+        self.eqList = eqList
+    def toRTF(self):
+        result = '\\u' + repr(self.data) + ' '
+        ucn = self.current_ucn
+        if len(self.eqList) < ucn:
+            ucn = len(self.eqList)
+            result =  tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result
+        i = 0
+        for eq in self.eqList:
+            if i >= ucn:
+                break
+            result = result + eq.toRTF()
+        return result
+    def __repr__(self):
+        return '\\u' + repr(self.data)
+
+
+def isAsciiLetter(value):
+    return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z'))
+
+def isDigit(value):
+    return (value >= '0') and (value <= '9')
+
+def isChar(value, char):
+    return value == char
+
+def isString(buffer, string):
+    return buffer == string
+
+
+class RtfTokenParser():
+    def __init__(self, tokens):
+        self.tokens = tokens
+        self.process()
+        self.processUnicode()
+
+    def process(self):
+        i = 0
+        newTokens = []
+        while i < len(self.tokens):
+            if isinstance(self.tokens[i], tokenControlSymbol):
+                if isString(self.tokens[i].name, "\\'"):
+                    i = i + 1
+                    if not isinstance(self.tokens[i], tokenData):
+                        raise BaseException('Error: token8bitChar without data.')
+                    if len(self.tokens[i].data) < 2:
+                        raise BaseException('Error: token8bitChar without data.')
+                    newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
+                    if len(self.tokens[i].data) > 2:
+                        newTokens.append(tokenData(self.tokens[i].data[2:]))
+                    i = i + 1
+                    continue
+
+            newTokens.append(self.tokens[i])
+            i = i + 1
+
+        self.tokens = list(newTokens)
+
+    def processUnicode(self):
+        i = 0
+        newTokens = []
+        ucNbStack = [1]
+        while i < len(self.tokens):
+            if isinstance(self.tokens[i], tokenDelimitatorStart):
+                ucNbStack.append(ucNbStack[len(ucNbStack) - 1])
+                newTokens.append(self.tokens[i])
+                i = i + 1
+                continue
+            if isinstance(self.tokens[i], tokenDelimitatorEnd):
+                ucNbStack.pop()
+                newTokens.append(self.tokens[i])
+                i = i + 1
+                continue
+            if isinstance(self.tokens[i], tokenControlWordWithNumericArgument):
+                if isString(self.tokens[i].name, '\\uc'):
+                    ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument
+                    newTokens.append(self.tokens[i])
+                    i = i + 1
+                    continue
+                if isString(self.tokens[i].name, '\\u'):
+                    x = i
+                    j = 0
+                    i = i + 1
+                    replace = []
+                    partialData = None
+                    ucn = ucNbStack[len(ucNbStack) - 1]
+                    while (i < len(self.tokens)) and (j < ucn):
+                        if isinstance(self.tokens[i], tokenDelimitatorStart):
+                            break
+                        if isinstance(self.tokens[i], tokenDelimitatorEnd):
+                            break
+                        if isinstance(self.tokens[i], tokenData):
+                            if len(self.tokens[i].data) >= ucn - j:
+                                replace.append(tokenData(self.tokens[i].data[0 : ucn - j]))
+                                if len(self.tokens[i].data) > ucn - j:
+                                    partialData = tokenData(self.tokens[i].data[ucn - j:])
+                                i = i + 1
+                                break
+                            else:
+                                replace.append(self.tokens[i])
+                                j = j + len(self.tokens[i].data)
+                                i = i + 1
+                                continue
+                        if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN):
+                            replace.append(self.tokens[i])
+                            i = i + 1
+                            j = j + 1
+                            continue
+                        raise BaseException('Error: incorect utf replacement.')
+
+                    #calibre rtf2xml does not support utfreplace
+                    replace = []
+
+                    newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace))
+                    if partialData != None:
+                        newTokens.append(partialData)
+                    continue
+
+            newTokens.append(self.tokens[i])
+            i = i + 1
+
+        self.tokens = list(newTokens)
+
+
+    def toRTF(self):
+        result = []
+        for token in self.tokens:
+            result.append(token.toRTF())
+        return "".join(result)
+
+
+class RtfTokenizer():
+    def __init__(self, rtfData):
+        self.rtfData = []
+        self.tokens = []
+        self.rtfData = rtfData
+        self.tokenize()
+
+    def tokenize(self):
+        i = 0
+        lastDataStart = -1
+        while i < len(self.rtfData):
+
+            if isChar(self.rtfData[i], '{'):
+                if lastDataStart > -1:
+                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
+                    lastDataStart = -1
+                self.tokens.append(tokenDelimitatorStart())
+                i = i + 1
+                continue
+
+            if isChar(self.rtfData[i], '}'):
+                if lastDataStart > -1:
+                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
+                    lastDataStart = -1
+                self.tokens.append(tokenDelimitatorEnd())
+                i = i + 1
+                continue
+
+            if isChar(self.rtfData[i], '\\'):
+                if i + 1 >= len(self.rtfData):
+                    raise BaseException('Error: Control character found at the end of the document.')
+
+                if lastDataStart > -1:
+                    self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
+                    lastDataStart = -1
+
+                tokenStart = i
+                i = i + 1
+
+                #Control Words
+                if isAsciiLetter(self.rtfData[i]):
+                    #consume <ASCII Letter Sequence>
+                    consumed = False
+                    while i < len(self.rtfData):
+                        if not isAsciiLetter(self.rtfData[i]):
+                            tokenEnd = i
+                            consumed = True
+                            break
+                        i = i + 1
+
+                    if not consumed:
+                        raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
+
+                    #we have numeric argument before delimiter
+                    if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
+                        #consume the numeric argument
+                        consumed = False
+                        l = 0
+                        while i < len(self.rtfData):
+                            if not isDigit(self.rtfData[i]):
+                                consumed = True
+                                break
+                            l = l + 1
+                            i = i + 1
+                            if l > 10 :
+                                raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
+
+                        if not consumed:
+                            raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
+
+                    separator = ''
+                    if isChar(self.rtfData[i], ' '):
+                        separator = ' '
+
+                    controlWord = self.rtfData[tokenStart: tokenEnd]
+                    if tokenEnd < i:
+                        value = int(self.rtfData[tokenEnd: i])
+                        if isString(controlWord, "\\bin"):
+                            i = i + value
+                            self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator))
+                        else:
+                            self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator))
+                    else:
+                        self.tokens.append(tokenControlWord(controlWord, separator))
+                    #space delimiter, we should discard it
+                    if self.rtfData[i] == ' ':
+                        i = i + 1
+
+                #Control Symbol
+                else:
+                    self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1]))
+                    i = i + 1
+                continue
+
+            if lastDataStart < 0:
+                lastDataStart = i
+            i = i + 1
+
+    def toRTF(self):
+        result = []
+        for token in self.tokens:
+            result.append(token.toRTF())
+        return "".join(result)
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print ("Usage %prog rtfFileToConvert")
+        sys.exit()
+    f = open(sys.argv[1], 'rb')
+    data = f.read()
+    f.close()
+
+    tokenizer = RtfTokenizer(data)
+    parsedTokens = RtfTokenParser(tokenizer.tokens)
+
+    data = parsedTokens.toRTF()
+
+    f = open(sys.argv[1], 'w')
+    f.write(data)
+    f.close()
+
+
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
 ORG_NAME = 'KovidsBrain'
 APP_UID  = 'libprs500'
 from calibre import islinux, iswindows, isosx
-from calibre.utils.config import Config, ConfigProxy, dynamic
+from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
 from calibre.utils.localization import set_qt_translator
 from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
 from calibre.ebooks.metadata import MetaInformation

+gprefs = JSONConfig('gui')

 NONE = QVariant() #: Null value to return from the data function of item models

--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -4,10 +4,14 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

+import os
+from optparse import OptionParser
+
 from calibre.customize.conversion import OptionRecommendation, DummyReporter
 from calibre.ebooks.conversion.plumber import Plumber
-# ?from calibre.library.catalog import Catalog
+from calibre.customize.ui import plugin_for_catalog_format
 from calibre.utils.logging import Log
+from calibre.gui2 import choose_dir, Application

 def gui_convert(input, output, recommendations, notification=DummyReporter(),
        abort_after_input_dump=False, log=None):
@ -21,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),

    plumber.run()

-def gui_catalog(fmt, title, dbspec, ids, out_file_name,
+def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
        notification=DummyReporter(), log=None):
    if log is None:
        log = Log()
@ -33,19 +37,25 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
    else: # To be implemented in the future
        pass
    
-    # Implement the interface to the catalog generating code here
-    #db
-    log("gui2.convert.gui_conversion:gui_catalog()")
-    log("fmt: %s" % fmt)
-    log("title: %s" % title)
-    log("dbspec: %s" % dbspec)
-    log("ids: %s" % ids)
-    log("out_file_name: %s" % out_file_name)
+    # Create a minimal OptionParser that we can append to
+    parser = OptionParser()
+    args = []
+    parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
+    opts, args = parser.parse_args()
+
+    # Populate opts
+    opts.ids = ids
+    opts.search_text = None
+    opts.sort_by = None
+
+    # Extract the option dictionary to comma-separated lists
+    for option in fmt_options:
+        setattr(opts,option, ','.join(fmt_options[option]))
+
+    # Fetch and run the plugin for fmt
+    plugin = plugin_for_catalog_format(fmt)
+    plugin.run(out_file_name, opts, db)

-    # This needs to call the .run() method of the plugin associated with fmt
-    # Needs to set up options before the call
-    # catalog = Catalog(out_file_name, options, dbspec)
-    # Can I call library.cli:catalog_option_parser()?
    
    

--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@ -12,15 +12,18 @@ from PyQt4.Qt import QDialog, QWidget

 from calibre.customize.ui import config
 from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
-from calibre.gui2 import dynamic
+from calibre.gui2 import gprefs, dynamic
 from calibre.customize.ui import available_catalog_formats, catalog_plugins
 from calibre.gui2.catalog.catalog_csv_xml import PluginWidget

 class Catalog(QDialog, Ui_Dialog):
+    ''' Catalog Dialog builder'''
+    widgets = []

    def __init__(self, parent, dbspec, ids):
        import re, cStringIO
        from calibre import prints as info
+        from calibre.gui2 import dynamic
        from PyQt4.uic import compileUi
        
        QDialog.__init__(self, parent)
@ -42,6 +45,7 @@ class Catalog(QDialog, Ui_Dialog):
        self.fmts = []
        
        from calibre.customize.builtins import plugins as builtin_plugins
+        from calibre.customize import CatalogPlugin

        for plugin in catalog_plugins():
            if plugin.name in config['disabled_plugins']:
@ -49,38 +53,30 @@ class Catalog(QDialog, Ui_Dialog):
                
            name = plugin.name.lower().replace(' ', '_')
            if type(plugin) in builtin_plugins:
-                info("Adding tab for builtin Catalog plugin %s" % plugin.name)                
+                #info("Adding widget for builtin Catalog plugin %s" % plugin.name)                
                try:
                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
                            fromlist=[1])
                    pw = catalog_widget.PluginWidget()
-                    pw.initialize()
+                    pw.initialize(name)
                    pw.ICON = I('forward.svg')    
-                    page = self.tabs.addTab(pw,pw.TITLE)
-                    [self.fmts.append([file_type, pw.sync_enabled]) for file_type in plugin.file_types]
-                    info("\tSupported formats: %s" % plugin.file_types)
-                    info("\tsync_enabled: %s" % pw.sync_enabled)
-    
+                    self.widgets.append(pw)
+                    [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]                    
                except ImportError:
                    info("ImportError with %s" % name)
                    continue
            else:
-                # Test to see if .ui and .py files exist in tmpdir/calibre_plugin_resources
-                form = os.path.join(tempfile.gettempdir(),
-                                    'calibre_plugin_resources','%s.ui' % name)
-                klass = os.path.join(tempfile.gettempdir(),
-                                  'calibre_plugin_resources','%s.py' % name)
-                compiled_form = os.path.join(tempfile.gettempdir(),
-                                  'calibre_plugin_resources','%s_ui.py' % name)
-                plugin_resources = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')        
+                # Load dynamic tab
+                form = os.path.join(plugin.resources_path,'%s.ui' % name)
+                klass = os.path.join(plugin.resources_path,'%s.py' % name)
+                compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)

                if os.path.exists(form) and os.path.exists(klass):
-                    info("Adding tab for user-installed Catalog plugin %s" % plugin.name)
+                    #info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
                    
-                    # Compile the form provided in plugin.zip
-                    if not os.path.exists(compiled_form) or \
-                       os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
-                        info('\tCompiling form', form)
+                    # Compile the .ui form provided in plugin.zip
+                    if not os.path.exists(compiled_form):
+                        # info('\tCompiling form', form)
                        buf = cStringIO.StringIO()
                        compileUi(form, buf)
                        dat = buf.getvalue()
@ -88,35 +84,41 @@ class Catalog(QDialog, Ui_Dialog):
                                         re.DOTALL).sub(r'_("\1")', dat)
                        open(compiled_form, 'wb').write(dat)
                    
-                    # Import the Catalog class from the dynamic .py file
+                    # Import the dynamic PluginWidget() from .py file provided in plugin.zip
                    try:
-                        sys.path.insert(0, plugin_resources)
+                        sys.path.insert(0, plugin.resources_path)
                        catalog_widget = __import__(name, fromlist=[1])
-                        dpw = catalog_widget.PluginWidget()
-                        dpw.initialize()
-                        dpw.ICON = I('forward.svg')    
-                        page = self.tabs.addTab(dpw, dpw.TITLE)
-                        [self.fmts.append([file_type, dpw.sync_enabled]) for file_type in plugin.file_types]
-                        info("\tSupported formats: %s" % plugin.file_types)
-                        info("\tsync_enabled: %s" % dpw.sync_enabled)
+                        pw = catalog_widget.PluginWidget()
+                        pw.initialize(name)
+                        pw.ICON = I('forward.svg')    
+                        self.widgets.append(pw)                        
+                        [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
                    except ImportError:
                        info("ImportError with %s" % name)
                        continue
                    finally:
-                        sys.path.remove(plugin_resources)
+                        sys.path.remove(plugin.resources_path)
                        
                else:
                    info("No dynamic tab resources found for %s" % name)

+        self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
+        for pw in self.widgets:
+            page = self.tabs.addTab(pw,pw.TITLE)
+
        # Generate a sorted list of installed catalog formats/sync_enabled pairs
-        # Generate a parallel list of sync_enabled[True|False]ß
-        self.fmts = sorted([x[0].upper() for x in self.fmts])
+        fmts = sorted([x[0] for x in self.fmts])
+
+        self.sync_enabled_formats = []
+        for fmt in self.fmts:
+            if fmt[1]:
+                self.sync_enabled_formats.append(fmt[0])

        # Callback when format changes
        self.format.currentIndexChanged.connect(self.format_changed)

        # Add the installed catalog format list to the format QComboBox
-        self.format.addItems(self.fmts)
+        self.format.addItems(fmts)

        pref = dynamic.get('catalog_preferred_format', 'CSV')
        idx = self.format.findText(pref)
@ -127,9 +129,8 @@ class Catalog(QDialog, Ui_Dialog):
            self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
                            
    def format_changed(self, idx):
-        print "format_changed(idx): idx: %d" % idx
        cf = unicode(self.format.currentText())
-        if cf in ('EPUB', 'MOBI'):
+        if cf in self.sync_enabled_formats:
            self.sync.setEnabled(True)
        else:
            self.sync.setDisabled(True)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -239,23 +239,35 @@ def fetch_scheduled_recipe(arg):
 def generate_catalog(parent, dbspec, ids):
    from calibre.gui2.dialogs.catalog import Catalog
    
-    # Build the Catalog dialog
+    # Build the Catalog dialog in gui2.dialogs.catalog
    d = Catalog(parent, dbspec, ids)
+
    if d.exec_() != d.Accepted:
        return None

    # Create the output file
    out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())

+    # Retrieve plugin options
+    fmt_options = {}
+    for x in range(d.tabs.count()):
+        if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
+            for fmt in d.fmts:
+                if fmt[0] == d.catalog_format:
+                    fmt_options = fmt[2].options()
+                    # print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
+                                        
    args = [
        d.catalog_format,
        d.catalog_title,
        dbspec,
        ids,
        out.name,
+        fmt_options
        ]
    out.close()

+    # This calls gui2.convert.gui_conversion:gui_catalog()
    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
            d.catalog_title

--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'

 '''The main GUI'''

-import os, sys, textwrap, collections, time
+import atexit, os, shutil, sys, tempfile, textwrap, collections, time
 from xml.parsers.expat import ExpatError
 from Queue import Queue, Empty
 from threading import Thread
@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server
 from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
                            question_dialog,\
                           pixmap_to_data, choose_dir, \
-                           Dispatcher, \
+                           Dispatcher, gprefs, \
                           available_height, \
                           max_available_height, config, info_dialog, \
                           available_width, GetMetadata
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        cm.addAction(_('Bulk convert'))
        cm.addSeparator()
        ac = cm.addAction(
-                _('Create catalog of the books in your calibre library'))
+                _('Create catalog of books in your calibre library'))
        ac.triggered.connect(self.generate_catalog)
        self.action_convert.setMenu(cm)
        self._convert_single_hook = partial(self.convert_ebook, bulk=False)
@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
                     self.tags_view.recount)
        self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
+        if not gprefs.get('quick_start_guide_added', False):
+            from calibre.ebooks.metadata import MetaInformation
+            mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
+            mi.author_sort = 'Schember, John'
+            mi.comments = "A guide to get you up an running with calibre"
+            mi.publisher = 'calibre'
+            self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
+                    [mi])
+            gprefs['quick_start_guide_added'] = True
+            self.library_view.model().books_added(1)
+            if hasattr(self, 'db_images'):
+                self.db_images.reset()
+
        self.library_view.model().count_changed()
+
        ########################### Cover Flow ################################
        self.cover_flow = None
        if CoverFlow is not None:
@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            return
        self._add_books(books, to_device)

-
    def _add_books(self, paths, to_device, on_card=None):
        if on_card is None:
            on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
@ -1348,24 +1361,29 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):

    def generate_catalog(self):    
        rows = self.library_view.selectionModel().selectedRows()
-        if not rows:
+        if not rows or len(rows) < 2:
            rows = xrange(self.library_view.model().rowCount(QModelIndex()))
        ids = map(self.library_view.model().id, rows)
+
        dbspec = None
        if not ids:
            return error_dialog(self, _('No books selected'),
                    _('No books selected to generate catalog for'),
                    show=True)
-        # calibre.gui2.tools:generate_catalog()
+
+        # Calling gui2.tools:generate_catalog()
        ret = generate_catalog(self, dbspec, ids)
        if ret is None:
            return
+            
        func, args, desc, out, sync, title = ret
+
        fmt = os.path.splitext(out)[1][1:].upper()
        job = self.job_manager.run_job(
                Dispatcher(self.catalog_generated), func, args=args,
                    description=desc)
        job.catalog_file_path = out
+        job.fmt = fmt
        job.catalog_sync, job.catalog_title = sync, title        
        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)

@ -1380,7 +1398,12 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            dynamic.set('catalogs_to_be_synced', sync)
        self.status_bar.showMessage(_('Catalog generated.'), 3000)
        self.sync_catalogs()
-
+		if job.fmt in ['CSV','XML']:
+			export_dir = choose_dir(self, 'Export Catalog Directory', 
+										          'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
+			if export_dir:
+				destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
+				shutil.copyfile(job.catalog_file_path, destination)
 				
    ############################### Fetch news #################################

--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -40,10 +40,9 @@ class CSV_XML(CatalogPlugin):
        from calibre.utils.logging import Log

        log = Log()
-        self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
-        # Update to .partition
        self.fmt = path_to_output.rpartition('.')[2]
-        if opts.verbose:
+        
+        if False and opts.verbose:
            log("%s:run" % self.name)
            log(" path_to_output: %s" % path_to_output)
            log(" Output format: %s" % self.fmt)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -644,6 +644,10 @@ def catalog_option_parser(args):
    output, fmt = validate_command_line(parser, args, log)

    # Add options common to all catalog plugins
+    parser.add_option('-i', '--ids', default=None, dest='ids',
+                      help=_("Comma-separated list of database IDs to catalog.\n"
+                      "If declared, --search is ignored.\n"
+                             "Default: all"))
    parser.add_option('-s', '--search', default=None, dest='search_text',
                      help=_("Filter the results by the search query. "
                          "For the format of the search query, please see "
@ -656,31 +660,6 @@ def catalog_option_parser(args):
    # Add options specific to fmt plugin
    plugin = add_plugin_parser_options(fmt, parser, log)

-    # Merge options from GUI Preferences
-    '''
-    # Placeholder sample code until we implement GUI preferences
-    from calibre.library.save_to_disk import config
-    c = config()
-    for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
-        opt = c.get_option(pref)
-        switch = '--dont-'+pref.replace('_', '-')
-        parser.add_option(switch, default=True, action='store_false',
-                help=opt.help+' '+_('Specifying this switch will turn '
-                    'this behavior off.'), dest=pref)
-
-    for pref in ['timefmt', 'template', 'formats']:
-        opt = c.get_option(pref)
-        switch = '--'+pref
-        parser.add_option(switch, default=opt.default,
-                help=opt.help, dest=pref)
-
-    for pref in ('replace_whitespace', 'to_lowercase'):
-        opt = c.get_option(pref)
-        switch = '--'+pref.replace('_', '-')
-        parser.add_option(switch, default=False, action='store_true',
-                help=opt.help)
-    '''
-
    return parser, plugin, log

 def command_catalog(args, dbpath):
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
        return 1
    if opts.verbose:
        log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
+    if opts.ids:
+        opts.ids = [int(id) for id in opts.ids.split(',')]    
+
    with plugin:
        plugin.run(args[1], opts, get_db(dbpath, opts))
    return 0
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
        for i in iter(self):
            yield i[x]

-    def get_data_as_dict(self, prefix=None, authors_as_string=False):
+    def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
        '''
        Return all metadata stored in the database as a dict. Includes paths to
        the cover and each format.

        :param prefix: The prefix for all paths. By default, the prefix is the absolute path
        to the library folder.
+        :param ids: Set of ids to return the data for. If None return data for
+        all entries in database.
        '''
        if prefix is None:
            prefix = self.library_path
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
        data = []
        for record in self.data:
            if record is None: continue
+            db_id = record[FIELD_MAP['id']]
+            if ids is not None and db_id not in ids:
+                continue
            x = {}
            for field in FIELDS:
                x[field] = record[FIELD_MAP[field]]
            data.append(x)
-            x['id'] = record[FIELD_MAP['id']]
+            x['id'] = db_id
            x['formats'] = []
            if not x['authors']:
                x['authors'] = _('Unknown')
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Manage application-wide preferences.
 '''
-import os, re, cPickle, textwrap, traceback, plistlib
+import os, re, cPickle, textwrap, traceback, plistlib, json
 from copy import deepcopy
 from functools import partial
 from optparse import OptionParser as _OptionParser
@ -564,23 +564,31 @@ class XMLConfig(dict):
    data types.
    '''

+    EXTENSION = '.plist'
+
    def __init__(self, rel_path_to_cf_file):
        dict.__init__(self)
        self.file_path = os.path.join(config_dir,
                *(rel_path_to_cf_file.split('/')))
        self.file_path = os.path.abspath(self.file_path)
-        if not self.file_path.endswith('.plist'):
-            self.file_path += '.plist'
+        if not self.file_path.endswith(self.EXTENSION):
+            self.file_path += self.EXTENSION

        self.refresh()

+    def raw_to_object(self, raw):
+        return plistlib.readPlistFromString(raw)
+
+    def to_raw(self):
+        return plistlib.writePlistToString(self)
+
    def refresh(self):
        d = {}
        if os.path.exists(self.file_path):
            with ExclusiveFile(self.file_path) as f:
                raw = f.read()
                try:
-                    d = plistlib.readPlistFromString(raw) if raw.strip() else {}
+                    d = self.raw_to_object(raw) if raw.strip() else {}
                except SystemError:
                    pass
                except:
@ -618,11 +626,21 @@ class XMLConfig(dict):
            if not os.path.exists(dpath):
                os.makedirs(dpath, mode=CONFIG_DIR_MODE)
            with ExclusiveFile(self.file_path) as f:
-                raw = plistlib.writePlistToString(self)
+                raw = self.to_raw()
                f.seek(0)
                f.truncate()
                f.write(raw)

+class JSONConfig(XMLConfig):
+
+    EXTENSION = '.json'
+
+    def raw_to_object(self, raw):
+        return json.loads(raw.decode('utf-8'))
+
+    def to_raw(self):
+        return json.dumps(self, indent=2)
+

 def _prefs():
    c = Config('global', 'calibre wide preferences')
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@ -104,6 +104,7 @@ _extra_lang_codes = {
        'en_CY' : _('English (Cyprus)'),
        'en_PK' : _('English (Pakistan)'),
        'en_SG' : _('English (Singapore)'),
+        'en_YE' : _('English (Yemen)'),
        'de_AT' : _('German (AT)'),
        'nl'    : _('Dutch (NL)'),
        'nl_BE' : _('Dutch (BE)'),