GwR edits to support Catalog GUI plugin implementation

2025-07-09 03:04:10 -04:00 · 2010-01-19 08:57:26 -07:00 · 2010-01-19 08:57:26 -07:00 · 59a5e1296a
commit 59a5e1296a
parent 9e30796443 3715fd26b2
37 changed files with 2117 additions and 286 deletions
--- a/resources/images/news/msnbc.png
+++ b/resources/images/news/msnbc.png
--- a/resources/recipes/blic.recipe
+++ b/resources/recipes/blic.recipe
@ -1,7 +1,6 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 blic.rs
 '''
@ -12,54 +11,33 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Blic(BasicNewsRecipe):
    title                 = 'Blic'
    __author__            = 'Darko Miletic'
-    description           = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
+    description           = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
    publisher             = 'RINGIER d.o.o.'
    category              = 'news, politics, Serbia'
    delay                 = 1
    oldest_article        = 2
    max_articles_per_feed = 100
-    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = False
    language              = 'sr'

-    lang                  = 'sr-Latn-RS'
    extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '

    conversion_options = {
                          'comment'  : description
                        , 'tags'     : category
                        , 'publisher': publisher
-                        , 'language'         : lang
+                        , 'language' : language
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

-    keep_only_tags     = [dict(name='div', attrs={'class':'single_news'})]
+    remove_tags_before = dict(name='div', attrs={'id':'article_info'})

-    feeds              = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
+    feeds              = [(u'Danasnje Vesti', u'http://www.blic.rs/rss/danasnje-vesti')]

    remove_tags        = [dict(name=['object','link'])]

    def print_version(self, url):
-        rest_url = url.partition('?')[2]
-        return u'http://www.blic.rs/_print.php?' + rest_url
-
-    def preprocess_html(self, soup):
-        attribs = [  'style','font','valign'
-                    ,'colspan','width','height'
-                    ,'rowspan','summary','align'
-                    ,'cellspacing','cellpadding'
-                    ,'frames','rules','border'
-                  ]
-        for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
-            item.name = 'div'
-            for attrib in attribs:
-                if item.has_key(attrib):
-                   del item[attrib]
-        return self.adeify_images(soup)
-
-    def get_article_url(self, article):
-        raw = article.get('link',  None)
-        return raw.replace('.co.yu','.rs')
+        return url + '/print'

--- a/resources/recipes/cio.recipe
+++ b/resources/recipes/cio.recipe
@ -0,0 +1,111 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'CIO is the leading information brand for today s busy chief information officer. '
+
+'''
+http://www.cio.co.uk/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+class cio(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'CIO is the leading information brand for today\'s busy chief information officer.'
+    cover_url     = 'http://media.cio.co.uk/graphics/shared/cio-logo.gif'
+
+    title          = 'CIO '
+    publisher      = 'IDG Communication'
+    category       = 'IT, technology, business, industry'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 7
+    max_articles_per_feed = 10
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    temp_files = []
+    articles_are_obfuscated = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url)
+        response = br.follow_link(url_regex='&print&intcmp=ROSATT2$', nr = 0)
+        html = response.read()
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'mainContent'})
+                        ]
+
+    feeds          = [
+                        (u'News', u'http://www.cio.co.uk/rss/feeds/cio-news.xml'),
+                        (u'Debate', u'http://www.cio.co.uk/rss/feeds/cio-debate.xml'),
+                        (u'Analysis', u'http://www.cio.co.uk/rss/feeds/cio-analysis.xml'),
+                        (u'Opinion', u'http://www.cio.co.uk/rss/feeds/cio-opinion.xml'),
+                        (u'In-Depth', u'http://www.cio.co.uk/rss/feeds/cio-in-depth.xml'),
+                        (u'Change management', u'http://www.cio.co.uk/rss/feeds/cio-change-management-management.xml'),
+                        (u'Regulatory compliance', u'http://www.cio.co.uk/rss/feeds/cio-regulatory-compliance-management.xml'),
+                        (u'Business strategy', u'http://www.cio.co.uk/rss/feeds/cio-business-strategy-management.xml'),
+                        (u'Technology', u'http://www.cio.co.uk/rss/feeds/cio-technology-management.xml'),
+                        (u'Security', u'http://www.cio.co.uk/rss/feeds/cio-security-management.xml'),
+                        (u'Soft skills', u'http://www.cio.co.uk/rss/feeds/cio-soft-skills-management.xml'),
+                        (u'The CIO career', u'http://www.cio.co.uk/rss/feeds/cio-cio-career-management.xml'),
+                        (u'Budgets', u'http://www.cio.co.uk/rss/feeds/cio-budgets-management.xml'),
+                        (u'Supplier management', u'http://www.cio.co.uk/rss/feeds/cio-supplier-management-management.xml'),
+                        (u'Board politics', u'http://www.cio.co.uk/rss/feeds/cio-board-politics-management.xml'),
+                        (u'Enterprise software', u'http://www.cio.co.uk/rss/feeds/cio-enterprise-software-technology.xml'),
+                        (u'Mobile and wireless', u'http://www.cio.co.uk/rss/feeds/cio-mobile-wireless-technology.xml'),
+                        (u'Security', u'http://www.cio.co.uk/rss/feeds/cio-security-technology.xml'),
+                        (u'Storage', u'http://www.cio.co.uk/rss/feeds/cio-storage-technology.xml'),
+                        (u'Desktop and client', u'http://www.cio.co.uk/rss/feeds/cio-desktop-client-technology.xml'),
+                        (u'Outsourcing', u'http://www.cio.co.uk/rss/feeds/cio-outsourcing-technology.xml'),
+                        (u'Internet and e-commerce', u'http://www.cio.co.uk/rss/feeds/cio-internet-technology.xml'),
+                        (u'Database management', u'http://www.cio.co.uk/rss/feeds/cio-database-management-technology.xml'),
+                        (u'Communications and networking ', u'http://www.cio.co.uk/rss/feeds/cio-communication-networking-technology.xml'),
+                        (u'Grid computing', u'http://www.cio.co.uk/rss/feeds/cio-grid-computing-cloud-technology.xml'),
+                        (u'Enterprise search', u'http://www.cio.co.uk/rss/feeds/cio-enterprise-search-technology.xml'),
+                        (u'CRM ', u'http://www.cio.co.uk/rss/feeds/cio-crm-technology.xml'),
+                        (u'Ade McCormack ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-ade-mccormack.xml'),
+                        (u'Andy Hayler ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-andy-hayler.xml'),
+                        (u'CEB ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-ceb.xml'),
+                        (u'CIO Staff ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-cio-staff.xml'),
+                        (u'Dave Pepperell ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-dave-pepperell.xml'),
+                        (u'Elliot Limb ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-elliot-limb.xml'),
+                        (u'Freeform Dynamics ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-freeform-dynamics.xml'),
+                        (u'Giles Nelson ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-giles-nelson.xml'),
+                        (u'Mark Chillingworth ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-mark-chillingworth.xml'),
+                        (u'Martin Veitch ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-martin-veitch.xml'),
+                        (u'Mike Altendorf ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-mike-altendorf.xml'),
+                        (u'Richard Steel ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-richard-steel.xml'),
+                        (u'Richard Sykes ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-richard-sykes.xml'),
+                        (u'Rob Llewellyn ', u'http://www.cio.co.uk/rss/feeds/cio-opinion-rob-llewellyn.xml'),
+                        (u'Free thinking ', u'http://www.cio.co.uk/rss/feeds/cio-blog-free-thinking.xml'),
+                        (u'Leading CIOs ', u'http://www.cio.co.uk/rss/feeds/cio-blog-leading-cios.xml'),
+                        (u'CIO News View ', u'http://www.cio.co.uk/rss/feeds/cio-blog-cio-news-view.xml'),
+                        (u'CIO Blog ', u'http://www.cio.co.uk/rss/feeds/cio-blog-cio-blog.xml'),
+                        (u'Transformation CIO ', u'http://www.cio.co.uk/rss/feeds/cio-blog-transformation-cio.xml')
+                    ]
+
+    extra_css = '''
+                h1 {color:#FF2222;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                img {align:left;}
+                '''
--- a/resources/recipes/computer_active.recipe
+++ b/resources/recipes/computer_active.recipe
@ -0,0 +1,91 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
+
+'''
+http://www.computeractive.co.uk/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class computeractive(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'Computeractive publishes new downloads, reviews, news stories, step-by-step guides and answers to PC problems every day.'
+    cover_url     = 'http://images.pcworld.com/images/common/header/header-logo.gif'
+
+    title          = 'Computer act!ve'
+    publisher      = 'Incisive media'
+    category       = 'PC, video, computing, product reviews, editing, cameras, production'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 7
+    max_articles_per_feed = 25
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'main'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'id':['seeAlsoTags','commentsModule','relatedArticles','mainLeft','mainRight']}),
+                            dict(name='div', attrs={'class':['buyIt','detailMpu']}),
+                            dict(name='a', attrs={'class':'largerImage'})
+                        ]
+
+    feeds          = [
+                       (u'General content', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/all'),
+                       (u'News', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/news'),
+                       (u'Downloads', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/downloads'),
+                       (u'Hardware', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/hardware'),
+                       (u'Software', u'http://feeds.computeractive.co.uk/rss/latest/computeractive/software'),
+                       (u'Competitions', u'http://www.v3.co.uk/feeds/rss20/personal-technology/competitions')
+                     ]
+
+
+    extra_css = '''
+                h1 {font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+                h2 {font-family:Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
+                h3 {color:#333333;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:14px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .author {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                p {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
+                .breadcrumbs {margin:0 0 0.6em 0;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:8px;}
+                #breadcrumbsLeft {width:360px; }
+                .breadcrumbs ul {color:#999; display:inline; margin:1em 0 0 0; padding:0; list-style:none; }
+                .breadcrumbs li { display:inline; }
+                .breadcrumbs a:link, .breadcrumbs a:visited { color:#999; text-decoration:none; }
+                .breadcrumbs a:hover, .breadcrumbs a:active { color:#999; text-decoration:underline; }
+                #postHeader #reviewDetails { padding-left: 0px; }
+                #reviewDetails { float:left; margin:0 0 0 10px; padding:0; width:574px; border-top:1px dotted #0071BC; }
+        	#reviewDetails div { margin:0; padding:0; }
+		#reviewDetailsLeft { float:left; width:334px; margin:0 10px 0 0; padding:0; }
+		#reviewDetailsRight { float:right; width:230px; margin:0; padding:0; }
+    		#reviewDetails div h2 { font-size:1.2em; float:none; margin:0.5em 0 0.5em 0; padding:0; }
+		#reviewDetails #verdict { width:334px; float:left; margin:0; padding:0; }
+		#reviewDetails #ratings, #reviewDetails #price { width:230px; float:left; margin:0; padding:0; }
+		#reviewDetails #ratings img { border:0; margin:0; padding:0; }
+		#verdict p strong { width:334px; float:left; margin:0 0 0.25em; padding:0; }
+		#verdict ul { width:334px; float:left; margin:0; padding:0; }
+		#verdict li { width:334px; float:left; list-style:none; clear:left; margin:0 4px 0.3em 0px; padding:0 0 0 12px;}
+		html > body #verdict li { width:322px; }
+                #post { margin-bottom:2em; clear:both; }
+                #post .content p { margin:1em 0; line-height:1.5em; }
+                #post p a:link { color:#005599; text-decoration:none; font-weight:bold; }
+                #post p a:hover, #post p a:active { color:#cc0000; text-decoration:underline; }
+                #post p a:visited { color:#003366; text-decoration:none; font-weight:bold; }
+                #postHeader .author { font-weight:normal; margin:1em 8px 0.25em 0; }
+                #postHeader .postMetaData { color:#666; margin:0 8px 0 0; }
+                '''
+
--- a/resources/recipes/digital_arts.recipe
+++ b/resources/recipes/digital_arts.recipe
@ -0,0 +1,76 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'Digital Arts - comprehensive coverage of the art of graphic design, 3D, animation, video, effects, web and interactive design, in print and online.'
+
+'''
+http://media.digitalartsonline.co.uk/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+temp_files = []
+articles_are_obfuscated = True
+
+class digiArts(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'Digital Arts - comprehensive coverage of the art of graphic design, 3D, animation, video, effects, web and interactive design, in print and online.'
+    cover_url     = 'http://media.digitalartsonline.co.uk/graphics/logo_digital_arts.gif'
+
+    title          = 'Digital Arts Magazine  '
+    publisher      = 'IDG Communication'
+    category       = 'Multimedia, photo, video, computing, product reviews, editing, cameras, production'
+
+    language       = 'en'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 30
+    max_articles_per_feed = 100
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url+'&print')
+
+        response = br.follow_link(url, nr = 0)
+        html = response.read()
+
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':['articleHeader','articleContent']})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':['submissionBar','mpuContainer']}),
+                            dict(name='div', attrs={'id':['articleSidebar','articleFooter']})
+                        ]
+    remove_tags_after   = [
+                            dict(name='p', attrs={'id':'articlePageList'})
+                        ]
+    feeds          = [
+                       (u'Content', u'http://rss.feedsportal.com/c/662/f/8410/index.rss')
+                    ]
+
+    extra_css = '''
+                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .author {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                img {align:left;}
+                '''
--- a/resources/recipes/kidney.recipe
+++ b/resources/recipes/kidney.recipe
@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class JASN(BasicNewsRecipe):
+    title          = u'Journal of the American Society of Nephrology'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 31 #days
+    max_articles_per_feed = 25
+    needs_subscription = True
+
+    INDEX = 'http://jasn.asnjournals.org/current.shtml'
+    no_stylesheets = True
+    remove_tags_before = dict(name='h2')
+    #remove_tags_after  = dict(name='th', attrs={'align':'left'})
+    remove_tags = [
+       dict(name='iframe'),
+       #dict(name='div', attrs={'class':'related-articles'}),
+       dict(name='td', attrs={'id':['jasnFooter']}),
+       dict(name='table', attrs={'id':"jasnNavBar"}),
+       dict(name='table', attrs={'class':'content_box_outer_table'}),
+       dict(name='th', attrs={'align':'left'})
+    ]
+
+
+
+    #TO LOGIN
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        self.kidney_toc_soup = BeautifulSoup(br.open(self.INDEX).read())
+        toc = self.kidney_toc_soup.find(id='tocTable')
+        t = toc.find(text=lambda x: x and '[Full&nbsp;Text]' in x)
+        a = t.findParent('a', href=True)
+        url = a.get('href')
+        if url.startswith('/'):
+            url = 'http://jasn.asnjournals.org'+url
+        br.open(url)
+        br.select_form(name='UserSignIn')
+        br['username'] = self.username
+        br['code'] = self.password
+        response = br.submit()
+        raw = response.read()
+        if 'Sign Out' not in raw:
+            raise ValueError('Failed to log in, is your account expired?')
+        return br
+
+    feeds          = [
+        ('JASN',
+        'http://jasn.asnjournals.org/rss/current.xml'),
+    ]
+
+
+
+    def preprocess_html(self, soup):
+        for a in soup.findAll(text=lambda x: x and '[in this window]' in x):
+            a = a.findParent('a')
+            url = a.get('href', None)
+            if not url:
+                continue
+            if url.startswith('/'):
+                url = 'http://jasn.asnjournals.org/'+url
+                isoup = self.index_to_soup(url)
+                img = isoup.find('img', src=lambda x: x and
+                x.startswith('/content/'))
+            if img is not None:
+                img.extract()
+                table = a.findParent('table')
+                table.replaceWith(img)
+        return soup
+
+
+
--- a/resources/recipes/mac_video.recipe
+++ b/resources/recipes/mac_video.recipe
@ -0,0 +1,82 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'MacVideo is an independent journal not affiliated with Apple Computer, It is a publication of IDG Communication focusing on video production and editing.'
+
+'''
+http://www.macvideo.tv/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+temp_files = []
+articles_are_obfuscated = True
+
+class macVideo(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'MacVideo is an independent journal not affiliated with Apple Computer, It is a publication of IDG Communication focusing on video production and editing.'
+    cover_url     = 'http://www.macvideo.tv/images/shared/macvideo-logo.jpg'
+
+    title          = 'MacVideo '
+    publisher      = 'IDG Communication'
+    category       = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
+
+    language       = 'en'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 30
+    max_articles_per_feed = 25
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url+'&print')
+
+        response = br.follow_link(url, nr = 0)
+        html = response.read()
+
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'mainContent'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':['submissionBar','mpuContainer']}),
+                            dict(name='p', attrs={'class':'articlePag'}),
+                            dict(name='ul', attrs={'id':'articleIconsList'})
+                        ]
+
+    feeds          = [
+                       (u'News', u'http://www.macvideo.tv/rss/feeds/macvideo-news.xml'),
+                       (u'Reviews', u'http://www.macvideo.tv/rss/feeds/macvideo-reviews.xml'),
+                       (u'Interviews', u'http://www.macvideo.tv/rss/feeds/macvideo-features-interviews.xml'),
+                       (u'Features', u'http://www.macvideo.tv/rss/feeds/macvideo-features-features.xml'),
+                       (u'Rick Young', u'http://www.macvideo.tv/rss/feeds/blog100140.xml'),
+                       (u'Matt Davis', u'http://www.macvideo.tv/rss/feeds/blog101658.xml'),
+                       (u'Adrian Miskelly', u'http://www.macvideo.tv/rss/feeds/blog101750.xml')
+                     ]
+
+    extra_css = '''
+                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .author {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                p {font-family:Arial,Helvetica,sans-serif; font-size:10px;}
+                img {align:left;}
+                '''
--- a/resources/recipes/mac_world.recipe
+++ b/resources/recipes/mac_world.recipe
@ -0,0 +1,94 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'Macworld is an independent journal not affiliated with Apple Computer.'
+
+'''
+http://www.macworld.co.uk/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+temp_files = []
+articles_are_obfuscated = True
+
+class macWorld(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'Macworld is an independent journal not affiliated with Apple Computer.'
+    cover_url     = 'http://images.macworld.com/images/templates/v4/mw-logo.gif'
+
+    title          = 'Mac World '
+    publisher      = 'IDG Communication'
+    category       = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 7
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url+'&print')
+
+        response = br.follow_link(url, nr = 0)
+        html = response.read()
+
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'article'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':['toolBar','mac_tags','toolBar  btmTools','textAds']}),
+                            dict(name='p', attrs={'class':'breadcrumbs'}),
+                            dict(name='div', attrs={'id':['breadcrumb','sidebar','comments']})
+
+                        ]
+
+    feeds          = [
+                       (u'MacWorld Headlines', u'http://rss.macworld.com/macworld/news'),
+                       (u'How-To', u'http://rss.macworld.com/macworld/howto'),
+                       (u'Security', u'http://rss.macworld.com/macworld/topics/security'),
+                       (u'MAC IT', u'http://rss.macworld.com/macworld/topics/mac_it'),
+                       (u'Business Mac', u'http://rss.macworld.com/macworld/topics/business_mac'),
+                       (u'Reviews', u'http://rss.macworld.com/macworld/reviews'),
+                       (u'Products: Mac', u'http://rss.macworld.com/macworld/products/mac'),
+                       (u'Products: iPod', u'http://rss.macworld.com/macworld/products/ipod'),
+                       (u'Products: iPhone', u'http://rss.macworld.com/macworld/products/iphone'),
+                       (u'Products: Software', u'http://rss.macworld.com/macworld/products/mac/software'),
+                       (u'OSX Hints', u'http://rss.macworld.com/macworld/weblogs/macosxhints'),
+                       (u'Mac Gems', u'http://rss.macworld.com/macworld/weblogs/macgems'),
+                       (u'Mac 911', u'http://rss.macworld.com/macworld/weblogs/mac911'),
+                       (u'Game Room', u'http://rss.macworld.com/macworld/topics/games'),
+                       (u'Editos notes', u'http://rss.macworld.com/macworld/weblogs/editors'),
+                       (u'Creative notes', u'http://rss.macworld.com/macworld/weblogs/creative'),
+                       (u'Playlist', u'http://rss.macworld.com/macworld/weblogs/ipodblog'),
+                       (u'Mobile', u'http://rss.macworld.com/macworld/weblogs/mobile'),
+                       (u'From the lab', u'http://rss.macworld.com/macworld/weblogs/macworldlab'),
+                       (u'MacUser', u'http://rss.macworld.com/macworld/weblogs/macuser')
+                     ]
+
+    extra_css = '''
+                h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                img {align:left;}
+                '''
--- a/resources/recipes/mac_world_uk.recipe
+++ b/resources/recipes/mac_world_uk.recipe
@ -0,0 +1,91 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'Macworld is a publication of IDG Communication in the UK specifically on the Apple Mac.'
+
+'''
+http://www.macworld.co.uk/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+class pcMag(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'Macworld is a publication of IDG Communication in the UK specifically on the Apple Mac.'
+    cover_url     = 'http://media.macworld.co.uk/images/masthead.jpg'
+
+    title          = 'Mac World UK '
+    publisher      = 'IDG Communication'
+    category       = 'Apple, Mac, computing, product reviews, UK'
+
+    language       = 'en_GB'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 15
+    max_articles_per_feed = 25
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    temp_files = []
+    articles_are_obfuscated = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url)
+        response = br.follow_link(url_regex='&print$', nr = 0)
+        html = response.read()
+
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'wrapper'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':'bannerContainer'}),
+                            dict(name='p', attrs={'class':'breadcrumbs'}),
+                            dict(name='ul', attrs={'id':'articleIconsList'})
+
+                        ]
+
+    remove_tags_after  = [
+                            dict(name='p', attrs={'id':'articlePageList'}),
+                        ]
+
+    feeds          = [
+                       (u'MacWorld Headlines', u'http://www.macworld.co.uk/rss/macworld.xml'),
+                       (u'Reviews', u'http://www.macworld.co.uk/rss/reviews.xml'),
+                       (u'Masterclass', u'http://www.macworld.co.uk/rss/masterclasses.xml'),
+                       (u'MacWorld Team', u'http://www.macworld.co.uk/rss/blog8.xml'),
+                       (u'Andy Ihnatko', u'http://www.macworld.co.uk/rss/blog7.xml'),
+                       (u'Andy Penfold', u'http://www.macworld.co.uk/rss/blog11.xml'),
+                       (u'Jonny Evans', u'http://www.macworld.co.uk/rss/blog1.xml'),
+                       (u'Karen Haslam', u'http://www.macworld.co.uk/rss/blog4.xml'),
+                       (u'Mark Hattersley', u'http://www.macworld.co.uk/rss/blog2.xml'),
+                       (u'Nick Spence', u'http://www.macworld.co.uk/rss/blog12.xml'),
+                       (u'Simon Iary', u'http://www.macworld.co.uk/rss/blog3.xml')
+                     ]
+
+    extra_css = '''
+                h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
+                p.authorCredit {-x-system-font:none;font-family:Arial,sans-serif;font-size:10pt;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.1em;}
+                p.date {font-size:10pt;margin-bottom:0;}
+                img {align:left;}
+                '''
+
+
+
--- a/resources/recipes/pc_advisor.recipe
+++ b/resources/recipes/pc_advisor.recipe
@ -0,0 +1,87 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'PC Advisor delivers expert advice you can trust to business and home PC users who want to buy the best-value equipment and make the most out of the equipment they already own.'
+
+'''
+http://www.pcadvisor.co.uk/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class pcAdvisor(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'PC Advisor delivers expert advice you can trust to business and home PC users who want to buy the best-value equipment and make the most out of the equipment they already own.'
+
+    cover_url      = 'http://media.pcadvisor.co.uk/images/spacer.gif'
+    title          = 'Pc Advisor '
+    publisher      = 'IDG Communication'
+    category       = 'PC, computing, product reviews, UK'
+
+    language       = 'en'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 15
+    max_articles_per_feed = 25
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets = True
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'articlecontent'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'id':['crosssitesignup','submitarticle','dontPrint','commentsForm','userReviewFormContainer','reevooContainerId']}),
+                            dict(name='div', attrs={'class':'mpu'}),
+                            dict(name='p', attrs={'id':'articlePageList'}),
+                            dict(name='div', attrs={'style':['margin: 0pt 10px 5px;','margin: 0pt 10px 5px;']}),
+                            dict(name='p', attrs={'class':'dontPrint'}),
+                            dict(name='h2', attrs={'class':'sectionTitle'}),
+                            dict(name='a', attrs={'title':'Subscribe to PC Advisor'}),
+                            dict(name='a', attrs={'name':'revooContent'}),
+                            {'name':['form','script','link']}
+                        ]
+
+    remove_tags_after = [
+                            dict(name='p', attrs={'id':'crosssitesignup'})
+                        ]
+
+    def get_article_url(self, article):
+        return article.get('guid',  None)
+
+    feeds          = [
+                       (u'News Headlines', u'http://www.pcadvisor.co.uk/rss/feeds/pcanews.xml'),
+                       (u'Reviews', u'http://www.pcadvisor.co.uk/rss/feeds/pcareviews.xml'),
+                       (u'New Products', u'http://www.pcadvisor.co.uk/rss/feeds/blog18.xml'),
+                       (u'PC Advisor Blog', u'http://www.pcadvisor.co.uk/rss/feeds/blog4.xml'),
+                       (u'PC Security', u'http://www.pcadvisor.co.uk/rss/feeds/pca-security.xml'),
+                       (u'Laptops', u'http://www.pcadvisor.co.uk/rss/feeds/pca-laptop.xml'),
+                       (u'Green Computing', u'http://www.pcadvisor.co.uk/rss/feeds/pca-green-computing.xml'),
+                       (u'Internet and broadband', u'http://www.pcadvisor.co.uk/rss/feeds/pca-internet.xml'),
+                       (u'Prones and PDAs', u'http://www.pcadvisor.co.uk/rss/feeds/pca-phones.xml'),
+                       (u'Software', u'http://www.pcadvisor.co.uk/rss/feeds/pca-software.xml'),
+                       (u'Small Business', u'http://www.pcadvisor.co.uk/rss/feeds/pca-small-business.xml'),
+                       (u'Photo and video', u'http://www.pcadvisor.co.uk/rss/feeds/pca-photo-video.xml'),
+                       (u'Mac News', u'http://www.pcadvisor.co.uk/rss/feeds/pca-mac.xml'),
+                       (u'Linux', u'http://www.pcadvisor.co.uk/rss/feeds/pca-linux.xml'),
+                       (u'WiFi and Networking', u'http://www.pcadvisor.co.uk/rss/feeds/pca-networking.xml'),
+                       (u'Gadgets', u'http://www.pcadvisor.co.uk/rss/feeds/pca-gadgets.xml')
+                     ]
+
+    extra_css = '''
+                h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .author {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                p {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
+                '''
--- a/resources/recipes/pc_mag.recipe
+++ b/resources/recipes/pc_mag.recipe
@ -0,0 +1,56 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '13, January 2010'
+__description__ = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comparative reviews of computing and Internet products to highly engaged technology buyers.'
+
+'''
+http://www.pcmag.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class pcMag(BasicNewsRecipe):
+    __author__     = 'Lorenzo Vigentini'
+    description    = 'PCMag (www.pcmag.com) delivers authoritative, labs-based comparative reviews of computing and Internet products to highly engaged technology buyers.'
+
+    cover_url      = 'http://www.pcmag.com/images/bg-logo-sharp.2.gif'
+    title          = 'PC Magazine'
+    publisher      = 'Ziff Davis Media'
+    category       = 'PC, computing, product reviews'
+
+    language       = 'en'
+    encoding       = 'cp1252'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 15
+    max_articles_per_feed = 25
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets = True
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'articleContent'})
+                        ]
+
+    feeds          = [
+                       (u'Tech Commentary from the Editors of PC Magazine', u'http://rssnewsapps.ziffdavis.com/PCMAG_commentary.xml'),
+                       (u'PC Magazine Breaking News', u'http://rssnewsapps.ziffdavis.com/pcmagtips.xml'),
+                       (u'PC Magazine Tips and Solutions', u'http://rssnewsapps.ziffdavis.com/pcmagofficetips.xml'),
+                       (u'PC Magazine Small Business', u'http://blogs.pcmag.com/atwork/index.xml'),
+                       (u'PC Magazine Security Watch', u'http://feeds.ziffdavis.com/ziffdavis/securitywatch?format=xml'),
+                       (u'PC Magazine: the Official John C. Dvorak RSS Feed', u'http://rssnewsapps.ziffdavis.com/PCMAG_dvorak.xml'),
+                       (u'PC Magazine Editor-in-Chief Lance Ulanoff', u'http://rssnewsapps.ziffdavis.com/pcmagulanoff.xml'),
+                       (u'Michael Millers Forward Thinking from PCMag.com', u'http://feeds.ziffdavis.com/ziffdavis/pcmag-miller?format=xml'),
+                       (u'Technology News from Ziff Davis', u'http://rssnewsapps.ziffdavis.com/pcmagbreakingnews.xml')
+                     ]
+
+    remove_tags         = [
+                            dict(name='div', attrs={'id':['microAd','intellitxt','articleDeckTalkback','inlineDigg','underArticleLinks','w_talkback']}),
+                            dict(name='span', attrs={'id':['highlights_content','yahooBuzzBadge-48558872521263350499378']})
+                          ]
+
--- a/resources/recipes/pc_world.recipe
+++ b/resources/recipes/pc_world.recipe
@ -0,0 +1,105 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'PC World and Macworld consistently deliver editorial excellence through award-winning content and trusted product reviews.'
+
+'''
+http://www.pcworld.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+temp_files = []
+articles_are_obfuscated = True
+
+class pcWorld(BasicNewsRecipe):
+    __author__    = 'Lorenzo Vigentini'
+    description   = 'PC World and Macworld consistently deliver editorial excellence through award-winning content and trusted product reviews.'
+    cover_url     = 'http://images.pcworld.com/images/common/header/header-logo.gif'
+
+    title          = 'PCWorld '
+    publisher      = 'IDG Communication'
+    category       = 'PC, video, computing, product reviews, editing, cameras, production'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 7
+    max_articles_per_feed = 20
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url+'&print')
+
+        response = br.follow_link(url, nr = 0)
+        html = response.read()
+
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'class':'article'})
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':['toolBar','mac_tags','toolBar  btmTools','recommend longRecommend','recommend shortRecommend','textAds']}),
+                            dict(name='div', attrs={'id':['sidebar','comments','mac_tags']}),
+                            dict(name='ul', attrs={'class':'tools'}),
+                            dict(name='li', attrs={'class':'sub'})
+                        ]
+
+    feeds          = [
+                       (u'PCWorld Headlines', u'http://feeds.pcworld.com/pcworld/latestnews'),
+                       (u'How-To', u'http://feeds.pcworld.com/pcworld/update/howto'),
+                       (u'Today@PCWorld', u'http://feeds.pcworld.com/pcworld/blogs/todayatpcw'),
+                       (u'Reviews', u'http://feeds.pcworld.com/pcworld/update/reviews'),
+                       (u'Most Popular Downloads', u'http://feeds.pcworld.com/pcworld/downloads/monthly'),
+                       (u'Answer Lines', u'http://feeds.pcworld.com/pcworld/blogs/answer_line'),
+                       (u'Digital Focus', u'http://feeds.pcworld.com/pcworld/blogs/digital_focus'),
+                       (u'Download this', u'http://feeds.pcworld.com/pcworld/blogs/download_this/'),
+                       (u'Game on', u'http://feeds.pcworld.com/pcworld/blogs/game_on'),
+                       (u'Geek tech', u'http://feeds.pcworld.com/pcworld/blogs/geektech/'),
+                       (u'Hassle free PC', u'http://feeds.pcworld.com/pcworld/blogs/hassle-free_pc'),
+                       (u'Mobile computing', u'http://feeds.pcworld.com/pcworld/blogs/mobile_computing'),
+                       (u'Security alert', u'http://feeds.pcworld.com/pcworld/blogs/security_alert/'),
+                       (u'BizFeed', u'http://feeds.pcworld.com/pcworld/businesscenter/bizfeed/'),
+                       (u'The Cost Cutter', u'http://feeds.pcworld.com/pcworld/businesscenter/cost_cutter/'),
+                       (u'Linux line', u'http://feeds.pcworld.com/pcworld/businesscenter/linuxline/'),
+                       (u'Net Work', u'http://feeds.pcworld.com/pcworld/businesscenter/network/'),
+                       (u'Peer-to-Peer', u'http://feeds.pcworld.com/pcworld/businesscenter/peertopeer/'),
+                       (u'Tech inciter', u'http://feeds.pcworld.com/pcworld/businesscenter/tech_inciter/'),
+                       (u'Gadgets and gear', u'http://feeds.pcworld.com/pcworld/update/gadgets'),
+                       (u'Home Entertainment', u'http://feeds.pcworld.com/pcworld/update/home-entertainment'),
+                       (u'Mobile Devices', u'http://feeds.pcworld.com/pcworld/update/mobile-devices')
+                     ]
+
+    extra_css = '''
+                h1 {color:#FF0000;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                img {align:left;}
+                #breadcrumb {}
+                #breadcrumb ul {padding:0;margin:2px 0 0 0;}
+                #breadcrumb li {list-style:none;display:inline;padding:0;}
+                #breadcrumb li big {padding-right:2px;}
+                #articleHead {border-top:1px solid #CCC;padding-top:5px;clear:both;margin-bottom:10px;}
+                #articleHead h1 {font-size:25px;line-height:28px;margin:10px 0px 2px;padding:0px;}
+                #articleHead h2 {font-size:14px;line-height:16px;margin:0px 0px 6px;padding:0px;}
+                #articleHead p {font-size:15px;font-weight:bold;margin:0px;padding:0px;}
+                #articleHead .date {color:#999;margin:0px 0px 20px;padding:0px;}
+                '''
--- a/resources/recipes/tech_world.recipe
+++ b/resources/recipes/tech_world.recipe
@ -0,0 +1,92 @@
+#!/usr/bin/env  python
+__license__     = 'GPL v3'
+__author__      = 'Lorenzo Vigentini'
+__copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__ = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK'
+
+'''
+http://www.techworld.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+
+class techworld(BasicNewsRecipe):
+    __author__     = 'Lorenzo Vigentini'
+    description   = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK'
+    cover_url     = 'http://www.techworld.com/graphics/header/site_logo.jpg'
+
+    title          = 'TechWorld'
+    publisher      = 'IDG Communication'
+    category       = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
+
+    language       = 'en'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    oldest_article        = 7
+    max_articles_per_feed = 15
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    temp_files = []
+    articles_are_obfuscated = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        br.open(url)
+        response = br.follow_link(url_regex='?getDynamicPage&print$', nr = 0)
+        html = response.read()
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
+
+    keep_only_tags     = [
+                            dict(name='div', attrs={'id':'articleBody'}),
+                            dict(name='h2', attrs={'class':'blogTitle'}),
+                            dict(name='h3', attrs={'class':'blogger'}),
+                        ]
+
+    remove_tags        = [
+                            dict(name='div', attrs={'class':['submissionBar','mpuContainer']}),
+                            dict(name='div', attrs={'id':['breadcrumb','mainContentSidebar','articleIconsList','loginSubscribeBoxout']}),
+                            dict(name='ul', attrs={'class':'articleIconsList'})
+                        ]
+    remove_tags_after   = [
+                            dict(name='div', attrs={'id':'articleFooter'})
+                        ]
+
+    feeds          = [
+                       (u'News', u'http://www.techworld.com/rss/feeds/techworld-news.xml'),
+                       (u'How-Tos', u'http://www.techworld.com/rss/feeds/techworld-how-tos.xml'),
+                       (u'Reviews', u'http://www.techworld.com/rss/feeds/techworld-reviews.xml'),
+                       (u'Features', u'http://www.techworld.com/rss/feeds/techworld-features.xml'),
+                       (u'Storage', u'http://www.techworld.com/rss/feeds/techworld-storage.xml'),
+                       (u'Applications', u'http://www.techworld.com/rss/feeds/techworld-applications.xml'),
+                       (u'Virtualization', u'http://www.techworld.com/rss/feeds/techworld-virtualisation.xml'),
+                       (u'Personal Tech', u'http://www.techworld.com/rss/feeds/techworld-personal-tech.xml'),
+                       (u'Green IT', u'http://www.techworld.com/rss/feeds/techworld-green-it.xml'),
+                       (u'Security', u'http://www.techworld.com/rss/feeds/techworld-security.xml'),
+                       (u'Operating Systems', u'http://www.techworld.com/rss/feeds/techworld-operating-systems.xml'),
+                       (u'Networking', u'http://www.techworld.com/rss/feeds/techworld-networking.xml'),
+                       (u'Mobile and Wireless', u'http://www.techworld.com/rss/feeds/techworld-mobile-wireless.xml'),
+                       (u'Data Centre', u'http://www.techworld.com/rss/feeds/techworld-data-centre.xml'),
+                       (u'SME', u'http://www.techworld.com/rss/feeds/techworld-sme.xml'),
+                       (u'TechWorld Blogs', u'http://blogs.techworld.com/atom.xml')
+                    ]
+
+    extra_css = '''
+                h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                img {align:left;}
+                '''
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@ -0,0 +1,261 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+online.wsj.com.com
+'''
+import re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class WSJ(BasicNewsRecipe):
+    # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
+    title          = u'Wall Street Journal (free)'
+    __author__     = 'Nick Redding'
+    language = 'en'
+    description = ('All the free content from the Wall Street Journal (business'
+            ', financial and political news)')
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css   = '''h1{font-size:large; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
+                    h2{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
+                    .subhead{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
+                    .insettipUnit {font-family:Arial,Sans-serif;font-size:xx-small;}
+                    .targetCaption{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
+                    .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
+                    .tagline { ont-size:xx-small;}
+                    .dateStamp {font-family:Arial,Helvetica,sans-serif;}
+                    h3{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
+                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small; list-style-type: none;}
+                    .metadataType-articleCredits {list-style-type: none;}
+                    h6{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic;}
+                    .paperLocation{font-size:xx-small;}'''
+
+    remove_tags_before = dict(name='h1')
+    remove_tags =   [   dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
+                                 "articleTabs_tab_interactive","articleTabs_tab_video",
+                                 "articleTabs_tab_map","articleTabs_tab_slideshow"]),
+			{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
+			'insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', 'tooltip',
+			'adSummary', 'nav-inline','insetFullBracket']},
+                        dict(rel='shortcut icon'),
+                    ]
+    remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
+
+
+    def preprocess_html(self,soup):
+        # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
+        ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
+        if ultag:
+            a = ultag.h3
+            if a:
+                ultag.replaceWith(a)
+        return soup
+
+    def parse_index(self):
+
+        articles = {}
+        key = None
+        ans = []
+
+        def parse_index_page(page_name,page_title,omit_paid_content):
+
+            def article_title(tag):
+                atag = tag.find('h2') # title is usually in an h2 tag
+                if not atag: # if not, get text from the a tag
+                    atag = tag.find('a',href=True)
+                    if not atag:
+                        return ''
+                    t = self.tag_to_string(atag,False)
+                    if t == '':
+                        # sometimes the title is in the second a tag
+                        atag.extract()
+                        atag = tag.find('a',href=True)
+                        if not atag:
+                            return ''
+                        return self.tag_to_string(atag,False)
+                    return t
+                return self.tag_to_string(atag,False)
+
+            def article_author(tag):
+                atag = tag.find('strong') # author is usually in a strong tag
+                if not atag:
+                     atag = tag.find('h4') # if not, look for an h4 tag
+                     if not atag:
+                         return ''
+                return self.tag_to_string(atag,False)
+
+            def article_summary(tag):
+                atag = tag.find('p')
+                if not atag:
+                    return ''
+                subtag = atag.strong
+                if subtag:
+                    subtag.extract()
+                return self.tag_to_string(atag,False)
+
+            def article_url(tag):
+                atag = tag.find('a',href=True)
+                if not atag:
+                    return ''
+                url = re.sub(r'\?.*', '', atag['href'])
+                return url
+
+            def handle_section_name(tag):
+                # turns a tag into a section name with special processing
+                # for Wat's News, U.S., World & U.S. and World
+                s = self.tag_to_string(tag,False)
+                if ("What" in s) and ("News" in s):
+                    s = "What's News"
+                elif (s == "U.S.") or (s == "World & U.S.") or (s == "World"):
+                    s = s + " News"
+                return s
+
+
+
+            mainurl = 'http://online.wsj.com'
+            pageurl = mainurl+page_name
+            #self.log("Page url %s" % pageurl)
+            soup = self.index_to_soup(pageurl)
+            # Find each instance of div with class including "headlineSummary"
+            for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
+
+                # divtag contains all article data as ul's and li's
+                # first, check if there is an h3 tag which provides a section name
+                stag = divtag.find('h3')
+                if stag:
+                    if stag.parent['class'] == 'dynamic':
+                        # a carousel of articles is too complex to extract a section name
+                        # for each article, so we'll just call the section "Carousel"
+                        section_name = 'Carousel'
+                    else:
+                        section_name = handle_section_name(stag)
+                else:
+                    section_name = "What's News"
+                #self.log("div Section %s" % section_name)
+                # find each top-level ul in the div
+                # we don't restrict to class = newsItem because the section_name
+                # sometimes changes via a ul tag inside the div
+                for ultag in divtag.findAll('ul',recursive=False):
+                    stag = ultag.find('h3')
+                    if stag:
+                        if stag.parent.name == 'ul':
+                            # section name has changed
+                            section_name = handle_section_name(stag)
+                            #self.log("ul Section %s" % section_name)
+                            # delete the h3 tag so it doesn't get in the way
+                            stag.extract()
+                    # find each top level li in the ul
+                    for litag in ultag.findAll('li',recursive=False):
+                        stag = litag.find('h3')
+                        if stag:
+                            # section name has changed
+                            section_name = handle_section_name(stag)
+                            #self.log("li Section %s" % section_name)
+                            # delete the h3 tag so it doesn't get in the way
+                            stag.extract()
+                        # if there is a ul tag inside the li it is superfluous;
+                        # it is probably a list of related articles
+                        utag = litag.find('ul')
+                        if utag:
+                            utag.extract()
+                        # now skip paid subscriber articles if desired
+                        subscriber_tag = litag.find(text="Subscriber Content")
+                        if subscriber_tag:
+                                if omit_paid_content:
+                                    continue
+                                # delete the tip div so it doesn't get in the way
+                                tiptag = litag.find("div", { "class" : "tipTargetBox" })
+                                if tiptag:
+                                    tiptag.extract()
+                        h1tag = litag.h1
+                        # if there's an h1 tag, it's parent is a div which should replace
+                        # the li tag for the analysis
+                        if h1tag:
+                            litag = h1tag.parent
+                        h5tag = litag.h5
+                        if h5tag:
+                            # section mame has changed
+                            section_name = self.tag_to_string(h5tag,False)
+                            #self.log("h5 Section %s" % section_name)
+                            # delete the h5 tag so it doesn't get in the way
+                            h5tag.extract()
+                        url = article_url(litag)
+                        if url == '':
+                            continue
+                        if url.startswith("/article"):
+                            url = mainurl+url
+                        if not url.startswith("http"):
+                            continue
+                        if not url.endswith(".html"):
+                            continue
+                        if 'video' in url:
+                            continue
+                        title = article_title(litag)
+                        if title == '':
+                            continue
+                        #self.log("URL %s" % url)
+                        #self.log("Title %s" % title)
+                        pubdate = ''
+                        #self.log("Date %s" % pubdate)
+                        author = article_author(litag)
+                        if author == '':
+                            author = section_name
+                        elif author == section_name:
+                            author = ''
+                        else:
+                            author = section_name+': '+author
+                        #if not author == '':
+                        #    self.log("Author %s" % author)
+                        description = article_summary(litag)
+                        #if not description == '':
+                        #    self.log("Description %s" % description)
+                        if not articles.has_key(page_title):
+                            articles[page_title] = []
+                        articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        # customization notes: delete sections you are not interested in
+        # set omit_paid_content to False if you want the paid content article previews
+        sectionlist = ['Front Page','Commentary','World News','US News','Business','Markets',
+                       'Technology','Personal Finance','Life & Style','Real Estate','Careers','Small Business']
+        omit_paid_content = True
+
+        if 'Front Page' in sectionlist:
+            parse_index_page('/home-page','Front Page',omit_paid_content)
+            ans.append('Front Page')
+        if 'Commentary' in sectionlist:
+            parse_index_page('/public/page/news-opinion-commentary.html','Commentary',omit_paid_content)
+            ans.append('Commentary')
+        if 'World News' in sectionlist:
+            parse_index_page('/public/page/news-global-world.html','World News',omit_paid_content)
+            ans.append('World News')
+        if 'US News' in sectionlist:
+            parse_index_page('/public/page/news-world-business.html','US News',omit_paid_content)
+            ans.append('US News')
+        if 'Business' in sectionlist:
+            parse_index_page('/public/page/news-business-us.html','Business',omit_paid_content)
+            ans.append('Business')
+        if 'Markets' in sectionlist:
+            parse_index_page('/public/page/news-financial-markets-stock.html','Markets',omit_paid_content)
+            ans.append('Markets')
+        if 'Technology' in sectionlist:
+            parse_index_page('/public/page/news-tech-technology.html','Technology',omit_paid_content)
+            ans.append('Technology')
+        if 'Personal Finance' in sectionlist:
+            parse_index_page('/public/page/news-personal-finance.html','Personal Finance',omit_paid_content)
+            ans.append('Personal Finance')
+        if 'Life & Style' in sectionlist:
+            parse_index_page('/public/page/news-lifestyle-arts-entertainment.html','Life & Style',omit_paid_content)
+            ans.append('Life & Style')
+        if 'Real Estate' in sectionlist:
+            parse_index_page('/public/page/news-real-estate-homes.html','Real Estate',omit_paid_content)
+            ans.append('Real Estate')
+        if 'Careers' in sectionlist:
+            parse_index_page('/public/page/news-career-jobs.html','Careers',omit_paid_content)
+            ans.append('Careers')
+        if 'Small Business' in sectionlist:
+            parse_index_page('/public/page/news-small-business-marketing.html','Small Business',omit_paid_content)
+            ans.append('Small Business')
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -2,10 +2,10 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-import sys
+import os, sys, tempfile, zipfile

-from calibre.ptempfile import PersistentTemporaryFile
 from calibre.constants import numeric_version
+from calibre.ptempfile import PersistentTemporaryFile

 class Plugin(object):
    '''
@ -249,6 +249,14 @@ class CatalogPlugin(Plugin):

    cli_options = []
    
+    def cleanup(self, path):
+        try:
+            import os, shutil
+            if os.path.exists(path):
+                shutil.rmtree(path)
+        except:
+            pass
+
    def search_sort_db(self, db, opts):
        if opts.search_text:
            db.search(opts.search_text)
@ -276,6 +284,41 @@ class CatalogPlugin(Plugin):
        fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
        return fields

+    def initialize(self):
+        '''
+        If plugin is not a built-in, copy the plugin's .ui and .py files from
+        the zip file to $TMPDIR.
+        Tab will be dynamically generated and added to the Catalog Options dialog in 
+        calibre.gui2.dialogs.catalog.py:Catalog
+        '''
+        import atexit
+        from calibre.customize.builtins import plugins as builtin_plugins
+        
+        if type(self) in builtin_plugins:
+            print "%s: Built-in Catalog plugin, no init necessary" % self.name
+        else:
+            print "%s: User-added plugin" % self.name
+            print " Copying .ui and .py resources from %s to tmpdir" % self.plugin_path
+
+            # Generate a list of resource files to extract from the zipped plugin
+            # Copy to tmpdir/calibre_plugin_resources
+            files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
+            print " files_to_copy: %s" % files_to_copy
+            resources = zipfile.ZipFile(self.plugin_path,'r')
+            temp_resources_path = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
+
+            for file in files_to_copy:
+                try:
+                    resources.extract(file, temp_resources_path)
+                    print " %s extracted to %s" % (file, temp_resources_path)
+                except:
+                    print " %s not found in %s" % (file, os.path.basename(self.plugin_path))
+            resources.close()
+            
+            # Register temp_resources_path for deletion when calibre exits
+            atexit.register(self.cleanup, temp_resources_path)
+
+            
    def run(self, path_to_output, opts, db):
        '''
        Run the plugin. Must be implemented in subclasses.
--- a/src/calibre/devices/interface.py
+++ b/src/calibre/devices/interface.py
@ -43,6 +43,15 @@ class DevicePlugin(Plugin):
    #: Icon for this device
    icon = I('reader.svg')

+    @classmethod
+    def get_gui_name(cls):
+        if hasattr(cls, 'gui_name'):
+            return cls.gui_name
+        if hasattr(cls, '__name__'):
+            return cls.__name__
+        return cls.name
+
+
    def test_bcd_windows(self, device_id, bcd):
        if bcd is None or len(bcd) == 0:
            return True
--- a/src/calibre/devices/prs500/driver.py
+++ b/src/calibre/devices/prs500/driver.py
@ -95,6 +95,7 @@ class PRS500(DeviceConfig, DevicePlugin):
    PRODUCT_ID   = 0x029b #: Product Id for the PRS-500
    BCD          = [0x100]
    PRODUCT_NAME = 'PRS-500'
+    gui_name     = PRODUCT_NAME
    VENDOR_NAME  = 'SONY'
    INTERFACE_ID = 0      #: The interface we use to talk to the device
    BULK_IN_EP   = 0x81   #: Endpoint for Bulk reads
@ -114,10 +115,6 @@ class PRS500(DeviceConfig, DevicePlugin):
    SUPPORTS_SUB_DIRS = False
    MUST_READ_METADATA = True

-    @classmethod
-    def get_gui_name(cls):
-        return 'PRS-500'
-
    def log_packet(self, packet, header, stream=sys.stderr):
        """
        Log C{packet} to stream C{stream}.
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -22,7 +22,7 @@ from calibre import __appname__
 class PRS505(CLI, Device):

    name           = 'PRS-300/505 Device Interface'
-    gui_name       = 'SONY Pocket Edition'
+    gui_name       = 'SONY Reader'
    description    = _('Communicate with the Sony PRS-300/505/500 eBook reader.')
    author         = 'Kovid Goyal and John Schember'
    supported_platforms = ['windows', 'osx', 'linux']
@ -95,7 +95,7 @@ class PRS505(CLI, Device):
                self._card_b_prefix = None

    def get_device_information(self, end_session=True):
-        return (self.__class__.__name__, '', '', '')
+        return (self.gui_name, '', '', '')

    def books(self, oncard=None, end_session=True):
        if oncard == 'carda' and not self._card_a_prefix:
@ -214,7 +214,7 @@ class PRS700(PRS505):
    name           = 'PRS-600/700/900 Device Interface'
    description    = _('Communicate with the Sony PRS-600/700/900 eBook reader.')
    author         = 'Kovid Goyal and John Schember'
-    gui_name       = 'SONY Touch/Daily edition'
+    gui_name       = 'SONY Reader'
    supported_platforms = ['windows', 'osx', 'linux']

    BCD          = [0x31a]
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@ -98,13 +98,6 @@ class Device(DeviceConfig, DevicePlugin):
            self.detected_device = None
        self.set_progress_reporter(report_progress)

-    @classmethod
-    def get_gui_name(cls):
-        x = getattr(cls, 'gui_name', None)
-        if x is None:
-            x = cls.__name__
-        return x
-
    def set_progress_reporter(self, report_progress):
        self.report_progress = report_progress
        self.report_progress = report_progress
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -35,7 +35,7 @@ class USBMS(CLI, Device):

    def get_device_information(self, end_session=True):
        self.report_progress(1.0, _('Get device information...'))
-        return (self.__class__.__name__, '', '', '')
+        return (self.get_gui_name(), '', '', '')

    def books(self, oncard=None, end_session=True):
        from calibre.ebooks.metadata.meta import path_to_ext
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@ -16,6 +16,7 @@ import re
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
+from calibre import prepare_string_for_xml

 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
@ -42,21 +43,21 @@ def get_metadata(stream, extract_cover=True):
    for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
        m = re.search(r'TITLE="(.*?)"', comment)
        if m:
-            mi.title = m.group(1).strip().decode('cp1252', 'replace')
+            mi.title = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'AUTHOR="(.*?)"', comment)
        if m:
            if mi.authors == [_('Unknown')]:
                mi.authors = []
-            mi.authors.append(m.group(1).strip().decode('cp1252', 'replace'))
+            mi.authors.append(re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace'))))
        m = re.search(r'PUBLISHER="(.*?)"', comment)
        if m:
-            mi.publisher = m.group(1).strip().decode('cp1252', 'replace')
+            mi.publisher = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'COPYRIGHT="(.*?)"', comment)
        if m:
-            mi.rights = m.group(1).strip().decode('cp1252', 'replace')
+            mi.rights = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))
        m = re.search(r'ISBN="(.*?)"', comment)
        if m:
-            mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
+            mi.isbn = re.sub('[\x00-\x1f]', '', prepare_string_for_xml(m.group(1).strip().decode('cp1252', 'replace')))

    return mi

--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -421,16 +421,16 @@ class DirContainer(object):
            return f.read()

    def write(self, path, data):
-        path = os.path.join(self.rootdir, path)
+        path = os.path.join(self.rootdir, urlunquote(path))
        dir = os.path.dirname(path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
-        with open(urlunquote(path), 'wb') as f:
+        with open(path, 'wb') as f:
            return f.write(data)

    def exists(self, path):
-        path = os.path.join(self.rootdir, path)
-        return os.path.isfile(urlunquote(path))
+        path = os.path.join(self.rootdir, urlunquote(path))
+        return os.path.isfile(path)

    def namelist(self):
        names = []
--- a/src/calibre/ebooks/oeb/writer.py
+++ b/src/calibre/ebooks/oeb/writer.py
@ -62,6 +62,7 @@ class OEBWriter(object):
        output = DirContainer(path, oeb.log)
        for item in oeb.manifest.values():
            output.write(item.href, str(item))
+
        if version == 1:
            metadata = oeb.to_opf1()
        elif version == 2:
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -4,9 +4,10 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from calibre.ebooks.conversion.plumber import Plumber
-from calibre.utils.logging import Log
 from calibre.customize.conversion import OptionRecommendation, DummyReporter
+from calibre.ebooks.conversion.plumber import Plumber
+# ?from calibre.library.catalog import Catalog
+from calibre.utils.logging import Log

 def gui_convert(input, output, recommendations, notification=DummyReporter(),
        abort_after_input_dump=False, log=None):
@ -20,3 +21,34 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),

    plumber.run()

+def gui_catalog(fmt, title, dbspec, ids, out_file_name,
+        notification=DummyReporter(), log=None):
+    if log is None:
+        log = Log()
+    if dbspec is None:
+        from calibre.utils.config import prefs
+        from calibre.library.database2 import LibraryDatabase2
+        dbpath = prefs['library_path']
+        db = LibraryDatabase2(dbpath)
+    else: # To be implemented in the future
+        pass
+    
+    # Implement the interface to the catalog generating code here
+    #db
+    log("gui2.convert.gui_conversion:gui_catalog()")
+    log("fmt: %s" % fmt)
+    log("title: %s" % title)
+    log("dbspec: %s" % dbspec)
+    log("ids: %s" % ids)
+    log("out_file_name: %s" % out_file_name)
+    
+    # This needs to call the .run() method of the plugin associated with fmt
+    # Needs to set up options before the call
+    # catalog = Catalog(out_file_name, options, dbspec)
+    # Can I call library.cli:catalog_option_parser()?
+    
+    
+
+
+
+
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -676,6 +676,65 @@ class DeviceGUI(object):
            self.status_bar.showMessage(_('Sent news to')+' '+\
                    ', '.join(sent_mails),  3000)

+    def sync_catalogs(self, send_ids=None, do_auto_convert=True):
+        if self.device_connected:
+            settings = self.device_manager.device.settings()
+            ids = list(dynamic.get('catalogs_to_be_synced', set([]))) if send_ids is None else send_ids
+            ids = [id for id in ids if self.library_view.model().db.has_id(id)]
+            files, _auto_ids = self.library_view.model().get_preferred_formats_from_ids(
+                                ids, settings.format_map,
+                                exclude_auto=do_auto_convert)
+            auto = []
+            if do_auto_convert and _auto_ids:
+                for id in _auto_ids:
+                    dbfmts = self.library_view.model().db.formats(id, index_is_id=True)
+                    formats = [] if dbfmts is None else \
+                        [f.lower() for f in dbfmts.split(',')]
+                    if set(formats).intersection(available_input_formats()) \
+                            and set(settings.format_map).intersection(available_output_formats()):
+                        auto.append(id)
+            if auto:
+                format = None
+                for fmt in settings.format_map:
+                    if fmt in list(set(settings.format_map).intersection(set(available_output_formats()))):
+                        format = fmt
+                        break
+                if format is not None:
+                    autos = [self.library_view.model().db.title(id, index_is_id=True) for id in auto]
+                    autos = '\n'.join('%s'%i for i in autos)
+                    if question_dialog(self, _('No suitable formats'),
+                        _('Auto convert the following books before uploading to '
+                            'the device?'), det_msg=autos):
+                        self.auto_convert_catalogs(auto, format)
+            files = [f for f in files if f is not None]
+            if not files:
+                dynamic.set('catalogs_to_be_synced', set([]))
+                return
+            metadata = self.library_view.model().metadata_for(ids)
+            names = []
+            for mi in metadata:
+                prefix = ascii_filename(mi.title)
+                if not isinstance(prefix, unicode):
+                    prefix = prefix.decode(preferred_encoding, 'replace')
+                prefix = ascii_filename(prefix)
+                names.append('%s_%d%s'%(prefix, id,
+                    os.path.splitext(f.name)[1]))
+                if mi.cover and os.access(mi.cover, os.R_OK):
+                    mi.thumbnail = self.cover_to_thumbnail(open(mi.cover,
+                        'rb').read())
+            dynamic.set('catalogs_to_be_synced', set([]))
+            if files:
+                remove = []
+                space = { self.location_view.model().free[0] : None,
+                    self.location_view.model().free[1] : 'carda',
+                    self.location_view.model().free[2] : 'cardb' }
+                on_card = space.get(sorted(space.keys(), reverse=True)[0], None)
+                self.upload_books(files, names, metadata,
+                        on_card=on_card,
+                        memory=[[f.name for f in files], remove])
+                self.status_bar.showMessage(_('Sending catalogs to device.'), 5000)
+
+

    def sync_news(self, send_ids=None, do_auto_convert=True):
        if self.device_connected:
--- a/src/calibre/gui2/dialogs/catalog.py
+++ b/src/calibre/gui2/dialogs/catalog.py
@ -0,0 +1,145 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, shutil, sys, tempfile
+
+from PyQt4.Qt import QDialog, QWidget
+
+from calibre.customize.ui import config
+from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
+from calibre.gui2 import dynamic
+from calibre.customize.ui import available_catalog_formats, catalog_plugins
+from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
+
+class Catalog(QDialog, Ui_Dialog):
+
+    def __init__(self, parent, dbspec, ids):
+        import re, cStringIO
+        from calibre import prints as info
+        from PyQt4.uic import compileUi
+        
+        QDialog.__init__(self, parent)
+        
+        # Run the dialog setup generated from catalog.ui
+        self.setupUi(self)
+        self.dbspec, self.ids = dbspec, ids
+
+        # Display the number of books we've been passed
+        self.count.setText(unicode(self.count.text()).format(len(ids)))
+
+        # Display the last-used title
+        self.title.setText(dynamic.get('catalog_last_used_title',
+            _('My Books')))
+
+        # GwR *** Add option tabs for built-in formats
+        # This code models #69 in calibre/gui2/dialogs/config/__init__.py
+
+        self.fmts = []
+        
+        from calibre.customize.builtins import plugins as builtin_plugins
+
+        for plugin in catalog_plugins():
+            if plugin.name in config['disabled_plugins']:
+                continue
+                
+            name = plugin.name.lower().replace(' ', '_')
+            if type(plugin) in builtin_plugins:
+                info("Adding tab for builtin Catalog plugin %s" % plugin.name)                
+                try:
+                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
+                            fromlist=[1])
+                    pw = catalog_widget.PluginWidget()
+                    pw.initialize()
+                    pw.ICON = I('forward.svg')    
+                    page = self.tabs.addTab(pw,pw.TITLE)
+                    [self.fmts.append([file_type, pw.sync_enabled]) for file_type in plugin.file_types]
+                    info("\tSupported formats: %s" % plugin.file_types)
+                    info("\tsync_enabled: %s" % pw.sync_enabled)
+    
+                except ImportError:
+                    info("ImportError with %s" % name)
+                    continue
+            else:
+                # Test to see if .ui and .py files exist in tmpdir/calibre_plugin_resources
+                form = os.path.join(tempfile.gettempdir(),
+                                    'calibre_plugin_resources','%s.ui' % name)
+                klass = os.path.join(tempfile.gettempdir(),
+                                  'calibre_plugin_resources','%s.py' % name)
+                compiled_form = os.path.join(tempfile.gettempdir(),
+                                  'calibre_plugin_resources','%s_ui.py' % name)
+                plugin_resources = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')        
+
+                if os.path.exists(form) and os.path.exists(klass):
+                    info("Adding tab for user-installed Catalog plugin %s" % plugin.name)
+                    
+                    # Compile the form provided in plugin.zip
+                    if not os.path.exists(compiled_form) or \
+                       os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
+                        info('\tCompiling form', form)
+                        buf = cStringIO.StringIO()
+                        compileUi(form, buf)
+                        dat = buf.getvalue()
+                        dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', 
+                                         re.DOTALL).sub(r'_("\1")', dat)
+                        open(compiled_form, 'wb').write(dat)
+                    
+                    # Import the Catalog class from the dynamic .py file
+                    try:
+                        sys.path.insert(0, plugin_resources)
+                        catalog_widget = __import__(name, fromlist=[1])
+                        dpw = catalog_widget.PluginWidget()
+                        dpw.initialize()
+                        dpw.ICON = I('forward.svg')    
+                        page = self.tabs.addTab(dpw, dpw.TITLE)
+                        [self.fmts.append([file_type, dpw.sync_enabled]) for file_type in plugin.file_types]
+                        info("\tSupported formats: %s" % plugin.file_types)
+                        info("\tsync_enabled: %s" % dpw.sync_enabled)
+                    except ImportError:
+                        info("ImportError with %s" % name)
+                        continue
+                    finally:
+                        sys.path.remove(plugin_resources)
+                        
+                else:
+                    info("No dynamic tab resources found for %s" % name)
+
+        # Generate a sorted list of installed catalog formats/sync_enabled pairs
+        # Generate a parallel list of sync_enabled[True|False]ß
+        self.fmts = sorted([x[0].upper() for x in self.fmts])
+
+        # Callback when format changes
+        self.format.currentIndexChanged.connect(self.format_changed)
+
+        # Add the installed catalog format list to the format QComboBox
+        self.format.addItems(self.fmts)
+
+        pref = dynamic.get('catalog_preferred_format', 'CSV')
+        idx = self.format.findText(pref)
+        if idx > -1:
+            self.format.setCurrentIndex(idx)
+
+        if self.sync.isEnabled():
+            self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
+                            
+    def format_changed(self, idx):
+        print "format_changed(idx): idx: %d" % idx
+        cf = unicode(self.format.currentText())
+        if cf in ('EPUB', 'MOBI'):
+            self.sync.setEnabled(True)
+        else:
+            self.sync.setDisabled(True)
+            self.sync.setChecked(False)
+
+    def accept(self):
+        self.catalog_format = unicode(self.format.currentText())
+        dynamic.set('catalog_preferred_format', self.catalog_format)
+        self.catalog_title = unicode(self.title.text())
+        dynamic.set('catalog_last_used_title', self.catalog_title)
+        self.catalog_sync = bool(self.sync.isChecked())
+        dynamic.set('catalog_sync_to_device', self.catalog_sync)
+        QDialog.accept(self)
--- a/src/calibre/gui2/dialogs/catalog.ui
+++ b/src/calibre/gui2/dialogs/catalog.ui
@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Dialog</class>
+ <widget class="QDialog" name="Dialog">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>611</width>
+    <height>514</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Generate catalog</string>
+  </property>
+  <property name="windowIcon">
+   <iconset>
+    <normaloff>:/images/library.png</normaloff>:/images/library.png</iconset>
+  </property>
+  <widget class="QDialogButtonBox" name="buttonBox">
+   <property name="geometry">
+    <rect>
+     <x>430</x>
+     <y>470</y>
+     <width>164</width>
+     <height>32</height>
+    </rect>
+   </property>
+   <property name="orientation">
+    <enum>Qt::Horizontal</enum>
+   </property>
+   <property name="standardButtons">
+    <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+   </property>
+  </widget>
+  <widget class="QTabWidget" name="tabs">
+   <property name="geometry">
+    <rect>
+     <x>12</x>
+     <y>39</y>
+     <width>579</width>
+     <height>411</height>
+    </rect>
+   </property>
+   <property name="currentIndex">
+    <number>0</number>
+   </property>
+   <widget class="QWidget" name="tab">
+    <attribute name="title">
+     <string>Catalog options</string>
+    </attribute>
+    <layout class="QGridLayout" name="gridLayout_2">
+     <item row="0" column="0">
+      <widget class="QLabel" name="label">
+       <property name="text">
+        <string>Catalog &amp;format:</string>
+       </property>
+       <property name="buddy">
+        <cstring>format</cstring>
+       </property>
+      </widget>
+     </item>
+     <item row="0" column="2">
+      <widget class="QComboBox" name="format"/>
+     </item>
+     <item row="1" column="0">
+      <widget class="QLabel" name="label_2">
+       <property name="text">
+        <string>Catalog &amp;title (existing catalog with the same title will be replaced):</string>
+       </property>
+       <property name="wordWrap">
+        <bool>true</bool>
+       </property>
+       <property name="buddy">
+        <cstring>title</cstring>
+       </property>
+      </widget>
+     </item>
+     <item row="1" column="2">
+      <widget class="QLineEdit" name="title"/>
+     </item>
+     <item row="3" column="0">
+      <widget class="QCheckBox" name="sync">
+       <property name="text">
+        <string>&amp;Send catalog to device automatically</string>
+       </property>
+      </widget>
+     </item>
+     <item row="2" column="1">
+      <spacer name="verticalSpacer">
+       <property name="orientation">
+        <enum>Qt::Vertical</enum>
+       </property>
+       <property name="sizeHint" stdset="0">
+        <size>
+         <width>20</width>
+         <height>299</height>
+        </size>
+       </property>
+      </spacer>
+     </item>
+    </layout>
+   </widget>
+  </widget>
+  <widget class="QLabel" name="count">
+   <property name="geometry">
+    <rect>
+     <x>12</x>
+     <y>12</y>
+     <width>205</width>
+     <height>17</height>
+    </rect>
+   </property>
+   <property name="font">
+    <font>
+     <weight>75</weight>
+     <bold>true</bold>
+    </font>
+   </property>
+   <property name="text">
+    <string>Generate catalog for {0} books</string>
+   </property>
+  </widget>
+ </widget>
+ <resources>
+  <include location="../../../work/calibre/resources/images.qrc"/>
+ </resources>
+ <connections>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>accepted()</signal>
+   <receiver>Dialog</receiver>
+   <slot>accept()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>248</x>
+     <y>254</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>157</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>buttonBox</sender>
+   <signal>rejected()</signal>
+   <receiver>Dialog</receiver>
+   <slot>reject()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>316</x>
+     <y>260</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>286</x>
+     <y>274</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
+</ui>
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -232,6 +232,11 @@ class BooksModel(QAbstractTableModel):
        self.count_changed()
        return ret

+    def add_catalog(self, path, title):
+        ret = self.db.add_catalog(path, title)
+        self.count_changed()
+        return ret
+
    def count_changed(self, *args):
        self.emit(SIGNAL('count_changed(int)'), self.db.count())

--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -236,6 +236,29 @@ def fetch_scheduled_recipe(arg):

    return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt]

+def generate_catalog(parent, dbspec, ids):
+    from calibre.gui2.dialogs.catalog import Catalog
+    
+    # Build the Catalog dialog
+    d = Catalog(parent, dbspec, ids)
+    if d.exec_() != d.Accepted:
+        return None
+
+    # Create the output file
+    out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
+
+    args = [
+        d.catalog_format,
+        d.catalog_title,
+        dbspec,
+        ids,
+        out.name,
+        ]
+    out.close()
+
+    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
+            d.catalog_title
+
 def convert_existing(parent, db, book_ids, output_format):
    already_converted_ids = []
    already_converted_titles = []
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -48,7 +48,7 @@ from calibre.gui2.jobs import JobManager, JobsDialog
 from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
 from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
 from calibre.gui2.tools import convert_single_ebook, convert_bulk_ebook, \
-    fetch_scheduled_recipe
+    fetch_scheduled_recipe, generate_catalog
 from calibre.gui2.dialogs.config import ConfigDialog
 from calibre.gui2.dialogs.search import SearchDialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
@ -355,6 +355,10 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        cm = QMenu()
        cm.addAction(_('Convert individually'))
        cm.addAction(_('Bulk convert'))
+        cm.addSeparator()
+        ac = cm.addAction(
+                _('Create catalog of the books in your calibre library'))
+        ac.triggered.connect(self.generate_catalog)
        self.action_convert.setMenu(cm)
        self._convert_single_hook = partial(self.convert_ebook, bulk=False)
        QObject.connect(cm.actions()[0],
@ -894,6 +898,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            view.resizeRowsToContents()
            view.resize_on_select = not view.isVisible()
        self.sync_news()
+        self.sync_catalogs()
    ############################################################################


@ -1339,6 +1344,44 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):

    ############################################################################

+    ############################### Generate catalog ###########################
+
+    def generate_catalog(self):
+        rows = self.library_view.selectionModel().selectedRows()
+        if not rows:
+            rows = xrange(self.library_view.model().rowCount(QModelIndex()))
+        ids = map(self.library_view.model().id, rows)
+        dbspec = None
+        if not ids:
+            return error_dialog(self, _('No books selected'),
+                    _('No books selected to generate catalog for'),
+                    show=True)
+        # calibre.gui2.tools:generate_catalog()
+        ret = generate_catalog(self, dbspec, ids)
+        if ret is None:
+            return
+        func, args, desc, out, sync, title = ret
+        fmt = os.path.splitext(out)[1][1:].upper()
+        job = self.job_manager.run_job(
+                Dispatcher(self.catalog_generated), func, args=args,
+                    description=desc)
+        job.catalog_file_path = out
+        job.catalog_sync, job.catalog_title = sync, title
+        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
+
+    def catalog_generated(self, job):
+        if job.failed:
+            return self.job_exception(job)
+        id = self.library_view.model().add_catalog(job.catalog_file_path, job.catalog_title)
+        self.library_view.model().reset()
+        if job.catalog_sync:
+            sync = dynamic.get('catalogs_to_be_synced', set([]))
+            sync.add(id)
+            dynamic.set('catalogs_to_be_synced', sync)
+        self.status_bar.showMessage(_('Catalog generated.'), 3000)
+        self.sync_catalogs()
+
+
    ############################### Fetch news #################################

    def download_scheduled_recipe(self, arg):
@ -1398,6 +1441,17 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.queue_convert_jobs(jobs, changed, bad, rows, previous,
                self.book_auto_converted_news)

+    def auto_convert_catalogs(self, book_ids, format):
+        previous = self.library_view.currentIndex()
+        rows = [x.row() for x in \
+                self.library_view.selectionModel().selectedRows()]
+        jobs, changed, bad = convert_single_ebook(self, self.library_view.model().db, book_ids, True, format)
+        if jobs == []: return
+        self.queue_convert_jobs(jobs, changed, bad, rows, previous,
+                self.book_auto_converted_catalogs)
+
+
+
    def get_books_for_conversion(self):
        rows = [r.row() for r in \
                self.library_view.selectionModel().selectedRows()]
@ -1463,6 +1517,11 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        self.book_converted(job)
        self.sync_news(send_ids=[book_id], do_auto_convert=False)

+    def book_auto_converted_catalogs(self, job):
+        temp_files, fmt, book_id = self.conversion_jobs[job]
+        self.book_converted(job)
+        self.sync_catalogs(send_ids=[book_id], do_auto_convert=False)
+
    def book_converted(self, job):
        temp_files, fmt, book_id = self.conversion_jobs.pop(job)[:3]
        try:
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -532,6 +532,7 @@ class LibraryPage(QWizardPage, LibraryUI):
        for item in items:
            self.language.addItem(item[1], QVariant(item[0]))
        self.language.blockSignals(False)
+        prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())

    def change_language(self, idx):
        prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -41,6 +41,8 @@ class CSV_XML(CatalogPlugin):

        log = Log()
        self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
+        # Update to .partition
+        self.fmt = path_to_output.rpartition('.')[2]
        if opts.verbose:
            log("%s:run" % self.name)
            log(" path_to_output: %s" % path_to_output)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -645,7 +645,9 @@ def catalog_option_parser(args):

    # Add options common to all catalog plugins
    parser.add_option('-s', '--search', default=None, dest='search_text',
-                      help=_("Filter the results by the search query.  For the format of the search query, please see the search-related documentation in the User Manual.\n"+
+                      help=_("Filter the results by the search query. "
+                          "For the format of the search query, please see "
+                          "the search-related documentation in the User Manual.\n"
                      "Default: no filtering"))
    parser.add_option('-v','--verbose', default=False, action='store_true',
                      dest='verbose',
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -1407,6 +1407,36 @@ class LibraryDatabase2(LibraryDatabase):
        if notify:
            self.notify('metadata', [id])

+    def add_catalog(self, path, title):
+        format = os.path.splitext(path)[1][1:].lower()
+        stream = path if hasattr(path, 'read') else open(path, 'rb')
+        stream.seek(0)
+        matches = self.data.get_matches('title', title)
+        if matches:
+            tag_matches = self.data.get_matches('tags', _('Catalog'))
+            matches = matches.intersection(tag_matches)
+        db_id = None
+        if matches:
+            db_id = list(matches)[0]
+        if db_id is None:
+            obj = self.conn.execute('INSERT INTO books(title, author_sort) VALUES (?, ?)',
+                                (title, 'calibre'))
+            db_id = obj.lastrowid
+            self.data.books_added([db_id], self)
+            self.set_path(db_id, index_is_id=True)
+            self.conn.commit()
+            mi = MetaInformation(title, ['calibre'])
+            mi.tags = [_('Catalog')]
+            self.set_metadata(db_id, mi)
+
+        self.add_format(db_id, format, stream, index_is_id=True)
+        if not hasattr(path, 'read'):
+            stream.close()
+        self.conn.commit()
+        self.data.refresh_ids(self, [db_id]) # Needed to update format list and size
+        return db_id
+
+
    def add_news(self, path, arg):
        format = os.path.splitext(path)[1][1:].lower()
        stream = path if hasattr(path, 'read') else open(path, 'rb')
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
@ -2245,7 +2245,7 @@ msgstr ""
 "kommando ...\n"
 "\n"
 "Kommandot kan vara något av följande:\n"
-"[%% kommandon]\n"
+"[%%commands]\n"
 "\n"
 "Använd %prog kommando --help för att få mer information om ett visst "
 "kommando\n"
@ -6213,7 +6213,7 @@ msgstr "<p> För hjälp se: <a href=\"%s\"> Användarhandbok </ a> <br>"
 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:224
 msgid "<b>%s</b>: %s by <b>Kovid Goyal %%(version)s</b><br>%%(device)s</p>"
 msgstr ""
-"<b>%s </ b>:%s av <b> Kovid Goyal%% (version) s </ b> <br>%% (enhet) s </ p>"
+"<b>%s </ b>:%s av <b> Kovid Goyal %%(version)s </ b> <br>%%(device)s </ p>"

 #: /home/kovid/work/calibre/src/calibre/gui2/ui.py:247
 msgid "Edit metadata individually"
@ -7828,7 +7828,7 @@ msgid ""
 msgstr ""
 "%%prog kommando [alternativ] [argument]\n"
 "\n"
-"%% PROG är kommandoradsgränssnitt till calibres bokdatabasen.\n"
+"%%prog är kommandoradsgränssnitt till calibres bokdatabasen.\n"
 "\n"
 "kommando är en av:\n"
 "  %s\n"
--- a/src/calibre/utils/ipc/worker.py
+++ b/src/calibre/utils/ipc/worker.py
@ -27,6 +27,9 @@ PARALLEL_FUNCS = {
      'gui_convert'     :
        ('calibre.gui2.convert.gui_conversion', 'gui_convert', 'notification'),

+      'gui_catalog'     :
+        ('calibre.gui2.convert.gui_conversion', 'gui_catalog', 'notification'),
+
      'move_library'     :
        ('calibre.library.move', 'move_library', 'notification'),