Sync to trunk.

2025-08-30 23:00:21 -04:00 · 2009-11-21 21:22:28 -05:00 · 2009-11-21 21:22:28 -05:00 · d96542418a
commit d96542418a
parent c569ba843f 69fc173ff5
52 changed files with 5724 additions and 4549 deletions
--- a/resources/recipes/fokkeensukke.recipe
+++ b/resources/recipes/fokkeensukke.recipe
@ -0,0 +1,87 @@
+#!/usr/bin/python
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+
+class FokkeEnSukkeRecipe(BasicNewsRecipe) :
+    __license__   = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'nl'
+    description = u'Popular Dutch daily cartoon Fokke en Sukke'
+
+    title = u'Fokke en Sukke'
+    no_stylesheets = True
+    # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
+    # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
+    template_css = ''
+    INDEX = u'http://foksuk.nl'
+
+    # This cover is not as nice as it could be, needs some work
+    #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
+
+    keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
+
+    def parse_index(self) :
+        # A list with daynames as they _can_ appear in the index
+        dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag']
+        soup = self.index_to_soup(self.INDEX)
+
+        # Find the links for the various cartoons for this week and loop through them
+        index = soup.find('div', attrs={'class' : 'selectcartoon'})
+        links = index.findAll('a')
+        maxIndex = len(links) - 1
+        articles = []
+        for i in range(len(links)) :
+            # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
+            if i == 0 :
+                continue
+
+            # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
+            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
+            if links[i].renderContents() in dayNames :
+                # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
+                if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
+                    # Got you! Add it to the list
+                    article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url'  : self.INDEX + links[i + 1]['href'], 'description' : ''}
+                    articles.append(article)
+                    # If there is a '1', there should be a '2' as well, but better save than sorry
+                    if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') :
+                        # Got you! Add it to the list
+                        article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url'  : self.INDEX + links[i + 2]['href'], 'description' : ''}
+                        articles.append(article)
+                else :
+                    # There is only one cartoon for this day. Add it to the list.
+                    article = {'title' : links[i].renderContents(), 'date' : u'', 'url'  : self.INDEX + links[i]['href'], 'description' : ''}
+                    articles.append(article)
+        # Might as well use the weeknumber as title
+        week = index.find('span', attrs={'class' : 'week'}).renderContents()
+
+        return [[week, articles]]
+
+    def preprocess_html(self, soup) :
+        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
+        cartoon = soup.find('div', attrs={'class' : 'cartoon'})
+        if cartoon :
+            # It is a cartoon. Extract the title.
+            title = ''
+            img = soup.find('img', attrs = {'alt' : True})
+            if img :
+                title = img['alt']
+
+            # Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
+            tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
+            tag.insert(0, title)
+            cartoon.insert(0, tag)
+
+            # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
+            # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
+            select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
+            if select :
+                select.extract()
+
+            return cartoon
+        else :
+            # It is a TOC. Just return the whole lot.
+            return soup
+
+
--- a/resources/recipes/guardian.recipe
+++ b/resources/recipes/guardian.recipe
@ -43,97 +43,45 @@ class Guardian(BasicNewsRecipe):
                    #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
                '''

+    def find_sections(self):
+        soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
+        # find cover pic
+        img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
+        if img is not None:
+            self.cover_url = img['src']
+        # end find cover pic
+
+        idx = soup.find('div', id='book-index')
+        for s in idx.findAll('strong', attrs={'class':'book'}):
+            a = s.find('a', href=True)
+            yield (self.tag_to_string(a), a['href'])
+
+    def find_articles(self, url):
+        soup = self.index_to_soup(url)
+        div = soup.find('div', attrs={'class':'book-index'})
+        for ul in div.findAll('ul', attrs={'class':'trailblock'}):
+            for li in ul.findAll('li'):
+                a = li.find(href=True)
+                if not a:
+                    continue
+                title = self.tag_to_string(a)
+                url = a['href']
+                if not title or not url:
+                    continue
+                tt = li.find('div', attrs={'class':'trailtext'})
+                if tt is not None:
+                    for da in tt.findAll('a'): da.extract()
+                    desc = self.tag_to_string(tt).strip()
+                yield {
+                        'title': title, 'url':url, 'description':desc,
+                        'date' : strftime('%a, %d %b'),
+                        }
+
    def parse_index(self):
-
-            soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
-             # find cover pic
-            img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
-            if img is None: return None
-            else:
-                self.cover_url = img['src']
-             # end find cover pic
-            sections = []
-            ans = []
-            for li in soup.findAll( 'li'):
-                section = ''
-                articles = []
-
-                if li.a and li.a.has_key('href'):
-                        url =  li.a['href']
-                        if 'mainsection' in url:
-                            section = self.tag_to_string(url)
-                            i = len(section)
-
-                            index1 = section.rfind('/',0,i)
-                            section = section[index1+1:i]
-                            sections.append(section)
-
-                            #find the articles in the Main Section  start
-                            soup = self.index_to_soup(url)
-                            date = strftime('%a, %d %b')
-                            descl = []
-
-                            for desclist in  soup.findAll(name='div',attrs={'class':"trailtext"}):
-                                descl.append(self.tag_to_string(desclist).strip())
-
-                            t = -1
-                            for tag in soup.findAll('h3'):
-                                t = t+1
-
-                                for a in tag.findAll('a'):
-
-                                    if t < len(descl):
-                                        desc =  descl[t]
-                                    else:
-                                        desc = ''
-                                    if a and a.has_key('href'):
-                                        url2 =  a['href']
-                                    else:
-                                        url2 =''
-                                    title = self.tag_to_string(a)
-
-                                    if len(articles) == 0:    #First article
-
-                                         articles.append({
-                                                 'title':title,
-                                                 'date':date,
-                                                 'url':url2,
-                                                 'description':desc,
-                                                    })
-                                    else:
-                                        #eliminate duplicates start
-                                        if {'title':title,'date':date,'url':url2,'description':desc} in articles :
-                                                url2 = ''
-                                        #eliminate duplicates end
-                                        else:
-                                                if 'http://jobs.guardian.co.uk/' in url2:
-                                                    url2 = ''
-                                                else:
-
-                                                    articles.append({
-                                                     'title':title,
-                                                     'date':date,
-                                                     'url':url2,
-                                                     'description':desc,
-                                                        })
-                            #find the articles in the Main Section end
-                            ans.append( articles)
-
-                        else:
-                                url =''
-
-
-            titles = map(self.find_title, sections)
-            ans1 = list(zip(titles,ans))
-
-            return ans1[2:]
-
-    def find_title(self, section):
-        d = {'topstories':'Top Stories', 'international':'International', 'editorialsandreply':'Editorials and Reply',
-             'commentanddebate':'Comment and Debate','uknews':'UK News','saturday':'Saturday','sunday':'Sunday',
-                'reviews':'Reviews', 'obituaries':'Obituaries'}
-
-        return d.get(section, section)
+        feeds = []
+        for title, href in self.find_sections():
+            feeds.append((title, list(self.find_articles(href))))
+        return feeds

    def preprocess_html(self, soup):

--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@ -0,0 +1,110 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class HBR(BasicNewsRecipe):
+
+    title = 'Harvard Business Review'
+    description = 'To subscribe go to http://hbr.harvardbusiness.org'
+    needs_subscription = True
+    __author__ = 'Kovid Goyal and Sujata Raman'
+    timefmt                = ' [%B %Y]'
+    language = 'en'
+    no_stylesheets = True
+
+    LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
+    INDEX = 'http://hbr.harvardbusiness.org/current'
+
+    keep_only_tags = [dict(name='div', id='content')]
+    remove_tags = [
+            dict(id=['articleDate', 'subscriptionModule', 'errorArea',
+                'feedbackForm', 'relatedModule', 'articleSecondaryModule',
+                'contentRight', 'summaryLink']),
+            dict(name='form'),
+            ]
+
+    extra_css = '''
+                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
+                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
+                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
+                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
+                #articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
+                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
+                '''
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open(self.LOGIN_URL)
+        br.select_form(nr=0)
+        br['ssousername'] = self.username
+        br['password'] = self.password
+        raw = br.submit().read()
+        if 'My Account' not in raw:
+            raise Exception('Failed to login, are you sure your username and password are correct?')
+        self.logout_url = None
+        link = br.find_link(text='(sign out)')
+        if link:
+            self.logout_url = link.absolute_url
+        return br
+
+    def cleanup(self):
+        if self.logout_url is not None:
+            self.browser.open(self.logout_url)
+
+    def map_url(self, url):
+        if url.endswith('/ar/1'):
+            return url[:-1]+'pr'
+
+    def get_features(self, soup):
+        div = soup.find('div', id='issueFeatures')
+        for li in div.findAll('li'):
+            a = li.find('a', href=True)
+            url = 'http://hbr.harvardbusiness.org'+a['href']
+            url = self.map_url(url)
+            if not url:
+                continue
+            title = self.tag_to_string(a)
+            p = li.find('p')
+            desc = ''
+            if p is not None:
+                desc = self.tag_to_string(p)
+            yield {'title':title, 'url':url, 'description':desc}
+
+    def get_departments(self, soup):
+        div = soup.find('div', id='issueDepartmentsContent')
+        for h4 in div.findAll('h4'):
+            feed = self.tag_to_string(h4)
+            articles = []
+            ul = h4.findNextSibling('ul')
+            for li in ul.findAll('li'):
+                a = li.find('a', href=True)
+                url = 'http://hbr.harvardbusiness.org'+a['href']
+                url = self.map_url(url)
+                if not url:
+                    continue
+                title = self.tag_to_string(a)
+                p = li.find('p')
+                desc = ''
+                if p is not None:
+                    desc = self.tag_to_string(p)
+                articles.append({'title':title, 'url':url, 'description':desc})
+            yield [feed, articles]
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.INDEX)
+        feeds = []
+        feeds.append(('Features', list(self.get_features(soup))))
+        feeds.extend(self.get_departments(soup))
+        return feeds
+
+    def get_cover_url(self):
+        cover_url = None
+        index = 'http://hbr.harvardbusiness.org/current'
+        soup = self.index_to_soup(index)
+        link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
+
+        if link_item:
+           cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
+
+        return cover_url
+
+
--- a/resources/recipes/kellog_insight.recipe
+++ b/resources/recipes/kellog_insight.recipe
@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class KellogInsight(BasicNewsRecipe):

    title          = 'Kellog Insight'
-    __author__     = 'Kovid Goyal'
+    __author__     = 'Kovid Goyal and Sujata Raman'
    description    = 'Articles from the Kellog School of Management'
    no_stylesheets = True
    encoding       = 'utf-8'
    language = 'en'

    oldest_article = 60
-    remove_tags_before = {'name':'h1'}
-    remove_tags_after = {'class':'col-two-text'}

+    keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]

+    remove_tags = [dict(name='div', attrs={'class':'col-three'})]

-    feeds = [('Articles',
-        'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
+    extra_css = '''
+                h1{font-family:arial; font-size:medium; color:#333333;}
+                .col-one{font-family:arial; font-size:xx-small;}
+                .col-two{font-family:arial; font-size:x-small; }
+                h2{font-family:arial; font-size:small; color:#666666;}
+                h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
+                h4{color:#660000;font-family:arial; font-size:x-small;}
+                .col-two-text{font-family:arial; font-size:x-small; color:#333333;}
+                '''
+
+    feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]

    def get_article_url(self, article):
        # Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
            return link
        self.log('Skipping non-article', link)
        return None
+
+    def preprocess_html(self, soup):
+
+            for tag in soup.findAll(name=['span']):
+                tag.nextSibling.name = 'h4'
+
+            return soup
+
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@ -14,7 +14,7 @@ class NewScientist(BasicNewsRecipe):
    description           = 'Science news and science articles from New Scientist.'
    language              = 'en'
    publisher             = 'New Scientist'
-    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software, sex'
+    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
    delay                 = 3
    oldest_article        = 7
    max_articles_per_feed = 100
--- a/resources/recipes/philly.recipe
+++ b/resources/recipes/philly.recipe
@ -3,50 +3,55 @@ __license__   = 'GPL v3'
 '''
 philly.com/inquirer/
 '''
-import re
 from calibre.web.feeds.recipes import BasicNewsRecipe

 class Philly(BasicNewsRecipe):

    title       = 'Philadelphia Inquirer'
-    __author__  = 'RadikalDissent'
+    __author__  = 'RadikalDissent and Sujata Raman'
    language = 'en'
    description = 'Daily news from the Philadelphia Inquirer'
    no_stylesheets        = True
    use_embedded_content  = False
    oldest_article = 1
    max_articles_per_feed = 25
+
    extra_css = '''
-        .byline {font-size: small; color: grey; font-style:italic; }
-        .lastline {font-size: small; color: grey; font-style:italic;}
-        .contact {font-size: small; color: grey;}
-        .contact p {font-size: small; color: grey;}
+        h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
+        h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
+        .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
+        .byline {font-size: small; color: #666666; font-style:italic; }
+        .lastline {font-size: small; color: #666666; font-style:italic;}
+        .contact {font-size: small; color: #666666;}
+        .contact p {font-size: small; color: #666666;}
+        #photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
+        .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
+        #photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
+        .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
+        .article_timestamp{font-size:x-small; color:#666666;}
+        a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
                '''
-    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-       [
-        (r'<body.*<h1>', lambda match: '<body><h1>'),
-        (r'<font size="2" face="Arial">', lambda match: '<div class="contact"><font class="contact">'),
-        (r'<font face="Arial" size="2">', lambda match: '<div class="contact"><font class="contact">')
-        ]
-    ]
+
    keep_only_tags = [
-        dict(name='h1'),
-        dict(name='p', attrs={'class':['byline','lastline']}),
-        dict(name='div', attrs={'class':'body-content'}),
-    ]
+               dict(name='div', attrs={'class':'story-content'}),
+               dict(name='div', attrs={'id': 'contentinside'})
+                    ]

    remove_tags = [
-        dict(name='hr'),
-        dict(name='p', attrs={'class':'buzzBadge'}),
+         dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
+         dict(name='dl', attrs={'class':'relatedlist'}),
+        dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
+        dict(name='a', attrs={'class': ['headlineonly','bl']}),
+         dict(name='img', attrs={'class':'img_noborder'})
    ]
-    def print_version(self, url):
-        return url + '?viewAll=y'
+   # def print_version(self, url):
+   #     return url + '?viewAll=y'


    feeds = [
        ('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
        ('Business', 'http://www.philly.com/inq_business.rss'),
-        ('News', 'http://www.philly.com/inquirer/news/index.rss'),
+        #('News', 'http://www.philly.com/inquirer/news/index.rss'),
        ('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
        ('Local', 'http://www.philly.com/inquirer_local.rss'),
        ('Health', 'http://www.philly.com/inquirer_health_science.rss'),
@ -54,3 +59,27 @@ class Philly(BasicNewsRecipe):
        ('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
        ('Sports', 'http://www.philly.com/inquirer_sports.rss')
        ]
+
+    def get_article_url(self, article):
+        ans = article.link
+
+        try:
+            self.log('Looking for full story link in', ans)
+            soup = self.index_to_soup(ans)
+            x = soup.find(text="View All")
+
+            if x is not None:
+                ans = ans + '?viewAll=y'
+                self.log('Found full story link', ans)
+        except:
+            pass
+        return ans
+
+    def postprocess_html(self, soup,first):
+
+         for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
+                tag.extract()
+         for tag in soup.findAll(name='br'):
+                tag.extract()
+
+         return soup
--- a/resources/recipes/science_news.recipe
+++ b/resources/recipes/science_news.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class Sciencenews(BasicNewsRecipe):
    title                 = u'ScienceNews'
-    __author__            = u'Darko Miletic'
+    __author__            = u'Darko Miletic and Sujata Raman'
    description           = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
    oldest_article        = 30
    language = 'en'
@ -19,11 +19,43 @@ class Sciencenews(BasicNewsRecipe):
    use_embedded_content  = False
    timefmt               = ' [%A, %d %B, %Y]'

+    extra_css = '''
+                .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
+                .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
+                .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
+                .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
+                .exclusive{color:#FF0000 ;}
+                .anonymous{color:#14487E ;}
+                .content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
+                .description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
+                .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
+                '''
+
    keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
    remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
    remove_tags = [
                     dict(name='ul', attrs={'id':'content_functions_bottom'})
-                    ,dict(name='div', attrs={'id':'content_functions_top'})
+                    ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
+                    ,dict(name='img', attrs={'class':'icon'})
+                    ,dict(name='div', attrs={'class': 'embiggen'})
                  ]

    feeds       = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
+
+    def get_cover_url(self):
+        cover_url = None
+        index = 'http://www.sciencenews.org/view/home'
+        soup = self.index_to_soup(index)
+        link_item = soup.find(name = 'img',alt = "issue")
+        print link_item
+        if link_item:
+           cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
+
+        return cover_url
+
+    def preprocess_html(self, soup):
+
+            for tag in soup.findAll(name=['span']):
+                tag.name = 'div'
+
+            return soup
--- a/resources/recipes/smh.recipe
+++ b/resources/recipes/smh.recipe
@ -6,51 +6,86 @@ __docformat__ = 'restructuredtext en'
 '''
 smh.com.au
 '''
-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup


 class SMH(BasicNewsRecipe):

    title = 'Sydney Morning Herald'
    description = 'Business News, World News and Breaking News in Australia'
-    __author__ = 'Kovid Goyal'
+    __author__ = 'Kovid Goyal and Sujata Raman'
    language = 'en_AU'

+    max_articles_per_feed = 100
+    no_stylesheets = True
+    use_embedded_content = False
+    no_javascript = True
+
+    timefmt               = ' [%A, %d %B, %Y]'
+    encoding = 'utf-8'
+
+    keep_only_tags = [dict(name='div', attrs ={'id':'content'})]
+    remove_tags     = [
+                        dict(name='div', attrs={'align' :'right'}),
+                        dict(name='p', attrs={'class' :'comments'}),
+                        dict(name='a', attrs={'class' :['more-photos','performerpromo']}),
+                        dict(name='img', attrs={'alt' :'aap'}),
+                        dict(name='div', attrs ={'id':['googleAds','moreGoogleAds','comments','footer','sidebar','austereopuff','adSpotIsland']}),
+                        dict(name='div', attrs ={'class':['article-links','wof','articleTools top','cN-multimediaGroup cfix','articleTools bottom']}),
+                        dict(name='div', attrs ={'class':['clear','adSpot-textboxgr1','adSpot-textBox','articleTools-c3 cfix','articleExtras-bottom','span-16 last']}),
+                        dict(name='div', attrs ={'class':[ 'sidebar span-5','cT-socialCommenting','cN-linkList','cN-topicSelector','cT-storyTools cfix','cT-imageMultimedia']}) ,
+                        dict(name='iframe'),
+                       ]
+
+    extra_css = '''
+                  h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
+                  .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
+                  .articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
+                  .cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
+                  .source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
+                  #content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
+                  .pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
+                  #bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
+                  .featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
+                  #idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
+                  h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
+                  h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
+                  h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
+                  h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
+                  body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
+                '''
+
+
+    feeds          = [
+                      ('Top Stories', 'http://feeds.smh.com.au/rssheadlines/top.xml'),
+                      ('National', 'http://feeds.smh.com.au/rssheadlines/national.xml'),
+                      ('World', 'http://feeds.smh.com.au/rssheadlines/world.xml'),
+                      ('Business', 'http://www.smh.com.au/rssheadlines/business.xml'),
+                      ('National Times', 'http://www.smh.com.au/rssheadlines/opinion/article/rss.xml'),
+                      ('Entertainment', 'http://feeds.smh.com.au/rssheadlines/entertainment.xml'),
+                      ('Technology', 'http://feeds.smh.com.au/rssheadlines/technology.xml'),
+                      ('Sport', 'http://feeds.smh.com.au/rssheadlines/sport.xml'),
+                    ]
+
+    def preprocess_html(self, soup):
+        bod = soup.find('bod')
+        if bod is not None:
+            bod.tag = 'div'
+            p = soup.find(id='content')
+            bod.extract()
+            p.insert(len(p), bod)
+        return soup

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.set_handle_refresh(False)
        return br

-    def parse_index(self):
-
-        soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read())
-
-        feeds, articles = [], []
-        feed = None
+    def get_article_url(self, article):
+        url = article.link
+        if 'media' in url:
+            url = ''
+        return url


-        for tag in soup.findAll(['h3', 'a']):
-            if tag.name == 'h3':
-                if articles:
-                    feeds.append((feed, articles))
-                    articles = []
-                feed = self.tag_to_string(tag)
-            elif feed is not None and tag.has_key('href') and tag['href'].strip():
-                url = tag['href'].strip()
-                if url.startswith('/'):
-                    url   = 'http://www.smh.com.au' + url
-                title = self.tag_to_string(tag)
-                articles.append({
-                                 'title': title,
-                                 'url'  : url,
-                                 'date' : strftime('%a, %d %b'),
-                                 'description' : '',
-                                 'content'     : '',
-                                 })
-
-        return feeds
-

--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.23'
+__version__   = '0.6.24'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -101,8 +101,6 @@ def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
                plugin.site_customization = customization.get(plugin.name, None)
            if plugin.name == 'IsbnDB' and isbndb_key is not None:
                plugin.site_customization = isbndb_key
-            if not plugin.is_ok():
-                continue
            yield plugin

 def get_isbndb_key():
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -92,3 +92,8 @@ class POCKETBOOK360(EB600):

    VENDOR_NAME = 'PHILIPS'
    WINDOWS_MAIN_MEM = 'MASS_STORGE'
+
+    OSX_MAIN_MEM   = 'Philips Mass Storge Media'
+    OSX_CARD_A_MEM = 'Philips Mass Storge Media'
+
+
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -66,14 +66,24 @@ class USBMS(CLI, Device):
                    match = fnmatch.filter(files, '*.%s' % (book_type))
                    for i, filename in enumerate(match):
                        self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
-                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                        try:
+                            bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                        except: # Probably a filename encoding error
+                            import traceback
+                            traceback.print_exc()
+                            continue
        else:
            path = os.path.join(prefix, ebook_dir)
            paths = os.listdir(path)
            for i, filename in enumerate(paths):
                self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
                if path_to_ext(filename) in self.FORMATS:
-                    bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                    try:
+                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                    except: # Probably a file name encoding error
+                        import traceback
+                        traceback.print_exc()
+                        continue

        self.report_progress(1.0, _('Getting list of books on device...'))

--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -9,9 +9,11 @@ from threading import Thread
 from calibre import prints
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import default_log
-
+from calibre.ebooks.metadata import MetaInformation
 from calibre.customize import Plugin

+metadata_config = None
+
 class MetadataSource(Plugin):

    author = 'Kovid Goyal'
@ -23,11 +25,17 @@ class MetadataSource(Plugin):
    #: tags/rating/reviews/etc.
    metadata_type = 'basic'

+    #: If not None, the customization dialog will allow for string
+    #: based customization as well the default customization. The
+    #: string customization will be saved in the site_customization
+    #: member.
+    string_customization_help = None
+
    type = _('Metadata download')

    def __call__(self, title, author, publisher, isbn, verbose, log=None,
            extra=None):
-        self.worker = Thread(target=self.fetch)
+        self.worker = Thread(target=self._fetch)
        self.worker.daemon = True
        self.title = title
        self.verbose = verbose
@ -39,23 +47,87 @@ class MetadataSource(Plugin):
        self.exception, self.tb, self.results = None, None, []
        self.worker.start()

+    def _fetch(self):
+        try:
+            self.fetch()
+            if self.results:
+                c = self.config_store().get(self.name, {})
+                res = self.results
+                if isinstance(res, MetaInformation):
+                    res = [res]
+                for mi in res:
+                    if not c.get('rating', True):
+                        mi.rating = None
+                    if not c.get('comments', True):
+                        mi.comments = None
+                    if not c.get('tags', True):
+                        mi.tags = []
+
+        except Exception, e:
+            self.exception = e
+            self.tb = traceback.format_exc()
+
    def fetch(self):
        '''
        All the actual work is done here.
        '''
        raise NotImplementedError

-    def is_ok(self):
-        '''
-        Used to check if the plugin has been correctly customized.
-        For example: The isbndb plugin checks to see if the site_customization
-        has been set with an isbndb.com access key.
-        '''
-        return True
-
    def join(self):
        return self.worker.join()

+    def is_customizable(self):
+        return True
+
+    def config_store(self):
+        global metadata_config
+        if metadata_config is None:
+            from calibre.utils.config import XMLConfig
+            metadata_config = XMLConfig('plugins/metadata_download')
+        return metadata_config
+
+    def config_widget(self):
+        from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
+            QCheckBox
+        from calibre.customize.ui import config
+        w = QWidget()
+        w._layout = QVBoxLayout(w)
+        w.setLayout(w._layout)
+        if self.string_customization_help is not None:
+            w._sc_label = QLabel(self.string_customization_help, w)
+            w._layout.addWidget(w._sc_label)
+            customization = config['plugin_customization']
+            def_sc = customization.get(self.name, '')
+            if not def_sc:
+                def_sc = ''
+            w._sc = QLineEdit(def_sc, w)
+            w._layout.addWidget(w._sc)
+            w._sc_label.setWordWrap(True)
+            w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
+                    | Qt.LinksAccessibleByKeyboard)
+            w._sc_label.setOpenExternalLinks(True)
+        c = self.config_store()
+        c = c.get(self.name, {})
+        for x, l in {'rating':_('ratings'), 'tags':_('tags'),
+                'comments':_('description/reviews')}.items():
+            cb = QCheckBox(_('Download %s from %s')%(l,
+                self.name))
+            setattr(w, '_'+x, cb)
+            cb.setChecked(c.get(x, True))
+            w._layout.addWidget(cb)
+        return w
+
+    def save_settings(self, w):
+        dl_settings = {}
+        for x in ('rating', 'tags', 'comments'):
+            dl_settings[x] = getattr(w, '_'+x).isChecked()
+        c = self.config_store()
+        c.set(self.name, dl_settings)
+        if hasattr(w, '_sc'):
+            sc = unicode(w._sc.text()).strip()
+            from calibre.customize.ui import customize_plugin
+            customize_plugin(self, sc)
+

 class GoogleBooks(MetadataSource):

@ -102,14 +174,11 @@ class ISBNDB(MetadataSource):
            self.exception = e
            self.tb = traceback.format_exc()

-    def customization_help(self, gui=False):
+    @property
+    def string_customization_help(self):
        ans = _('To use isbndb.com you must sign up for a %sfree account%s '
                'and enter your access key below.')
-        if gui:
-            ans = '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
-        else:
-            ans = ans.replace('%s', '')
-        return ans
+        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')

 class Amazon(MetadataSource):

@ -191,7 +260,7 @@ def get_social_metadata(mi, verbose=0):
                comments.add(dmi.comments)
    if ratings:
        rating = sum(ratings)/float(len(ratings))
-        if mi.rating is None:
+        if mi.rating is None or mi.rating < 0.1:
            mi.rating = rating
        else:
            mi.rating = (mi.rating + rating)/2.0
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -3,6 +3,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''

+import re
 from functools import partial

 from calibre import prints
@ -11,10 +12,16 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_

 pdfreflow, pdfreflow_error = plugins['pdfreflow']

+_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)')
+
 def get_metadata(stream, cover=True):
    if pdfreflow is None:
        raise RuntimeError(pdfreflow_error)
-    info = pdfreflow.get_metadata(stream.read(), cover)
+    raw = stream.read()
+    isbn = _isbn_pat.search(raw)
+    if isbn is not None:
+        isbn = isbn.group(1).replace('-', '').replace(' ', '')
+    info = pdfreflow.get_metadata(raw, cover)
    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
@ -22,6 +29,8 @@ def get_metadata(stream, cover=True):
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
+    if isbn is not None:
+        mi.isbn = isbn

    creator = info.get('Creator', None)
    if creator:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -777,7 +777,7 @@ class Manifest(object):


            # Remove DOCTYPE declaration as it messes up parsing
-            # Inparticular it causes tostring to insert xmlns
+            # In particular, it causes tostring to insert xmlns
            # declarations, which messes up the coercing logic
            idx = data.find('<html')
            if idx > -1:
@ -1746,9 +1746,20 @@ class OEBBook(object):
            return d.replace('\r\n', '\n').replace('\r', '\n')
        if isinstance(data, unicode):
            return fix_data(data)
-        if data[:2] in ('\xff\xfe', '\xfe\xff'):
+        bom_enc = None
+        if data[:4] in ('\0\0\xfe\xff', '\xff\xfe\0\0'):
+            bom_enc = {'\0\0\xfe\xff':'utf-32-be',
+                    '\xff\xfe\0\0':'utf-32-le'}[data[:4]]
+            data = data[4:]
+        elif data[:2] in ('\xff\xfe', '\xfe\xff'):
+            bom_enc = {'\xff\xfe':'utf-16-le', '\xfe\xff':'utf-16-be'}[data[:2]]
+            data = data[2:]
+        elif data[:3] == '\xef\xbb\xbf':
+            bom_enc = 'utf-8'
+            data = data[3:]
+        if bom_enc is not None:
            try:
-                return fix_data(data.decode('utf-16'))
+                return fix_data(data.decode(bom_enc))
            except UnicodeDecodeError:
                pass
        if self.input_encoding is not None:
--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -31,6 +31,13 @@ class BulkConfig(Config):

        self.input_label.hide()
        self.input_formats.hide()
+        self.opt_individual_saved_settings.setVisible(True)
+        self.opt_individual_saved_settings.setChecked(True)
+        self.opt_individual_saved_settings.setToolTip(_('For '
+            'settings that cannot be specified in this dialog, use the '
+            'values saved in a previous conversion (if they exist) instead '
+            'of using the defaults specified in the Preferences'))
+

        self.connect(self.output_formats, SIGNAL('currentIndexChanged(QString)'),
                self.setup_pipeline)
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@ -116,6 +116,7 @@ class Config(ResizableDialog, Ui_Dialog):
    def __init__(self, parent, db, book_id,
            preferred_input_format=None, preferred_output_format=None):
        ResizableDialog.__init__(self, parent)
+        self.opt_individual_saved_settings.setVisible(False)
        self.db, self.book_id = db, book_id

        self.setup_input_output_formats(self.db, self.book_id, preferred_input_format,
--- a/src/calibre/gui2/convert/single.ui
+++ b/src/calibre/gui2/convert/single.ui
@ -33,6 +33,13 @@
     <item>
      <widget class="QComboBox" name="input_formats"/>
     </item>
+     <item>
+      <widget class="QCheckBox" name="opt_individual_saved_settings">
+       <property name="text">
+        <string>Use &amp;saved conversion settings for individual books</string>
+       </property>
+      </widget>
+     </item>
     <item>
      <spacer name="horizontalSpacer">
       <property name="orientation">
@ -109,7 +116,7 @@
        <x>0</x>
        <y>0</y>
        <width>810</width>
-        <height>492</height>
+        <height>489</height>
       </rect>
      </property>
      <layout class="QVBoxLayout" name="verticalLayout_3">
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 from PyQt4.QtCore import SIGNAL, QObject
 from PyQt4.QtGui import QDialog

-from calibre.gui2 import qstring_to_unicode
 from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
 from calibre.gui2.dialogs.tag_editor import TagEditor
 from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
@ -86,7 +85,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):

    def sync(self):
        for id in self.ids:
-            au = qstring_to_unicode(self.authors.text())
+            au = unicode(self.authors.text())
            if au:
                au = string_to_authors(au)
                self.db.set_authors(id, au, notify=False)
@ -97,28 +96,39 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                x = authors_to_sort_string(aut)
                if x:
                    self.db.set_author_sort(id, x, notify=False)
-            aus = qstring_to_unicode(self.author_sort.text())
+            aus = unicode(self.author_sort.text())
            if aus and self.author_sort.isEnabled():
                self.db.set_author_sort(id, aus, notify=False)
            if self.write_rating:
                self.db.set_rating(id, 2*self.rating.value(), notify=False)
-            pub = qstring_to_unicode(self.publisher.text())
+            pub = unicode(self.publisher.text())
            if pub:
                self.db.set_publisher(id, pub, notify=False)
-            remove_tags = qstring_to_unicode(self.remove_tags.text()).strip()
+            remove_tags = unicode(self.remove_tags.text()).strip()
            if remove_tags:
                remove_tags = [i.strip() for i in remove_tags.split(',')]
                self.db.unapply_tags(id, remove_tags, notify=False)
-            tags = qstring_to_unicode(self.tags.text()).strip()
+            tags = unicode(self.tags.text()).strip()
            if tags:
                tags = map(lambda x: x.strip(), tags.split(','))
                self.db.set_tags(id, tags, append=True, notify=False)
            if self.write_series:
-                self.db.set_series(id, qstring_to_unicode(self.series.currentText()), notify=False)
+                self.db.set_series(id, unicode(self.series.currentText()), notify=False)

            if self.remove_format.currentIndex() > -1:
                self.db.remove_format(id, unicode(self.remove_format.currentText()), index_is_id=True, notify=False)

+            if self.swap_title_and_author.isChecked():
+                title = self.db.title(id, index_is_id=True)
+                aum = self.db.authors(id, index_is_id=True)
+                if aum:
+                    aum = [a.strip().replace('|', ',') for a in aum.split(',')]
+                    new_title = authors_to_string(aum)
+                    self.db.set_title(id, new_title)
+                if title:
+                    new_authors = string_to_authors(title)
+                    self.db.set_authors(id, new_authors)
+
            self.changed = True

    def series_changed(self):
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>495</width>
-    <height>387</height>
+    <height>456</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -230,6 +230,13 @@
          </property>
         </widget>
        </item>
+        <item row="9" column="0" colspan="2">
+         <widget class="QCheckBox" name="swap_title_and_author">
+          <property name="text">
+           <string>&amp;Swap title and author</string>
+          </property>
+         </widget>
+        </item>
       </layout>
      </widget>
     </item>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -552,6 +552,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                                warning_dialog(self, _('There were errors'),
                                       _('There were errors downloading social metadata'),
                                       det_msg=det, show=True)
+                        else:
+                            book.tags = []
                        self.title.setText(book.title)
                        self.authors.setText(authors_to_string(book.authors))
                        if book.author_sort: self.author_sort.setText(book.author_sort)
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -68,6 +68,7 @@ class LibraryDelegate(QItemDelegate):
        self.drawFocus(painter, option, option.rect)
        try:
            painter.setRenderHint(QPainter.Antialiasing)
+            painter.setClipRect(option.rect)
            y = option.rect.center().y()-self.SIZE/2.
            x = option.rect.right()  - self.SIZE
            painter.setPen(self.PEN)
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -213,19 +213,18 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                        self.device_manager.umount_device)

        ####################### Vanity ########################
-        self.vanity_template  = _('<p>For help visit <a href="http://%s.'
-                'kovidgoyal.net/user_manual">%s.kovidgoyal.net</a>'
-                '<br>')%(__appname__, __appname__)
+        self.vanity_template  = _('<p>For help see the: <a href="%s">User Manual</a>'
+                '<br>')%'http://calibre.kovidgoyal.net/user_manual'
        self.vanity_template += _('<b>%s</b>: %s by <b>Kovid Goyal '
            '%%(version)s</b><br>%%(device)s</p>')%(__appname__, __version__)
        self.latest_version = ' '
        self.vanity.setText(self.vanity_template%dict(version=' ', device=' '))
        self.device_info = ' '
        if not opts.no_update_check:
-            self.update_checker = CheckForUpdates()
+            self.update_checker = CheckForUpdates(self)
            QObject.connect(self.update_checker,
                    SIGNAL('update_found(PyQt_PyObject)'), self.update_found)
-            self.update_checker.start()
+            self.update_checker.start(2000)
        ####################### Status Bar #####################
        self.status_bar = StatusBar(self.jobs_dialog, self.system_tray_icon)
        self.setStatusBar(self.status_bar)
@ -246,6 +245,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        md.addAction(_('Download metadata and covers'))
        md.addAction(_('Download only metadata'))
        md.addAction(_('Download only covers'))
+        md.addAction(_('Download only social metadata'))
        self.metadata_menu = md
        self.add_menu = QMenu()
        self.add_menu.addAction(_('Add books from a single directory'))
@ -288,7 +288,10 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                    set_metadata=False)
        QObject.connect(md.actions()[6], SIGNAL('triggered(bool)'),
                self.__em5__)
-
+        self.__em6__ = partial(self.download_metadata, covers=False,
+                    set_metadata=False, set_social_metadata=True)
+        QObject.connect(md.actions()[7], SIGNAL('triggered(bool)'),
+                self.__em6__)


        self.save_menu = QMenu()
@ -1027,7 +1030,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):

    ############################### Edit metadata ##############################

-    def download_metadata(self, checked, covers=True, set_metadata=True):
+    def download_metadata(self, checked, covers=True, set_metadata=True,
+            set_social_metadata=None):
        rows = self.library_view.selectionModel().selectedRows()
        previous = self.library_view.currentIndex()
        if not rows or len(rows) == 0:
@ -1037,12 +1041,19 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            return
        db = self.library_view.model().db
        ids = [db.id(row.row()) for row in rows]
+        if set_social_metadata is None:
+            get_social_metadata = config['get_social_metadata']
+        else:
+            get_social_metadata = set_social_metadata
        from calibre.gui2.metadata import DownloadMetadata
        self._download_book_metadata = DownloadMetadata(db, ids,
                get_covers=covers, set_metadata=set_metadata,
-                get_social_metadata=config['get_social_metadata'])
+                get_social_metadata=get_social_metadata)
        self._download_book_metadata.start()
-        x = _('covers') if covers and not set_metadata else _('metadata')
+        if set_social_metadata is not None and set_social_metadata:
+            x = _('social metadata')
+        else:
+            x = _('covers') if covers and not set_metadata else _('metadata')
        self.progress_indicator.start(
            _('Downloading %s for %d book(s)')%(x, len(ids)))
        self._book_metadata_download_check = QTimer(self)
@ -1744,6 +1755,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        if write_settings:
            self.write_settings()
        self.check_messages_timer.stop()
+        self.update_checker.stop()
        self.listener.close()
        self.job_manager.server.close()
        while self.spare_servers:
--- a/src/calibre/gui2/metadata.py
+++ b/src/calibre/gui2/metadata.py
@ -60,6 +60,7 @@ class DownloadMetadata(Thread):
        self.worker = Worker()
        for id in ids:
            self.metadata[id] = db.get_metadata(id, index_is_id=True)
+            self.metadata[id].rating = None

    def run(self):
        self.exception = self.tb = None
@ -100,15 +101,28 @@ class DownloadMetadata(Thread):
                    mi.smart_update(fmi)
                    if mi.isbn and self.get_social_metadata:
                        self.social_metadata_exceptions = get_social_metadata(mi)
+                        if mi.rating:
+                            mi.rating *= 2
+                    if not self.get_social_metadata:
+                        mi.tags = []
                else:
                    self.failures[id] = (mi.title,
                        _('No matches found for this book'))
                self.commit_covers()

        self.commit_covers(True)
-        if self.set_metadata:
-            for id in self.fetched_metadata:
-                self.db.set_metadata(id, self.metadata[id])
+        for id in self.fetched_metadata:
+            mi = self.metadata[id]
+            if self.set_metadata:
+                self.db.set_metadata(id, mi)
+            if not self.set_metadata and self.get_social_metadata:
+                if mi.rating:
+                    self.db.set_rating(id, mi.rating)
+                if mi.tags:
+                    self.db.set_tags(id, mi.tags)
+                if mi.comments:
+                    self.db.set_comment(id, mi.comments)
+
        self.updated = set(self.fetched_metadata)


--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -47,7 +47,10 @@ class TagsView(QTreeView):
        ci = self.currentIndex()
        if not ci.isValid():
            ci = self.indexAt(QPoint(10, 10))
-        self.model().refresh()
+        try:
+            self.model().refresh()
+        except: #Database connection could be closed if an integrity check is happening
+            pass
        if ci.isValid():
            self.scrollTo(ci, QTreeView.PositionAtTop)

--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -111,17 +111,21 @@ def convert_bulk_ebook(parent, queue, db, book_ids, out_format=None, args=[]):
    user_recs = cPickle.loads(d.recommendations)

    book_ids = convert_existing(parent, db, book_ids, output_format)
-    return QueueBulk(parent, book_ids, output_format, queue, db, user_recs, args)
+    use_saved_single_settings = d.opt_individual_saved_settings.isChecked()
+    return QueueBulk(parent, book_ids, output_format, queue, db, user_recs,
+            args, use_saved_single_settings=use_saved_single_settings)

 class QueueBulk(QProgressDialog):

-    def __init__(self, parent, book_ids, output_format, queue, db, user_recs, args):
+    def __init__(self, parent, book_ids, output_format, queue, db, user_recs,
+            args, use_saved_single_settings=True):
        QProgressDialog.__init__(self, '',
                QString(), 0, len(book_ids), parent)
        self.setWindowTitle(_('Queueing books for bulk conversion'))
        self.book_ids, self.output_format, self.queue, self.db, self.args, self.user_recs = \
                book_ids, output_format, queue, db, args, user_recs
        self.parent = parent
+        self.use_saved_single_settings = use_saved_single_settings
        self.i, self.bad, self.jobs, self.changed = 0, [], [], False
        self.timer = QTimer(self)
        self.connect(self.timer, SIGNAL('timeout()'), self.do_book)
@ -149,11 +153,12 @@ class QueueBulk(QProgressDialog):

            combined_recs = GuiRecommendations()
            default_recs = load_defaults('%s_input' % input_format)
-            specific_recs = load_specifics(self.db, book_id)
            for key in default_recs:
                combined_recs[key] = default_recs[key]
-            for key in specific_recs:
-                combined_recs[key] = specific_recs[key]
+            if self.use_saved_single_settings:
+                specific_recs = load_specifics(self.db, book_id)
+                for key in specific_recs:
+                    combined_recs[key] = specific_recs[key]
            for item in self.user_recs:
                combined_recs[item[0]] = item[1]
            save_specifics(self.db, book_id, combined_recs)
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

 import traceback

-from PyQt4.QtCore import QThread, SIGNAL
+from PyQt4.QtCore import QObject, SIGNAL, QTimer
 import mechanize

 from calibre.constants import __version__, iswindows, isosx
@ -11,9 +11,21 @@ from calibre import browser

 URL = 'http://status.calibre-ebook.com/latest'

-class CheckForUpdates(QThread):
+class CheckForUpdates(QObject):
+
+    def __init__(self, parent):
+        QObject.__init__(self, parent)
+        self.timer = QTimer(self)
+        self.first = True
+        self.connect(self.timer, SIGNAL('timeout()'), self)
+        self.start = self.timer.start
+        self.stop = self.timer.stop
+
+    def __call__(self):
+        if self.first:
+            self.timer.setInterval(1000*24*60*60)
+            self.first = False

-    def run(self):
        try:
            br = browser()
            req = mechanize.Request(URL)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -9,7 +9,6 @@ Command line interface to the calibre database.

 import sys, os, cStringIO
 from textwrap import TextWrapper
-from urllib import quote

 from calibre import terminal_controller, preferred_encoding, prints
 from calibre.utils.config import OptionParser, prefs
@ -48,10 +47,10 @@ XML_TEMPLATE = '''\
        <comments>${record['comments']}</comments>
        <series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
        <isbn>${record['isbn']}</isbn>
-        <cover py:if="record['cover']">${record['cover']}</cover>
+        <cover py:if="record['cover']">${record['cover'].replace(os.sep, '/')}</cover>
        <formats py:if="record['formats']">
        <py:for each="path in record['formats']">
-            <format>${path}</format>
+            <format>${path.replace(os.sep, '/')}</format>
        </py:for>
        </formats>
    </record>
@ -78,9 +77,9 @@ STANZA_TEMPLATE='''\
      <id>urn:calibre:${record['uuid']}</id>
      <author><name>${record['author_sort']}</name></author>
      <updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')}</updated>
-      <link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/')).replace('http%3A', 'http:')}" />
-      <link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" />
-      <link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" />
+      <link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/'))}"/>
+      <link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
+      <link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
      <content type="xhtml">
          <div xmlns="http://www.w3.org/1999/xhtml">
              <py:for each="f in ('authors', 'publisher', 'rating', 'tags', 'series', 'isbn')">
@ -186,8 +185,10 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
        return o.getvalue()
    elif output_format == 'xml':
        template = MarkupTemplate(XML_TEMPLATE)
-        return template.generate(data=data).render('xml')
+        return template.generate(data=data, os=os).render('xml')
    elif output_format == 'stanza':
+        def quote(raw):
+            return raw.replace('"', r'\"')
        data = [i for i in data if i.has_key('fmt_epub')]
        for x in data:
            if isinstance(x['fmt_epub'], unicode):
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -115,7 +115,7 @@ class PostInstall:
            self.info('Creating symlinks...')
            for exe in scripts.keys():
                dest = os.path.join(self.opts.staging_bindir, exe)
-                if os.path.exists(dest):
+                if os.path.lexists(dest):
                    os.unlink(dest)
                tgt = os.path.join(getattr(sys, 'frozen_path'), exe)
                self.info('\tSymlinking %s to %s'%(tgt, dest))
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -421,7 +421,7 @@ button in the individual book conversion dialog.
 When you Bulk Convert a set of books, settings are taken in the following order:

    * From the defaults set in Preferences->Conversion
-    * From the saved conversion settings for each book being converted (if any)
+    * From the saved conversion settings for each book being converted (if any). This can be turned off by the option in the top left corner of the Bulk Conversion dialog.
    * From the settings set in the Bulk conversion dialog

 Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -81,7 +81,7 @@ Device Integration

 What devices does |app| support?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
+At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, PocketBook 360, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.

 How can I help get my device supported in |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/manual/plugins.rst
+++ b/src/calibre/manual/plugins.rst
@ -108,7 +108,7 @@ Metadata download plugins
 .. class:: calibre.ebooks.metadata.fetch.MetadataSource

    Represents a source to query for metadata. Subclasses must implement
-    at least the fetch method and optionally the is_ok method.
+    at least the fetch method.

    When :meth:`fetch` is called, the `self` object will have the following
    useful attributes (each of which may be None)::
@ -124,8 +124,9 @@ Metadata download plugins

 .. automember:: calibre.ebooks.metadata.fetch.MetadataSource.metadata_type

+.. automember:: calibre.ebooks.metadata.fetch.MetadataSource.string_customization_help
+
 .. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.fetch

-.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.is_ok


--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Manage application-wide preferences.
 '''
-import os, re, cPickle, textwrap, traceback
+import os, re, cPickle, textwrap, traceback, plistlib
 from copy import deepcopy
 from functools import partial
 from optparse import OptionParser as _OptionParser
@ -34,9 +34,11 @@ else:

 plugin_dir = os.path.join(config_dir, 'plugins')

+CONFIG_DIR_MODE = 0700
+
 def make_config_dir():
    if not os.path.exists(plugin_dir):
-        os.makedirs(plugin_dir, mode=448) # 0700 == 448
+        os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)

 def check_config_write_access():
    return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
@ -552,6 +554,72 @@ class DynamicConfig(dict):

 dynamic = DynamicConfig()

+class XMLConfig(dict):
+
+    '''
+    Similar to :class:`DynamicConfig`, except that it uses an XML storage
+    backend instead of a pickle file.
+
+    See `http://docs.python.org/dev/library/plistlib.html`_ for the supported
+    data types.
+    '''
+
+    def __init__(self, rel_path_to_cf_file):
+        dict.__init__(self)
+        self.file_path = os.path.join(config_dir,
+                *(rel_path_to_cf_file.split('/')))
+        self.file_path = os.path.abspath(self.file_path)
+        if not self.file_path.endswith('.plist'):
+            self.file_path += '.plist'
+
+        self.refresh()
+
+    def refresh(self):
+        d = {}
+        if os.path.exists(self.file_path):
+            with ExclusiveFile(self.file_path) as f:
+                raw = f.read()
+                try:
+                    d = plistlib.readPlistFromString(raw) if raw.strip() else {}
+                except SystemError:
+                    pass
+                except:
+                    import traceback
+                    traceback.print_exc()
+                    d = {}
+        self.clear()
+        self.update(d)
+
+    def __getitem__(self, key):
+        try:
+            ans = dict.__getitem__(self, key)
+            if isinstance(ans, plistlib.Data):
+                ans = ans.data
+            return ans
+        except KeyError:
+            return None
+
+    def __setitem__(self, key, val):
+        if isinstance(val, (bytes, str)):
+            val = plistlib.Data(val)
+        dict.__setitem__(self, key, val)
+        self.commit()
+
+    def set(self, key, val):
+        self.__setitem__(key, val)
+
+    def commit(self):
+        if hasattr(self, 'file_path') and self.file_path:
+            dpath = os.path.dirname(self.file_path)
+            if not os.path.exists(dpath):
+                os.makedirs(dpath, mode=CONFIG_DIR_MODE)
+            with ExclusiveFile(self.file_path) as f:
+                raw = plistlib.writePlistToString(self)
+                f.seek(0)
+                f.truncate()
+                f.write(raw)
+
+
 def _prefs():
    c = Config('global', 'calibre wide preferences')
    c.add_opt('database_path',
--- a/src/odf/element.py
+++ b/src/odf/element.py
@ -56,6 +56,8 @@ def _quoteattr(data, entities={}):
        the optional entities parameter.  The keys and values must all be
        strings; each key will be replaced with its corresponding value.
    """
+    entities['\n']='&#10;'
+    entities['\r']='&#12;'
    data = _escape(data, entities)
    if '"' in data:
        if "'" in data:
--- a/src/odf/namespaces.py
+++ b/src/odf/namespaces.py
@ -17,7 +17,7 @@
 #
 # Contributor(s):
 #
-TOOLSVERSION = u"ODFPY/0.9.1dev"
+TOOLSVERSION = u"ODFPY/0.9.2dev"

 ANIMNS         = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
 DBNS           = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
--- a/src/odf/opendocument.py
+++ b/src/odf/opendocument.py
@ -185,7 +185,7 @@ class OpenDocument:
        if self.fontfacedecls.hasChildNodes():
            self.fontfacedecls.toXml(1, xml)
        a = AutomaticStyles()
-        stylelist = self._used_auto_styles([self.styles, self.body])
+        stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
        if len(stylelist) > 0:
            a.write_open_tag(1, xml)
            for s in stylelist:
@ -233,9 +233,11 @@ class OpenDocument:
                for styleref in ( (DRAWNS,u'style-name'),
                        (DRAWNS,u'text-style-name'),
                        (PRESENTATIONNS,u'style-name'),
-                        (STYLENS,u'style-name'),
+                        (STYLENS,u'data-style-name'),
                        (STYLENS,u'list-style-name'),
                        (STYLENS,u'page-layout-name'),
+                        (STYLENS,u'style-name'),
+                        (TABLENS,u'default-cell-style-name'),
                        (TABLENS,u'style-name'),
                        (TEXTNS,u'style-name') ):
                    if e.getAttrNS(styleref[0],styleref[1]):
--- a/src/odf/svg.py
+++ b/src/odf/svg.py
@ -50,3 +50,5 @@ def Radialgradient(**args):
 def Stop(**args):
    return Element(qname = (SVGNS,'stop'), **args)

+def Title(**args):
+    return Element(qname = (SVGNS,'title'), **args)
--- a/src/odf/text.py
+++ b/src/odf/text.py
@ -446,6 +446,9 @@ def SequenceRef(**args):
 def SheetName(**args):
    return Element(qname = (TEXTNS,'sheet-name'), **args)

+def SoftPageBreak(**args):
+    return Element(qname = (TEXTNS,'soft-page-break'), **args)
+
 def SortKey(**args):
    return Element(qname = (TEXTNS,'sort-key'), **args)

--- a/src/odf/userfield.py
+++ b/src/odf/userfield.py
@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
@ -22,16 +22,11 @@
 """Class to show and manipulate user fields in odf documents."""

 import sys
-import time
 import zipfile

-import xml.sax
-import xml.sax.handler
-import xml.sax.saxutils
-
-from odf.namespaces import OFFICENS, TEXTNS
-
-from cStringIO import StringIO
+from odf.text import UserFieldDecl
+from odf.namespaces import OFFICENS
+from odf.opendocument import load

 OUTENCODING = "utf-8"

@ -64,6 +59,26 @@ class UserFields(object):
        """
        self.src_file = src
        self.dest_file = dest
+        self.document = None
+
+    def loaddoc(self):
+        if isinstance(self.src_file, basestring):
+            # src_file is a filename, check if it is a zip-file
+            if not zipfile.is_zipfile(self.src_file):
+                raise TypeError("%s is no odt file." % self.src_file)
+        elif self.src_file is None:
+            # use stdin if no file given
+            self.src_file = sys.stdin
+
+        self.document = load(self.src_file)
+
+    def savedoc(self):
+        # write output
+        if self.dest_file is None:
+            # use stdout if no filename given
+            self.document.save('-')
+        else:
+            self.document.save(self.dest_file)

    def list_fields(self):
        """List (extract) all known user-fields.
@ -81,15 +96,21 @@ class UserFields(object):
        Returns list of tuples (<field name>, <field type>, <value>).

        """
+        self.loaddoc()
        found_fields = []
-        def _callback(field_name, value_type, value, attrs):
+        all_fields = self.document.getElementsByType(UserFieldDecl)
+        for f in all_fields:
+            value_type = f.getAttribute('valuetype')
+            if value_type == 'string':
+                value = f.getAttribute('stringvalue')
+            else:
+                value = f.getAttribute('value')
+            field_name = f.getAttribute('name')
+
            if field_names is None or field_name in field_names:
                found_fields.append((field_name.encode(OUTENCODING),
                                     value_type.encode(OUTENCODING),
                                     value.encode(OUTENCODING)))
-            return attrs
-        
-        self._content_handler(_callback)
        return found_fields

    def list_values(self, field_names):
@ -133,199 +154,16 @@ class UserFields(object):
        Returns None

        """
-        def _callback(field_name, value_type, value, attrs):
-            if field_name in data:
-                valattr = VALUE_TYPES.get(value_type)
-                attrs = dict(attrs.items())
-                # Take advantage that startElementNS can take a normal
-                # dict as attrs
-                attrs[valattr] = data[field_name]
-            return attrs
-        self._content_handler(_callback, write_file=True)
-
-    def _content_handler(self, callback_func, write_file=False):
-        """Handle the content using the callback function and write result if
-           necessary.
-
-        callback_func ... function called for each field found in odf document
-                          signature: field_name ... name of current field
-                                     value_type ... type of current field
-                                     value ... value of current field
-                                     attrs ... tuple of attrs of current field
-                          returns: tuple or dict of attrs
-        write_file ... boolean telling wether write result to file
-
-        """
-        class DevNull(object):
-            """IO-object which behaves like /dev/null."""
-            def write(self, str):
-                pass
-
-        # get input
-        if isinstance(self.src_file, basestring):
-            # src_file is a filename, check if it is a zip-file
-            if not zipfile.is_zipfile(self.src_file):
-                raise TypeError("%s is no odt file." % self.src_file)
-        elif self.src_file is None:
-            # use stdin if no file given
-            self.src_file = sys.stdin
-
-        zin = zipfile.ZipFile(self.src_file, 'r')
-        content_xml = zin.read('content.xml')
-
-        # prepare output
-        if write_file:
-            output_io = StringIO()
-            if self.dest_file is None:
-                # use stdout if no filename given
-                self.dest_file = sys.stdout
-            zout = zipfile.ZipFile(self.dest_file, 'w')
-        else:
-            output_io = DevNull()
-
-
-        # parse input
-        odfs = ODFContentParser(callback_func, output_io)
-        parser = xml.sax.make_parser()
-        parser.setFeature(xml.sax.handler.feature_namespaces, 1)
-        parser.setContentHandler(odfs)
-        parser.parse(StringIO(content_xml))
-
-        # write output
-        if write_file:
-            # Loop through the input zipfile and copy the content to
-            # the output until we get to the content.xml. Then
-            # substitute.
-            for zinfo in zin.infolist():
-                if zinfo.filename == "content.xml":
-                    # Write meta
-                    zi = zipfile.ZipInfo("content.xml", time.localtime()[:6])
-                    zi.compress_type = zipfile.ZIP_DEFLATED
-                    zout.writestr(zi, odfs.content())
+        self.loaddoc()
+        all_fields = self.document.getElementsByType(UserFieldDecl)
+        for f in all_fields:
+            field_name = f.getAttribute('name')
+            if data.has_key(field_name):
+                value_type = f.getAttribute('valuetype')
+                value = data.get(field_name)
+                if value_type == 'string':
+                    f.setAttribute('stringvalue', value)
                else:
-                    payload = zin.read(zinfo.filename)
-                    zout.writestr(zinfo, payload)
-            zout.close()
-        zin.close()
+                    f.setAttribute('value', value) 
+        self.savedoc()

-
-class ODFContentParser(xml.sax.saxutils.XMLGenerator):
-
-    def __init__(self, callback_func, out=None, encoding=OUTENCODING):
-        """Constructor.
-
-        callback_func ... function called for each field found in odf document
-                          signature: field_name ... name of current field
-                                     value_type ... type of current field
-                                     value ... value of current field
-                                     attrs ... tuple of attrs of current field
-                          returns: tuple or dict of attrs
-        out ... file like object for output
-        encoding ... encoding for output
-
-        """
-        self._callback_func = callback_func
-        xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding)
-
-    def _qname(self, name):
-        """Builds a qualified name from a (ns_url, localname) pair"""
-        if name[0]:
-            if name[0] == u'http://www.w3.org/XML/1998/namespace':
-                return u'xml' + ":" + name[1]
-            # The name is in a non-empty namespace
-            prefix = self._current_context[name[0]]
-            if prefix:
-                # If it is not the default namespace, prepend the prefix
-                return prefix + ":" + name[1]
-        # Return the unqualified name
-        return name[1]
-
-    def startElementNS(self, name, qname, attrs):
-        if name == (TEXTNS, u'user-field-decl'):
-            field_name = attrs.get((TEXTNS, u'name'))
-            value_type = attrs.get((OFFICENS, u'value-type'))
-            if value_type == 'string':
-                value = attrs.get((OFFICENS, u'string-value'))
-            else:
-                value = attrs.get((OFFICENS, u'value'))
-
-            attrs = self._callback_func(field_name, value_type, value, attrs)
-
-        self._startElementNS(name, qname, attrs)
-
-    def _startElementNS(self, name, qname, attrs):
-        # copy of xml.sax.saxutils.XMLGenerator.startElementNS
-        # necessary because we have to provide our own writeattr
-        # function which is called by this method
-        if name[0] is None:
-            name = name[1]
-        elif self._current_context[name[0]] is None:
-            # default namespace
-            name = name[1]
-        else:
-            name = self._current_context[name[0]] + ":" + name[1]
-        self._out.write('<' + name)
-
-        for k,v in self._undeclared_ns_maps:
-            if k is None:
-                self._out.write(' xmlns="%s"' % (v or ''))
-            else:
-                self._out.write(' xmlns:%s="%s"' % (k,v))
-        self._undeclared_ns_maps = []
-
-        for (name, value) in attrs.items():
-            if name[0] is None:
-                name = name[1]
-            elif self._current_context[name[0]] is None:
-                # default namespace
-                #If an attribute has a nsuri but not a prefix, we must
-                #create a prefix and add a nsdecl
-                prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
-                self._generated_prefix_ctr = self._generated_prefix_ctr + 1
-                name = prefix + ':' + name[1]
-                self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
-                self._current_context[name[0]] = prefix
-            else:
-                name = self._current_context[name[0]] + ":" + name[1]
-            self._out.write(' %s=' % name)
-            writeattr(self._out, value)
-        self._out.write('>')
-
-    def content(self):
-        return self._out.getvalue()
-
-
-ATTR_ENTITIES = {
-    '\n': '&#x0a;' # convert newlines into entities inside attributes
-    }
-
-
-def writetext(stream, text, entities={}):
-    text = xml.sax.saxutils.escape(text, entities)
-    try:
-        stream.write(text)
-    except UnicodeError:
-        for c in text:
-            try:
-                stream.write(c)
-            except UnicodeError:
-                stream.write(u"&#%d;" % ord(c))
-
-def writeattr(stream, text):
-    # copied from xml.sax.saxutils.writeattr added support for an
-    # additional entity mapping
-    countdouble = text.count('"')
-    entities = ATTR_ENTITIES.copy()
-    if countdouble:
-        countsingle = text.count("'")
-        if countdouble <= countsingle:
-            entities['"'] = "&quot;"
-            quote = '"'
-        else:
-            entities["'"] =  "&apos;"
-            quote = "'"
-    else:
-        quote = '"'
-    stream.write(quote)
-    writetext(stream, text, entities)
-    stream.write(quote)