Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2009-11-21 21:22:28 -05:00 · 2009-11-21 21:22:28 -05:00 · d96542418a
commit d96542418a
parent c569ba843f 69fc173ff5
52 changed files with 5724 additions and 4549 deletions
--- a/resources/recipes/fokkeensukke.recipe
+++ b/resources/recipes/fokkeensukke.recipe
@ -0,0 +1,87 @@
 #!/usr/bin/python
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class FokkeEnSukkeRecipe(BasicNewsRecipe) :
    __license__   = 'GPL v3'
    __author__ = 'kwetal'
    language = 'nl'
    description = u'Popular Dutch daily cartoon Fokke en Sukke'
    title = u'Fokke en Sukke'
    no_stylesheets = True
    # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
    # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
    template_css = ''
    INDEX = u'http://foksuk.nl'
    # This cover is not as nice as it could be, needs some work
    #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
    keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
    def parse_index(self) :
        # A list with daynames as they _can_ appear in the index
        dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag']
        soup = self.index_to_soup(self.INDEX)
        # Find the links for the various cartoons for this week and loop through them
        index = soup.find('div', attrs={'class' : 'selectcartoon'})
        links = index.findAll('a')
        maxIndex = len(links) - 1
        articles = []
        for i in range(len(links)) :
            # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
            if i == 0 :
                continue
            # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
            # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
            if links[i].renderContents() in dayNames :
                # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
                if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
                    # Got you! Add it to the list
                    article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url'  : self.INDEX + links[i + 1]['href'], 'description' : ''}
                    articles.append(article)
                    # If there is a '1', there should be a '2' as well, but better save than sorry
                    if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') :
                        # Got you! Add it to the list
                        article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url'  : self.INDEX + links[i + 2]['href'], 'description' : ''}
                        articles.append(article)
                else :
                    # There is only one cartoon for this day. Add it to the list.
                    article = {'title' : links[i].renderContents(), 'date' : u'', 'url'  : self.INDEX + links[i]['href'], 'description' : ''}
                    articles.append(article)
        # Might as well use the weeknumber as title
        week = index.find('span', attrs={'class' : 'week'}).renderContents()
        return [[week, articles]]
    def preprocess_html(self, soup) :
        # This method is called for every page, be it cartoon or TOC. We need to process each in their own way
        cartoon = soup.find('div', attrs={'class' : 'cartoon'})
        if cartoon :
            # It is a cartoon. Extract the title.
            title = ''
            img = soup.find('img', attrs = {'alt' : True})
            if img :
                title = img['alt']
            # Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
            tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
            tag.insert(0, title)
            cartoon.insert(0, tag)
            # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
            # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
            select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
            if select :
                select.extract()
            return cartoon
        else :
            # It is a TOC. Just return the whole lot.
            return soup
--- a/resources/recipes/guardian.recipe
+++ b/resources/recipes/guardian.recipe
@ -43,97 +43,45 @@ class Guardian(BasicNewsRecipe):
                    #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
                '''
    def find_sections(self):
        soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
        # find cover pic
        img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
        if img is not None:
            self.cover_url = img['src']
        # end find cover pic
        idx = soup.find('div', id='book-index')
        for s in idx.findAll('strong', attrs={'class':'book'}):
            a = s.find('a', href=True)
            yield (self.tag_to_string(a), a['href'])
    def find_articles(self, url):
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'class':'book-index'})
        for ul in div.findAll('ul', attrs={'class':'trailblock'}):
            for li in ul.findAll('li'):
                a = li.find(href=True)
                if not a:
                    continue
                title = self.tag_to_string(a)
                url = a['href']
                if not title or not url:
                    continue
                tt = li.find('div', attrs={'class':'trailtext'})
                if tt is not None:
                    for da in tt.findAll('a'): da.extract()
                    desc = self.tag_to_string(tt).strip()
                yield {
                        'title': title, 'url':url, 'description':desc,
                        'date' : strftime('%a, %d %b'),
                        }
    def parse_index(self):
-
+        feeds = []
-            soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
+        for title, href in self.find_sections():
-             # find cover pic
+            feeds.append((title, list(self.find_articles(href))))
-            img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
+        return feeds
            if img is None: return None
            else:
                self.cover_url = img['src']
             # end find cover pic
            sections = []
            ans = []
            for li in soup.findAll( 'li'):
                section = ''
                articles = []
                if li.a and li.a.has_key('href'):
                        url =  li.a['href']
                        if 'mainsection' in url:
                            section = self.tag_to_string(url)
                            i = len(section)
                            index1 = section.rfind('/',0,i)
                            section = section[index1+1:i]
                            sections.append(section)
                            #find the articles in the Main Section  start
                            soup = self.index_to_soup(url)
                            date = strftime('%a, %d %b')
                            descl = []
                            for desclist in  soup.findAll(name='div',attrs={'class':"trailtext"}):
                                descl.append(self.tag_to_string(desclist).strip())
                            t = -1
                            for tag in soup.findAll('h3'):
                                t = t+1
                                for a in tag.findAll('a'):
                                    if t < len(descl):
                                        desc =  descl[t]
                                    else:
                                        desc = ''
                                    if a and a.has_key('href'):
                                        url2 =  a['href']
                                    else:
                                        url2 =''
                                    title = self.tag_to_string(a)
                                    if len(articles) == 0:    #First article
                                         articles.append({
                                                 'title':title,
                                                 'date':date,
                                                 'url':url2,
                                                 'description':desc,
                                                    })
                                    else:
                                        #eliminate duplicates start
                                        if {'title':title,'date':date,'url':url2,'description':desc} in articles :
                                                url2 = ''
                                        #eliminate duplicates end
                                        else:
                                                if 'http://jobs.guardian.co.uk/' in url2:
                                                    url2 = ''
                                                else:
                                                    articles.append({
                                                     'title':title,
                                                     'date':date,
                                                     'url':url2,
                                                     'description':desc,
                                                        })
                            #find the articles in the Main Section end
                            ans.append( articles)
                        else:
                                url =''
            titles = map(self.find_title, sections)
            ans1 = list(zip(titles,ans))
            return ans1[2:]
    def find_title(self, section):
        d = {'topstories':'Top Stories', 'international':'International', 'editorialsandreply':'Editorials and Reply',
             'commentanddebate':'Comment and Debate','uknews':'UK News','saturday':'Saturday','sunday':'Sunday',
                'reviews':'Reviews', 'obituaries':'Obituaries'}
        return d.get(section, section)
    def preprocess_html(self, soup):
--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@ -0,0 +1,110 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class HBR(BasicNewsRecipe):
    title = 'Harvard Business Review'
    description = 'To subscribe go to http://hbr.harvardbusiness.org'
    needs_subscription = True
    __author__ = 'Kovid Goyal and Sujata Raman'
    timefmt                = ' [%B %Y]'
    language = 'en'
    no_stylesheets = True
    LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
    INDEX = 'http://hbr.harvardbusiness.org/current'
    keep_only_tags = [dict(name='div', id='content')]
    remove_tags = [
            dict(id=['articleDate', 'subscriptionModule', 'errorArea',
                'feedbackForm', 'relatedModule', 'articleSecondaryModule',
                'contentRight', 'summaryLink']),
            dict(name='form'),
            ]
    extra_css = '''
                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
                #articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
                '''
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.LOGIN_URL)
        br.select_form(nr=0)
        br['ssousername'] = self.username
        br['password'] = self.password
        raw = br.submit().read()
        if 'My Account' not in raw:
            raise Exception('Failed to login, are you sure your username and password are correct?')
        self.logout_url = None
        link = br.find_link(text='(sign out)')
        if link:
            self.logout_url = link.absolute_url
        return br
    def cleanup(self):
        if self.logout_url is not None:
            self.browser.open(self.logout_url)
    def map_url(self, url):
        if url.endswith('/ar/1'):
            return url[:-1]+'pr'
    def get_features(self, soup):
        div = soup.find('div', id='issueFeatures')
        for li in div.findAll('li'):
            a = li.find('a', href=True)
            url = 'http://hbr.harvardbusiness.org'+a['href']
            url = self.map_url(url)
            if not url:
                continue
            title = self.tag_to_string(a)
            p = li.find('p')
            desc = ''
            if p is not None:
                desc = self.tag_to_string(p)
            yield {'title':title, 'url':url, 'description':desc}
    def get_departments(self, soup):
        div = soup.find('div', id='issueDepartmentsContent')
        for h4 in div.findAll('h4'):
            feed = self.tag_to_string(h4)
            articles = []
            ul = h4.findNextSibling('ul')
            for li in ul.findAll('li'):
                a = li.find('a', href=True)
                url = 'http://hbr.harvardbusiness.org'+a['href']
                url = self.map_url(url)
                if not url:
                    continue
                title = self.tag_to_string(a)
                p = li.find('p')
                desc = ''
                if p is not None:
                    desc = self.tag_to_string(p)
                articles.append({'title':title, 'url':url, 'description':desc})
            yield [feed, articles]
    def parse_index(self):
        soup = self.index_to_soup(self.INDEX)
        feeds = []
        feeds.append(('Features', list(self.get_features(soup))))
        feeds.extend(self.get_departments(soup))
        return feeds
    def get_cover_url(self):
        cover_url = None
        index = 'http://hbr.harvardbusiness.org/current'
        soup = self.index_to_soup(index)
        link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
        if link_item:
           cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
        return cover_url
--- a/resources/recipes/kellog_insight.recipe
+++ b/resources/recipes/kellog_insight.recipe
@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class KellogInsight(BasicNewsRecipe):
    title          = 'Kellog Insight'
-    __author__     = 'Kovid Goyal'
+    __author__     = 'Kovid Goyal and Sujata Raman'
    description    = 'Articles from the Kellog School of Management'
    no_stylesheets = True
    encoding       = 'utf-8'
    language = 'en'
    oldest_article = 60
    remove_tags_before = {'name':'h1'}
    remove_tags_after = {'class':'col-two-text'}
    keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]
    remove_tags = [dict(name='div', attrs={'class':'col-three'})]
-    feeds = [('Articles',
+    extra_css = '''
-        'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
+                h1{font-family:arial; font-size:medium; color:#333333;}
                .col-one{font-family:arial; font-size:xx-small;}
                .col-two{font-family:arial; font-size:x-small; }
                h2{font-family:arial; font-size:small; color:#666666;}
                h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
                h4{color:#660000;font-family:arial; font-size:x-small;}
                .col-two-text{font-family:arial; font-size:x-small; color:#333333;}
                '''
    feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
    def get_article_url(self, article):
        # Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
            return link
        self.log('Skipping non-article', link)
        return None
    def preprocess_html(self, soup):
            for tag in soup.findAll(name=['span']):
                tag.nextSibling.name = 'h4'
            return soup
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@ -14,7 +14,7 @@ class NewScientist(BasicNewsRecipe):
    description           = 'Science news and science articles from New Scientist.'
    language              = 'en'
    publisher             = 'New Scientist'
-    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software, sex'
+    category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
    delay                 = 3
    oldest_article        = 7
    max_articles_per_feed = 100
--- a/resources/recipes/philly.recipe
+++ b/resources/recipes/philly.recipe
@ -3,50 +3,55 @@ __license__   = 'GPL v3'
 '''
 philly.com/inquirer/
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 class Philly(BasicNewsRecipe):
    title       = 'Philadelphia Inquirer'
-    __author__  = 'RadikalDissent'
+    __author__  = 'RadikalDissent and Sujata Raman'
    language = 'en'
    description = 'Daily news from the Philadelphia Inquirer'
    no_stylesheets        = True
    use_embedded_content  = False
    oldest_article = 1
    max_articles_per_feed = 25
    extra_css = '''
-        .byline {font-size: small; color: grey; font-style:italic; }
+        h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
-        .lastline {font-size: small; color: grey; font-style:italic;}
+        h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
-        .contact {font-size: small; color: grey;}
+        .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
-        .contact p {font-size: small; color: grey;}
+        .byline {font-size: small; color: #666666; font-style:italic; }
        .lastline {font-size: small; color: #666666; font-style:italic;}
        .contact {font-size: small; color: #666666;}
        .contact p {font-size: small; color: #666666;}
        #photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
        .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
        #photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
        .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
        .article_timestamp{font-size:x-small; color:#666666;}
        a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
                '''
-    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+
       [
        (r'<body.*<h1>', lambda match: '<body><h1>'),
        (r'<font size="2" face="Arial">', lambda match: '<div class="contact"><font class="contact">'),
        (r'<font face="Arial" size="2">', lambda match: '<div class="contact"><font class="contact">')
        ]
    ]
    keep_only_tags = [
-        dict(name='h1'),
+               dict(name='div', attrs={'class':'story-content'}),
-        dict(name='p', attrs={'class':['byline','lastline']}),
+               dict(name='div', attrs={'id': 'contentinside'})
-        dict(name='div', attrs={'class':'body-content'}),
+                    ]
    ]
    remove_tags = [
-        dict(name='hr'),
+         dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
-        dict(name='p', attrs={'class':'buzzBadge'}),
+         dict(name='dl', attrs={'class':'relatedlist'}),
        dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
        dict(name='a', attrs={'class': ['headlineonly','bl']}),
         dict(name='img', attrs={'class':'img_noborder'})
    ]
-    def print_version(self, url):
+   # def print_version(self, url):
-        return url + '?viewAll=y'
+   #     return url + '?viewAll=y'
    feeds = [
        ('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
        ('Business', 'http://www.philly.com/inq_business.rss'),
-        ('News', 'http://www.philly.com/inquirer/news/index.rss'),
+        #('News', 'http://www.philly.com/inquirer/news/index.rss'),
        ('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
        ('Local', 'http://www.philly.com/inquirer_local.rss'),
        ('Health', 'http://www.philly.com/inquirer_health_science.rss'),
@ -54,3 +59,27 @@ class Philly(BasicNewsRecipe):
        ('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
        ('Sports', 'http://www.philly.com/inquirer_sports.rss')
        ]
    def get_article_url(self, article):
        ans = article.link
        try:
            self.log('Looking for full story link in', ans)
            soup = self.index_to_soup(ans)
            x = soup.find(text="View All")
            if x is not None:
                ans = ans + '?viewAll=y'
                self.log('Found full story link', ans)
        except:
            pass
        return ans
    def postprocess_html(self, soup,first):
         for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
                tag.extract()
         for tag in soup.findAll(name='br'):
                tag.extract()
         return soup
--- a/resources/recipes/science_news.recipe
+++ b/resources/recipes/science_news.recipe
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Sciencenews(BasicNewsRecipe):
    title                 = u'ScienceNews'
-    __author__            = u'Darko Miletic'
+    __author__            = u'Darko Miletic and Sujata Raman'
    description           = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
    oldest_article        = 30
    language = 'en'
@ -17,13 +17,45 @@ class Sciencenews(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
-    timefmt               = ' [%A, %d %B, %Y]' 
+    timefmt               = ' [%A, %d %B, %Y]'
    extra_css = '''
                .content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
                .content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
                .content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
                .content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
                .exclusive{color:#FF0000 ;}
                .anonymous{color:#14487E ;}
                .content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
                .description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
                .credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
                '''
    keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
    remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
    remove_tags = [
                     dict(name='ul', attrs={'id':'content_functions_bottom'})
-                    ,dict(name='div', attrs={'id':'content_functions_top'})
+                    ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
                    ,dict(name='img', attrs={'class':'icon'})
                    ,dict(name='div', attrs={'class': 'embiggen'})
                  ]
    feeds       = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
    def get_cover_url(self):
        cover_url = None
        index = 'http://www.sciencenews.org/view/home'
        soup = self.index_to_soup(index)
        link_item = soup.find(name = 'img',alt = "issue")
        print link_item
        if link_item:
           cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
        return cover_url
    def preprocess_html(self, soup):
            for tag in soup.findAll(name=['span']):
                tag.name = 'div'
            return soup
--- a/resources/recipes/smh.recipe
+++ b/resources/recipes/smh.recipe
@ -6,51 +6,86 @@ __docformat__ = 'restructuredtext en'
 '''
 smh.com.au
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class SMH(BasicNewsRecipe):
    title = 'Sydney Morning Herald'
    description = 'Business News, World News and Breaking News in Australia'
-    __author__ = 'Kovid Goyal'
+    __author__ = 'Kovid Goyal and Sujata Raman'
    language = 'en_AU'
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = False
    no_javascript = True
    timefmt               = ' [%A, %d %B, %Y]'
    encoding = 'utf-8'
    keep_only_tags = [dict(name='div', attrs ={'id':'content'})]
    remove_tags     = [
                        dict(name='div', attrs={'align' :'right'}),
                        dict(name='p', attrs={'class' :'comments'}),
                        dict(name='a', attrs={'class' :['more-photos','performerpromo']}),
                        dict(name='img', attrs={'alt' :'aap'}),
                        dict(name='div', attrs ={'id':['googleAds','moreGoogleAds','comments','footer','sidebar','austereopuff','adSpotIsland']}),
                        dict(name='div', attrs ={'class':['article-links','wof','articleTools top','cN-multimediaGroup cfix','articleTools bottom']}),
                        dict(name='div', attrs ={'class':['clear','adSpot-textboxgr1','adSpot-textBox','articleTools-c3 cfix','articleExtras-bottom','span-16 last']}),
                        dict(name='div', attrs ={'class':[ 'sidebar span-5','cT-socialCommenting','cN-linkList','cN-topicSelector','cT-storyTools cfix','cT-imageMultimedia']}) ,
                        dict(name='iframe'),
                       ]
    extra_css = '''
                  h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
                  .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
                  .articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
                  .cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
                  .source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
                  #content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
                  .pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                  #bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
                  .featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
                  #idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
                  h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
                  h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
                  h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
                  h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
                  body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
                '''
    feeds          = [
                      ('Top Stories', 'http://feeds.smh.com.au/rssheadlines/top.xml'),
                      ('National', 'http://feeds.smh.com.au/rssheadlines/national.xml'),
                      ('World', 'http://feeds.smh.com.au/rssheadlines/world.xml'),
                      ('Business', 'http://www.smh.com.au/rssheadlines/business.xml'),
                      ('National Times', 'http://www.smh.com.au/rssheadlines/opinion/article/rss.xml'),
                      ('Entertainment', 'http://feeds.smh.com.au/rssheadlines/entertainment.xml'),
                      ('Technology', 'http://feeds.smh.com.au/rssheadlines/technology.xml'),
                      ('Sport', 'http://feeds.smh.com.au/rssheadlines/sport.xml'),
                    ]
    def preprocess_html(self, soup):
        bod = soup.find('bod')
        if bod is not None:
            bod.tag = 'div'
            p = soup.find(id='content')
            bod.extract()
            p.insert(len(p), bod)
        return soup
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        br.set_handle_refresh(False)
        return br
-    def parse_index(self):
+    def get_article_url(self, article):
-
+        url = article.link
-        soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read())
+        if 'media' in url:
-
+            url = ''
-        feeds, articles = [], []
+        return url
        feed = None
        for tag in soup.findAll(['h3', 'a']):
            if tag.name == 'h3':
                if articles:
                    feeds.append((feed, articles))
                    articles = []
                feed = self.tag_to_string(tag)
            elif feed is not None and tag.has_key('href') and tag['href'].strip():
                url = tag['href'].strip()
                if url.startswith('/'):
                    url   = 'http://www.smh.com.au' + url
                title = self.tag_to_string(tag)
                articles.append({
                                 'title': title,
                                 'url'  : url,
                                 'date' : strftime('%a, %d %b'),
                                 'description' : '',
                                 'content'     : '',
                                 })
        return feeds
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.6.23'
+__version__   = '0.6.24'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -101,8 +101,6 @@ def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
                plugin.site_customization = customization.get(plugin.name, None)
            if plugin.name == 'IsbnDB' and isbndb_key is not None:
                plugin.site_customization = isbndb_key
            if not plugin.is_ok():
                continue
            yield plugin
 def get_isbndb_key():
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -92,3 +92,8 @@ class POCKETBOOK360(EB600):
    VENDOR_NAME = 'PHILIPS'
    WINDOWS_MAIN_MEM = 'MASS_STORGE'
    OSX_MAIN_MEM   = 'Philips Mass Storge Media'
    OSX_CARD_A_MEM = 'Philips Mass Storge Media'
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -66,14 +66,24 @@ class USBMS(CLI, Device):
                    match = fnmatch.filter(files, '*.%s' % (book_type))
                    for i, filename in enumerate(match):
                        self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
-                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                        try:
                            bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
                        except: # Probably a filename encoding error
                            import traceback
                            traceback.print_exc()
                            continue
        else:
            path = os.path.join(prefix, ebook_dir)
            paths = os.listdir(path)
            for i, filename in enumerate(paths):
                self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
                if path_to_ext(filename) in self.FORMATS:
-                    bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
+                    try:
                        bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
                    except: # Probably a file name encoding error
                        import traceback
                        traceback.print_exc()
                        continue
        self.report_progress(1.0, _('Getting list of books on device...'))
--- a/src/calibre/ebooks/metadata/fetch.py
+++ b/src/calibre/ebooks/metadata/fetch.py
@ -9,9 +9,11 @@ from threading import Thread
 from calibre import prints
 from calibre.utils.config import OptionParser
 from calibre.utils.logging import default_log
-
+from calibre.ebooks.metadata import MetaInformation
 from calibre.customize import Plugin
 metadata_config = None
 class MetadataSource(Plugin):
    author = 'Kovid Goyal'
@ -23,11 +25,17 @@ class MetadataSource(Plugin):
    #: tags/rating/reviews/etc.
    metadata_type = 'basic'
    #: If not None, the customization dialog will allow for string
    #: based customization as well the default customization. The
    #: string customization will be saved in the site_customization
    #: member.
    string_customization_help = None
    type = _('Metadata download')
    def __call__(self, title, author, publisher, isbn, verbose, log=None,
            extra=None):
-        self.worker = Thread(target=self.fetch)
+        self.worker = Thread(target=self._fetch)
        self.worker.daemon = True
        self.title = title
        self.verbose = verbose
@ -39,23 +47,87 @@ class MetadataSource(Plugin):
        self.exception, self.tb, self.results = None, None, []
        self.worker.start()
    def _fetch(self):
        try:
            self.fetch()
            if self.results:
                c = self.config_store().get(self.name, {})
                res = self.results
                if isinstance(res, MetaInformation):
                    res = [res]
                for mi in res:
                    if not c.get('rating', True):
                        mi.rating = None
                    if not c.get('comments', True):
                        mi.comments = None
                    if not c.get('tags', True):
                        mi.tags = []
        except Exception, e:
            self.exception = e
            self.tb = traceback.format_exc()
    def fetch(self):
        '''
        All the actual work is done here.
        '''
        raise NotImplementedError
    def is_ok(self):
        '''
        Used to check if the plugin has been correctly customized.
        For example: The isbndb plugin checks to see if the site_customization
        has been set with an isbndb.com access key.
        '''
        return True
    def join(self):
        return self.worker.join()
    def is_customizable(self):
        return True
    def config_store(self):
        global metadata_config
        if metadata_config is None:
            from calibre.utils.config import XMLConfig
            metadata_config = XMLConfig('plugins/metadata_download')
        return metadata_config
    def config_widget(self):
        from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
            QCheckBox
        from calibre.customize.ui import config
        w = QWidget()
        w._layout = QVBoxLayout(w)
        w.setLayout(w._layout)
        if self.string_customization_help is not None:
            w._sc_label = QLabel(self.string_customization_help, w)
            w._layout.addWidget(w._sc_label)
            customization = config['plugin_customization']
            def_sc = customization.get(self.name, '')
            if not def_sc:
                def_sc = ''
            w._sc = QLineEdit(def_sc, w)
            w._layout.addWidget(w._sc)
            w._sc_label.setWordWrap(True)
            w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
                    | Qt.LinksAccessibleByKeyboard)
            w._sc_label.setOpenExternalLinks(True)
        c = self.config_store()
        c = c.get(self.name, {})
        for x, l in {'rating':_('ratings'), 'tags':_('tags'),
                'comments':_('description/reviews')}.items():
            cb = QCheckBox(_('Download %s from %s')%(l,
                self.name))
            setattr(w, '_'+x, cb)
            cb.setChecked(c.get(x, True))
            w._layout.addWidget(cb)
        return w
    def save_settings(self, w):
        dl_settings = {}
        for x in ('rating', 'tags', 'comments'):
            dl_settings[x] = getattr(w, '_'+x).isChecked()
        c = self.config_store()
        c.set(self.name, dl_settings)
        if hasattr(w, '_sc'):
            sc = unicode(w._sc.text()).strip()
            from calibre.customize.ui import customize_plugin
            customize_plugin(self, sc)
 class GoogleBooks(MetadataSource):
@ -102,14 +174,11 @@ class ISBNDB(MetadataSource):
            self.exception = e
            self.tb = traceback.format_exc()
-    def customization_help(self, gui=False):
+    @property
    def string_customization_help(self):
        ans = _('To use isbndb.com you must sign up for a %sfree account%s '
                'and enter your access key below.')
-        if gui:
+        return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
            ans = '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
        else:
            ans = ans.replace('%s', '')
        return ans
 class Amazon(MetadataSource):
@ -191,7 +260,7 @@ def get_social_metadata(mi, verbose=0):
                comments.add(dmi.comments)
    if ratings:
        rating = sum(ratings)/float(len(ratings))
-        if mi.rating is None:
+        if mi.rating is None or mi.rating < 0.1:
            mi.rating = rating
        else:
            mi.rating = (mi.rating + rating)/2.0
--- a/src/calibre/ebooks/metadata/pdf.py
+++ b/src/calibre/ebooks/metadata/pdf.py
@ -3,6 +3,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''Read meta information from PDF files'''
 import re
 from functools import partial
 from calibre import prints
@ -11,10 +12,16 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_
 pdfreflow, pdfreflow_error = plugins['pdfreflow']
 _isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)')
 def get_metadata(stream, cover=True):
    if pdfreflow is None:
        raise RuntimeError(pdfreflow_error)
-    info = pdfreflow.get_metadata(stream.read(), cover)
+    raw = stream.read()
    isbn = _isbn_pat.search(raw)
    if isbn is not None:
        isbn = isbn.group(1).replace('-', '').replace(' ', '')
    info = pdfreflow.get_metadata(raw, cover)
    title = info.get('Title', None)
    au = info.get('Author', None)
    if au is None:
@ -22,6 +29,8 @@ def get_metadata(stream, cover=True):
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    if isbn is not None:
        mi.isbn = isbn
    creator = info.get('Creator', None)
    if creator:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -777,7 +777,7 @@ class Manifest(object):
            # Remove DOCTYPE declaration as it messes up parsing
-            # Inparticular it causes tostring to insert xmlns
+            # In particular, it causes tostring to insert xmlns
            # declarations, which messes up the coercing logic
            idx = data.find('<html')
            if idx > -1:
@ -1746,9 +1746,20 @@ class OEBBook(object):
            return d.replace('\r\n', '\n').replace('\r', '\n')
        if isinstance(data, unicode):
            return fix_data(data)
-        if data[:2] in ('\xff\xfe', '\xfe\xff'):
+        bom_enc = None
        if data[:4] in ('\0\0\xfe\xff', '\xff\xfe\0\0'):
            bom_enc = {'\0\0\xfe\xff':'utf-32-be',
                    '\xff\xfe\0\0':'utf-32-le'}[data[:4]]
            data = data[4:]
        elif data[:2] in ('\xff\xfe', '\xfe\xff'):
            bom_enc = {'\xff\xfe':'utf-16-le', '\xfe\xff':'utf-16-be'}[data[:2]]
            data = data[2:]
        elif data[:3] == '\xef\xbb\xbf':
            bom_enc = 'utf-8'
            data = data[3:]
        if bom_enc is not None:
            try:
-                return fix_data(data.decode('utf-16'))
+                return fix_data(data.decode(bom_enc))
            except UnicodeDecodeError:
                pass
        if self.input_encoding is not None:
--- a/src/calibre/gui2/convert/bulk.py
+++ b/src/calibre/gui2/convert/bulk.py
@ -31,6 +31,13 @@ class BulkConfig(Config):
        self.input_label.hide()
        self.input_formats.hide()
        self.opt_individual_saved_settings.setVisible(True)
        self.opt_individual_saved_settings.setChecked(True)
        self.opt_individual_saved_settings.setToolTip(_('For '
            'settings that cannot be specified in this dialog, use the '
            'values saved in a previous conversion (if they exist) instead '
            'of using the defaults specified in the Preferences'))
        self.connect(self.output_formats, SIGNAL('currentIndexChanged(QString)'),
                self.setup_pipeline)
--- a/src/calibre/gui2/convert/single.py
+++ b/src/calibre/gui2/convert/single.py
@ -116,6 +116,7 @@ class Config(ResizableDialog, Ui_Dialog):
    def __init__(self, parent, db, book_id,
            preferred_input_format=None, preferred_output_format=None):
        ResizableDialog.__init__(self, parent)
        self.opt_individual_saved_settings.setVisible(False)
        self.db, self.book_id = db, book_id
        self.setup_input_output_formats(self.db, self.book_id, preferred_input_format,
--- a/src/calibre/gui2/convert/single.ui
+++ b/src/calibre/gui2/convert/single.ui
@ -33,6 +33,13 @@
     <item>
      <widget class="QComboBox" name="input_formats"/>
     </item>
     <item>
      <widget class="QCheckBox" name="opt_individual_saved_settings">
       <property name="text">
        <string>Use &amp;saved conversion settings for individual books</string>
       </property>
      </widget>
     </item>
     <item>
      <spacer name="horizontalSpacer">
       <property name="orientation">
@ -109,7 +116,7 @@
        <x>0</x>
        <y>0</y>
        <width>810</width>
-        <height>492</height>
+        <height>489</height>
       </rect>
      </property>
      <layout class="QVBoxLayout" name="verticalLayout_3">
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 from PyQt4.QtCore import SIGNAL, QObject
 from PyQt4.QtGui import QDialog
 from calibre.gui2 import qstring_to_unicode
 from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
 from calibre.gui2.dialogs.tag_editor import TagEditor
 from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
@ -86,7 +85,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
    def sync(self):
        for id in self.ids:
-            au = qstring_to_unicode(self.authors.text())
+            au = unicode(self.authors.text())
            if au:
                au = string_to_authors(au)
                self.db.set_authors(id, au, notify=False)
@ -97,28 +96,39 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
                x = authors_to_sort_string(aut)
                if x:
                    self.db.set_author_sort(id, x, notify=False)
-            aus = qstring_to_unicode(self.author_sort.text())
+            aus = unicode(self.author_sort.text())
            if aus and self.author_sort.isEnabled():
                self.db.set_author_sort(id, aus, notify=False)
            if self.write_rating:
                self.db.set_rating(id, 2*self.rating.value(), notify=False)
-            pub = qstring_to_unicode(self.publisher.text())
+            pub = unicode(self.publisher.text())
            if pub:
                self.db.set_publisher(id, pub, notify=False)
-            remove_tags = qstring_to_unicode(self.remove_tags.text()).strip()
+            remove_tags = unicode(self.remove_tags.text()).strip()
            if remove_tags:
                remove_tags = [i.strip() for i in remove_tags.split(',')]
                self.db.unapply_tags(id, remove_tags, notify=False)
-            tags = qstring_to_unicode(self.tags.text()).strip()
+            tags = unicode(self.tags.text()).strip()
            if tags:
                tags = map(lambda x: x.strip(), tags.split(','))
                self.db.set_tags(id, tags, append=True, notify=False)
            if self.write_series:
-                self.db.set_series(id, qstring_to_unicode(self.series.currentText()), notify=False)
+                self.db.set_series(id, unicode(self.series.currentText()), notify=False)
            if self.remove_format.currentIndex() > -1:
                self.db.remove_format(id, unicode(self.remove_format.currentText()), index_is_id=True, notify=False)
            if self.swap_title_and_author.isChecked():
                title = self.db.title(id, index_is_id=True)
                aum = self.db.authors(id, index_is_id=True)
                if aum:
                    aum = [a.strip().replace('|', ',') for a in aum.split(',')]
                    new_title = authors_to_string(aum)
                    self.db.set_title(id, new_title)
                if title:
                    new_authors = string_to_authors(title)
                    self.db.set_authors(id, new_authors)
            self.changed = True
    def series_changed(self):
--- a/src/calibre/gui2/dialogs/metadata_bulk.ui
+++ b/src/calibre/gui2/dialogs/metadata_bulk.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>495</width>
-    <height>387</height>
+    <height>456</height>
   </rect>
  </property>
  <property name="windowTitle">
@ -230,6 +230,13 @@
          </property>
         </widget>
        </item>
        <item row="9" column="0" colspan="2">
         <widget class="QCheckBox" name="swap_title_and_author">
          <property name="text">
           <string>&amp;Swap title and author</string>
          </property>
         </widget>
        </item>
       </layout>
      </widget>
     </item>
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -552,6 +552,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                                warning_dialog(self, _('There were errors'),
                                       _('There were errors downloading social metadata'),
                                       det_msg=det, show=True)
                        else:
                            book.tags = []
                        self.title.setText(book.title)
                        self.authors.setText(authors_to_string(book.authors))
                        if book.author_sort: self.author_sort.setText(book.author_sort)
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@ -68,6 +68,7 @@ class LibraryDelegate(QItemDelegate):
        self.drawFocus(painter, option, option.rect)
        try:
            painter.setRenderHint(QPainter.Antialiasing)
            painter.setClipRect(option.rect)
            y = option.rect.center().y()-self.SIZE/2.
            x = option.rect.right()  - self.SIZE
            painter.setPen(self.PEN)
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -213,19 +213,18 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                        self.device_manager.umount_device)
        ####################### Vanity ########################
-        self.vanity_template  = _('<p>For help visit <a href="http://%s.'
+        self.vanity_template  = _('<p>For help see the: <a href="%s">User Manual</a>'
-                'kovidgoyal.net/user_manual">%s.kovidgoyal.net</a>'
+                '<br>')%'http://calibre.kovidgoyal.net/user_manual'
                '<br>')%(__appname__, __appname__)
        self.vanity_template += _('<b>%s</b>: %s by <b>Kovid Goyal '
            '%%(version)s</b><br>%%(device)s</p>')%(__appname__, __version__)
        self.latest_version = ' '
        self.vanity.setText(self.vanity_template%dict(version=' ', device=' '))
        self.device_info = ' '
        if not opts.no_update_check:
-            self.update_checker = CheckForUpdates()
+            self.update_checker = CheckForUpdates(self)
            QObject.connect(self.update_checker,
                    SIGNAL('update_found(PyQt_PyObject)'), self.update_found)
-            self.update_checker.start()
+            self.update_checker.start(2000)
        ####################### Status Bar #####################
        self.status_bar = StatusBar(self.jobs_dialog, self.system_tray_icon)
        self.setStatusBar(self.status_bar)
@ -246,6 +245,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        md.addAction(_('Download metadata and covers'))
        md.addAction(_('Download only metadata'))
        md.addAction(_('Download only covers'))
        md.addAction(_('Download only social metadata'))
        self.metadata_menu = md
        self.add_menu = QMenu()
        self.add_menu.addAction(_('Add books from a single directory'))
@ -288,7 +288,10 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
                    set_metadata=False)
        QObject.connect(md.actions()[6], SIGNAL('triggered(bool)'),
                self.__em5__)
-
+        self.__em6__ = partial(self.download_metadata, covers=False,
                    set_metadata=False, set_social_metadata=True)
        QObject.connect(md.actions()[7], SIGNAL('triggered(bool)'),
                self.__em6__)
        self.save_menu = QMenu()
@ -1027,7 +1030,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
    ############################### Edit metadata ##############################
-    def download_metadata(self, checked, covers=True, set_metadata=True):
+    def download_metadata(self, checked, covers=True, set_metadata=True,
            set_social_metadata=None):
        rows = self.library_view.selectionModel().selectedRows()
        previous = self.library_view.currentIndex()
        if not rows or len(rows) == 0:
@ -1037,12 +1041,19 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
            return
        db = self.library_view.model().db
        ids = [db.id(row.row()) for row in rows]
        if set_social_metadata is None:
            get_social_metadata = config['get_social_metadata']
        else:
            get_social_metadata = set_social_metadata
        from calibre.gui2.metadata import DownloadMetadata
        self._download_book_metadata = DownloadMetadata(db, ids,
                get_covers=covers, set_metadata=set_metadata,
-                get_social_metadata=config['get_social_metadata'])
+                get_social_metadata=get_social_metadata)
        self._download_book_metadata.start()
-        x = _('covers') if covers and not set_metadata else _('metadata')
+        if set_social_metadata is not None and set_social_metadata:
            x = _('social metadata')
        else:
            x = _('covers') if covers and not set_metadata else _('metadata')
        self.progress_indicator.start(
            _('Downloading %s for %d book(s)')%(x, len(ids)))
        self._book_metadata_download_check = QTimer(self)
@ -1744,6 +1755,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
        if write_settings:
            self.write_settings()
        self.check_messages_timer.stop()
        self.update_checker.stop()
        self.listener.close()
        self.job_manager.server.close()
        while self.spare_servers:
--- a/src/calibre/gui2/metadata.py
+++ b/src/calibre/gui2/metadata.py
@ -60,6 +60,7 @@ class DownloadMetadata(Thread):
        self.worker = Worker()
        for id in ids:
            self.metadata[id] = db.get_metadata(id, index_is_id=True)
            self.metadata[id].rating = None
    def run(self):
        self.exception = self.tb = None
@ -100,15 +101,28 @@ class DownloadMetadata(Thread):
                    mi.smart_update(fmi)
                    if mi.isbn and self.get_social_metadata:
                        self.social_metadata_exceptions = get_social_metadata(mi)
                        if mi.rating:
                            mi.rating *= 2
                    if not self.get_social_metadata:
                        mi.tags = []
                else:
                    self.failures[id] = (mi.title,
                        _('No matches found for this book'))
                self.commit_covers()
        self.commit_covers(True)
-        if self.set_metadata:
+        for id in self.fetched_metadata:
-            for id in self.fetched_metadata:
+            mi = self.metadata[id]
-                self.db.set_metadata(id, self.metadata[id])
+            if self.set_metadata:
                self.db.set_metadata(id, mi)
            if not self.set_metadata and self.get_social_metadata:
                if mi.rating:
                    self.db.set_rating(id, mi.rating)
                if mi.tags:
                    self.db.set_tags(id, mi.tags)
                if mi.comments:
                    self.db.set_comment(id, mi.comments)
        self.updated = set(self.fetched_metadata)
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -47,7 +47,10 @@ class TagsView(QTreeView):
        ci = self.currentIndex()
        if not ci.isValid():
            ci = self.indexAt(QPoint(10, 10))
-        self.model().refresh()
+        try:
            self.model().refresh()
        except: #Database connection could be closed if an integrity check is happening
            pass
        if ci.isValid():
            self.scrollTo(ci, QTreeView.PositionAtTop)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -111,17 +111,21 @@ def convert_bulk_ebook(parent, queue, db, book_ids, out_format=None, args=[]):
    user_recs = cPickle.loads(d.recommendations)
    book_ids = convert_existing(parent, db, book_ids, output_format)
-    return QueueBulk(parent, book_ids, output_format, queue, db, user_recs, args)
+    use_saved_single_settings = d.opt_individual_saved_settings.isChecked()
    return QueueBulk(parent, book_ids, output_format, queue, db, user_recs,
            args, use_saved_single_settings=use_saved_single_settings)
 class QueueBulk(QProgressDialog):
-    def __init__(self, parent, book_ids, output_format, queue, db, user_recs, args):
+    def __init__(self, parent, book_ids, output_format, queue, db, user_recs,
            args, use_saved_single_settings=True):
        QProgressDialog.__init__(self, '',
                QString(), 0, len(book_ids), parent)
        self.setWindowTitle(_('Queueing books for bulk conversion'))
        self.book_ids, self.output_format, self.queue, self.db, self.args, self.user_recs = \
                book_ids, output_format, queue, db, args, user_recs
        self.parent = parent
        self.use_saved_single_settings = use_saved_single_settings
        self.i, self.bad, self.jobs, self.changed = 0, [], [], False
        self.timer = QTimer(self)
        self.connect(self.timer, SIGNAL('timeout()'), self.do_book)
@ -149,11 +153,12 @@ class QueueBulk(QProgressDialog):
            combined_recs = GuiRecommendations()
            default_recs = load_defaults('%s_input' % input_format)
            specific_recs = load_specifics(self.db, book_id)
            for key in default_recs:
                combined_recs[key] = default_recs[key]
-            for key in specific_recs:
+            if self.use_saved_single_settings:
-                combined_recs[key] = specific_recs[key]
+                specific_recs = load_specifics(self.db, book_id)
                for key in specific_recs:
                    combined_recs[key] = specific_recs[key]
            for item in self.user_recs:
                combined_recs[item[0]] = item[1]
            save_specifics(self.db, book_id, combined_recs)
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import traceback
-from PyQt4.QtCore import QThread, SIGNAL
+from PyQt4.QtCore import QObject, SIGNAL, QTimer
 import mechanize
 from calibre.constants import __version__, iswindows, isosx
@ -11,9 +11,21 @@ from calibre import browser
 URL = 'http://status.calibre-ebook.com/latest'
-class CheckForUpdates(QThread):
+class CheckForUpdates(QObject):
    def __init__(self, parent):
        QObject.__init__(self, parent)
        self.timer = QTimer(self)
        self.first = True
        self.connect(self.timer, SIGNAL('timeout()'), self)
        self.start = self.timer.start
        self.stop = self.timer.stop
    def __call__(self):
        if self.first:
            self.timer.setInterval(1000*24*60*60)
            self.first = False
    def run(self):
        try:
            br = browser()
            req = mechanize.Request(URL)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -9,7 +9,6 @@ Command line interface to the calibre database.
 import sys, os, cStringIO
 from textwrap import TextWrapper
 from urllib import quote
 from calibre import terminal_controller, preferred_encoding, prints
 from calibre.utils.config import OptionParser, prefs
@ -48,10 +47,10 @@ XML_TEMPLATE = '''\
        <comments>${record['comments']}</comments>
        <series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
        <isbn>${record['isbn']}</isbn>
-        <cover py:if="record['cover']">${record['cover']}</cover>
+        <cover py:if="record['cover']">${record['cover'].replace(os.sep, '/')}</cover>
        <formats py:if="record['formats']">
        <py:for each="path in record['formats']">
-            <format>${path}</format>
+            <format>${path.replace(os.sep, '/')}</format>
        </py:for>
        </formats>
    </record>
@ -78,9 +77,9 @@ STANZA_TEMPLATE='''\
      <id>urn:calibre:${record['uuid']}</id>
      <author><name>${record['author_sort']}</name></author>
      <updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')}</updated>
-      <link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/')).replace('http%3A', 'http:')}" />
+      <link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/'))}"/>
-      <link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" />
+      <link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
-      <link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" />
+      <link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
      <content type="xhtml">
          <div xmlns="http://www.w3.org/1999/xhtml">
              <py:for each="f in ('authors', 'publisher', 'rating', 'tags', 'series', 'isbn')">
@ -186,8 +185,10 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
        return o.getvalue()
    elif output_format == 'xml':
        template = MarkupTemplate(XML_TEMPLATE)
-        return template.generate(data=data).render('xml')
+        return template.generate(data=data, os=os).render('xml')
    elif output_format == 'stanza':
        def quote(raw):
            return raw.replace('"', r'\"')
        data = [i for i in data if i.has_key('fmt_epub')]
        for x in data:
            if isinstance(x['fmt_epub'], unicode):
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -115,7 +115,7 @@ class PostInstall:
            self.info('Creating symlinks...')
            for exe in scripts.keys():
                dest = os.path.join(self.opts.staging_bindir, exe)
-                if os.path.exists(dest):
+                if os.path.lexists(dest):
                    os.unlink(dest)
                tgt = os.path.join(getattr(sys, 'frozen_path'), exe)
                self.info('\tSymlinking %s to %s'%(tgt, dest))
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -421,7 +421,7 @@ button in the individual book conversion dialog.
 When you Bulk Convert a set of books, settings are taken in the following order:
    * From the defaults set in Preferences->Conversion
-    * From the saved conversion settings for each book being converted (if any)
+    * From the saved conversion settings for each book being converted (if any). This can be turned off by the option in the top left corner of the Bulk Conversion dialog.
    * From the settings set in the Bulk conversion dialog
 Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -81,7 +81,7 @@ Device Integration
 What devices does |app| support?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
+At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, PocketBook 360, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
 How can I help get my device supported in |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/manual/plugins.rst
+++ b/src/calibre/manual/plugins.rst
@ -108,7 +108,7 @@ Metadata download plugins
 .. class:: calibre.ebooks.metadata.fetch.MetadataSource
    Represents a source to query for metadata. Subclasses must implement
-    at least the fetch method and optionally the is_ok method.
+    at least the fetch method.
    When :meth:`fetch` is called, the `self` object will have the following
    useful attributes (each of which may be None)::
@ -124,8 +124,9 @@ Metadata download plugins
 .. automember:: calibre.ebooks.metadata.fetch.MetadataSource.metadata_type
 .. automember:: calibre.ebooks.metadata.fetch.MetadataSource.string_customization_help
 .. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.fetch
 .. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.is_ok
--- a/src/calibre/translations/ar.po
+++ b/src/calibre/translations/ar.po
--- a/src/calibre/translations/calibre.pot
+++ b/src/calibre/translations/calibre.pot
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/hr.po
+++ b/src/calibre/translations/hr.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/zh_TW.po
+++ b/src/calibre/translations/zh_TW.po
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Manage application-wide preferences.
 '''
-import os, re, cPickle, textwrap, traceback
+import os, re, cPickle, textwrap, traceback, plistlib
 from copy import deepcopy
 from functools import partial
 from optparse import OptionParser as _OptionParser
@ -34,9 +34,11 @@ else:
 plugin_dir = os.path.join(config_dir, 'plugins')
 CONFIG_DIR_MODE = 0700
 def make_config_dir():
    if not os.path.exists(plugin_dir):
-        os.makedirs(plugin_dir, mode=448) # 0700 == 448
+        os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)
 def check_config_write_access():
    return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
@ -552,6 +554,72 @@ class DynamicConfig(dict):
 dynamic = DynamicConfig()
 class XMLConfig(dict):
    '''
    Similar to :class:`DynamicConfig`, except that it uses an XML storage
    backend instead of a pickle file.
    See `http://docs.python.org/dev/library/plistlib.html`_ for the supported
    data types.
    '''
    def __init__(self, rel_path_to_cf_file):
        dict.__init__(self)
        self.file_path = os.path.join(config_dir,
                *(rel_path_to_cf_file.split('/')))
        self.file_path = os.path.abspath(self.file_path)
        if not self.file_path.endswith('.plist'):
            self.file_path += '.plist'
        self.refresh()
    def refresh(self):
        d = {}
        if os.path.exists(self.file_path):
            with ExclusiveFile(self.file_path) as f:
                raw = f.read()
                try:
                    d = plistlib.readPlistFromString(raw) if raw.strip() else {}
                except SystemError:
                    pass
                except:
                    import traceback
                    traceback.print_exc()
                    d = {}
        self.clear()
        self.update(d)
    def __getitem__(self, key):
        try:
            ans = dict.__getitem__(self, key)
            if isinstance(ans, plistlib.Data):
                ans = ans.data
            return ans
        except KeyError:
            return None
    def __setitem__(self, key, val):
        if isinstance(val, (bytes, str)):
            val = plistlib.Data(val)
        dict.__setitem__(self, key, val)
        self.commit()
    def set(self, key, val):
        self.__setitem__(key, val)
    def commit(self):
        if hasattr(self, 'file_path') and self.file_path:
            dpath = os.path.dirname(self.file_path)
            if not os.path.exists(dpath):
                os.makedirs(dpath, mode=CONFIG_DIR_MODE)
            with ExclusiveFile(self.file_path) as f:
                raw = plistlib.writePlistToString(self)
                f.seek(0)
                f.truncate()
                f.write(raw)
 def _prefs():
    c = Config('global', 'calibre wide preferences')
    c.add_opt('database_path',
--- a/src/odf/element.py
+++ b/src/odf/element.py
@ -56,6 +56,8 @@ def _quoteattr(data, entities={}):
        the optional entities parameter.  The keys and values must all be
        strings; each key will be replaced with its corresponding value.
    """
    entities['\n']='&#10;'
    entities['\r']='&#12;'
    data = _escape(data, entities)
    if '"' in data:
        if "'" in data:
--- a/src/odf/namespaces.py
+++ b/src/odf/namespaces.py
@ -17,7 +17,7 @@
 #
 # Contributor(s):
 #
-TOOLSVERSION = u"ODFPY/0.9.1dev"
+TOOLSVERSION = u"ODFPY/0.9.2dev"
 ANIMNS         = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
 DBNS           = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
--- a/src/odf/opendocument.py
+++ b/src/odf/opendocument.py
@ -185,7 +185,7 @@ class OpenDocument:
        if self.fontfacedecls.hasChildNodes():
            self.fontfacedecls.toXml(1, xml)
        a = AutomaticStyles()
-        stylelist = self._used_auto_styles([self.styles, self.body])
+        stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
        if len(stylelist) > 0:
            a.write_open_tag(1, xml)
            for s in stylelist:
@ -233,9 +233,11 @@ class OpenDocument:
                for styleref in ( (DRAWNS,u'style-name'),
                        (DRAWNS,u'text-style-name'),
                        (PRESENTATIONNS,u'style-name'),
-                        (STYLENS,u'style-name'),
+                        (STYLENS,u'data-style-name'),
                        (STYLENS,u'list-style-name'),
                        (STYLENS,u'page-layout-name'),
                        (STYLENS,u'style-name'),
                        (TABLENS,u'default-cell-style-name'),
                        (TABLENS,u'style-name'),
                        (TEXTNS,u'style-name') ):
                    if e.getAttrNS(styleref[0],styleref[1]):
--- a/src/odf/svg.py
+++ b/src/odf/svg.py
@ -50,3 +50,5 @@ def Radialgradient(**args):
 def Stop(**args):
    return Element(qname = (SVGNS,'stop'), **args)
 def Title(**args):
    return Element(qname = (SVGNS,'title'), **args)
--- a/src/odf/text.py
+++ b/src/odf/text.py
@ -446,6 +446,9 @@ def SequenceRef(**args):
 def SheetName(**args):
    return Element(qname = (TEXTNS,'sheet-name'), **args)
 def SoftPageBreak(**args):
    return Element(qname = (TEXTNS,'soft-page-break'), **args)
 def SortKey(**args):
    return Element(qname = (TEXTNS,'sort-key'), **args)
--- a/src/odf/userfield.py
+++ b/src/odf/userfield.py
@ -1,6 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
 #
 # This is free software.  You may redistribute it under the terms
 # of the Apache license and the GNU General Public License Version
@ -22,16 +22,11 @@
 """Class to show and manipulate user fields in odf documents."""
 import sys
 import time
 import zipfile
-import xml.sax
+from odf.text import UserFieldDecl
-import xml.sax.handler
+from odf.namespaces import OFFICENS
-import xml.sax.saxutils
+from odf.opendocument import load
 from odf.namespaces import OFFICENS, TEXTNS
 from cStringIO import StringIO
 OUTENCODING = "utf-8"
@ -60,16 +55,36 @@ class UserFields(object):
        src ... source document name, file like object or None for stdin
        dest ... destination document name, file like object or None for stdout
-   
+
        """
        self.src_file = src
        self.dest_file = dest
        self.document = None
    def loaddoc(self):
        if isinstance(self.src_file, basestring):
            # src_file is a filename, check if it is a zip-file
            if not zipfile.is_zipfile(self.src_file):
                raise TypeError("%s is no odt file." % self.src_file)
        elif self.src_file is None:
            # use stdin if no file given
            self.src_file = sys.stdin
        self.document = load(self.src_file)
    def savedoc(self):
        # write output
        if self.dest_file is None:
            # use stdout if no filename given
            self.document.save('-')
        else:
            self.document.save(self.dest_file)
    def list_fields(self):
        """List (extract) all known user-fields.
-        
+
        Returns list of user-field names.
-        
+
        """
        return [x[0] for x in self.list_fields_and_values()]
@ -81,15 +96,21 @@ class UserFields(object):
        Returns list of tuples (<field name>, <field type>, <value>).
        """
        self.loaddoc()
        found_fields = []
-        def _callback(field_name, value_type, value, attrs):
+        all_fields = self.document.getElementsByType(UserFieldDecl)
        for f in all_fields:
            value_type = f.getAttribute('valuetype')
            if value_type == 'string':
                value = f.getAttribute('stringvalue')
            else:
                value = f.getAttribute('value')
            field_name = f.getAttribute('name')
            if field_names is None or field_name in field_names:
                found_fields.append((field_name.encode(OUTENCODING),
                                     value_type.encode(OUTENCODING),
                                     value.encode(OUTENCODING)))
            return attrs
        self._content_handler(_callback)
        return found_fields
    def list_values(self, field_names):
@ -133,199 +154,16 @@ class UserFields(object):
        Returns None
        """
-        def _callback(field_name, value_type, value, attrs):
+        self.loaddoc()
-            if field_name in data:
+        all_fields = self.document.getElementsByType(UserFieldDecl)
-                valattr = VALUE_TYPES.get(value_type)
+        for f in all_fields:
-                attrs = dict(attrs.items())
+            field_name = f.getAttribute('name')
-                # Take advantage that startElementNS can take a normal
+            if data.has_key(field_name):
-                # dict as attrs
+                value_type = f.getAttribute('valuetype')
-                attrs[valattr] = data[field_name]
+                value = data.get(field_name)
-            return attrs
+                if value_type == 'string':
-        self._content_handler(_callback, write_file=True)
+                    f.setAttribute('stringvalue', value)
    def _content_handler(self, callback_func, write_file=False):
        """Handle the content using the callback function and write result if
           necessary.
        callback_func ... function called for each field found in odf document
                          signature: field_name ... name of current field
                                     value_type ... type of current field
                                     value ... value of current field
                                     attrs ... tuple of attrs of current field
                          returns: tuple or dict of attrs
        write_file ... boolean telling wether write result to file
        """
        class DevNull(object):
            """IO-object which behaves like /dev/null."""
            def write(self, str):
                pass
        # get input
        if isinstance(self.src_file, basestring):
            # src_file is a filename, check if it is a zip-file
            if not zipfile.is_zipfile(self.src_file):
                raise TypeError("%s is no odt file." % self.src_file)
        elif self.src_file is None:
            # use stdin if no file given
            self.src_file = sys.stdin
        zin = zipfile.ZipFile(self.src_file, 'r')
        content_xml = zin.read('content.xml')
        # prepare output
        if write_file:
            output_io = StringIO()
            if self.dest_file is None:
                # use stdout if no filename given
                self.dest_file = sys.stdout
            zout = zipfile.ZipFile(self.dest_file, 'w')
        else:
            output_io = DevNull()
        # parse input
        odfs = ODFContentParser(callback_func, output_io)
        parser = xml.sax.make_parser()
        parser.setFeature(xml.sax.handler.feature_namespaces, 1)
        parser.setContentHandler(odfs)
        parser.parse(StringIO(content_xml))
        # write output
        if write_file:
            # Loop through the input zipfile and copy the content to
            # the output until we get to the content.xml. Then
            # substitute.
            for zinfo in zin.infolist():
                if zinfo.filename == "content.xml":
                    # Write meta
                    zi = zipfile.ZipInfo("content.xml", time.localtime()[:6])
                    zi.compress_type = zipfile.ZIP_DEFLATED
                    zout.writestr(zi, odfs.content())
                else:
-                    payload = zin.read(zinfo.filename)
+                    f.setAttribute('value', value) 
-                    zout.writestr(zinfo, payload)
+        self.savedoc()
            zout.close()
        zin.close()
 class ODFContentParser(xml.sax.saxutils.XMLGenerator):
    def __init__(self, callback_func, out=None, encoding=OUTENCODING):
        """Constructor.
        callback_func ... function called for each field found in odf document
                          signature: field_name ... name of current field
                                     value_type ... type of current field
                                     value ... value of current field
                                     attrs ... tuple of attrs of current field
                          returns: tuple or dict of attrs
        out ... file like object for output
        encoding ... encoding for output
        """
        self._callback_func = callback_func
        xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding)
    def _qname(self, name):
        """Builds a qualified name from a (ns_url, localname) pair"""
        if name[0]:
            if name[0] == u'http://www.w3.org/XML/1998/namespace':
                return u'xml' + ":" + name[1]
            # The name is in a non-empty namespace
            prefix = self._current_context[name[0]]
            if prefix:
                # If it is not the default namespace, prepend the prefix
                return prefix + ":" + name[1]
        # Return the unqualified name
        return name[1]
    def startElementNS(self, name, qname, attrs):
        if name == (TEXTNS, u'user-field-decl'):
            field_name = attrs.get((TEXTNS, u'name'))
            value_type = attrs.get((OFFICENS, u'value-type'))
            if value_type == 'string':
                value = attrs.get((OFFICENS, u'string-value'))
            else:
                value = attrs.get((OFFICENS, u'value'))
            attrs = self._callback_func(field_name, value_type, value, attrs)
        self._startElementNS(name, qname, attrs)
    def _startElementNS(self, name, qname, attrs):
        # copy of xml.sax.saxutils.XMLGenerator.startElementNS
        # necessary because we have to provide our own writeattr
        # function which is called by this method
        if name[0] is None:
            name = name[1]
        elif self._current_context[name[0]] is None:
            # default namespace
            name = name[1]
        else:
            name = self._current_context[name[0]] + ":" + name[1]
        self._out.write('<' + name)
        for k,v in self._undeclared_ns_maps:
            if k is None:
                self._out.write(' xmlns="%s"' % (v or ''))
            else:
                self._out.write(' xmlns:%s="%s"' % (k,v))
        self._undeclared_ns_maps = []
        for (name, value) in attrs.items():
            if name[0] is None:
                name = name[1]
            elif self._current_context[name[0]] is None:
                # default namespace
                #If an attribute has a nsuri but not a prefix, we must
                #create a prefix and add a nsdecl
                prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
                self._generated_prefix_ctr = self._generated_prefix_ctr + 1
                name = prefix + ':' + name[1]
                self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
                self._current_context[name[0]] = prefix
            else:
                name = self._current_context[name[0]] + ":" + name[1]
            self._out.write(' %s=' % name)
            writeattr(self._out, value)
        self._out.write('>')
    def content(self):
        return self._out.getvalue()
 ATTR_ENTITIES = {
    '\n': '&#x0a;' # convert newlines into entities inside attributes
    }
 def writetext(stream, text, entities={}):
    text = xml.sax.saxutils.escape(text, entities)
    try:
        stream.write(text)
    except UnicodeError:
        for c in text:
            try:
                stream.write(c)
            except UnicodeError:
                stream.write(u"&#%d;" % ord(c))
 def writeattr(stream, text):
    # copied from xml.sax.saxutils.writeattr added support for an
    # additional entity mapping
    countdouble = text.count('"')
    entities = ATTR_ENTITIES.copy()
    if countdouble:
        countsingle = text.count("'")
        if countdouble <= countsingle:
            entities['"'] = "&quot;"
            quote = '"'
        else:
            entities["'"] =  "&apos;"
            quote = "'"
    else:
        quote = '"'
    stream.write(quote)
    writetext(stream, text, entities)
    stream.write(quote)