Merge from trunk

2026-03-19 07:57:53 -04:00 · 2011-03-11 07:48:01 +00:00 · 2011-03-11 07:48:01 +00:00 · e2917dcda7
commit e2917dcda7
parent 8831eb5443 e45fd25914
24 changed files with 435 additions and 69 deletions
--- a/resources/images/news/publika.png
+++ b/resources/images/news/publika.png
--- a/resources/recipes/golem_de.recipe
+++ b/resources/recipes/golem_de.recipe
@ -1,17 +1,83 @@
-from calibre.web.feeds.news import BasicNewsRecipe
+#!/usr/bin/env  python

-class AdvancedUserRecipe1257093338(BasicNewsRecipe):
+from calibre.web.feeds.news import BasicNewsRecipe
+class golem_ger(BasicNewsRecipe):
    title          = u'Golem.de'
    language = 'de'
    __author__ = 'Kovid Goyal'
    oldest_article = 7
    max_articles_per_feed = 100
+    language              = 'de'
+    lang                  = 'de-DE'
+    no_stylesheets        = True
+    encoding              = 'iso-8859-1'
+    recursions = 1
+    match_regexps = [r'http://www.golem.de/.*.html']

-    feeds          = [(u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0')]
+    keep_only_tags     = [
+                               dict(name='h1', attrs={'class':'artikelhead'}),
+                               dict(name='p', attrs={'class':'teaser'}),
+                               dict(name='div', attrs={'class':'artikeltext'}),
+                               dict(name='h2', attrs={'id':'artikelhead'}),
+                            ]

-    def print_version(self, url):
-        murxb = url.rfind('/') + 1
-        murxc = url[murxb :-5]
-        murxa = 'http://www.golem.de/' + 'print.php?a=' + murxc
-        return murxa

+
+    remove_tags = [
+                    dict(name='div', attrs={'id':['similarContent','topContentWrapper','storycarousel','aboveFootPromo','comments','toolbar','breadcrumbs','commentlink','sidebar','rightColumn']}),
+                    dict(name='div', attrs={'class':['gg_embeddedSubText','gg_embeddedIndex gg_solid','gg_toOldGallery','golemGallery']}),
+                    dict(name='img', attrs={'class':['gg_embedded','gg_embeddedIconRight gg_embeddedIconFS gg_cursorpointer']}),
+                    dict(name='td', attrs={'class':['xsmall']}),
+                    ]
+
+
+    # remove_tags_after  = [
+      #                      dict(name='div', attrs={'id':['contentad2']})
+       #                 ]
+
+
+    feeds          = [
+                      (u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
+                      (u'Audio/Video', u'http://rss.golem.de/rss.php?tp=av&feed=RSS2.0'),
+                      (u'Foto', u'http://rss.golem.de/rss.php?tp=foto&feed=RSS2.0'),
+                      (u'Games', u'http://rss.golem.de/rss.php?tp=games&feed=RSS2.0'),
+                      (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
+                      (u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=ATOM1.0'),
+                      (u'Internet', u'http://rss.golem.de/rss.php?tp=inet&feed=RSS1.0'),
+                      (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=ATOM1.0'),
+                      (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
+                      (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
+                      (u'Wirtschaft', u'http://rss.golem.de/rss.php?tp=wirtschaft&feed=RSS2.0'),
+                      (u'Hardware', u'http://rss.golem.de/rss.php?r=hw&feed=RSS2.0'),
+                      (u'Software', u'http://rss.golem.de/rss.php?r=sw&feed=RSS2.0'),
+                      (u'Networld', u'http://rss.golem.de/rss.php?r=nw&feed=RSS2.0'),
+                      (u'Entertainment', u'http://rss.golem.de/rss.php?r=et&feed=RSS2.0'),
+                      (u'TK', u'http://rss.golem.de/rss.php?r=tk&feed=RSS2.0'),
+                      (u'E-Commerce', u'http://rss.golem.de/rss.php?r=ec&feed=RSS2.0'),
+                      (u'Unternehmen/Maerkte', u'http://rss.golem.de/rss.php?r=wi&feed=RSS2.0')
+                      ]
+
+
+
+
+    feeds          = [
+                      (u'Golem.de', u'http://rss.golem.de/rss.php?feed=ATOM1.0'),
+                      (u'Mobil', u'http://rss.golem.de/rss.php?tp=mc&feed=feed=RSS2.0'),
+                      (u'OSS', u'http://rss.golem.de/rss.php?tp=oss&feed=RSS2.0'),
+                      (u'Politik/Recht', u'http://rss.golem.de/rss.php?tp=pol&feed=RSS2.0'),
+                      (u'Desktop-Applikationen', u'http://rss.golem.de/rss.php?tp=apps&feed=RSS2.0'),
+                      (u'Software-Entwicklung', u'http://rss.golem.de/rss.php?tp=dev&feed=RSS2.0'),
+                      ]
+
+
+    extra_css = '''
+                h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:30px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;margin-bottom:2 em;}
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:x-small; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal; line-height:5px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
+                .teaser {font-style:italic;font-size:12pt;margin-bottom:15pt;}
+                .xsmall{font-style:italic;font-size:x-small;}
+                .td{font-style:italic;font-size:x-small;}
+                img {align:left;}
+                '''
--- a/resources/recipes/gulli.recipe
+++ b/resources/recipes/gulli.recipe
@ -11,6 +11,26 @@ class AdvancedUserRecipe1259599587(BasicNewsRecipe):

    feeds          = [(u'gulli:news', u'http://ticker.gulli.com/rss/')]

-    remove_tags = [{'class' : ['addthis_button', 'BreadCrumb']}, {'id' : ['plista0']}]
+    remove_tags = [dict(name='div', attrs={'class':['FloatL','_forumBox']})]

-    keep_only_tags = [dict(name='div', attrs={'class':'inside'})]
+    keep_only_tags = [dict(name='div', attrs={'id':['_contentLeft']})]
+
+    remove_tags_after  = [dict(name='div', attrs={'class':['_bookmark']})]
+
+
+
+
+
+    extra_css = '''
+                h1 {color:#008852;font-family:Arial,Helvetica,sans-serif; font-size:25px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:22px; }
+                h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
+                .byline {color:#666;margin-bottom:0;font-size:12px}
+                .blockquote {color:#030303;font-style:italic;padding-left:15px;}
+                img {align:center;}
+                .li {list-style-type: none}
+                '''
--- a/resources/recipes/jbpress.recipe
+++ b/resources/recipes/jbpress.recipe
@ -0,0 +1,42 @@
+import urllib2
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class JBPress(BasicNewsRecipe):
+    title          = u'JBPress'
+    language = 'ja'
+    description = u'Japan Business Press New articles (using small print version)'
+    __author__	= 'Ado Nishimura'
+    needs_subscription = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_tags_before = dict(id='wrapper')
+    no_stylesheets         = True
+
+    feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
+
+
+    def get_cover_url(self):
+        return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
+
+    def get_browser(self):
+        html = '''<form action="https://jbpress.ismedia.jp/auth/dologin/http://jbpress.ismedia.jp/articles/print/5549" method="post">
+<input id="login" name="login" type="text"/>
+<input id="password" name="password" type="password"/>
+<input id="rememberme" name="rememberme" type="checkbox"/>
+</form>
+'''
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://jbpress.ismedia.jp/articles/print/5549')
+            response = br.response()
+            response.set_data(html)
+            br.set_response(response)
+            br.select_form(nr=0)
+            br["login"]   = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def print_version(self, url):
+        url = urllib2.urlopen(url).geturl() # resolve redirect.
+        return url.replace('/-/', '/print/')
--- a/resources/recipes/lanacion.recipe
+++ b/resources/recipes/lanacion.recipe
@ -17,6 +17,7 @@ class Lanacion(BasicNewsRecipe):
    use_embedded_content  = False
    no_stylesheets        = True
    language              = 'es_AR'
+    delay                 = 14
    publication_type      = 'newspaper'
    remove_empty_feeds    = True
    masthead_url          = 'http://www.lanacion.com.ar/_ui/desktop/imgs/layout/logos/ln341x47.gif'
@ -25,7 +26,7 @@ class Lanacion(BasicNewsRecipe):
                                h2{color: #626262; font-weight: normal; font-size: 1.1em}
                                body{font-family: Arial,sans-serif}
                                img{margin-top: 0.5em; margin-bottom: 0.2em; display: block}
-                                .notaFecha{color: #808080}
+                                .notaFecha{color: #808080; font-size: small}
                                .notaEpigrafe{font-size: x-small}
                                .topNota h1{font-family: Arial,sans-serif}
                            """
@ -38,7 +39,10 @@ class Lanacion(BasicNewsRecipe):
                        , 'language' : language
                        }

-    keep_only_tags = [dict(name='div', attrs={'id':'content'})]
+    keep_only_tags = [  
+                        dict(name='div', attrs={'class':['topNota','itemHeader','nota','itemBody']})
+                       ,dict(name='div', attrs={'id':'content'})
+                     ]
    
    remove_tags = [
                     dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
@ -52,8 +56,7 @@ class Lanacion(BasicNewsRecipe):
    remove_attributes = ['height','width','visible','onclick','data-count','name']

    feeds          = [
-                         (u'Ultimas Noticias'     , u'http://servicios.lanacion.com.ar/herramientas/rss/origen=2'         )
-                        ,(u'Politica'             , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30'  )
+                         (u'Politica'             , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=30'  )
                        ,(u'Deportes'             , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=131' )
                        ,(u'Economia'             , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=272' )
                        ,(u'Informacion General'  , u'http://servicios.lanacion.com.ar/herramientas/rss/categoria_id=21'  )
@ -81,17 +84,12 @@ class Lanacion(BasicNewsRecipe):
                     ]

                     
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser()
-        br.set_debug_redirects(True)
-        br.set_debug_responses(True)
-        br.set_debug_http(True)
-        return br
-
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self,article)
        if link.startswith('http://blogs.lanacion') and not link.endswith('/'):
-           return None
+           return self.browser.open_novisit(link).geturl()
+        if link.rfind('galeria=') > 0:
+           return None        
        return link
        
    def preprocess_html(self, soup):
--- a/resources/recipes/nbonline.recipe
+++ b/resources/recipes/nbonline.recipe
@ -0,0 +1,33 @@
+EMAILADDRESS = 'hoge@foobar.co.jp'
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NBOnline(BasicNewsRecipe):
+    title          = u'Nikkei Business Online'
+    language = 'ja'
+    description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
+    __author__	= 'Ado Nishimura'
+    needs_subscription = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_tags_before = dict(id='kanban')
+    remove_tags = [dict(name='div', id='footer')]
+
+    feeds          = [('Nikkei Buisiness Online', 'http://business.nikkeibp.co.jp/rss/all_nbo.rdf')]
+
+    def get_cover_url(self):
+        return 'http://business.nikkeibp.co.jp/images/nbo/200804/parts/logo.gif'
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
+            br.select_form(name='loginActionForm')
+            br['email']   = EMAILADDRESS
+            br['userId']   = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
+    def print_version(self, url):
+        return url + '?ST=print'
--- a/resources/recipes/publika.recipe
+++ b/resources/recipes/publika.recipe
@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = u'2011, Silviu Cotoar\u0103'
+'''
+publika.md
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Publika(BasicNewsRecipe):
+    title                 = u'Publika'
+    __author__            = u'Silviu Cotoar\u0103'
+    description           = u'\u015etiri din Moldova'
+    publisher             = u'Publika'
+    oldest_article        = 25
+    language              = 'ro'
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    category              = 'Ziare,Stiri,Moldova'
+    encoding              = 'utf-8'
+    cover_url             = 'http://assets.publika.md/images/logo.jpg'
+
+    conversion_options = {
+                             'comments'   : description
+                            ,'tags'       : category
+                            ,'language'   : language
+                            ,'publisher'  : publisher
+                         }
+
+    keep_only_tags = [
+                          dict(name='div', attrs={'id':'colLeft'})
+                     ]
+
+    remove_tags = [
+                          dict(name='div', attrs={'class':['articleInfo']})
+                        , dict(name='div', attrs={'class':['articleRelated']})
+                        , dict(name='div', attrs={'class':['roundedBox socialSharing']})
+                        , dict(name='div', attrs={'class':['comment clearfix']})
+                  ]
+
+    remove_tags_after = [
+                              dict(name='div', attrs={'class':['roundedBox socialSharing']})
+                            , dict(name='div', attrs={'class':['comment clearfix']})
+                        ]
+
+    feeds          = [
+                        (u'Feeds', u'http://rss.publika.md/stiri.xml')
+                     ]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -72,7 +72,7 @@ class FB2MLizer(object):

    def clean_text(self, text):
        # Condense empty paragraphs into a line break. 
-        text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<p><empty-line /></p>', text)
+        text = re.sub(r'(?miu)(<p>\s*</p>\s*){3,}', '<empty-line />', text)
        # Remove empty paragraphs.
        text = re.sub(r'(?miu)<p>\s*</p>', '', text)
        # Clean up pargraph endings.
@ -101,9 +101,6 @@ class FB2MLizer(object):

    def fb2_header(self):
        metadata = {}
-        metadata['author_first'] = u''
-        metadata['author_middle'] = u''
-        metadata['author_last'] = u''
        metadata['title'] = self.oeb_book.metadata.title[0].value
        metadata['appname'] = __appname__
        metadata['version'] = __version__
@ -115,16 +112,36 @@ class FB2MLizer(object):
        metadata['id'] = None
        metadata['cover'] = self.get_cover()

-        author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
-        if len(author_parts) == 1:
-            metadata['author_last'] = author_parts[0]
-        elif len(author_parts) == 2:
-            metadata['author_first'] = author_parts[0]
-            metadata['author_last'] = author_parts[1]
-        else:
-            metadata['author_first'] = author_parts[0]
-            metadata['author_middle'] = ' '.join(author_parts[1:-2])
-            metadata['author_last'] = author_parts[-1]
+        metadata['author'] = u''
+        for auth in self.oeb_book.metadata.creator:
+            author_first = u''
+            author_middle = u''
+            author_last = u''
+            author_parts = auth.value.split(' ')
+            if len(author_parts) == 1:
+                author_last = author_parts[0]
+            elif len(author_parts) == 2:
+                author_first = author_parts[0]
+                author_last = author_parts[1]
+            else:
+                author_first = author_parts[0]
+                author_middle = ' '.join(author_parts[1:-1])
+                author_last = author_parts[-1]
+            metadata['author'] += '<author>'
+            metadata['author'] += '<first-name>%s</first-name>' % prepare_string_for_xml(author_first)
+            if author_middle:
+                metadata['author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml(author_middle)
+            metadata['author'] += '<last-name>%s</last-name>' % prepare_string_for_xml(author_last)
+            metadata['author'] += '</author>'
+        if not metadata['author']:
+            metadata['author'] = u'<author><first-name></first-name><last-name><last-name></author>'
+
+        metadata['sequence'] = u''
+        if self.oeb_book.metadata.series:
+            index = '1'
+            if self.oeb_book.metadata.series_index:
+                index = self.oeb_book.metadata.series_index[0]
+            metadata['sequence'] = u'<sequence name="%s" number="%s" />' % (prepare_string_for_xml(u'%s' % self.oeb_book.metadata.series[0]), index)

        identifiers = self.oeb_book.metadata['identifier']
        for x in identifiers:
@ -136,28 +153,21 @@ class FB2MLizer(object):
            metadata['id'] = str(uuid.uuid4())

        for key, value in metadata.items():
-            if not key == 'cover':
+            if key not in ('author', 'cover', 'sequence'):
                metadata[key] = prepare_string_for_xml(value)

        return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \
                '<description>' \
                    '<title-info>' \
                        '<genre>antique</genre>' \
-                        '<author>' \
-                            '<first-name>%(author_first)s</first-name>' \
-                            '<middle-name>%(author_middle)s</middle-name>' \
-                            '<last-name>%(author_last)s</last-name>' \
-                        '</author>' \
+                            '%(author)s' \
                        '<book-title>%(title)s</book-title>' \
                        '%(cover)s' \
                        '<lang>%(lang)s</lang>' \
+                        '%(sequence)s' \
                    '</title-info>' \
                    '<document-info>' \
-                        '<author>' \
-                            '<first-name></first-name>' \
-                            '<middle-name></middle-name>' \
-                            '<last-name></last-name>' \
-                        '</author>' \
+                        '%(author)s' \
                        '<program-used>%(appname)s %(version)s</program-used>' \
                        '<date>%(date)s</date>' \
                        '<id>%(id)s</id>' \
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -23,8 +23,9 @@ cover_url_cache = {}
 cache_lock = RLock()

 def find_asin(br, isbn):
-    q = 'http://www.amazon.com/s?field-keywords='+isbn
-    raw = br.open_novisit(q).read()
+    q = 'http://www.amazon.com/s/?search-alias=aps&field-keywords='+isbn
+    res = br.open_novisit(q)
+    raw = res.read()
    raw = xml_to_unicode(raw, strip_encoding_pats=True,
            resolve_entities=True)[0]
    root = html.fromstring(raw)
@ -151,6 +152,8 @@ def get_metadata(br, asin, mi):
        root = soupparser.fromstring(raw)
    except:
        return False
+    if root.xpath('//*[@id="errorMessage"]'):
+        return False
    ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
    if ratings:
        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
@ -191,6 +194,7 @@ def main(args=sys.argv):
    tdir = tempfile.gettempdir()
    br = browser()
    for title, isbn in [
+            ('The Heroes', '9780316044981'), # Test find_asin
            ('Learning Python', '8324616489'), # Test xisbn
            ('Angels & Demons', '9781416580829'), # Test sophisticated comment formatting
            # Random tests
@ -207,8 +211,12 @@ def main(args=sys.argv):

        #import time
        #st = time.time()
-        print get_social_metadata(title, None, None, isbn)
+        mi = get_social_metadata(title, None, None, isbn)
+        if not mi.comments:
+            print 'Failed to downlaod social metadata for', title
+            return 1
        #print '\n\n', time.time() - st, '\n\n'
+        print '\n'

    return 0

--- a/src/calibre/ebooks/metadata/covers.py
+++ b/src/calibre/ebooks/metadata/covers.py
@ -74,6 +74,8 @@ class HeadRequest(mechanize.Request):
 class OpenLibraryCovers(CoverDownload): # {{{
    'Download covers from openlibrary.org'

+    # See http://openlibrary.org/dev/docs/api/covers
+
    OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false'
    name = 'openlibrary.org covers'
    description = _('Download covers from openlibrary.org')
@ -82,7 +84,8 @@ class OpenLibraryCovers(CoverDownload): # {{{
    def has_cover(self, mi, ans, timeout=5.):
        if not mi.isbn:
            return False
-        br = browser()
+        from calibre.ebooks.metadata.library_thing import get_browser
+        br = get_browser()
        br.set_handle_redirect(False)
        try:
            br.open_novisit(HeadRequest(self.OPENLIBRARY%mi.isbn), timeout=timeout)
@ -98,7 +101,8 @@ class OpenLibraryCovers(CoverDownload): # {{{
    def get_covers(self, mi, result_queue, abort, timeout=5.):
        if not mi.isbn:
            return
-        br = browser()
+        from calibre.ebooks.metadata.library_thing import get_browser
+        br = get_browser()
        try:
            ans = br.open(self.OPENLIBRARY%mi.isbn, timeout=timeout).read()
            result_queue.put((True, ans, 'jpg', self.name))
@ -137,6 +141,8 @@ class AmazonCovers(CoverDownload): # {{{
        br = browser()
        try:
            url = get_cover_url(mi.isbn, br)
+            if url is None:
+                raise ValueError('No cover found for ISBN: %s'%mi.isbn)
            cover_data = br.open_novisit(url).read()
            result_queue.put((True, cover_data, 'jpg', self.name))
        except Exception, e:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -908,6 +908,19 @@ class Manifest(object):
                pass
            data = first_pass(data)

+            if data.tag == 'HTML':
+                # Lower case all tag and attribute names
+                data.tag = data.tag.lower()
+                for x in data.iterdescendants():
+                    try:
+                        x.tag = x.tag.lower()
+                        for key, val in list(x.attrib.iteritems()):
+                            del x.attrib[key]
+                            key = key.lower()
+                            x.attrib[key] = val
+                    except:
+                        pass
+
            # Handle weird (non-HTML/fragment) files
            if barename(data.tag) != 'html':
                self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -12,7 +12,7 @@ from threading import Thread

 from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QDate, \
    QPixmap, QListWidgetItem, QDialog, pyqtSignal, QIcon, \
-    QPushButton
+    QPushButton, QKeySequence

 from calibre.gui2 import error_dialog, file_icon_provider, dynamic, \
                           choose_files, choose_images, ResizableDialog, \
@ -469,20 +469,22 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.ts_tooltips = (ok_tooltip, bad_tooltip)
        self.row_delta = 0
        if prev:
-            self.prev_button = QPushButton(QIcon(I('back.png')), _('Previous'),
+            self.prev_button = QPushButton(QIcon(I('back.png')), _('&Previous'),
                    self)
            self.button_box.addButton(self.prev_button, self.button_box.ActionRole)
            tip = _('Save changes and edit the metadata of %s')%prev
            self.prev_button.setToolTip(tip)
            self.prev_button.clicked.connect(partial(self.next_triggered,
                -1))
+            self.prev_button.setShortcut(QKeySequence('Alt+Left'))
        if next_:
-            self.next_button = QPushButton(QIcon(I('forward.png')), _('Next'),
+            self.next_button = QPushButton(QIcon(I('forward.png')), _('&Next'),
                    self)
            self.button_box.addButton(self.next_button, self.button_box.ActionRole)
            tip = _('Save changes and edit the metadata of %s')%next_
            self.next_button.setToolTip(tip)
            self.next_button.clicked.connect(partial(self.next_triggered, 1))
+            self.next_button.setShortcut(QKeySequence('Alt+Right'))

        self.splitter.setStretchFactor(100, 1)
        self.read_state()
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -11,7 +11,7 @@ from functools import partial
 from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \
        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \
        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \
-        QSizePolicy, QPalette, QFrame, QSize
+        QSizePolicy, QPalette, QFrame, QSize, QKeySequence

 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
@ -43,11 +43,14 @@ class MetadataSingleDialogBase(ResizableDialog):
                self)
        self.button_box.accepted.connect(self.accept)
        self.button_box.rejected.connect(self.reject)
-        self.next_button = QPushButton(QIcon(I('forward.png')), _('Next'),
+        self.next_button = QPushButton(QIcon(I('forward.png')), _('&Next'),
                self)
+        self.next_button.setShortcut(QKeySequence('Alt+Right'))
        self.next_button.clicked.connect(partial(self.do_one, delta=1))
-        self.prev_button = QPushButton(QIcon(I('back.png')), _('Previous'),
+        self.prev_button = QPushButton(QIcon(I('back.png')), _('&Previous'),
                self)
+        self.prev_button.setShortcut(QKeySequence('Alt+Left'))
+
        self.button_box.addButton(self.prev_button, self.button_box.ActionRole)
        self.button_box.addButton(self.next_button, self.button_box.ActionRole)
        self.prev_button.clicked.connect(partial(self.do_one, delta=-1))
@ -355,11 +358,13 @@ class MetadataSingleDialogBase(ResizableDialog):
            next_ = self.db.title(self.row_list[self.current_row+1])

        if next_ is not None:
-            tip = _('Save changes and edit the metadata of %s')%next_
+            tip = (_('Save changes and edit the metadata of %s')+
+                    ' [Alt+Right]')%next_
            self.next_button.setToolTip(tip)
        self.next_button.setVisible(next_ is not None)
        if prev is not None:
-            tip = _('Save changes and edit the metadata of %s')%prev
+            tip = (_('Save changes and edit the metadata of %s')+
+                    ' [Alt+Left]')%prev
            self.prev_button.setToolTip(tip)
        self.prev_button.setVisible(prev is not None)
        self(self.db.id(self.row_list[self.current_row]))
--- a/src/calibre/manual/images/sg_cc.jpg
+++ b/src/calibre/manual/images/sg_cc.jpg
--- a/src/calibre/manual/images/sg_genre.jpg
+++ b/src/calibre/manual/images/sg_genre.jpg
--- a/src/calibre/manual/images/sg_pref.jpg
+++ b/src/calibre/manual/images/sg_pref.jpg
--- a/src/calibre/manual/images/sg_restrict.jpg
+++ b/src/calibre/manual/images/sg_restrict.jpg
--- a/src/calibre/manual/images/sg_restrict2.jpg
+++ b/src/calibre/manual/images/sg_restrict2.jpg
--- a/src/calibre/manual/images/sg_search.jpg
+++ b/src/calibre/manual/images/sg_search.jpg
--- a/src/calibre/manual/images/sg_tb.jpg
+++ b/src/calibre/manual/images/sg_tb.jpg
--- a/src/calibre/manual/images/sg_tree.jpg
+++ b/src/calibre/manual/images/sg_tree.jpg
--- a/src/calibre/manual/sub_groups.rst
+++ b/src/calibre/manual/sub_groups.rst
@ -0,0 +1,108 @@
+
+.. include:: global.rst
+
+.. _subgroups-tutorial:
+
+Managing subgroups of books, for example "genre"
+==================================================
+
+Some people wish to organize the books in their library into subgroups, similar to subfolders. The most common wish is to create genre hierarchies, but there are many others. One user asked for a way to organize textbooks by subject and course number. Another wanted to keep track of gifts by subject and recipient. I will use the genre example for the rest of this post.
+
+Before I go on, please note that I am not talking about folders on the hard disk. Subgroups are not file folders. Books will not be copied anywhere. Calibre's library file structure is not affected. Instead, I am talking about a way to display subgroups of books within a calibre library.
+
+.. contents::
+    :depth: 1
+    :local:
+
+.. |sgtree| image:: images/sg_tree.jpg
+    :class: float-left-img
+
+
+The commonly expressed requirements for subgroups such as genres are:
+
+    * A subgroup (e.g., a genre) must contain (point to) books, not categories of books. This is what distinguishes subgroups from user categories.
+    * A book can be in multiple subgroups (genres). This distinguishes subgroups from physical file folders.
+    * Subgroups (genres) must form a hierarchy; subgroups can contain subgroups.
+
+|sgtree| Tags give you the first two. If you tag a book with the genre then you can use the tag browser (or search) for find the books with that genre, giving you the first. Many books can have the same tag, giving you the second. The problem is that tags don't satisfy the third requirement. They don't provide a hierarchy.
+
+Calibre's new hierarchy feature gives you the third, the ability to see the genres in a 'tree' and the ability to easily search for books in genre or sub-genre. For example, assume that your genre structure is similar to the following::
+
+    Genre
+        . History
+        .. Japanese
+        .. Military
+        .. Roman
+        . Mysteries
+        .. English
+        .. Vampire
+        . Science Fiction
+        .. Alternate History
+        .. Military
+        .. Space Opera
+        . Thrillers
+        .. Crime
+        .. Horror
+        etc.
+
+By using the hierarchy feature, you can see these genres in the tag browser in a tree form. As you can see, in this example the outermost level (Genre) is a custom column. The genres themselves appear under that column. Genres containing sub-genres appear with a small triangle next to them. Clicking on that triangle will open the item and show the sub-genres, as you see with History and Science Fiction.
+
+Clicking on a genre will search for all books with that genre or children of that genre. For example, clicking on Science Fiction will give all three of the child genres, Alternate History, Military, and Space Opera. Clicking on Alternate History will give books in that genre, ignoring those in Military and Space Opera. Of course, a book can have multiple genres. If a book has both Space Opera and Military genres, then you see that book if you click on either genre. Searching is discussed in more detail below.
+
+Another thing you can see from the image is that the genre Military appears twice, once under History and once under Science Fiction. Because the genres are in a hierarchy, these are two separate genres. A book can be in one, the other, or (doubtfully in this case) both. For example, Winston Churchill's World War II books could be in "History.Military". David Weber's Honor Harrington books could be in "Science Fiction.Military", and in "Science Fiction.Space Opera" for that matter.
+
+Once a genre exists, that is the genre has been applied to at least one book, you can easily apply it to other books by dragging a book from the library view onto the genre you want the book to have. You can also apply them in the metadata editors. More on this below.
+
+Setup
+----------------------------------------
+
+
+Your question by now might be "how did I set all of this up?". There are three steps: 1) create the custom column, 2) tell calibre that the new column is to be treated as a hierarchy, and 3) add genres.
+
+I created the custom column in the usual way, using Preferences -> Add your own columns. I used "genre" as the lookup name and "Genre" as the column heading. The column type is "Comma-separated text, like tags, shown in the tag browser." 
+
+.. image:: images/sg_cc.jpg
+    :align: center
+
+Then after restarting calibre, I told calibre that the column is to be treated as a hierarchy. I went to Preferences -> Look and Feel and entered the lookup name "#genre" into the "Categories with hierarchical items" box. I pressed Apply and was done with setting up.
+
+.. image:: images/sg_pref.jpg
+    :align: center
+
+At the point there are no genres. We are left with the last step: how to apply a genre to a book. A genre does not exist until it appears on at least one book. To apply a genre for the first time, we must go into some detail about what a genre looks like in the metadata for a book.
+
+A hierarchy of 'things' is built by creating an item consisting of phrases separated by periods. Continuing the Genre example, these items would "History.Military", "Mysteries.Vampire", "Science Fiction.Space Opera", etc. Thus to create a new genre, you pick a book that should have that genre, edit its metadata, and enter the new genre into the column you created. Continuing my example, if I want to assign a new genre "Comics" with a sub-genre "Superheros" to a book, I would 'edit metadata' for that (comic) book, choose the Custom metadata tab, and then enter "Comics.Superheros" as shown in the following (ignore my other custom columns):
+
+.. image:: images/sg_genre.jpg
+    :align: center
+
+After I do the above, I see in the tag browser:
+
+.. image:: images/sg_tb.jpg
+    :align: center
+
+From here on, to apply this new genre to a book (a comic book, presumably), I can either drag the book onto the genre, or add it to the book using edit metadata in exactly the same way as I did above.
+
+Searching
+---------------
+
+.. image:: images/sg_search.jpg
+    :align: center
+
+The easiest way to search for genres is to use the tag browser, clicking on the genre you want to see. Clicking on a genre with children will show you books with that genre and all child genres. However, this might bring up a question. Just because a genre has children doesn't mean that it isn't a genre in its own right. For example, a book can have the genre "History" but not "History.Military". How do I search for books with only "History"?
+
+The tag browser search mechanism knows if an item has children. If it does, clicking on the item cycles through 5 searches instead of the normal three. The first is the normal green plus, which shows you books with that genre only. The second is new: a doubled plus (shown below), which shows you books with that genre and all sub-genres. The third is the normal red minus, which shows you books without that exact genre. The fourth is new: a doubled minus, which shows you books without that genre or sub-genres. The fifth is back to the beginning, no mark, meaning no search.
+
+Restrictions
+---------------
+
+If you search for a genre then create a saved search, you can use the 'restrict to' box to create a virtual library of books with that genre. This is most useful if you want to do other searches within the genre or to manage/update metadata. For this example I created a saved search named 'History.Japanese' by first clicking on the genre Japanese in the tag browser to get a search into the search box, entering History.Japanese into the saved search box, then pushing the "save search" button (the green box with the white plus, on the right-hand side).
+
+.. image:: images/sg_restrict.jpg
+    :align: center
+
+Once I have done that, then I can use this search as a restriction.
+
+.. image:: images/sg_restrict2.jpg
+    :align: center
+
--- a/src/calibre/manual/template_lang.rst
+++ b/src/calibre/manual/template_lang.rst
@ -129,17 +129,17 @@ The functions available are:
    * ``switch(pattern, value, pattern, value, ..., else_value)`` -- for each ``pattern, value`` pair, checks if the field matches the regular expression ``pattern`` and if so, returns that ``value``. If no ``pattern`` matches, then ``else_value`` is returned. You can have as many ``pattern, value`` pairs as you want.
    * ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later).
    * ``select(key)`` -- interpret the field as a comma-separated list of items, with the items being of the form "id:value". Find the pair with the id equal to key, and return the corresponding value. This function is particularly useful for extracting a value such as an isbn from the set of identifiers for a book.
-    * ``subitems(val, start_index, end_index)`` -- This function is used to break apart lists of tag-like hierarchical items such as genres. It interprets the value as a comma-separated list of tag-like items, where each item is a period-separated list. Returns a new list made by first finding all the period-separated tag-like items, then for each such item extracting the start_index`th to the `end_index`th components, then combining the results back together. The first component in a period-separated list has an index of zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples::
+    * ``subitems(val, start_index, end_index)`` -- This function is used to break apart lists of tag-like hierarchical items such as genres. It interprets the value as a comma-separated list of tag-like items, where each item is a period-separated list. Returns a new list made by first finding all the period-separated tag-like items, then for each such item extracting the `start_index` th to the `end_index` th components, then combining the results back together. The first component in a period-separated list has an index of zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples::

-    Assuming a #genre column containing "A.B.C":    
-        {#genre:subitems(0,1)} returns "A"
-        {#genre:subitems(0,2)} returns "A.B"
-        {#genre:subitems(1,0)} returns "B.C"
-    Assuming a #genre column containing "A.B.C, D.E":
-        {#genre:subitems(0,1)} returns "A, D"
-        {#genre:subitems(0,2)} returns "A.B, D.E"
+        Assuming a #genre column containing "A.B.C":    
+            {#genre:subitems(0,1)} returns "A"
+            {#genre:subitems(0,2)} returns "A.B"
+            {#genre:subitems(1,0)} returns "B.C"
+        Assuming a #genre column containing "A.B.C, D.E":
+            {#genre:subitems(0,1)} returns "A, D"
+            {#genre:subitems(0,2)} returns "A.B, D.E"

-    * ``sublist(val, start_index, end_index, separator)`` -- interpret the value as a list of items separated by `separator`, returning a new list made from the `start_index`th to the `end_index`th item. The first item is number zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples assuming that the tags column (which is comma-separated) contains "A, B ,C"::
+    * ``sublist(val, start_index, end_index, separator)`` -- interpret the value as a list of items separated by `separator`, returning a new list made from the `start_index` th to the `end_index` th item. The first item is number zero. If an index is negative, then it counts from the end of the list. As a special case, an end_index of zero is assumed to be the length of the list. Examples assuming that the tags column (which is comma-separated) contains "A, B ,C"::
    
        {tags:sublist(0,1,\,)} returns "A"
        {tags:sublist(-1,0,\,)} returns "C"
--- a/src/calibre/manual/tutorials.rst
+++ b/src/calibre/manual/tutorials.rst
@ -12,6 +12,7 @@ Here you will find tutorials to get you started using |app|'s more advanced feat
   :maxdepth: 1

   news
+   sub_groups
   xpath
   template_lang
   regexp