Merge from trunk

2025-08-30 23:00:21 -04:00 · 2010-11-02 21:35:21 +01:00 · 2010-11-02 21:35:21 +01:00 · f61daece95
commit f61daece95
parent a8578eee2d 8367f875a5
21 changed files with 1267 additions and 405 deletions
--- a/resources/recipes/fudzilla.recipe
+++ b/resources/recipes/fudzilla.recipe
@ -25,15 +25,15 @@ class Fudzilla(BasicNewsRecipe):
    remove_tags_before = dict(name='div', attrs={'class':['padding']})

    remove_tags = [dict(name='td', attrs={'class':['left','right']}),
-                   dict(name='div', attrs={'id':['toolbar','buttons']}), 
-                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}), 
-                   dict(name='span', attrs={'class':['pathway']}), 
-                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}), 
-                   dict(name='table', attrs={'class':['headlines']}), 
+                   dict(name='div', attrs={'id':['toolbar','buttons']}),
+                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}),
+                   dict(name='span', attrs={'class':['pathway']}),
+                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}),
+                   dict(name='table', attrs={'class':['headlines']}),
                   ]

    feeds = [
-             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
+            (u'Posts', u'http://www.fudzilla.com/?format=feed')
             ]

    preprocess_regexps = [
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,62 +5,59 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
-import time
-from calibre import entity_to_unicode
+import re, string, time
+from calibre import entity_to_unicode, strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
-Comment, BeautifulStoneSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

 class NYTimes(BasicNewsRecipe):

-    title       = 'New York Times Top Stories'
-    __author__  = 'GRiker'
-    language = 'en'
-    requires_version = (0, 7, 5)
-    description = 'Top Stories from the New York Times'
+    # set headlinesOnly to True for the headlines-only version
+    headlinesOnly = True

-    # List of sections typically included in Top Stories.  Use a keyword from the
-    # right column in the excludeSectionKeywords[] list to skip downloading that section
-    sections = {
-                 'arts'             :   'Arts',
-                 'business'         :   'Business',
-                 'diningwine'       :   'Dining & Wine',
-                 'editorials'       :   'Editorials',
-                 'health'           :   'Health',
-                 'magazine'         :   'Magazine',
-                 'mediaadvertising' :   'Media & Advertising',
-                 'newyorkregion'    :   'New York/Region',
-                 'oped'             :   'Op-Ed',
-                 'politics'         :   'Politics',
-                 'science'          :   'Science',
-                 'sports'           :   'Sports',
-                 'technology'       :   'Technology',
-                 'topstories'       :   'Top Stories',
-                 'travel'           :   'Travel',
-                 'us'               :   'U.S.',
-                 'world'            :   'World'
-               }
+    # includeSections: List of sections to include. If empty, all sections found will be included.
+    # Otherwise, only the sections named will be included. For example,
+    #
+    #    includeSections = ['Politics','Sports']
+    #
+    # would cause only the Politics and Sports sections to be included.

-    # Add section keywords from the right column above to skip that section
-    # For example, to skip sections containing the word 'Sports' or 'Dining', use:
-    # excludeSectionKeywords = ['Sports', 'Dining']
-    # Fetch only Business and Technology
-    # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
-    # Fetch only Top Stories
-    # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
-    # By default, no sections are skipped.
-    excludeSectionKeywords = []
+    includeSections = []  # by default, all sections included
+
+    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
+    # Otherwise, the sections named will be excluded. For example,
+    #
+    #    excludeSections = ['Politics','Sports']
+    #
+    # would cause the Politics and Sports sections to be excluded. This parameter can be used
+    # in conjuction with includeSections although in most cases using one or the other, but
+    # not both, is sufficient.
+
+    excludeSections = []

    # one_picture_per_article specifies that calibre should only use the first image
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
+
    # and shown in their original location.
    one_picture_per_article = True

    # The maximum number of articles that will be downloaded
-    max_articles_per_feed = 40
+    max_articles_per_feed = 100
+
+
+    if headlinesOnly:
+        title='New York Times Headlines'
+        description = 'Headlines from the New York Times'
+    else:
+        title='New York Times'
+        description = 'Today\'s New York Times'
+
+    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
+    language = 'en'
+    requires_version = (0, 7, 5)
+

    timefmt = ''
    needs_subscription = True
@ -82,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
+                            'metaFootnote',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
@ -89,12 +87,13 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
-                            'subNavigation clearfix',
-                            'subNavigation tabContent active',
-                            'subNavigation tabContent active clearfix',
+                            re.compile('^subNavigation'),
+                            re.compile('^leaderboard'),
+                            re.compile('^module'),
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
+                            'adxSponLink',
                            'archive',
                            'articleExtras',
                            'articleInline',
@ -105,87 +104,98 @@ class NYTimes(BasicNewsRecipe):
                            'footer',
                            'header',
                            'header_search',
+                            'inlineBox',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
+                            'readerReviews',
+                            'readerReviewsCount',
                            'relatedArticles',
+                            'relatedTopics',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
-                   dict(name=['script', 'noscript', 'style'])]
-
+                   dict(name=['script', 'noscript', 'style','form','hr'])]
    no_stylesheets = True
-    extra_css = '.headline      {text-align:    left;}\n    \
-                 .byline        {font-family:   monospace;  \
-                                 text-align:    left;       \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .dateline      {font-size:     small;      \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .timestamp     {font-size:     small;      \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .source        {text-align:    left;}\n    \
-                 .image         {text-align:    center;}\n  \
-                 .credit        {text-align:    right;      \
-                                 font-size:     small;      \
-                                 margin-top:    0px;        \
-                                 margin-bottom: 0px;}\n     \
-                 .articleBody   {text-align:    left;}\n    \
-                 .authorId      {text-align:    left;       \
-                                 font-style:    italic;}\n  '
+    extra_css = '''
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .timestamp { text-align: left; font-size: small; }
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                a:link {text-decoration: none; }
+                .articleBody { }
+                .authorId {text-align: left; }
+                .image {text-align: center;}
+                .source {text-align: left; }'''

-    def dump_ans(self, ans) :
+    def filter_ans(self, ans) :
        total_article_count = 0
-        for section in ans :
+        idx = 0
+        idx_max = len(ans)-1
+        while idx <= idx_max:
+            if self.includeSections != []:
+                if ans[idx][0] not in self.includeSections:
+                    print "SECTION NOT INCLUDED: ",ans[idx][0]
+                    del ans[idx]
+                    idx_max = idx_max-1
+                    continue
+            if ans[idx][0] in self.excludeSections:
+                print "SECTION EXCLUDED: ",ans[idx][0]
+                del ans[idx]
+                idx_max = idx_max-1
+                continue
            if self.verbose:
-                self.log("section %s: %d articles" % (section[0], len(section[1])) )
-            for article in section[1]:
+                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
+            for article in ans[idx][1]:
                total_article_count += 1
                if self.verbose:
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].encode('cp1252','replace')))
+            idx = idx+1
+
        self.log( "Queued %d articles" % total_article_count )
+        return ans

    def fixChars(self,string):
        # Replace lsquo (\x91)
-        fixed = re.sub("\x91","&#8216;",string)
+        fixed = re.sub("\x91","‘",string)

        # Replace rsquo (\x92)
-        fixed = re.sub("\x92","&#8217;",fixed)
+        fixed = re.sub("\x92","’",fixed)

        # Replace ldquo (\x93)
-        fixed = re.sub("\x93","&#8220;",fixed)
+        fixed = re.sub("\x93","“",fixed)

        # Replace rdquo (\x94)
-        fixed = re.sub("\x94","&#8221;",fixed)
+        fixed = re.sub("\x94","”",fixed)

        # Replace ndash (\x96)
-        fixed = re.sub("\x96","&#8211;",fixed)
+        fixed = re.sub("\x96","–",fixed)

        # Replace mdash (\x97)
-        fixed = re.sub("\x97","&#8212;",fixed)
+        fixed = re.sub("\x97","—",fixed)

        return fixed

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
-            try:
-                br.open('http://www.nytimes.com/auth/login')
-                br.select_form(name='login')
-                br['USERID']   = self.username
-                br['PASSWORD'] = self.password
-                br.submit()
-            except:
-                self.log("\nFailed to login")
+            br.open('http://www.nytimes.com/auth/login')
+            br.select_form(name='login')
+            br['USERID']   = self.username
+            br['PASSWORD'] = self.password
+            raw = br.submit().read()
+            if 'Please try again' in raw:
+                raise Exception('Your username and password are incorrect')
        return br

    def skip_ad_pages(self, soup):
@ -213,6 +223,9 @@ class NYTimes(BasicNewsRecipe):
            cover = None
        return cover

+    def short_title(self):
+        return self.title
+
    def index_to_soup(self, url_or_raw, raw=False):
        '''
        OVERRIDE of class method
@ -255,157 +268,184 @@ class NYTimes(BasicNewsRecipe):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&#38;'
-            massaged = re.sub("&","&#38;", massaged)
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description

-    def parse_index(self):
+    def parse_todays_index(self):
+
+        def feed_title(div):
+            return ''.join(div.findAll(text=True, recursive=True)).strip()
+
+        articles = {}
+        key = None
+        ans = []
+        url_list = []
+
+        def handle_article(div):
+            a = div.find('a', href=True)
+            if not a:
+                return
+            url = re.sub(r'\?.*', '', a['href'])
+            if not url.startswith("http"):
+                return
+            if not url.endswith(".html"):
+                return
+            if 'podcast' in url:
+                return
+            if '/video/' in url:
+                return
+            url += '?pagewanted=all'
+            if url in url_list:
+                return
+            url_list.append(url)
+            title = self.tag_to_string(a, use_alt=True).strip()
+            description = ''
+            pubdate = strftime('%a, %d %b')
+            summary = div.find(True, attrs={'class':'summary'})
+            if summary:
+                description = self.tag_to_string(summary, use_alt=False)
+            author = ''
+            authorAttribution = div.find(True, attrs={'class':'byline'})
+            if authorAttribution:
+                author = self.tag_to_string(authorAttribution, use_alt=False)
+            else:
+                authorAttribution = div.find(True, attrs={'class':'byline'})
+                if authorAttribution:
+                    author = self.tag_to_string(authorAttribution, use_alt=False)
+            feed = key if key is not None else 'Uncategorized'
+            if not articles.has_key(feed):
+                ans.append(feed)
+                articles[feed] = []
+            articles[feed].append(
+                            dict(title=title, url=url, date=pubdate,
+                                description=description, author=author,
+                                content=''))
+
+
+        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
+
+
+        # Find each article
+        for div in soup.findAll(True,
+            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
+
+            if div['class'] in ['section-headline','sectionHeader']:
+                key = string.capwords(feed_title(div))
+                key = key.replace('Op-ed','Op-Ed')
+                key = key.replace('U.s.','U.S.')
+            elif div['class'] in ['story', 'story headline'] :
+                handle_article(div)
+            elif div['class'] == 'headlinesOnly multiline flush':
+                for lidiv in div.findAll('li'):
+                    handle_article(lidiv)
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(ans)
+
+    def parse_headline_index(self):
+
        articles = {}
        ans = []
-
-        feed = key = 'All Top Stories'
-        articles[key] = []
-        ans.append(key)
-        self.log("Scanning 1 section ...")
+        url_list = []

        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')

-        # Fetch the outer table
-        table = soup.find('table')
-        previousTable = table
+        # Fetch the content table
+        content_table = soup.find('table',{'id':'content'})
+        if content_table is None:
+            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
+            return None

-        # Find the deepest table containing the stories
-        while True :
-            table = table.find('table')
-            if table.find(text=re.compile('top stories start')) :
-                previousTable = table
-                continue
-            else :
-                table = previousTable
-                break
+        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections

-        # There are multiple subtables, find the one containing the stories
-        for block in table.findAll('table') :
-            if block.find(text=re.compile('top stories start')) :
-                table = block
-                break
-            else :
-                continue
+        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
+            for div_sec in td_col.findAll('div',recursive=False):
+                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+                    section_name = re.sub(r'^ *$','',section_name)
+                    if section_name == '':
+                        continue
+                    section_name=string.capwords(section_name)
+                    if section_name == 'U.s.':
+                       section_name = 'U.S.'
+                    elif section_name == 'Op-ed':
+                       section_name = 'Op-Ed'
+                    pubdate = strftime('%a, %d %b')

-        # Again there are multiple subtables, find the one containing the stories
-        for storyblock in table.findAll('table') :
-            if storyblock.find(text=re.compile('top stories start')) :
-                break
-            else :
-                continue
-
-        skipThisSection = False
-        todays_article_count = 0
-        # Within this table are <font face="times new roman, times, san serif"> entries
-        self.log("Fetching feed Top Stories")
-        for tr in storyblock.findAllNext('tr'):
-            if tr.find('span') is not None :
-
-                sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
-                                                         'times new roman,times, sans serif',
-                                                         'times new roman, times, sans serif']})
-                section = None
-                bylines = []
-                descriptions = []
-                pubdate = None
-
-                # Get the Section title
-                for (x,i) in enumerate(sectionblock.contents) :
-                    skipThisSection = False
-                    # Extract the section title
-                    if ('Comment' in str(i.__class__)) :
-                        if 'start(name=' in i :
-                            section = i[i.find('=')+1:-2]
-
-                        if not self.sections.has_key(section) :
-                            skipThisSection = True
+                    search_div = div_sec
+                    for next_tag in h6_sec_name.findNextSiblings(True):
+                        if next_tag.__class__.__name__ == 'Tag':
+                            if next_tag.name == 'div':
+                                search_div = next_tag
                            break

-                        # Check for excluded section
-                        if len(self.excludeSectionKeywords):
-                            key = self.sections[section]
-                            excluded = re.compile('|'.join(self.excludeSectionKeywords))
-                            if excluded.search(key) or articles.has_key(key):
-                                skipThisSection = True
-                                break
-
-                # Get the bylines and descriptions
-                if not skipThisSection :
-                    lines = sectionblock.contents
-                    contentStrings = []
-
-                    for line in lines:
-                        if not isinstance(line, Comment) and line.strip and line.strip() > "":
-                            contentStrings.append(line.strip())
-
-                    # Gather the byline/description pairs
-                    bylines = []
-                    descriptions = []
-                    for contentString in contentStrings:
-                        if contentString[0:3] == 'By ' and contentString[3].isupper() :
-                            bylines.append(contentString)
+                    # Get the articles
+                    for h3_item in search_div.findAll('h3'):
+                        byline = h3_item.h6
+                        if byline is not None:
+                            author = self.tag_to_string(byline,usa_alt=False)
                        else:
-                            descriptions.append(contentString)
-
-                    # Fetch the article titles and URLs
-                    articleCount = len(sectionblock.findAll('span'))
-                    todays_article_count += articleCount
-                    for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
-                        a = span.find('a', href=True)
+                            author = ''
+                        a = h3_item.find('a', href=True)
+                        if not a:
+                            continue
                        url = re.sub(r'\?.*', '', a['href'])
+                        if not url.startswith("http"):
+                            continue
+                        if not url.endswith(".html"):
+                            continue
+                        if 'podcast' in url:
+                            continue
+                        if 'video' in url:
+                            continue
                        url += '?pagewanted=all'
+                        if url in url_list:
+                            continue
+                        url_list.append(url)
+                        self.log("URL %s" % url)
+                        title = self.tag_to_string(a, use_alt=True).strip()
+                        desc = h3_item.find('p')
+                        if desc is not None:
+                            description = self.tag_to_string(desc,use_alt=False)
+                        else:
+                            description = ''
+                        if not articles.has_key(section_name):
+                            ans.append(section_name)
+                            articles[section_name] = []
+                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))

-                        title = self.tag_to_string(a, use_alt=True)
-                        # prepend the section name
-                        title = self.sections[section] + " &middot; " + title

-                        if not isinstance(title, unicode):
-                            title = title.decode('utf-8', 'replace')
-
-                        # Allow for unattributed, undescribed entries "Editor's Note"
-                        if i >= len(descriptions) :
-                            description = None
-                        else :
-                            description = descriptions[i]
-
-                        if len(bylines) == articleCount :
-                            author = bylines[i]
-                        else :
-                            author = None
-
-                        # Check for duplicates
-                        duplicateFound = False
-                        if len(articles[feed]) > 1:
-                            for article in articles[feed] :
-                                if url == article['url'] :
-                                    duplicateFound = True
-                                    break
-
-                            if duplicateFound:
-                                # Continue fetching, don't add this article
-                                todays_article_count -= 1
-                                continue
-
-                        if not articles.has_key(feed):
-                            articles[feed] = []
-                        articles[feed].append(
-                            dict(title=title, url=url, date=pubdate,
-                                 description=description, author=author, content=''))
-#        self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
-
-        ans = self.sort_index_by(ans, {'Top Stories':-1})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        self.dump_ans(ans)
-        return ans
+        return self.filter_ans(ans)
+
+    def parse_index(self):
+        if self.headlinesOnly:
+            return self.parse_headline_index()
+        else:
+            return self.parse_todays_index()
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup
+

    def preprocess_html(self, soup):
+
+        kicker_tag = soup.find(attrs={'class':'kicker'})
+        if kicker_tag: # remove Op_Ed author head shots
+            tagline = self.tag_to_string(kicker_tag)
+            if tagline=='Op-Ed Columnist':
+                img_div = soup.find('div','inlineImage module')
+                if img_div:
+                    img_div.extract()
        return self.strip_anchors(soup)

    def postprocess_html(self,soup, True):
@ -422,8 +462,9 @@ class NYTimes(BasicNewsRecipe):
                    firstImg = inlineImgs[0]
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
-                    # Move firstImg after headline
-                    cgFirst = soup.find(True, {'class':'columnGroup  first'})
+                    # Move firstImg before article body
+                    #article_body = soup.find(True, {'id':'articleBody'})
+                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
                        navstrings = cgFirst.findAll(text=True, recursive=False)
@ -443,30 +484,18 @@ class NYTimes(BasicNewsRecipe):
                        if headline_found:
                            cgFirst.insert(insertLoc,firstImg)
                    else:
-                        self.log(">>> No class:'columnGroup  first' found <<<")
-        # Change class="kicker" to <h3>
-        kicker = soup.find(True, {'class':'kicker'})
-        if kicker and kicker.contents[0]:
-            h3Tag = Tag(soup, "h3")
-            h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
-                             use_alt=False)))
-            kicker.replaceWith(h3Tag)
+                        self.log(">>> No class:'columnGroup first' found <<<")

-        # Change captions to italic -1
+        # Change captions to italic
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption and caption.contents[0]:
-                emTag = Tag(soup, "em")
+                cTag = Tag(soup, "p", [("class", "caption")])
                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
                mp_off = c.find("More Photos")
                if mp_off >= 0:
                    c = c[:mp_off]
-                emTag.insert(0, c)
-                #hrTag = Tag(soup, 'hr')
-                #hrTag['class'] = 'caption_divider'
-                hrTag = Tag(soup, 'div')
-                hrTag['class'] = 'divider'
-                emTag.insert(1, hrTag)
-                caption.replaceWith(emTag)
+                cTag.insert(0, c)
+                caption.replaceWith(cTag)

        # Change <nyt_headline> to <h2>
        h1 = soup.find('h1')
@ -506,17 +535,6 @@ class NYTimes(BasicNewsRecipe):
                bTag.insert(0, subhead.contents[0])
                subhead.replaceWith(bTag)

-        # Synthesize a section header
-        dsk = soup.find('meta', attrs={'name':'dsk'})
-        if dsk and dsk.has_key('content'):
-            hTag = Tag(soup,'h3')
-            hTag['class'] = 'section'
-            hTag.insert(0,NavigableString(dsk['content']))
-            articleTag = soup.find(True, attrs={'id':'article'})
-            if articleTag:
-                articleTag.insert(0,hTag)
-
-        # Add class="articleBody" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag:
            divTag['class'] = divTag['id']
@ -532,11 +550,3 @@ class NYTimes(BasicNewsRecipe):

        return soup

-    def strip_anchors(self,soup):
-        paras = soup.findAll(True)
-        for para in paras:
-            aTags = para.findAll('a')
-            for a in aTags:
-                if a.img is None:
-                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
-        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -5,52 +5,186 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string, re, time
-from calibre import strftime
+import re, string, time
+from calibre import entity_to_unicode, strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-def decode(self, src):
-    enc = 'utf-8'
-    if 'iso-8859-1' in src:
-        enc = 'cp1252'
-    return src.decode(enc, 'ignore')
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup

 class NYTimes(BasicNewsRecipe):

-    title       = u'New York Times'
-    __author__  = 'Kovid Goyal/Nick Redding'
-    language = 'en'
-    requires_version = (0, 6, 36)
+    # set headlinesOnly to True for the headlines-only version
+    headlinesOnly = False

-    description = 'Daily news from the New York Times (subscription version)'
-    timefmt = ' [%b %d]'
+    # includeSections: List of sections to include. If empty, all sections found will be included.
+    # Otherwise, only the sections named will be included. For example,
+    #
+    #    includeSections = ['Politics','Sports']
+    #
+    # would cause only the Politics and Sports sections to be included.
+
+    includeSections = []  # by default, all sections included
+
+    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
+    # Otherwise, the sections named will be excluded. For example,
+    #
+    #    excludeSections = ['Politics','Sports']
+    #
+    # would cause the Politics and Sports sections to be excluded. This parameter can be used
+    # in conjuction with includeSections although in most cases using one or the other, but
+    # not both, is sufficient.
+
+    excludeSections = []
+
+    # one_picture_per_article specifies that calibre should only use the first image
+    # from an article (if one exists).  If one_picture_per_article = True, the image
+    # will be moved to a location between the headline and the byline.
+    # If one_picture_per_article = False, all images from the article will be included
+
+    # and shown in their original location.
+    one_picture_per_article = True
+
+    # The maximum number of articles that will be downloaded
+    max_articles_per_feed = 100
+
+
+    if headlinesOnly:
+        title='New York Times Headlines'
+        description = 'Headlines from the New York Times'
+    else:
+        title='New York Times'
+        description = 'Today\'s New York Times'
+
+    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
+    language = 'en'
+    requires_version = (0, 7, 5)
+
+
+    timefmt = ''
    needs_subscription = True
+    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+    cover_margins = (18,18,'grey99')
+
    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
-    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink',
-                                        'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta',
-                                        'icon enlargeThis','columnGroup  last','relatedSearchesModule']}),
-                   dict({'class':re.compile('^subNavigation')}),
-                   dict({'class':re.compile('^leaderboard')}),
-                   dict({'class':re.compile('^module')}),
-                   dict({'class':'metaFootnote'}),
-                   dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead',
-                            'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline',
-                            'side_tool', 'side_index','header','readerReviewsCount','readerReviews',
-                            'relatedArticles', 'relatedTopics', 'adxSponLink']),
+    remove_tags = [dict(attrs={'class':[
+                            'articleFooter',
+                            'articleTools',
+                            'columnGroup doubleRule',
+                            'columnGroup singleRule',
+                            'columnGroup last',
+                            'columnGroup  last',
+                            'doubleRule',
+                            'dottedLine',
+                            'entry-meta',
+                            'entry-response module',
+                            'icon enlargeThis',
+                            'leftNavTabs',
+                            'metaFootnote',
+                            'module box nav',
+                            'nextArticleLink',
+                            'nextArticleLink clearfix',
+                            'post-tools',
+                            'relatedSearchesModule',
+                            'side_tool',
+                            'singleAd',
+                            re.compile('^subNavigation'),
+                            re.compile('^leaderboard'),
+                            re.compile('^module'),
+                            ]}),
+                   dict(id=[
+                            'adxLeaderboard',
+                            'adxSponLink',
+                            'archive',
+                            'articleExtras',
+                            'articleInline',
+                            'blog_sidebar',
+                            'businessSearchBar',
+                            'cCol',
+                            'entertainmentSearchBar',
+                            'footer',
+                            'header',
+                            'header_search',
+                            'inlineBox',
+                            'login',
+                            'masthead',
+                            'masthead-nav',
+                            'memberTools',
+                            'navigation',
+                            'portfolioInline',
+                            'readerReviews',
+                            'readerReviewsCount',
+                            'relatedArticles',
+                            'relatedTopics',
+                            'respond',
+                            'side_search',
+                            'side_index',
+                            'side_tool',
+                            'toolsRight',
+                            ]),
                   dict(name=['script', 'noscript', 'style','form','hr'])]
-    encoding = decode
    no_stylesheets = True
    extra_css = '''
-                .articleHeadline { margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { font-size: small; }
-                .caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                a:link {text-decoration: none; }'''
+                .timestamp { text-align: left; font-size: small; }
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                a:link {text-decoration: none; }
+                .articleBody { }
+                .authorId {text-align: left; }
+                .image {text-align: center;}
+                .source {text-align: left; }'''
+
+    def filter_ans(self, ans) :
+        total_article_count = 0
+        idx = 0
+        idx_max = len(ans)-1
+        while idx <= idx_max:
+            if self.includeSections != []:
+                if ans[idx][0] not in self.includeSections:
+                    print "SECTION NOT INCLUDED: ",ans[idx][0]
+                    del ans[idx]
+                    idx_max = idx_max-1
+                    continue
+            if ans[idx][0] in self.excludeSections:
+                print "SECTION EXCLUDED: ",ans[idx][0]
+                del ans[idx]
+                idx_max = idx_max-1
+                continue
+            if self.verbose:
+                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
+            for article in ans[idx][1]:
+                total_article_count += 1
+                if self.verbose:
+                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
+                              article['url'].encode('cp1252','replace')))
+            idx = idx+1
+
+        self.log( "Queued %d articles" % total_article_count )
+        return ans
+
+    def fixChars(self,string):
+        # Replace lsquo (\x91)
+        fixed = re.sub("\x91","‘",string)
+
+        # Replace rsquo (\x92)
+        fixed = re.sub("\x92","’",fixed)
+
+        # Replace ldquo (\x93)
+        fixed = re.sub("\x93","“",fixed)
+
+        # Replace rdquo (\x94)
+        fixed = re.sub("\x94","”",fixed)
+
+        # Replace ndash (\x96)
+        fixed = re.sub("\x96","–",fixed)
+
+        # Replace mdash (\x97)
+        fixed = re.sub("\x97","—",fixed)
+
+        return fixed

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -60,22 +194,19 @@ class NYTimes(BasicNewsRecipe):
            br['USERID']   = self.username
            br['PASSWORD'] = self.password
            raw = br.submit().read()
-            if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
+            if 'Please try again' in raw:
                raise Exception('Your username and password are incorrect')
-            #open('/t/log.html', 'wb').write(raw)
        return br

-    def get_masthead_url(self):
-        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
-        #masthead = 'http://members.cox.net/nickredding/nytlogo.gif'
-        br = BasicNewsRecipe.get_browser()
-        try:
-            br.open(masthead)
-        except:
-            self.log("\nMasthead unavailable")
-            masthead = None
-        return masthead
-
+    def skip_ad_pages(self, soup):
+        # Skip ad pages served before actual article
+        skip_tag = soup.find(True, {'name':'skip'})
+        if skip_tag is not None:
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.warn("Skipping ad to article at '%s'" % url)
+            return self.index_to_soup(url, raw=True)

    def get_cover_url(self):
        cover = None
@ -93,12 +224,57 @@ class NYTimes(BasicNewsRecipe):
        return cover

    def short_title(self):
-        return 'New York Times'
+        return self.title

-    def parse_index(self):
-        self.encoding = 'cp1252'
-        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-        self.encoding = decode
+    def index_to_soup(self, url_or_raw, raw=False):
+        '''
+        OVERRIDE of class method
+        deals with various page encodings between index and articles
+        '''
+        def get_the_soup(docEncoding, url_or_raw, raw=False) :
+            if re.match(r'\w+://', url_or_raw):
+                f = self.browser.open(url_or_raw)
+                _raw = f.read()
+                f.close()
+                if not _raw:
+                    raise RuntimeError('Could not fetch index from %s'%url_or_raw)
+            else:
+                _raw = url_or_raw
+            if raw:
+                return _raw
+
+            if not isinstance(_raw, unicode) and self.encoding:
+                _raw = _raw.decode(docEncoding, 'replace')
+            massage = list(BeautifulSoup.MARKUP_MASSAGE)
+            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
+            return BeautifulSoup(_raw, markupMassage=massage)
+
+        # Entry point
+        print "index_to_soup()"
+        soup = get_the_soup( self.encoding, url_or_raw )
+        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
+        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
+        if docEncoding == '' :
+            docEncoding = self.encoding
+
+        if self.verbose > 2:
+            self.log( "  document encoding: '%s'" % docEncoding)
+        if docEncoding != self.encoding :
+            soup = get_the_soup(docEncoding, url_or_raw)
+
+        return soup
+
+    def massageNCXText(self, description):
+        # Kindle TOC descriptions won't render certain characters
+        if description:
+            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
+            # Replace '&' with '&'
+            massaged = re.sub("&","&", massaged)
+            return self.fixChars(massaged)
+        else:
+            return description
+
+    def parse_todays_index(self):

        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=True)).strip()
@ -119,12 +295,13 @@ class NYTimes(BasicNewsRecipe):
                return
            if 'podcast' in url:
                return
+            if '/video/' in url:
+                return
            url += '?pagewanted=all'
            if url in url_list:
                return
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
-            #self.log("Title: %s" % title)
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
@ -140,6 +317,7 @@ class NYTimes(BasicNewsRecipe):
                    author = self.tag_to_string(authorAttribution, use_alt=False)
            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
+                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
@ -147,46 +325,228 @@ class NYTimes(BasicNewsRecipe):
                                content=''))


+        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')

-        # Find each instance of class="section-headline", class="story", class="story headline"
+
+        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):

            if div['class'] in ['section-headline','sectionHeader']:
                key = string.capwords(feed_title(div))
-                articles[key] = []
-                ans.append(key)
-                #self.log('Section: %s' % key)
-
+                key = key.replace('Op-ed','Op-Ed')
+                key = key.replace('U.s.','U.S.')
            elif div['class'] in ['story', 'story headline'] :
                handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
                    handle_article(lidiv)

-#        ans = self.sort_index_by(ans, {'The Front Page':-1,
-#                                      'Dining In, Dining Out':1,
-#                                     'Obituaries':2})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(ans)
+
+    def parse_headline_index(self):
+
+        articles = {}
+        ans = []
+        url_list = []
+
+        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
+
+        # Fetch the content table
+        content_table = soup.find('table',{'id':'content'})
+        if content_table is None:
+            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
+            return None
+
+        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
+
+        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
+            for div_sec in td_col.findAll('div',recursive=False):
+                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
+                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
+                    section_name = re.sub(r'^ *$','',section_name)
+                    if section_name == '':
+                        continue
+                    section_name=string.capwords(section_name)
+                    if section_name == 'U.s.':
+                       section_name = 'U.S.'
+                    elif section_name == 'Op-ed':
+                       section_name = 'Op-Ed'
+                    pubdate = strftime('%a, %d %b')
+
+                    search_div = div_sec
+                    for next_tag in h6_sec_name.findNextSiblings(True):
+                        if next_tag.__class__.__name__ == 'Tag':
+                            if next_tag.name == 'div':
+                                search_div = next_tag
+                            break
+
+                    # Get the articles
+                    for h3_item in search_div.findAll('h3'):
+                        byline = h3_item.h6
+                        if byline is not None:
+                            author = self.tag_to_string(byline,usa_alt=False)
+                        else:
+                            author = ''
+                        a = h3_item.find('a', href=True)
+                        if not a:
+                            continue
+                        url = re.sub(r'\?.*', '', a['href'])
+                        if not url.startswith("http"):
+                            continue
+                        if not url.endswith(".html"):
+                            continue
+                        if 'podcast' in url:
+                            continue
+                        if 'video' in url:
+                            continue
+                        url += '?pagewanted=all'
+                        if url in url_list:
+                            continue
+                        url_list.append(url)
+                        self.log("URL %s" % url)
+                        title = self.tag_to_string(a, use_alt=True).strip()
+                        desc = h3_item.find('p')
+                        if desc is not None:
+                            description = self.tag_to_string(desc,use_alt=False)
+                        else:
+                            description = ''
+                        if not articles.has_key(section_name):
+                            ans.append(section_name)
+                            articles[section_name] = []
+                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
+
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return self.filter_ans(ans)
+
+    def parse_index(self):
+        if self.headlinesOnly:
+            return self.parse_headline_index()
+        else:
+            return self.parse_todays_index()
+
+    def strip_anchors(self,soup):
+        paras = soup.findAll(True)
+        for para in paras:
+            aTags = para.findAll('a')
+            for a in aTags:
+                if a.img is None:
+                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
+        return soup

-        return ans

    def preprocess_html(self, soup):
+
        kicker_tag = soup.find(attrs={'class':'kicker'})
-        if kicker_tag:
+        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
-            #self.log("FOUND KICKER %s" % tagline)
            if tagline=='Op-Ed Columnist':
                img_div = soup.find('div','inlineImage module')
-                #self.log("Searching for photo")
                if img_div:
                    img_div.extract()
-                    #self.log("Photo deleted")
-        refresh = soup.find('meta', {'http-equiv':'refresh'})
-        if refresh is None:
-            return soup
-        content = refresh.get('content').partition('=')[2]
-        raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
-        return BeautifulSoup(raw.decode('cp1252', 'replace'))
+        return self.strip_anchors(soup)

+    def postprocess_html(self,soup, True):
+
+        if self.one_picture_per_article:
+            # Remove all images after first
+            largeImg = soup.find(True, {'class':'articleSpanImage'})
+            inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
+            if largeImg:
+                for inlineImg in inlineImgs:
+                    inlineImg.extract()
+            else:
+                if inlineImgs:
+                    firstImg = inlineImgs[0]
+                    for inlineImg in inlineImgs[1:]:
+                        inlineImg.extract()
+                    # Move firstImg before article body
+                    #article_body = soup.find(True, {'id':'articleBody'})
+                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
+                    if cgFirst:
+                        # Strip all sibling NavigableStrings: noise
+                        navstrings = cgFirst.findAll(text=True, recursive=False)
+                        [ns.extract() for ns in navstrings]
+                        headline_found = False
+                        tag = cgFirst.find(True)
+                        insertLoc = 0
+                        while True:
+                            insertLoc += 1
+                            if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
+                                    headline_found = True
+                                    break
+                            tag = tag.nextSibling
+                            if not tag:
+                                headline_found = False
+                                break
+                        if headline_found:
+                            cgFirst.insert(insertLoc,firstImg)
+                    else:
+                        self.log(">>> No class:'columnGroup first' found <<<")
+
+        # Change captions to italic
+        for caption in soup.findAll(True, {'class':'caption'}) :
+            if caption and caption.contents[0]:
+                cTag = Tag(soup, "p", [("class", "caption")])
+                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
+                mp_off = c.find("More Photos")
+                if mp_off >= 0:
+                    c = c[:mp_off]
+                cTag.insert(0, c)
+                caption.replaceWith(cTag)
+
+        # Change <nyt_headline> to <h2>
+        h1 = soup.find('h1')
+        if h1:
+            headline = h1.find("nyt_headline")
+            if headline:
+                tag = Tag(soup, "h2")
+                tag['class'] = "headline"
+                tag.insert(0, self.fixChars(headline.contents[0]))
+                h1.replaceWith(tag)
+        else:
+            # Blog entry - replace headline, remove <hr> tags
+            headline = soup.find('title')
+            if headline:
+                tag = Tag(soup, "h2")
+                tag['class'] = "headline"
+                tag.insert(0, self.fixChars(headline.contents[0]))
+                soup.insert(0, tag)
+                hrs = soup.findAll('hr')
+                for hr in hrs:
+                    hr.extract()
+
+        # Change <h1> to <h3> - used in editorial blogs
+        masthead = soup.find("h1")
+        if masthead:
+            # Nuke the href
+            if masthead.a:
+                del(masthead.a['href'])
+            tag = Tag(soup, "h3")
+            tag.insert(0, self.fixChars(masthead.contents[0]))
+            masthead.replaceWith(tag)
+
+        # Change <span class="bold"> to <b>
+        for subhead in soup.findAll(True, {'class':'bold'}) :
+            if subhead.contents:
+                bTag = Tag(soup, "b")
+                bTag.insert(0, subhead.contents[0])
+                subhead.replaceWith(bTag)
+
+        divTag = soup.find('div',attrs={'id':'articleBody'})
+        if divTag:
+            divTag['class'] = divTag['id']
+
+        # Add class="authorId" to <div> so we can format with CSS
+        divTag = soup.find('div',attrs={'id':'authorId'})
+        if divTag and divTag.contents[0]:
+            tag = Tag(soup, "p")
+            tag['class'] = "authorId"
+            tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
+                             use_alt=False)))
+            divTag.replaceWith(tag)
+
+        return soup

--- a/resources/recipes/zeitde.recipe
+++ b/resources/recipes/zeitde.recipe
@ -6,22 +6,25 @@ Fetch Die Zeit.
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag

 class ZeitDe(BasicNewsRecipe):

-    title = 'ZEIT Online'
-    description = 'ZEIT Online'
+    title = 'Zeit Online'
+    description = 'Zeit Online'
    language = 'de'
-    lang = 'de_DE'

-    __author__ = 'Martin Pitt, Sujata Raman and Ingo Paschke'
-    use_embedded_content   = False
+    __author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
+
    max_articles_per_feed = 40
-    remove_empty_feeds = True
-    no_stylesheets = True
-    no_javascript = True
-    encoding = 'utf-8'
+
+    remove_tags = [
+	                    dict(name='iframe'),
+	                    dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
+	                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
+	                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
+	                  ]
+
+    keep_only_tags = [dict(id=['main'])]

    feeds =  [
               ('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
@ -40,43 +43,15 @@ class ZeitDe(BasicNewsRecipe):
               ('Sport', 'http://newsfeed.zeit.de/sport/index'),
             ]

-    extra_css = '''
-                .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
-                .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:small;}
-                .title{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
-                .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
-                .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
-                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
-                .quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
-                .quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
-                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
-                .inline{float:left;margin-top:0;margin-right:15px;position:relative;width:180px; }
-                img.inline{float:none}
-                .intertitle{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small;font-weight:700}
-                .ebinfobox{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small;list-style-type:none;float:right;margin-top:0;border-left-style:solid;border-left-width:1px;padding-left:10px;}
-                .infobox {border-style: solid; border-width: 1px;padding:8px;}
-                .infobox dt {font-weight:700;}
-                '''
+    extra_css = '.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
+
    #filter_regexps = [r'ad.de.doubleclick.net/']

-    keep_only_tags = [
-                        dict(name='div', attrs={'class':["article"]}) ,
-                        dict(name='ul', attrs={'class':["tools"]}) ,
-                         ]
-    remove_tags = [
-                    dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),
-                    dict(name='div', attrs={'class':["pagination block","pagenav","inline link", "copyright"] }),
-                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
-                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
-                  ]
-
-    remove_attributes = ['style', 'font']
-
    def get_article_url(self, article):
        ans = article.get('link',None)
-        ans += "?page=all"
+        ans += "?page=all&print=true"

-        if 'video' in ans or 'quiz' in ans :
+        if 'video' in ans or 'quiz' in ans or 'blog' in ans :
              ans = None
        return ans

@ -86,25 +61,3 @@ class ZeitDe(BasicNewsRecipe):
            return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
        except:
            return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
-
-    def preprocess_html(self, soup):
-        soup.html['xml:lang'] = self.lang
-        soup.html['lang']     = self.lang
-        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
-        soup.head.insert(0,mtag)
-        title = soup.find('h2', attrs={'class':'title'})
-        if title is None:
-            print "no title"
-            return soup
-        info = Tag(soup,'ul',[('class','ebinfobox')])
-        tools = soup.find('ul', attrs={'class':'tools'})
-        #author = tools.find('li','author first')
-        for tag in ['author first', 'date', 'date first', 'author', 'source']:
-            line = tools.find('li', tag)
-            if line:
-                info.insert(0,line)
-        title.parent.insert(0,info)
-        tools.extract()
-        return soup
-
-
--- a/resources/templates/html_export_default.css
+++ b/resources/templates/html_export_default.css
@ -0,0 +1,60 @@
+body{
+  margin:0px;
+  padding: 0.5em;
+  background-color:#F6F3E9;
+  font-size:12px;
+  font-family:Arial, Helvetica, sans-serif;
+}
+
+.calibreMeta{
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+}
+
+.calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
+  color:white;
+}
+
+.calibreMeta h1{
+  margin:0px;
+  font-size:18px;
+  background-color:#39322B;
+}
+
+.calibreEbookContent{
+  padding:20px;
+}
+
+.calibreEbNav, .calibreEbNavTop{
+  clear:both;
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+  text-align:center;
+}
+
+.calibreEbNavTop{
+  margin-bottom:20px;
+}
+
+.calibreEbNav a, .calibreEbNavTop a{
+  padding:0px 5px;
+}
+
+.calibreTocIndex{
+  line-height:18px;
+}
+
+.calibreToc{
+  float:left;
+  margin:20px;
+  width:300px;
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+}
+.calibreEbookContent{
+  width:600px;
+  float:left;
+}
--- a/resources/templates/html_export_default.tmpl
+++ b/resources/templates/html_export_default.tmpl
@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+${head_content}$
+
+<link href="${cssLink}$" type="text/css" rel="stylesheet" />
+
+</head>
+<body>
+
+<div class="calibreMeta">
+  <div class="calibreMetaTitle">
+  ${pos1=1}$
+  ${for title in meta.titles():}$
+    ${if pos1:}$
+    <h1>
+      <a href="${tocUrl}$">${print title}$</a>
+    </h1>
+    ${:else:}$
+    <div class="calibreMetaSubtitle">${print title}$</div>
+    ${:endif}$
+    ${pos1=0}$
+  ${:endfor}$
+  </div>
+  <div class="calibreMetaAuthor">
+    ${print ', '.join(meta.creators())}$
+  </div>
+</div>
+
+<div class="calibreMain">
+
+  <div class="calibreEbookContent">
+    ${if prevLink or nextLink:}$
+      <div class="calibreEbNavTop">
+        ${if prevLink:}$
+          <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+        ${:else:}$
+          <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+        ${:endif}$
+
+        ${if nextLink:}$
+          <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+        ${:endif}$
+      </div>
+    ${:endif}$
+
+    ${ebookContent}$
+  </div>
+
+  ${if has_toc:}$
+  <div class="calibreToc">
+    <h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
+    ${print toc()}$
+  </div>
+  ${:endif}$
+
+  <div class="calibreEbNav">
+    ${if prevLink:}$
+      <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+    ${:else:}$
+      <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+    ${:endif}$
+
+    <a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
+
+    ${if nextLink:}$
+      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+    ${:endif}$
+  </div>
+
+</div>
+
+</body>
+</html>
--- a/resources/templates/html_export_default_index.tmpl
+++ b/resources/templates/html_export_default_index.tmpl
@ -0,0 +1,61 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+
+<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
+<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
+
+<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
+
+${for item in meta:}$
+  <meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
+${:endfor}$
+
+<link href="${cssLink}$" type="text/css" rel="stylesheet" />
+</head>
+<body>
+
+<div class="calibreMeta">
+  <div class="calibreMetaTitle">
+  ${pos1=1}$
+  ${for title in meta.titles():}$
+    ${if pos1:}$
+    <h1>
+      <a href="${tocUrl}$">${print title}$</a>
+    </h1>
+    ${:else:}$
+    <div class="calibreMetaSubtitle">${print title}$</div>
+    ${:endif}$
+    ${pos1=0}$
+  ${:endfor}$
+  </div>
+  <div class="calibreMetaAuthor">
+    ${print ', '.join(meta.creators()),}$
+  </div>
+</div>
+
+<div class="calibreMain">
+  <div class="calibreEbookContent">
+
+    ${if has_toc:}$
+      <div class="calibreTocIndex">
+        <h2>${print _('Table of contents'),}$</h2>
+        ${toc}$
+      </div>
+    ${:else:}$
+        <h2>${print _('No table of contents present'),}$</h2>
+        <div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
+    ${:endif}$
+
+  </div>
+
+  <div class="calibreEbNav">
+    ${if nextLink:}$
+      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+    ${:endif}$
+  </div>
+</div>
+
+</body>
+</html>
--- a/setup/server.py
+++ b/setup/server.py
@ -89,7 +89,7 @@ class Server(Command):
            t = telnetlib.Telnet('localhost', 4242)
            t.read_until("repl>")
            t.write('BrowserReload();')
-            print t.read_until("repl>")
+            t.read_until("repl>")
            t.close()
        except:
            print 'Failed to reload browser'
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
+from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.snb.output import SNBOutput

 from calibre.customize.profiles import input_profiles, output_profiles
@ -525,6 +526,7 @@ plugins += [
    RTFOutput,
    TCROutput,
    TXTOutput,
+    HTMLOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
@ -893,4 +895,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
        Email, Server, Plugins, Tweaks, Misc]

 #}}}
-
--- a/src/calibre/ebooks/html/meta.py
+++ b/src/calibre/ebooks/html/meta.py
@ -0,0 +1,33 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
+
+class EasyMeta(object):
+
+    def __init__(self, meta):
+        self.meta = meta
+
+    def __iter__(self):
+        meta = self.meta
+        for item_name in meta.items:
+            for item in meta[item_name]:
+                if namespace(item.term) == DC11_NS:
+                    yield { 'name': barename(item.term), 'value': item.value }
+
+    def __len__(self):
+        count = 0
+        for item in self:
+            count = count+1
+        return count
+
+    def titles(self):
+        for item in self.meta['title']:
+            yield item.value
+
+    def creators(self):
+        for item in self.meta['creator']:
+            yield item.value
--- a/src/calibre/ebooks/html/output.py
+++ b/src/calibre/ebooks/html/output.py
@ -0,0 +1,201 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, shutil
+
+from os.path import dirname, abspath, relpath, exists
+
+from lxml import etree
+from templite import Templite
+
+from calibre.ebooks.oeb.base import element
+from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
+from calibre import CurrentDir
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.zipfile import ZipFile
+
+from urllib import unquote
+
+from calibre.ebooks.html.meta import EasyMeta
+
+class HTMLOutput(OutputFormatPlugin):
+
+    name = 'HTML Output'
+    author = 'Fabian Grassl'
+    file_type = 'zip'
+
+    options = set([
+        OptionRecommendation(name='template_css',
+            help=_('CSS file used for the output instead of the default file')),
+
+        OptionRecommendation(name='template_html_index',
+            help=_('Template used for generation of the html index file instead of the default file')),
+
+        OptionRecommendation(name='template_html',
+            help=_('Template used for the generation of the html contents of the book instead of the default file')),
+
+        OptionRecommendation(name='extract_to',
+            help=_('Extract the contents of the generated ZIP file to the directory of the generated ZIP file')
+        ),
+    ])
+
+    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
+
+    def generate_toc(self, oeb_book, ref_url, output_dir):
+        '''
+        Generate table of contents
+        '''
+        with CurrentDir(output_dir):
+            def build_node(current_node, parent=None):
+                if parent is None:
+                    parent = etree.Element('ul')
+                elif len(current_node.nodes):
+                    parent = element(parent, ('ul'))
+                for node in current_node.nodes:
+                    point = element(parent, 'li')
+                    href = relpath(abspath(unquote(node.href)), dirname(ref_url))
+                    link = element(point, 'a', href=href)
+                    title = node.title
+                    if title:
+                        title = re.sub(r'\s+', ' ', title)
+                    link.text=title
+                    build_node(node, point)
+                return parent
+            wrap = etree.Element('div')
+            wrap.append(build_node(oeb_book.toc))
+            return wrap
+
+    def generate_html_toc(self, oeb_book, ref_url, output_dir):
+        root = self.generate_toc(oeb_book, ref_url, output_dir)
+        return etree.tostring(root, pretty_print=True, encoding='utf-8',
+                xml_declaration=True)
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+
+        # read template files
+        if opts.template_html_index is not None:
+            template_html_index_data = open(opts.template_html_index, 'rb').read()
+        else:
+            template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
+
+        if opts.template_html is not None:
+            template_html_data = open(opts.template_html, 'rb').read()
+        else:
+            template_html_data = P('templates/html_export_default.tmpl', data=True)
+
+        if opts.template_css is not None:
+            template_css_data = open(opts.template_css, 'rb').read()
+        else:
+            template_css_data = P('templates/html_export_default.css', data=True)
+
+        template_html_index_data = template_html_index_data.decode('utf-8')
+        template_html_data = template_html_data.decode('utf-8')
+        template_css_data = template_css_data.decode('utf-8')
+
+        self.log  = log
+        self.opts = opts
+        meta = EasyMeta(oeb_book.metadata)
+
+        tempdir = PersistentTemporaryDirectory()
+        output_file = os.path.join(tempdir,
+                os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
+        output_dir = re.sub(r'\.html', '', output_file)+'_files'
+
+        if not exists(output_dir):
+            os.makedirs(output_dir)
+
+        css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
+        with open(css_path, 'wb') as f:
+            f.write(template_css_data.encode('utf-8'))
+
+        with open(output_file, 'wb') as f:
+            html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
+            templite = Templite(template_html_index_data)
+            nextLink = oeb_book.spine[0].href
+            nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
+            cssLink = relpath(abspath(css_path), dirname(output_file))
+            tocUrl = relpath(output_file, dirname(output_file))
+            t = templite.render(has_toc=bool(oeb_book.toc.count()),
+                    toc=html_toc, meta=meta, nextLink=nextLink,
+                    tocUrl=tocUrl, cssLink=cssLink)
+            f.write(t)
+
+        with CurrentDir(output_dir):
+            for item in oeb_book.manifest:
+                path = abspath(unquote(item.href))
+                dir = dirname(path)
+                if not exists(dir):
+                    os.makedirs(dir)
+                if item.spine_position is not None:
+                    with open(path, 'wb') as f:
+                        pass
+                else:
+                    with open(path, 'wb') as f:
+                        f.write(str(item))
+                    item.unload_data_from_memory(memory=path)
+
+            for item in oeb_book.spine:
+                path = abspath(unquote(item.href))
+                dir = dirname(path)
+                root = item.data.getroottree()
+
+                # get & clean HTML <HEAD>-data
+                head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
+                head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
+                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
+                head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
+
+                # get & clean HTML <BODY>-data
+                body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
+                ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
+                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
+
+                # generate link to next page
+                if item.spine_position+1 < len(oeb_book.spine):
+                    nextLink = oeb_book.spine[item.spine_position+1].href
+                    nextLink = relpath(abspath(nextLink), dir)
+                else:
+                    nextLink = None
+
+                # generate link to previous page
+                if item.spine_position > 0:
+                    prevLink = oeb_book.spine[item.spine_position-1].href
+                    prevLink = relpath(abspath(prevLink), dir)
+                else:
+                    prevLink = None
+
+                cssLink = relpath(abspath(css_path), dir)
+                tocUrl = relpath(output_file, dir)
+
+                # render template
+                templite = Templite(template_html_data)
+                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
+                t = templite.render(ebookContent=ebook_content,
+                        prevLink=prevLink, nextLink=nextLink,
+                        has_toc=bool(oeb_book.toc.count()), toc=toc,
+                        tocUrl=tocUrl, head_content=head_content,
+                        meta=meta, cssLink=cssLink)
+
+                # write html to file
+                with open(path, 'wb') as f:
+                    f.write(t)
+                item.unload_data_from_memory(memory=path)
+
+        zfile = ZipFile(output_path, "w")
+        zfile.add_dir(output_dir)
+
+        if opts.extract_to:
+            if os.path.exists(opts.extract_to):
+                shutil.rmtree(opts.extract_to)
+            os.makedirs(opts.extract_to)
+            zfile.extractall(opts.extract_to)
+            self.log('Zip file extracted to', opts.extract_to)
+
+        zfile.close()
+
+        # cleanup temp dir
+        shutil.rmtree(tempdir)
+
+
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -112,13 +112,12 @@ def get_metadata(br, asin, mi):

 def main(args=sys.argv):
    # Test xisbn
-    #print get_social_metadata('Learning Python', None, None, '8324616489')
-    #print
+    print get_social_metadata('Learning Python', None, None, '8324616489')
+    print

    # Test sophisticated comment formatting
    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
    print
-    return

    # Random tests
    print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -275,7 +275,15 @@ class MobiMLizer(object):
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
-            return
+            id_ = elem.get('id', None)
+            if id_:
+                # Keep anchors so people can use display:none
+                # to generate hidden TOCs
+                elem.clear()
+                elem.text = None
+                elem.set('id', id_)
+            else:
+                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
@ -406,6 +414,12 @@ class MobiMLizer(object):
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
+                # Add anchors
+                for child in vbstate.body:
+                    if child is not vbstate.para:
+                        vtag.append(child)
+                    else:
+                        break
                for child in vbstate.para:
                    vtag.append(child)
                return
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
-
-
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@ -101,11 +101,12 @@ class SNBMLizer(object):
        subitem = ''
        bodyTree = trees[subitem].find(".//body")
        for line in output.splitlines():
-            if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
+            pos = line.find(CALIBRE_SNB_PRE_TAG)
+            if pos == -1:
                line = line.strip(u' \t\n\r\u3000')
            else:
                etree.SubElement(bodyTree, "text").text = \
-                    etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
+                    etree.CDATA(line[pos+len(CALIBRE_SNB_PRE_TAG):])
                continue
            if len(line) != 0:
                if line.find(CALIBRE_SNB_IMG_TAG) == 0:
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -35,7 +35,6 @@ class ViewAction(InterfaceAction):
        self.qaction.setMenu(self.view_menu)
        ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)

-
    def location_selected(self, loc):
        enabled = loc == 'library'
        for action in list(self.view_menu.actions())[1:]:
@ -134,6 +133,9 @@ class ViewAction(InterfaceAction):
        rows = self.gui.current_view().selectionModel().selectedRows()
        self._view_books(rows)

+    def view_triggered(self, index):
+        self._view_books([index])
+
    def view_specific_book(self, index):
        self._view_books([index])

--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -28,6 +28,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne
    if log is None:
        log = Log()
    from calibre.library import db
+    from calibre.utils.config import prefs
+    prefs.refresh()
    db = db()
    db.catalog_plugin_on_device_temp_mapping = dbspec

--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -50,6 +50,8 @@ class BooksView(QTableView): # {{{
    def __init__(self, parent, modelcls=BooksModel):
        QTableView.__init__(self, parent)

+        self.setEditTriggers(self.SelectedClicked|self.EditKeyPressed)
+
        self.drag_allowed = True
        self.setDragEnabled(True)
        self.setDragDropOverwriteMode(False)
@ -98,6 +100,8 @@ class BooksView(QTableView): # {{{
        self._model.about_to_be_sorted.connect(self.about_to_be_sorted)
        self._model.sorting_done.connect(self.sorting_done)

+        self.doubleClicked.connect(parent.iactions['View'].view_triggered)
+
    # Column Header Context Menu {{{
    def column_header_context_handler(self, action=None, column=None):
        if not action or not column:
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -128,7 +128,7 @@ class ContentServer(object):
        if want_mobile:
            return self.mobile()

-        return self.browse_toplevel()
+        return self.browse_catalog()

    def old(self, **kwargs):
        return self.static('index.html').replace('{prefix}',
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -338,6 +338,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes

    * - Keyboard Shortcut
      - Action
+    * - :kbd:`F2 (Enter in OS X)`
+      - Edit the metadata of the currently selected field in the book list.
    * - :kbd:`A` 
      - Add Books
    * - :kbd:`C` 
--- a/src/templite/init.py
+++ b/src/templite/init.py
@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+#       Templite+
+#       A light-weight, fully functional, general purpose templating engine
+#
+#       Copyright (c) 2009 joonis new media
+#       Author: Thimo Kraemer <thimo.kraemer@joonis.de>
+#
+#       Based on Templite - Tomer Filiba
+#       http://code.activestate.com/recipes/496702/
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+#
+
+import sys, re
+
+class Templite(object):
+    auto_emit = re.compile('(^[\'\"])|(^[a-zA-Z0-9_\[\]\'\"]+$)')
+
+    def __init__(self, template, start='${', end='}$'):
+        if len(start) != 2 or len(end) != 2:
+            raise ValueError('each delimiter must be two characters long')
+        delimiter = re.compile('%s(.*?)%s' % (re.escape(start), re.escape(end)), re.DOTALL)
+        offset = 0
+        tokens = []
+        for i, part in enumerate(delimiter.split(template)):
+            part = part.replace('\\'.join(list(start)), start)
+            part = part.replace('\\'.join(list(end)), end)
+            if i % 2 == 0:
+                if not part: continue
+                part = part.replace('\\', '\\\\').replace('"', '\\"')
+                part = '\t' * offset + 'emit("""%s""")' % part
+            else:
+                part = part.rstrip()
+                if not part: continue
+                if part.lstrip().startswith(':'):
+                    if not offset:
+                        raise SyntaxError('no block statement to terminate: ${%s}$' % part)
+                    offset -= 1
+                    part = part.lstrip()[1:]
+                    if not part.endswith(':'): continue
+                elif self.auto_emit.match(part.lstrip()):
+                    part = 'emit(%s)' % part.lstrip()
+                lines = part.splitlines()
+                margin = min(len(l) - len(l.lstrip()) for l in lines if l.strip())
+                part = '\n'.join('\t' * offset + l[margin:] for l in lines)
+                if part.endswith(':'):
+                    offset += 1
+            tokens.append(part)
+        if offset:
+            raise SyntaxError('%i block statement(s) not terminated' % offset)
+        self.__code = compile('\n'.join(tokens), '<templite %r>' % template[:20], 'exec')
+
+    def render(self, __namespace=None, **kw):
+        """
+        renders the template according to the given namespace.
+        __namespace - a dictionary serving as a namespace for evaluation
+        **kw - keyword arguments which are added to the namespace
+        """
+        namespace = {}
+        if __namespace: namespace.update(__namespace)
+        if kw: namespace.update(kw)
+        namespace['emit'] = self.write
+
+        __stdout = sys.stdout
+        sys.stdout = self
+        self.__output = []
+        eval(self.__code, namespace)
+        sys.stdout = __stdout
+        return ''.join(self.__output)
+
+    def write(self, *args):
+        for a in args:
+            self.__output.append(str(a))