Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-11-02 21:35:21 +01:00 · 2010-11-02 21:35:21 +01:00 · f61daece95
commit f61daece95
parent a8578eee2d 8367f875a5
21 changed files with 1267 additions and 405 deletions
--- a/resources/recipes/fudzilla.recipe
+++ b/resources/recipes/fudzilla.recipe
@ -25,15 +25,15 @@ class Fudzilla(BasicNewsRecipe):
    remove_tags_before = dict(name='div', attrs={'class':['padding']})
    remove_tags = [dict(name='td', attrs={'class':['left','right']}),
-                   dict(name='div', attrs={'id':['toolbar','buttons']}), 
+                   dict(name='div', attrs={'id':['toolbar','buttons']}),
-                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}), 
+                   dict(name='div', attrs={'class':['artbannersxtd','back_button']}),
-                   dict(name='span', attrs={'class':['pathway']}), 
+                   dict(name='span', attrs={'class':['pathway']}),
-                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}), 
+                   dict(name='th', attrs={'class':['pagenav_next','pagenav_prev']}),
-                   dict(name='table', attrs={'class':['headlines']}), 
+                   dict(name='table', attrs={'class':['headlines']}),
                   ]
    feeds = [
-             (u'Posts', u'http://www.fudzilla.com/index.php?option=com_rss&feed=RSS2.0&no_html=1')
+            (u'Posts', u'http://www.fudzilla.com/?format=feed')
             ]
    preprocess_regexps = [
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -5,62 +5,59 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import re
+import re, string, time
-import time
+from calibre import entity_to_unicode, strftime
 from calibre import entity_to_unicode
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, \
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
 Comment, BeautifulStoneSoup
 class NYTimes(BasicNewsRecipe):
-    title       = 'New York Times Top Stories'
+    # set headlinesOnly to True for the headlines-only version
-    __author__  = 'GRiker'
+    headlinesOnly = True
    language = 'en'
    requires_version = (0, 7, 5)
    description = 'Top Stories from the New York Times'
-    # List of sections typically included in Top Stories.  Use a keyword from the
+    # includeSections: List of sections to include. If empty, all sections found will be included.
-    # right column in the excludeSectionKeywords[] list to skip downloading that section
+    # Otherwise, only the sections named will be included. For example,
-    sections = {
+    #
-                 'arts'             :   'Arts',
+    #    includeSections = ['Politics','Sports']
-                 'business'         :   'Business',
+    #
-                 'diningwine'       :   'Dining & Wine',
+    # would cause only the Politics and Sports sections to be included.
                 'editorials'       :   'Editorials',
                 'health'           :   'Health',
                 'magazine'         :   'Magazine',
                 'mediaadvertising' :   'Media & Advertising',
                 'newyorkregion'    :   'New York/Region',
                 'oped'             :   'Op-Ed',
                 'politics'         :   'Politics',
                 'science'          :   'Science',
                 'sports'           :   'Sports',
                 'technology'       :   'Technology',
                 'topstories'       :   'Top Stories',
                 'travel'           :   'Travel',
                 'us'               :   'U.S.',
                 'world'            :   'World'
               }
-    # Add section keywords from the right column above to skip that section
+    includeSections = []  # by default, all sections included
-    # For example, to skip sections containing the word 'Sports' or 'Dining', use:
+
-    # excludeSectionKeywords = ['Sports', 'Dining']
+    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
-    # Fetch only Business and Technology
+    # Otherwise, the sections named will be excluded. For example,
-    # excludeSectionKeywords = ['Arts','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Top Stories','Travel','U.S.','World']
+    #
-    # Fetch only Top Stories
+    #    excludeSections = ['Politics','Sports']
-    # excludeSectionKeywords = ['Arts','Business','Dining','Editorials','Health','Magazine','Media','Region','Op-Ed','Politics','Science','Sports','Technology','Travel','U.S.','World']
+    #
-    # By default, no sections are skipped.
+    # would cause the Politics and Sports sections to be excluded. This parameter can be used
-    excludeSectionKeywords = []
+    # in conjuction with includeSections although in most cases using one or the other, but
    # not both, is sufficient.
    excludeSections = []
    # one_picture_per_article specifies that calibre should only use the first image
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
    # and shown in their original location.
    one_picture_per_article = True
    # The maximum number of articles that will be downloaded
-    max_articles_per_feed = 40
+    max_articles_per_feed = 100
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
    requires_version = (0, 7, 5)
    timefmt = ''
    needs_subscription = True
@ -82,6 +79,7 @@ class NYTimes(BasicNewsRecipe):
                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'metaFootnote',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
@ -89,12 +87,13 @@ class NYTimes(BasicNewsRecipe):
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
-                            'subNavigation clearfix',
+                            re.compile('^subNavigation'),
-                            'subNavigation tabContent active',
+                            re.compile('^leaderboard'),
-                            'subNavigation tabContent active clearfix',
+                            re.compile('^module'),
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
                            'adxSponLink',
                            'archive',
                            'articleExtras',
                            'articleInline',
@ -105,87 +104,98 @@ class NYTimes(BasicNewsRecipe):
                            'footer',
                            'header',
                            'header_search',
                            'inlineBox',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
                            'readerReviews',
                            'readerReviewsCount',
                            'relatedArticles',
                            'relatedTopics',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
-                   dict(name=['script', 'noscript', 'style'])]
+                   dict(name=['script', 'noscript', 'style','form','hr'])]
    no_stylesheets = True
-    extra_css = '.headline      {text-align:    left;}\n    \
+    extra_css = '''
-                 .byline        {font-family:   monospace;  \
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                                 text-align:    left;       \
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                                 margin-top:    0px;        \
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                                 margin-bottom: 0px;}\n     \
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                 .dateline      {font-size:     small;      \
+                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                                 margin-top:    0px;        \
+                .timestamp { text-align: left; font-size: small; }
-                                 margin-bottom: 0px;}\n     \
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                 .timestamp     {font-size:     small;      \
+                a:link {text-decoration: none; }
-                                 margin-top:    0px;        \
+                .articleBody { }
-                                 margin-bottom: 0px;}\n     \
+                .authorId {text-align: left; }
-                 .source        {text-align:    left;}\n    \
+                .image {text-align: center;}
-                 .image         {text-align:    center;}\n  \
+                .source {text-align: left; }'''
                 .credit        {text-align:    right;      \
                                 font-size:     small;      \
                                 margin-top:    0px;        \
                                 margin-bottom: 0px;}\n     \
                 .articleBody   {text-align:    left;}\n    \
                 .authorId      {text-align:    left;       \
                                 font-style:    italic;}\n  '
-    def dump_ans(self, ans) :
+    def filter_ans(self, ans) :
        total_article_count = 0
-        for section in ans :
+        idx = 0
        idx_max = len(ans)-1
        while idx <= idx_max:
            if self.includeSections != []:
                if ans[idx][0] not in self.includeSections:
                    print "SECTION NOT INCLUDED: ",ans[idx][0]
                    del ans[idx]
                    idx_max = idx_max-1
                    continue
            if ans[idx][0] in self.excludeSections:
                print "SECTION EXCLUDED: ",ans[idx][0]
                del ans[idx]
                idx_max = idx_max-1
                continue
            if self.verbose:
-                self.log("section %s: %d articles" % (section[0], len(section[1])) )
+                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
-            for article in section[1]:
+            for article in ans[idx][1]:
                total_article_count += 1
                if self.verbose:
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].encode('cp1252','replace')))
            idx = idx+1
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def fixChars(self,string):
        # Replace lsquo (\x91)
-        fixed = re.sub("\x91","&#8216;",string)
+        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
-        fixed = re.sub("\x92","&#8217;",fixed)
+        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
-        fixed = re.sub("\x93","&#8220;",fixed)
+        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
-        fixed = re.sub("\x94","&#8221;",fixed)
+        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
-        fixed = re.sub("\x96","&#8211;",fixed)
+        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
-        fixed = re.sub("\x97","&#8212;",fixed)
+        fixed = re.sub("\x97","—",fixed)
        return fixed
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
-            try:
+            br.open('http://www.nytimes.com/auth/login')
-                br.open('http://www.nytimes.com/auth/login')
+            br.select_form(name='login')
-                br.select_form(name='login')
+            br['USERID']   = self.username
-                br['USERID']   = self.username
+            br['PASSWORD'] = self.password
-                br['PASSWORD'] = self.password
+            raw = br.submit().read()
-                br.submit()
+            if 'Please try again' in raw:
-            except:
+                raise Exception('Your username and password are incorrect')
                self.log("\nFailed to login")
        return br
    def skip_ad_pages(self, soup):
@ -213,6 +223,9 @@ class NYTimes(BasicNewsRecipe):
            cover = None
        return cover
    def short_title(self):
        return self.title
    def index_to_soup(self, url_or_raw, raw=False):
        '''
        OVERRIDE of class method
@ -255,157 +268,184 @@ class NYTimes(BasicNewsRecipe):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
-            # Replace '&' with '&#38;'
+            # Replace '&' with '&'
-            massaged = re.sub("&","&#38;", massaged)
+            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
-    def parse_index(self):
+    def parse_todays_index(self):
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=True)).strip()
        articles = {}
        key = None
        ans = []
        url_list = []
        def handle_article(div):
            a = div.find('a', href=True)
            if not a:
                return
            url = re.sub(r'\?.*', '', a['href'])
            if not url.startswith("http"):
                return
            if not url.endswith(".html"):
                return
            if 'podcast' in url:
                return
            if '/video/' in url:
                return
            url += '?pagewanted=all'
            if url in url_list:
                return
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
            if summary:
                description = self.tag_to_string(summary, use_alt=False)
            author = ''
            authorAttribution = div.find(True, attrs={'class':'byline'})
            if authorAttribution:
                author = self.tag_to_string(authorAttribution, use_alt=False)
            else:
                authorAttribution = div.find(True, attrs={'class':'byline'})
                if authorAttribution:
                    author = self.tag_to_string(authorAttribution, use_alt=False)
            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
                                description=description, author=author,
                                content=''))
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
            if div['class'] in ['section-headline','sectionHeader']:
                key = string.capwords(feed_title(div))
                key = key.replace('Op-ed','Op-Ed')
                key = key.replace('U.s.','U.S.')
            elif div['class'] in ['story', 'story headline'] :
                handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
                    handle_article(lidiv)
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return self.filter_ans(ans)
    def parse_headline_index(self):
        articles = {}
        ans = []
-
+        url_list = []
        feed = key = 'All Top Stories'
        articles[key] = []
        ans.append(key)
        self.log("Scanning 1 section ...")
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
-        # Fetch the outer table
+        # Fetch the content table
-        table = soup.find('table')
+        content_table = soup.find('table',{'id':'content'})
-        previousTable = table
+        if content_table is None:
            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
            return None
-        # Find the deepest table containing the stories
+        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
        while True :
            table = table.find('table')
            if table.find(text=re.compile('top stories start')) :
                previousTable = table
                continue
            else :
                table = previousTable
                break
-        # There are multiple subtables, find the one containing the stories
+        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
-        for block in table.findAll('table') :
+            for div_sec in td_col.findAll('div',recursive=False):
-            if block.find(text=re.compile('top stories start')) :
+                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
-                table = block
+                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
-                break
+                    section_name = re.sub(r'^ *$','',section_name)
-            else :
+                    if section_name == '':
-                continue
+                        continue
                    section_name=string.capwords(section_name)
                    if section_name == 'U.s.':
                       section_name = 'U.S.'
                    elif section_name == 'Op-ed':
                       section_name = 'Op-Ed'
                    pubdate = strftime('%a, %d %b')
-        # Again there are multiple subtables, find the one containing the stories
+                    search_div = div_sec
-        for storyblock in table.findAll('table') :
+                    for next_tag in h6_sec_name.findNextSiblings(True):
-            if storyblock.find(text=re.compile('top stories start')) :
+                        if next_tag.__class__.__name__ == 'Tag':
-                break
+                            if next_tag.name == 'div':
-            else :
+                                search_div = next_tag
                continue
        skipThisSection = False
        todays_article_count = 0
        # Within this table are <font face="times new roman, times, san serif"> entries
        self.log("Fetching feed Top Stories")
        for tr in storyblock.findAllNext('tr'):
            if tr.find('span') is not None :
                sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
                                                         'times new roman,times, sans serif',
                                                         'times new roman, times, sans serif']})
                section = None
                bylines = []
                descriptions = []
                pubdate = None
                # Get the Section title
                for (x,i) in enumerate(sectionblock.contents) :
                    skipThisSection = False
                    # Extract the section title
                    if ('Comment' in str(i.__class__)) :
                        if 'start(name=' in i :
                            section = i[i.find('=')+1:-2]
                        if not self.sections.has_key(section) :
                            skipThisSection = True
                            break
-                        # Check for excluded section
+                    # Get the articles
-                        if len(self.excludeSectionKeywords):
+                    for h3_item in search_div.findAll('h3'):
-                            key = self.sections[section]
+                        byline = h3_item.h6
-                            excluded = re.compile('|'.join(self.excludeSectionKeywords))
+                        if byline is not None:
-                            if excluded.search(key) or articles.has_key(key):
+                            author = self.tag_to_string(byline,usa_alt=False)
                                skipThisSection = True
                                break
                # Get the bylines and descriptions
                if not skipThisSection :
                    lines = sectionblock.contents
                    contentStrings = []
                    for line in lines:
                        if not isinstance(line, Comment) and line.strip and line.strip() > "":
                            contentStrings.append(line.strip())
                    # Gather the byline/description pairs
                    bylines = []
                    descriptions = []
                    for contentString in contentStrings:
                        if contentString[0:3] == 'By ' and contentString[3].isupper() :
                            bylines.append(contentString)
                        else:
-                            descriptions.append(contentString)
+                            author = ''
-
+                        a = h3_item.find('a', href=True)
-                    # Fetch the article titles and URLs
+                        if not a:
-                    articleCount = len(sectionblock.findAll('span'))
+                            continue
                    todays_article_count += articleCount
                    for (i,span) in enumerate(sectionblock.findAll(attrs={'class':'headlineWrapper'})) :
                        a = span.find('a', href=True)
                        url = re.sub(r'\?.*', '', a['href'])
                        if not url.startswith("http"):
                            continue
                        if not url.endswith(".html"):
                            continue
                        if 'podcast' in url:
                            continue
                        if 'video' in url:
                            continue
                        url += '?pagewanted=all'
                        if url in url_list:
                            continue
                        url_list.append(url)
                        self.log("URL %s" % url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
                        if not articles.has_key(section_name):
                            ans.append(section_name)
                            articles[section_name] = []
                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
                        title = self.tag_to_string(a, use_alt=True)
                        # prepend the section name
                        title = self.sections[section] + " &middot; " + title
                        if not isinstance(title, unicode):
                            title = title.decode('utf-8', 'replace')
                        # Allow for unattributed, undescribed entries "Editor's Note"
                        if i >= len(descriptions) :
                            description = None
                        else :
                            description = descriptions[i]
                        if len(bylines) == articleCount :
                            author = bylines[i]
                        else :
                            author = None
                        # Check for duplicates
                        duplicateFound = False
                        if len(articles[feed]) > 1:
                            for article in articles[feed] :
                                if url == article['url'] :
                                    duplicateFound = True
                                    break
                            if duplicateFound:
                                # Continue fetching, don't add this article
                                todays_article_count -= 1
                                continue
                        if not articles.has_key(feed):
                            articles[feed] = []
                        articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
                                 description=description, author=author, content=''))
 #        self.log("Queuing %d articles from %s" % (todays_article_count, "Top Stories"))
        ans = self.sort_index_by(ans, {'Top Stories':-1})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
-        self.dump_ans(ans)
+        return self.filter_ans(ans)
-        return ans
+
    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
        else:
            return self.parse_todays_index()
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
    def preprocess_html(self, soup):
        kicker_tag = soup.find(attrs={'class':'kicker'})
        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
            if tagline=='Op-Ed Columnist':
                img_div = soup.find('div','inlineImage module')
                if img_div:
                    img_div.extract()
        return self.strip_anchors(soup)
    def postprocess_html(self,soup, True):
@ -422,8 +462,9 @@ class NYTimes(BasicNewsRecipe):
                    firstImg = inlineImgs[0]
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
-                    # Move firstImg after headline
+                    # Move firstImg before article body
-                    cgFirst = soup.find(True, {'class':'columnGroup  first'})
+                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
                        navstrings = cgFirst.findAll(text=True, recursive=False)
@ -443,30 +484,18 @@ class NYTimes(BasicNewsRecipe):
                        if headline_found:
                            cgFirst.insert(insertLoc,firstImg)
                    else:
-                        self.log(">>> No class:'columnGroup  first' found <<<")
+                        self.log(">>> No class:'columnGroup first' found <<<")
        # Change class="kicker" to <h3>
        kicker = soup.find(True, {'class':'kicker'})
        if kicker and kicker.contents[0]:
            h3Tag = Tag(soup, "h3")
            h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker,
                             use_alt=False)))
            kicker.replaceWith(h3Tag)
-        # Change captions to italic -1
+        # Change captions to italic
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption and caption.contents[0]:
-                emTag = Tag(soup, "em")
+                cTag = Tag(soup, "p", [("class", "caption")])
                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
                mp_off = c.find("More Photos")
                if mp_off >= 0:
                    c = c[:mp_off]
-                emTag.insert(0, c)
+                cTag.insert(0, c)
-                #hrTag = Tag(soup, 'hr')
+                caption.replaceWith(cTag)
                #hrTag['class'] = 'caption_divider'
                hrTag = Tag(soup, 'div')
                hrTag['class'] = 'divider'
                emTag.insert(1, hrTag)
                caption.replaceWith(emTag)
        # Change <nyt_headline> to <h2>
        h1 = soup.find('h1')
@ -506,17 +535,6 @@ class NYTimes(BasicNewsRecipe):
                bTag.insert(0, subhead.contents[0])
                subhead.replaceWith(bTag)
        # Synthesize a section header
        dsk = soup.find('meta', attrs={'name':'dsk'})
        if dsk and dsk.has_key('content'):
            hTag = Tag(soup,'h3')
            hTag['class'] = 'section'
            hTag.insert(0,NavigableString(dsk['content']))
            articleTag = soup.find(True, attrs={'id':'article'})
            if articleTag:
                articleTag.insert(0,hTag)
        # Add class="articleBody" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag:
            divTag['class'] = divTag['id']
@ -532,11 +550,3 @@ class NYTimes(BasicNewsRecipe):
        return soup
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -5,52 +5,186 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string, re, time
+import re, string, time
-from calibre import strftime
+from calibre import entity_to_unicode, strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
 def decode(self, src):
    enc = 'utf-8'
    if 'iso-8859-1' in src:
        enc = 'cp1252'
    return src.decode(enc, 'ignore')
 class NYTimes(BasicNewsRecipe):
-    title       = u'New York Times'
+    # set headlinesOnly to True for the headlines-only version
-    __author__  = 'Kovid Goyal/Nick Redding'
+    headlinesOnly = False
    language = 'en'
    requires_version = (0, 6, 36)
-    description = 'Daily news from the New York Times (subscription version)'
+    # includeSections: List of sections to include. If empty, all sections found will be included.
-    timefmt = ' [%b %d]'
+    # Otherwise, only the sections named will be included. For example,
    #
    #    includeSections = ['Politics','Sports']
    #
    # would cause only the Politics and Sports sections to be included.
    includeSections = []  # by default, all sections included
    # excludeSections: List of sections to exclude. If empty, all sections found will be included.
    # Otherwise, the sections named will be excluded. For example,
    #
    #    excludeSections = ['Politics','Sports']
    #
    # would cause the Politics and Sports sections to be excluded. This parameter can be used
    # in conjuction with includeSections although in most cases using one or the other, but
    # not both, is sufficient.
    excludeSections = []
    # one_picture_per_article specifies that calibre should only use the first image
    # from an article (if one exists).  If one_picture_per_article = True, the image
    # will be moved to a location between the headline and the byline.
    # If one_picture_per_article = False, all images from the article will be included
    # and shown in their original location.
    one_picture_per_article = True
    # The maximum number of articles that will be downloaded
    max_articles_per_feed = 100
    if headlinesOnly:
        title='New York Times Headlines'
        description = 'Headlines from the New York Times'
    else:
        title='New York Times'
        description = 'Today\'s New York Times'
    __author__  = 'GRiker/Kovid Goyal/Nick Redding'
    language = 'en'
    requires_version = (0, 7, 5)
    timefmt = ''
    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
    cover_margins = (18,18,'grey99')
    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
-    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool','nextArticleLink',
+    remove_tags = [dict(attrs={'class':[
-                                        'nextArticleLink clearfix','columnGroup doubleRule','doubleRule','entry-meta',
+                            'articleFooter',
-                                        'icon enlargeThis','columnGroup  last','relatedSearchesModule']}),
+                            'articleTools',
-                   dict({'class':re.compile('^subNavigation')}),
+                            'columnGroup doubleRule',
-                   dict({'class':re.compile('^leaderboard')}),
+                            'columnGroup singleRule',
-                   dict({'class':re.compile('^module')}),
+                            'columnGroup last',
-                   dict({'class':'metaFootnote'}),
+                            'columnGroup  last',
-                   dict(id=['inlineBox','footer', 'toolsRight', 'articleInline','login','masthead',
+                            'doubleRule',
-                            'navigation', 'archive', 'side_search', 'blog_sidebar','cCol','portfolioInline',
+                            'dottedLine',
-                            'side_tool', 'side_index','header','readerReviewsCount','readerReviews',
+                            'entry-meta',
-                            'relatedArticles', 'relatedTopics', 'adxSponLink']),
+                            'entry-response module',
                            'icon enlargeThis',
                            'leftNavTabs',
                            'metaFootnote',
                            'module box nav',
                            'nextArticleLink',
                            'nextArticleLink clearfix',
                            'post-tools',
                            'relatedSearchesModule',
                            'side_tool',
                            'singleAd',
                            re.compile('^subNavigation'),
                            re.compile('^leaderboard'),
                            re.compile('^module'),
                            ]}),
                   dict(id=[
                            'adxLeaderboard',
                            'adxSponLink',
                            'archive',
                            'articleExtras',
                            'articleInline',
                            'blog_sidebar',
                            'businessSearchBar',
                            'cCol',
                            'entertainmentSearchBar',
                            'footer',
                            'header',
                            'header_search',
                            'inlineBox',
                            'login',
                            'masthead',
                            'masthead-nav',
                            'memberTools',
                            'navigation',
                            'portfolioInline',
                            'readerReviews',
                            'readerReviewsCount',
                            'relatedArticles',
                            'relatedTopics',
                            'respond',
                            'side_search',
                            'side_index',
                            'side_tool',
                            'toolsRight',
                            ]),
                   dict(name=['script', 'noscript', 'style','form','hr'])]
    encoding = decode
    no_stylesheets = True
    extra_css = '''
-                .articleHeadline { margin-top:0.5em; margin-bottom:0.25em; }
+                .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
-                .credit { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .byline { font-size: small; font-style:italic; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .dateline { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
                .kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                .timestamp { font-size: small; }
+                .timestamp { text-align: left; font-size: small; }
-                .caption { font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
+                .caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
-                a:link {text-decoration: none; }'''
+                a:link {text-decoration: none; }
                .articleBody { }
                .authorId {text-align: left; }
                .image {text-align: center;}
                .source {text-align: left; }'''
    def filter_ans(self, ans) :
        total_article_count = 0
        idx = 0
        idx_max = len(ans)-1
        while idx <= idx_max:
            if self.includeSections != []:
                if ans[idx][0] not in self.includeSections:
                    print "SECTION NOT INCLUDED: ",ans[idx][0]
                    del ans[idx]
                    idx_max = idx_max-1
                    continue
            if ans[idx][0] in self.excludeSections:
                print "SECTION EXCLUDED: ",ans[idx][0]
                del ans[idx]
                idx_max = idx_max-1
                continue
            if self.verbose:
                self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
            for article in ans[idx][1]:
                total_article_count += 1
                if self.verbose:
                    self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
                              article['url'].encode('cp1252','replace')))
            idx = idx+1
        self.log( "Queued %d articles" % total_article_count )
        return ans
    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("\x91","‘",string)
        # Replace rsquo (\x92)
        fixed = re.sub("\x92","’",fixed)
        # Replace ldquo (\x93)
        fixed = re.sub("\x93","“",fixed)
        # Replace rdquo (\x94)
        fixed = re.sub("\x94","”",fixed)
        # Replace ndash (\x96)
        fixed = re.sub("\x96","–",fixed)
        # Replace mdash (\x97)
        fixed = re.sub("\x97","—",fixed)
        return fixed
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -60,22 +194,19 @@ class NYTimes(BasicNewsRecipe):
            br['USERID']   = self.username
            br['PASSWORD'] = self.password
            raw = br.submit().read()
-            if 'Sorry, we could not find the combination you entered. Please try again.' in raw:
+            if 'Please try again' in raw:
                raise Exception('Your username and password are incorrect')
            #open('/t/log.html', 'wb').write(raw)
        return br
-    def get_masthead_url(self):
+    def skip_ad_pages(self, soup):
-        masthead = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+        # Skip ad pages served before actual article
-        #masthead = 'http://members.cox.net/nickredding/nytlogo.gif'
+        skip_tag = soup.find(True, {'name':'skip'})
-        br = BasicNewsRecipe.get_browser()
+        if skip_tag is not None:
-        try:
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
-            br.open(masthead)
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
-        except:
+            url += '?pagewanted=all'
-            self.log("\nMasthead unavailable")
+            self.log.warn("Skipping ad to article at '%s'" % url)
-            masthead = None
+            return self.index_to_soup(url, raw=True)
        return masthead
    def get_cover_url(self):
        cover = None
@ -93,12 +224,57 @@ class NYTimes(BasicNewsRecipe):
        return cover
    def short_title(self):
-        return 'New York Times'
+        return self.title
-    def parse_index(self):
+    def index_to_soup(self, url_or_raw, raw=False):
-        self.encoding = 'cp1252'
+        '''
-        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
+        OVERRIDE of class method
-        self.encoding = decode
+        deals with various page encodings between index and articles
        '''
        def get_the_soup(docEncoding, url_or_raw, raw=False) :
            if re.match(r'\w+://', url_or_raw):
                f = self.browser.open(url_or_raw)
                _raw = f.read()
                f.close()
                if not _raw:
                    raise RuntimeError('Could not fetch index from %s'%url_or_raw)
            else:
                _raw = url_or_raw
            if raw:
                return _raw
            if not isinstance(_raw, unicode) and self.encoding:
                _raw = _raw.decode(docEncoding, 'replace')
            massage = list(BeautifulSoup.MARKUP_MASSAGE)
            massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
            return BeautifulSoup(_raw, markupMassage=massage)
        # Entry point
        print "index_to_soup()"
        soup = get_the_soup( self.encoding, url_or_raw )
        contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
        docEncoding =  str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
        if docEncoding == '' :
            docEncoding = self.encoding
        if self.verbose > 2:
            self.log( "  document encoding: '%s'" % docEncoding)
        if docEncoding != self.encoding :
            soup = get_the_soup(docEncoding, url_or_raw)
        return soup
    def massageNCXText(self, description):
        # Kindle TOC descriptions won't render certain characters
        if description:
            massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
            # Replace '&' with '&'
            massaged = re.sub("&","&", massaged)
            return self.fixChars(massaged)
        else:
            return description
    def parse_todays_index(self):
        def feed_title(div):
            return ''.join(div.findAll(text=True, recursive=True)).strip()
@ -119,12 +295,13 @@ class NYTimes(BasicNewsRecipe):
                return
            if 'podcast' in url:
                return
            if '/video/' in url:
                return
            url += '?pagewanted=all'
            if url in url_list:
                return
            url_list.append(url)
            title = self.tag_to_string(a, use_alt=True).strip()
            #self.log("Title: %s" % title)
            description = ''
            pubdate = strftime('%a, %d %b')
            summary = div.find(True, attrs={'class':'summary'})
@ -140,6 +317,7 @@ class NYTimes(BasicNewsRecipe):
                    author = self.tag_to_string(authorAttribution, use_alt=False)
            feed = key if key is not None else 'Uncategorized'
            if not articles.has_key(feed):
                ans.append(feed)
                articles[feed] = []
            articles[feed].append(
                            dict(title=title, url=url, date=pubdate,
@ -147,46 +325,228 @@ class NYTimes(BasicNewsRecipe):
                                content=''))
        soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
-        # Find each instance of class="section-headline", class="story", class="story headline"
+
        # Find each article
        for div in soup.findAll(True,
            attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
            if div['class'] in ['section-headline','sectionHeader']:
                key = string.capwords(feed_title(div))
-                articles[key] = []
+                key = key.replace('Op-ed','Op-Ed')
-                ans.append(key)
+                key = key.replace('U.s.','U.S.')
                #self.log('Section: %s' % key)
            elif div['class'] in ['story', 'story headline'] :
                handle_article(div)
            elif div['class'] == 'headlinesOnly multiline flush':
                for lidiv in div.findAll('li'):
                    handle_article(lidiv)
 #        ans = self.sort_index_by(ans, {'The Front Page':-1,
 #                                      'Dining In, Dining Out':1,
 #                                     'Obituaries':2})
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return self.filter_ans(ans)
    def parse_headline_index(self):
        articles = {}
        ans = []
        url_list = []
        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
        # Fetch the content table
        content_table = soup.find('table',{'id':'content'})
        if content_table is None:
            self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
            return None
        # Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
        for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
            for div_sec in td_col.findAll('div',recursive=False):
                for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
                    section_name = self.tag_to_string(h6_sec_name,use_alt=False)
                    section_name = re.sub(r'^ *$','',section_name)
                    if section_name == '':
                        continue
                    section_name=string.capwords(section_name)
                    if section_name == 'U.s.':
                       section_name = 'U.S.'
                    elif section_name == 'Op-ed':
                       section_name = 'Op-Ed'
                    pubdate = strftime('%a, %d %b')
                    search_div = div_sec
                    for next_tag in h6_sec_name.findNextSiblings(True):
                        if next_tag.__class__.__name__ == 'Tag':
                            if next_tag.name == 'div':
                                search_div = next_tag
                            break
                    # Get the articles
                    for h3_item in search_div.findAll('h3'):
                        byline = h3_item.h6
                        if byline is not None:
                            author = self.tag_to_string(byline,usa_alt=False)
                        else:
                            author = ''
                        a = h3_item.find('a', href=True)
                        if not a:
                            continue
                        url = re.sub(r'\?.*', '', a['href'])
                        if not url.startswith("http"):
                            continue
                        if not url.endswith(".html"):
                            continue
                        if 'podcast' in url:
                            continue
                        if 'video' in url:
                            continue
                        url += '?pagewanted=all'
                        if url in url_list:
                            continue
                        url_list.append(url)
                        self.log("URL %s" % url)
                        title = self.tag_to_string(a, use_alt=True).strip()
                        desc = h3_item.find('p')
                        if desc is not None:
                            description = self.tag_to_string(desc,use_alt=False)
                        else:
                            description = ''
                        if not articles.has_key(section_name):
                            ans.append(section_name)
                            articles[section_name] = []
                        articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return self.filter_ans(ans)
    def parse_index(self):
        if self.headlinesOnly:
            return self.parse_headline_index()
        else:
            return self.parse_todays_index()
    def strip_anchors(self,soup):
        paras = soup.findAll(True)
        for para in paras:
            aTags = para.findAll('a')
            for a in aTags:
                if a.img is None:
                    a.replaceWith(a.renderContents().decode('cp1252','replace'))
        return soup
        return ans
    def preprocess_html(self, soup):
        kicker_tag = soup.find(attrs={'class':'kicker'})
-        if kicker_tag:
+        if kicker_tag: # remove Op_Ed author head shots
            tagline = self.tag_to_string(kicker_tag)
            #self.log("FOUND KICKER %s" % tagline)
            if tagline=='Op-Ed Columnist':
                img_div = soup.find('div','inlineImage module')
                #self.log("Searching for photo")
                if img_div:
                    img_div.extract()
-                    #self.log("Photo deleted")
+        return self.strip_anchors(soup)
        refresh = soup.find('meta', {'http-equiv':'refresh'})
        if refresh is None:
            return soup
        content = refresh.get('content').partition('=')[2]
        raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
        return BeautifulSoup(raw.decode('cp1252', 'replace'))
    def postprocess_html(self,soup, True):
        if self.one_picture_per_article:
            # Remove all images after first
            largeImg = soup.find(True, {'class':'articleSpanImage'})
            inlineImgs = soup.findAll(True, {'class':'inlineImage module'})
            if largeImg:
                for inlineImg in inlineImgs:
                    inlineImg.extract()
            else:
                if inlineImgs:
                    firstImg = inlineImgs[0]
                    for inlineImg in inlineImgs[1:]:
                        inlineImg.extract()
                    # Move firstImg before article body
                    #article_body = soup.find(True, {'id':'articleBody'})
                    cgFirst = soup.find(True, {'class':re.compile('columnGroup  *first')})
                    if cgFirst:
                        # Strip all sibling NavigableStrings: noise
                        navstrings = cgFirst.findAll(text=True, recursive=False)
                        [ns.extract() for ns in navstrings]
                        headline_found = False
                        tag = cgFirst.find(True)
                        insertLoc = 0
                        while True:
                            insertLoc += 1
                            if hasattr(tag,'class') and tag['class'] == 'articleHeadline':
                                    headline_found = True
                                    break
                            tag = tag.nextSibling
                            if not tag:
                                headline_found = False
                                break
                        if headline_found:
                            cgFirst.insert(insertLoc,firstImg)
                    else:
                        self.log(">>> No class:'columnGroup first' found <<<")
        # Change captions to italic
        for caption in soup.findAll(True, {'class':'caption'}) :
            if caption and caption.contents[0]:
                cTag = Tag(soup, "p", [("class", "caption")])
                c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
                mp_off = c.find("More Photos")
                if mp_off >= 0:
                    c = c[:mp_off]
                cTag.insert(0, c)
                caption.replaceWith(cTag)
        # Change <nyt_headline> to <h2>
        h1 = soup.find('h1')
        if h1:
            headline = h1.find("nyt_headline")
            if headline:
                tag = Tag(soup, "h2")
                tag['class'] = "headline"
                tag.insert(0, self.fixChars(headline.contents[0]))
                h1.replaceWith(tag)
        else:
            # Blog entry - replace headline, remove <hr> tags
            headline = soup.find('title')
            if headline:
                tag = Tag(soup, "h2")
                tag['class'] = "headline"
                tag.insert(0, self.fixChars(headline.contents[0]))
                soup.insert(0, tag)
                hrs = soup.findAll('hr')
                for hr in hrs:
                    hr.extract()
        # Change <h1> to <h3> - used in editorial blogs
        masthead = soup.find("h1")
        if masthead:
            # Nuke the href
            if masthead.a:
                del(masthead.a['href'])
            tag = Tag(soup, "h3")
            tag.insert(0, self.fixChars(masthead.contents[0]))
            masthead.replaceWith(tag)
        # Change <span class="bold"> to <b>
        for subhead in soup.findAll(True, {'class':'bold'}) :
            if subhead.contents:
                bTag = Tag(soup, "b")
                bTag.insert(0, subhead.contents[0])
                subhead.replaceWith(bTag)
        divTag = soup.find('div',attrs={'id':'articleBody'})
        if divTag:
            divTag['class'] = divTag['id']
        # Add class="authorId" to <div> so we can format with CSS
        divTag = soup.find('div',attrs={'id':'authorId'})
        if divTag and divTag.contents[0]:
            tag = Tag(soup, "p")
            tag['class'] = "authorId"
            tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
                             use_alt=False)))
            divTag.replaceWith(tag)
        return soup
--- a/resources/recipes/zeitde.recipe
+++ b/resources/recipes/zeitde.recipe
@ -6,22 +6,25 @@ Fetch Die Zeit.
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag
 class ZeitDe(BasicNewsRecipe):
-    title = 'ZEIT Online'
+    title = 'Zeit Online'
-    description = 'ZEIT Online'
+    description = 'Zeit Online'
    language = 'de'
    lang = 'de_DE'
-    __author__ = 'Martin Pitt, Sujata Raman and Ingo Paschke'
+    __author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
-    use_embedded_content   = False
+
    max_articles_per_feed = 40
-    remove_empty_feeds = True
+
-    no_stylesheets = True
+    remove_tags = [
-    no_javascript = True
+	                    dict(name='iframe'),
-    encoding = 'utf-8'
+	                    dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
 	                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
 	                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
 	                  ]
    keep_only_tags = [dict(id=['main'])]
    feeds =  [
               ('Seite 1', 'http://newsfeed.zeit.de/index_xml'),
@ -40,43 +43,15 @@ class ZeitDe(BasicNewsRecipe):
               ('Sport', 'http://newsfeed.zeit.de/sport/index'),
             ]
-    extra_css = '''
+    extra_css = '.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
-                .supertitle{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
+
                .excerpt{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:small;}
                .title{font-family:Arial,Helvetica,sans-serif;font-size:large;clear:right;}
                .caption{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .copyright{color:#666666; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;}
                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .quote{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .quote .cite{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small}
                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
                .inline{float:left;margin-top:0;margin-right:15px;position:relative;width:180px; }
                img.inline{float:none}
                .intertitle{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small;font-weight:700}
                .ebinfobox{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:xx-small;list-style-type:none;float:right;margin-top:0;border-left-style:solid;border-left-width:1px;padding-left:10px;}
                .infobox {border-style: solid; border-width: 1px;padding:8px;}
                .infobox dt {font-weight:700;}
                '''
    #filter_regexps = [r'ad.de.doubleclick.net/']
    keep_only_tags = [
                        dict(name='div', attrs={'class':["article"]}) ,
                        dict(name='ul', attrs={'class':["tools"]}) ,
                         ]
    remove_tags = [
                    dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),
                    dict(name='div', attrs={'class':["pagination block","pagenav","inline link", "copyright"] }),
                    dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
                    dict(name='div', attrs={'id':["place_5","place_4","comments"]})
                  ]
    remove_attributes = ['style', 'font']
    def get_article_url(self, article):
        ans = article.get('link',None)
-        ans += "?page=all"
+        ans += "?page=all&print=true"
-        if 'video' in ans or 'quiz' in ans :
+        if 'video' in ans or 'quiz' in ans or 'blog' in ans :
              ans = None
        return ans
@ -86,25 +61,3 @@ class ZeitDe(BasicNewsRecipe):
            return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
        except:
            return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
        soup.html['lang']     = self.lang
        mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
        soup.head.insert(0,mtag)
        title = soup.find('h2', attrs={'class':'title'})
        if title is None:
            print "no title"
            return soup
        info = Tag(soup,'ul',[('class','ebinfobox')])
        tools = soup.find('ul', attrs={'class':'tools'})
        #author = tools.find('li','author first')
        for tag in ['author first', 'date', 'date first', 'author', 'source']:
            line = tools.find('li', tag)
            if line:
                info.insert(0,line)
        title.parent.insert(0,info)
        tools.extract()
        return soup
--- a/resources/templates/html_export_default.css
+++ b/resources/templates/html_export_default.css
@ -0,0 +1,60 @@
 body{
  margin:0px;
  padding: 0.5em;
  background-color:#F6F3E9;
  font-size:12px;
  font-family:Arial, Helvetica, sans-serif;
 }
 .calibreMeta{
  background-color:#39322B;
  color:white;
  padding:10px;
 }
 .calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
  color:white;
 }
 .calibreMeta h1{
  margin:0px;
  font-size:18px;
  background-color:#39322B;
 }
 .calibreEbookContent{
  padding:20px;
 }
 .calibreEbNav, .calibreEbNavTop{
  clear:both;
  background-color:#39322B;
  color:white;
  padding:10px;
  text-align:center;
 }
 .calibreEbNavTop{
  margin-bottom:20px;
 }
 .calibreEbNav a, .calibreEbNavTop a{
  padding:0px 5px;
 }
 .calibreTocIndex{
  line-height:18px;
 }
 .calibreToc{
  float:left;
  margin:20px;
  width:300px;
  background-color:#39322B;
  color:white;
  padding:10px;
 }
 .calibreEbookContent{
  width:600px;
  float:left;
 }
--- a/resources/templates/html_export_default.tmpl
+++ b/resources/templates/html_export_default.tmpl
@ -0,0 +1,74 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 ${head_content}$
 <link href="${cssLink}$" type="text/css" rel="stylesheet" />
 </head>
 <body>
 <div class="calibreMeta">
  <div class="calibreMetaTitle">
  ${pos1=1}$
  ${for title in meta.titles():}$
    ${if pos1:}$
    <h1>
      <a href="${tocUrl}$">${print title}$</a>
    </h1>
    ${:else:}$
    <div class="calibreMetaSubtitle">${print title}$</div>
    ${:endif}$
    ${pos1=0}$
  ${:endfor}$
  </div>
  <div class="calibreMetaAuthor">
    ${print ', '.join(meta.creators())}$
  </div>
 </div>
 <div class="calibreMain">
  <div class="calibreEbookContent">
    ${if prevLink or nextLink:}$
      <div class="calibreEbNavTop">
        ${if prevLink:}$
          <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
        ${:else:}$
          <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
        ${:endif}$
        ${if nextLink:}$
          <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
        ${:endif}$
      </div>
    ${:endif}$
    ${ebookContent}$
  </div>
  ${if has_toc:}$
  <div class="calibreToc">
    <h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
    ${print toc()}$
  </div>
  ${:endif}$
  <div class="calibreEbNav">
    ${if prevLink:}$
      <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
    ${:else:}$
      <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
    ${:endif}$
    <a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
    ${if nextLink:}$
      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
    ${:endif}$
  </div>
 </div>
 </body>
 </html>
--- a/resources/templates/html_export_default_index.tmpl
+++ b/resources/templates/html_export_default_index.tmpl
@ -0,0 +1,61 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 <link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
 <link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
 <title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
 ${for item in meta:}$
  <meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
 ${:endfor}$
 <link href="${cssLink}$" type="text/css" rel="stylesheet" />
 </head>
 <body>
 <div class="calibreMeta">
  <div class="calibreMetaTitle">
  ${pos1=1}$
  ${for title in meta.titles():}$
    ${if pos1:}$
    <h1>
      <a href="${tocUrl}$">${print title}$</a>
    </h1>
    ${:else:}$
    <div class="calibreMetaSubtitle">${print title}$</div>
    ${:endif}$
    ${pos1=0}$
  ${:endfor}$
  </div>
  <div class="calibreMetaAuthor">
    ${print ', '.join(meta.creators()),}$
  </div>
 </div>
 <div class="calibreMain">
  <div class="calibreEbookContent">
    ${if has_toc:}$
      <div class="calibreTocIndex">
        <h2>${print _('Table of contents'),}$</h2>
        ${toc}$
      </div>
    ${:else:}$
        <h2>${print _('No table of contents present'),}$</h2>
        <div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
    ${:endif}$
  </div>
  <div class="calibreEbNav">
    ${if nextLink:}$
      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
    ${:endif}$
  </div>
 </div>
 </body>
 </html>
--- a/setup/server.py
+++ b/setup/server.py
@ -89,7 +89,7 @@ class Server(Command):
            t = telnetlib.Telnet('localhost', 4242)
            t.read_until("repl>")
            t.write('BrowserReload();')
-            print t.read_until("repl>")
+            t.read_until("repl>")
            t.close()
        except:
            print 'Failed to reload browser'
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
@ -525,6 +526,7 @@ plugins += [
    RTFOutput,
    TCROutput,
    TXTOutput,
    HTMLOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
@ -893,4 +895,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
        Email, Server, Plugins, Tweaks, Misc]
 #}}}
--- a/src/calibre/ebooks/html/meta.py
+++ b/src/calibre/ebooks/html/meta.py
@ -0,0 +1,33 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
 class EasyMeta(object):
    def __init__(self, meta):
        self.meta = meta
    def __iter__(self):
        meta = self.meta
        for item_name in meta.items:
            for item in meta[item_name]:
                if namespace(item.term) == DC11_NS:
                    yield { 'name': barename(item.term), 'value': item.value }
    def __len__(self):
        count = 0
        for item in self:
            count = count+1
        return count
    def titles(self):
        for item in self.meta['title']:
            yield item.value
    def creators(self):
        for item in self.meta['creator']:
            yield item.value
--- a/src/calibre/ebooks/html/output.py
+++ b/src/calibre/ebooks/html/output.py
@ -0,0 +1,201 @@
 from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
 __docformat__ = 'restructuredtext en'
 import os, re, shutil
 from os.path import dirname, abspath, relpath, exists
 from lxml import etree
 from templite import Templite
 from calibre.ebooks.oeb.base import element
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre import CurrentDir
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from urllib import unquote
 from calibre.ebooks.html.meta import EasyMeta
 class HTMLOutput(OutputFormatPlugin):
    name = 'HTML Output'
    author = 'Fabian Grassl'
    file_type = 'zip'
    options = set([
        OptionRecommendation(name='template_css',
            help=_('CSS file used for the output instead of the default file')),
        OptionRecommendation(name='template_html_index',
            help=_('Template used for generation of the html index file instead of the default file')),
        OptionRecommendation(name='template_html',
            help=_('Template used for the generation of the html contents of the book instead of the default file')),
        OptionRecommendation(name='extract_to',
            help=_('Extract the contents of the generated ZIP file to the directory of the generated ZIP file')
        ),
    ])
    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
    def generate_toc(self, oeb_book, ref_url, output_dir):
        '''
        Generate table of contents
        '''
        with CurrentDir(output_dir):
            def build_node(current_node, parent=None):
                if parent is None:
                    parent = etree.Element('ul')
                elif len(current_node.nodes):
                    parent = element(parent, ('ul'))
                for node in current_node.nodes:
                    point = element(parent, 'li')
                    href = relpath(abspath(unquote(node.href)), dirname(ref_url))
                    link = element(point, 'a', href=href)
                    title = node.title
                    if title:
                        title = re.sub(r'\s+', ' ', title)
                    link.text=title
                    build_node(node, point)
                return parent
            wrap = etree.Element('div')
            wrap.append(build_node(oeb_book.toc))
            return wrap
    def generate_html_toc(self, oeb_book, ref_url, output_dir):
        root = self.generate_toc(oeb_book, ref_url, output_dir)
        return etree.tostring(root, pretty_print=True, encoding='utf-8',
                xml_declaration=True)
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        # read template files
        if opts.template_html_index is not None:
            template_html_index_data = open(opts.template_html_index, 'rb').read()
        else:
            template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
        if opts.template_html is not None:
            template_html_data = open(opts.template_html, 'rb').read()
        else:
            template_html_data = P('templates/html_export_default.tmpl', data=True)
        if opts.template_css is not None:
            template_css_data = open(opts.template_css, 'rb').read()
        else:
            template_css_data = P('templates/html_export_default.css', data=True)
        template_html_index_data = template_html_index_data.decode('utf-8')
        template_html_data = template_html_data.decode('utf-8')
        template_css_data = template_css_data.decode('utf-8')
        self.log  = log
        self.opts = opts
        meta = EasyMeta(oeb_book.metadata)
        tempdir = PersistentTemporaryDirectory()
        output_file = os.path.join(tempdir,
                os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
        output_dir = re.sub(r'\.html', '', output_file)+'_files'
        if not exists(output_dir):
            os.makedirs(output_dir)
        css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
        with open(css_path, 'wb') as f:
            f.write(template_css_data.encode('utf-8'))
        with open(output_file, 'wb') as f:
            html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
            templite = Templite(template_html_index_data)
            nextLink = oeb_book.spine[0].href
            nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
            cssLink = relpath(abspath(css_path), dirname(output_file))
            tocUrl = relpath(output_file, dirname(output_file))
            t = templite.render(has_toc=bool(oeb_book.toc.count()),
                    toc=html_toc, meta=meta, nextLink=nextLink,
                    tocUrl=tocUrl, cssLink=cssLink)
            f.write(t)
        with CurrentDir(output_dir):
            for item in oeb_book.manifest:
                path = abspath(unquote(item.href))
                dir = dirname(path)
                if not exists(dir):
                    os.makedirs(dir)
                if item.spine_position is not None:
                    with open(path, 'wb') as f:
                        pass
                else:
                    with open(path, 'wb') as f:
                        f.write(str(item))
                    item.unload_data_from_memory(memory=path)
            for item in oeb_book.spine:
                path = abspath(unquote(item.href))
                dir = dirname(path)
                root = item.data.getroottree()
                # get & clean HTML <HEAD>-data
                head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
                head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
                # get & clean HTML <BODY>-data
                body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
                # generate link to next page
                if item.spine_position+1 < len(oeb_book.spine):
                    nextLink = oeb_book.spine[item.spine_position+1].href
                    nextLink = relpath(abspath(nextLink), dir)
                else:
                    nextLink = None
                # generate link to previous page
                if item.spine_position > 0:
                    prevLink = oeb_book.spine[item.spine_position-1].href
                    prevLink = relpath(abspath(prevLink), dir)
                else:
                    prevLink = None
                cssLink = relpath(abspath(css_path), dir)
                tocUrl = relpath(output_file, dir)
                # render template
                templite = Templite(template_html_data)
                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
                t = templite.render(ebookContent=ebook_content,
                        prevLink=prevLink, nextLink=nextLink,
                        has_toc=bool(oeb_book.toc.count()), toc=toc,
                        tocUrl=tocUrl, head_content=head_content,
                        meta=meta, cssLink=cssLink)
                # write html to file
                with open(path, 'wb') as f:
                    f.write(t)
                item.unload_data_from_memory(memory=path)
        zfile = ZipFile(output_path, "w")
        zfile.add_dir(output_dir)
        if opts.extract_to:
            if os.path.exists(opts.extract_to):
                shutil.rmtree(opts.extract_to)
            os.makedirs(opts.extract_to)
            zfile.extractall(opts.extract_to)
            self.log('Zip file extracted to', opts.extract_to)
        zfile.close()
        # cleanup temp dir
        shutil.rmtree(tempdir)
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@ -112,13 +112,12 @@ def get_metadata(br, asin, mi):
 def main(args=sys.argv):
    # Test xisbn
-    #print get_social_metadata('Learning Python', None, None, '8324616489')
+    print get_social_metadata('Learning Python', None, None, '8324616489')
-    #print
+    print
    # Test sophisticated comment formatting
    print get_social_metadata('Angels & Demons', None, None, '9781416580829')
    print
    return
    # Random tests
    print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -275,7 +275,15 @@ class MobiMLizer(object):
        # <mbp:frame-set/> does not exist lalalala
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
-            return
+            id_ = elem.get('id', None)
            if id_:
                # Keep anchors so people can use display:none
                # to generate hidden TOCs
                elem.clear()
                elem.text = None
                elem.set('id', id_)
            else:
                return
        tag = barename(elem.tag)
        istate = copy.copy(istates[-1])
        istate.rendered = False
@ -406,6 +414,12 @@ class MobiMLizer(object):
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
                        vtag.append(child)
                    else:
                        break
                for child in vbstate.para:
                    vtag.append(child)
                return
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@ -101,11 +101,12 @@ class SNBMLizer(object):
        subitem = ''
        bodyTree = trees[subitem].find(".//body")
        for line in output.splitlines():
-            if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
+            pos = line.find(CALIBRE_SNB_PRE_TAG)
            if pos == -1:
                line = line.strip(u' \t\n\r\u3000')
            else:
                etree.SubElement(bodyTree, "text").text = \
-                    etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
+                    etree.CDATA(line[pos+len(CALIBRE_SNB_PRE_TAG):])
                continue
            if len(line) != 0:
                if line.find(CALIBRE_SNB_IMG_TAG) == 0:
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -35,7 +35,6 @@ class ViewAction(InterfaceAction):
        self.qaction.setMenu(self.view_menu)
        ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
    def location_selected(self, loc):
        enabled = loc == 'library'
        for action in list(self.view_menu.actions())[1:]:
@ -134,6 +133,9 @@ class ViewAction(InterfaceAction):
        rows = self.gui.current_view().selectionModel().selectedRows()
        self._view_books(rows)
    def view_triggered(self, index):
        self._view_books([index])
    def view_specific_book(self, index):
        self._view_books([index])
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@ -28,6 +28,8 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options, conne
    if log is None:
        log = Log()
    from calibre.library import db
    from calibre.utils.config import prefs
    prefs.refresh()
    db = db()
    db.catalog_plugin_on_device_temp_mapping = dbspec
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@ -50,6 +50,8 @@ class BooksView(QTableView): # {{{
    def __init__(self, parent, modelcls=BooksModel):
        QTableView.__init__(self, parent)
        self.setEditTriggers(self.SelectedClicked|self.EditKeyPressed)
        self.drag_allowed = True
        self.setDragEnabled(True)
        self.setDragDropOverwriteMode(False)
@ -98,6 +100,8 @@ class BooksView(QTableView): # {{{
        self._model.about_to_be_sorted.connect(self.about_to_be_sorted)
        self._model.sorting_done.connect(self.sorting_done)
        self.doubleClicked.connect(parent.iactions['View'].view_triggered)
    # Column Header Context Menu {{{
    def column_header_context_handler(self, action=None, column=None):
        if not action or not column:
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@ -128,7 +128,7 @@ class ContentServer(object):
        if want_mobile:
            return self.mobile()
-        return self.browse_toplevel()
+        return self.browse_catalog()
    def old(self, **kwargs):
        return self.static('index.html').replace('{prefix}',
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -338,6 +338,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
    * - Keyboard Shortcut
      - Action
    * - :kbd:`F2 (Enter in OS X)`
      - Edit the metadata of the currently selected field in the book list.
    * - :kbd:`A` 
      - Add Books
    * - :kbd:`C` 
--- a/src/templite/init.py
+++ b/src/templite/init.py
@ -0,0 +1,87 @@
 #!/usr/bin/env python
 #
 #       Templite+
 #       A light-weight, fully functional, general purpose templating engine
 #
 #       Copyright (c) 2009 joonis new media
 #       Author: Thimo Kraemer <thimo.kraemer@joonis.de>
 #
 #       Based on Templite - Tomer Filiba
 #       http://code.activestate.com/recipes/496702/
 #
 #       This program is free software; you can redistribute it and/or modify
 #       it under the terms of the GNU General Public License as published by
 #       the Free Software Foundation; either version 2 of the License, or
 #       (at your option) any later version.
 #
 #       This program is distributed in the hope that it will be useful,
 #       but WITHOUT ANY WARRANTY; without even the implied warranty of
 #       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #       GNU General Public License for more details.
 #
 #       You should have received a copy of the GNU General Public License
 #       along with this program; if not, write to the Free Software
 #       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 #       MA 02110-1301, USA.
 #
 import sys, re
 class Templite(object):
    auto_emit = re.compile('(^[\'\"])|(^[a-zA-Z0-9_\[\]\'\"]+$)')
    def __init__(self, template, start='${', end='}$'):
        if len(start) != 2 or len(end) != 2:
            raise ValueError('each delimiter must be two characters long')
        delimiter = re.compile('%s(.*?)%s' % (re.escape(start), re.escape(end)), re.DOTALL)
        offset = 0
        tokens = []
        for i, part in enumerate(delimiter.split(template)):
            part = part.replace('\\'.join(list(start)), start)
            part = part.replace('\\'.join(list(end)), end)
            if i % 2 == 0:
                if not part: continue
                part = part.replace('\\', '\\\\').replace('"', '\\"')
                part = '\t' * offset + 'emit("""%s""")' % part
            else:
                part = part.rstrip()
                if not part: continue
                if part.lstrip().startswith(':'):
                    if not offset:
                        raise SyntaxError('no block statement to terminate: ${%s}$' % part)
                    offset -= 1
                    part = part.lstrip()[1:]
                    if not part.endswith(':'): continue
                elif self.auto_emit.match(part.lstrip()):
                    part = 'emit(%s)' % part.lstrip()
                lines = part.splitlines()
                margin = min(len(l) - len(l.lstrip()) for l in lines if l.strip())
                part = '\n'.join('\t' * offset + l[margin:] for l in lines)
                if part.endswith(':'):
                    offset += 1
            tokens.append(part)
        if offset:
            raise SyntaxError('%i block statement(s) not terminated' % offset)
        self.__code = compile('\n'.join(tokens), '<templite %r>' % template[:20], 'exec')
    def render(self, __namespace=None, **kw):
        """
        renders the template according to the given namespace.
        __namespace - a dictionary serving as a namespace for evaluation
        **kw - keyword arguments which are added to the namespace
        """
        namespace = {}
        if __namespace: namespace.update(__namespace)
        if kw: namespace.update(kw)
        namespace['emit'] = self.write
        __stdout = sys.stdout
        sys.stdout = self
        self.__output = []
        eval(self.__code, namespace)
        sys.stdout = __stdout
        return ''.join(self.__output)
    def write(self, *args):
        for a in args:
            self.__output.append(str(a))