diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 215e5a65ce..d044be24b6 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -764,7 +764,25 @@ class Manifest(object):
             # Convert to Unicode and normalize line endings
             data = self.oeb.decode(data)
             data = self.oeb.html_preprocessor(data)
-            orig_data = data
+
+            # Remove DOCTYPE declaration as it messes up parsing
+            # Inparticular it causes tostring to insert xmlns
+            # declarations, which messes up the coercing logic
+            idx = data.find('<html')
+            if idx > -1:
+                pre = data[:idx]
+                data = data[idx:]
+                if '<!DOCTYPE' in pre:
+                    user_entities = {}
+                    for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
+                        val = match.group(2)
+                        if val.startswith('"') and val.endswith('"'):
+                            val = val[1:-1]
+                        user_entities[match.group(1)] = val
+                    if user_entities:
+                        pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
+                        data = pat.sub(lambda m:user_entities[m.group(1)], data)
+
             # Try with more & more drastic measures to parse
             def first_pass(data):
                 try:
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 37252f17cd..0c2211e5c7 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -282,8 +282,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
 
 
         self.initialize_combos()
-
-        self.series_index.setValue(self.db.series_index(row))
+        si = self.db.series_index(row)
+        if si is None:
+            si = 1.0
+        self.series_index.setValue(si)
         QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index)
         QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index)
 
@@ -305,6 +307,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
 
     def deduce_author_sort(self):
         au = unicode(self.authors.text())
+        au = re.sub(r'\s+et al\.$', '', au)
         authors = string_to_authors(au)
         self.author_sort.setText(authors_to_sort_string(authors))
 
@@ -483,9 +486,17 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
 
 
     def accept(self):
-        if self.formats_changed:
-            self.sync_formats()
-        title = qstring_to_unicode(self.title.text())
+        try:
+            if self.formats_changed:
+                self.sync_formats()
+            title = unicode(self.title.text())
+        except IOError, err:
+            if err.errno == 13: # Permission denied
+                fname = err.filename if err.filename else 'file'
+                return error_dialog(self, _('Permission denied'),
+                        _('Could not open %s. Is it being used by another'
+                        ' program?')%fname, show=True)
+            raise
         self.db.set_title(self.id, title, notify=False)
         au = unicode(self.authors.text())
         if au:
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index 14ca98f534..5bf1260df4 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -25,6 +25,8 @@ class Article(object):
                 entity_to_unicode, self.title)
         except:
             pass
+        if not isinstance(self.title, unicode):
+            self.title = self.title.decode('utf-8', 'replace')
         self.url = url
         self.author = author
         if author and not isinstance(author, unicode):
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 6ca0f8318f..88367ac63e 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -980,7 +980,7 @@ class BasicNewsRecipe(Recipe):
 
     def error_in_article_download(self, request, traceback):
         self.jobs_done += 1
-        self.log.error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
+        self.log.error(_(u'Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
         self.log.debug(traceback)
         self.log.debug('\n')
         self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 51f0000605..78d22fef00 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -15,7 +15,7 @@ recipe_modules = ['recipe_' + r for r in (
            'demorgen_be', 'de_standaard', 'ap', 'barrons', 'chr_mon', 'cnn', 'faznet',
            'jpost', 'jutarnji', 'nasa', 'reuters', 'spiegelde', 'wash_post', 'zeitde',
            'blic', 'novosti', 'danas', 'vreme', 'times_online', 'the_scotsman',
-           'nytimes_sub', 'security_watch', 'cyberpresse', 'st_petersburg_times',
+           'nytimes_sub', 'nytimes', 'security_watch', 'cyberpresse', 'st_petersburg_times',
            'clarin', 'financial_times', 'heise', 'le_monde', 'harpers', 'science_aas',
            'science_news', 'the_nation', 'lrb', 'harpers_full', 'liberation',
            'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
diff --git a/src/calibre/web/feeds/recipes/recipe_craigslist.py b/src/calibre/web/feeds/recipes/recipe_craigslist.py
new file mode 100644
index 0000000000..bc4fd79131
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_craigslist.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CraigsList(BasicNewsRecipe):
+     title          = u'craigslist - Best Of'
+     oldest_article = 365
+     max_articles_per_feed = 100
+     language = _('English')
+     __author__ = 'kiodane'
+
+     feeds          = [(u'Best of craigslist',
+ u'http://www.craigslist.org/about/best/all/index.rss'), (u'Ann Arbor',
+ u'http://www.craigslist.org/about/best/aaa/index.rss'), (u'Asheville',
+ u'http://www.craigslist.org/about/best/ash/index.rss'), (u'Austin',
+ u'http://www.craigslist.org/about/best/aus/index.rss'), (u'Baltimore',
+ u'http://www.craigslist.org/about/best/bal/index.rss'), (u'Birmingham',
+ u'http://www.craigslist.org/about/best/bhm/index.rss'), (u'Boston',
+ u'http://www.craigslist.org/about/best/bos/index.rss'), (u'Vermont',
+ u'http://www.craigslist.org/about/best/brl/index.rss'), (u'Columbia',
+ u'http://www.craigslist.org/about/best/cae/index.rss'), (u'Charlotte',
+ u'http://www.craigslist.org/about/best/cha/index.rss'), (u'Chico',
+ u'http://www.craigslist.org/about/best/chc/index.rss'), (u'Chicago',
+ u'http://www.craigslist.org/about/best/chi/index.rss'), (u'Charleston',
+ u'http://www.craigslist.org/about/best/chs/index.rss'), (u'Cleveland',
+ u'http://www.craigslist.org/about/best/cle/index.rss'), (u'Calgary',
+ u'http://www.craigslist.org/about/best/clg/index.rss'),
+ (u'Colorado Springs', u'http://www.craigslist.org/about/best/cos/index.rss'),
+ (u'Dallas', u'http://www.craigslist.org/about/best/dal/index.rss'),
+ (u'Denver', u'http://www.craigslist.org/about/best/den/index.rss'),
+ (u'Detroit Metro', u'http://www.craigslist.org/about/best/det/index.rss'),
+ (u'Des Moines', u'http://www.craigslist.org/about/best/dsm/index.rss'),
+ (u'Eau Claire', u'http://www.craigslist.org/about/best/eau/index.rss'),
+ (u'Grand Rapids', u'http://www.craigslist.org/about/best/grr/index.rss'),
+ (u'Hawaii', u'http://www.craigslist.org/about/best/hnl/index.rss'),
+ (u'Jacksonville', u'http://www.craigslist.org/about/best/jax/index.rss'),
+ (u'Knoxville', u'http://www.craigslist.org/about/best/knx/index.rss'),
+ (u'Kansas City', u'http://www.craigslist.org/about/best/ksc/index.rss'),
+ (u'South Florida', u'http://www.craigslist.org/about/best/mia/index.rss'),
+(u'Minneapolis', u'http://www.craigslist.org/about/best/min/index.rss'),
+ (u'Maine', u'http://www.craigslist.org/about/best/mne/index.rss'),
+ (u'Montreal', u'http://www.craigslist.org/about/best/mon/index.rss'),
+ (u'Nashville', u'http://www.craigslist.org/about/best/nsh/index.rss'),
+ (u'New York', u'http://www.craigslist.org/about/best/nyc/index.rss'),
+ (u'Orange County', u'http://www.craigslist.org/about/best/orc/index.rss'),
+ (u'Portland', u'http://www.craigslist.org/about/best/pdx/index.rss'),
+ (u'Phoenix', u'http://www.craigslist.org/about/best/phx/index.rss'),
+ (u'Pittsburgh', u'http://www.craigslist.org/about/best/pit/index.rss'),
+ (u'Rhode Island', u'http://www.craigslist.org/about/best/prv/index.rss'),
+ (u'Raleigh', u'http://www.craigslist.org/about/best/ral/index.rss'),
+ (u'Rochester', u'http://www.craigslist.org/about/best/rcs/index.rss'),
+ (u'San Antonio', u'http://www.craigslist.org/about/best/sat/index.rss'),
+ (u'Santa Barbara', u'http://www.craigslist.org/about/best/sba/index.rss'),
+ (u'San Diego', u'http://www.craigslist.org/about/best/sdo/index.rss'),
+ (u'Seattle-Tacoma', u'http://www.craigslist.org/about/best/sea/index.rss'),
+ (u'Sf Bay Area', u'http://www.craigslist.org/about/best/sfo/index.rss'),
+ (u'Salt Lake City',
+ u'http://www.craigslist.org/about/best/slc/index.rss'), (u'Spokane',
+ u'http://www.craigslist.org/about/best/spk/index.rss'), (u'St Louis',
+ u'http://www.craigslist.org/about/best/stl/index.rss'), (u'Sydney',
+ u'http://www.craigslist.org/about/best/syd/index.rss'), (u'Toronto',
+ u'http://www.craigslist.org/about/best/tor/index.rss'), (u'Vancouver BC',
+ u'http://www.craigslist.org/about/best/van/index.rss'), (u'Washington DC',
+ u'http://www.craigslist.org/about/best/wdc/index.rss')]
+
diff --git a/src/calibre/web/feeds/recipes/recipe_nytimes.py b/src/calibre/web/feeds/recipes/recipe_nytimes.py
index 9276ad667a..bd150bffcf 100644
--- a/src/calibre/web/feeds/recipes/recipe_nytimes.py
+++ b/src/calibre/web/feeds/recipes/recipe_nytimes.py
@@ -1,110 +1,241 @@
 #!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
 
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
-mobile.nytimes.com
+nytimes.com
 '''
 import re
-from calibre.web.feeds.news import BasicNewsRecipe
-from lxml import html
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
 
-class NYTimesMobile(BasicNewsRecipe):
-    
-    title       = 'The New York Times'
-    __author__  = 'Kovid Goyal'
+class NYTimes(BasicNewsRecipe):
+
+    title       = 'NYTimes Top Stories'
+    __author__  = 'Greg Riker'
     language = _('English')
-    description = 'Daily news from the New York Times (mobile version)'
-    timefmt     = ' [%a, %d %b, %Y]'
-    multithreaded_fetch = True
-    max_articles_per_feed  = 15
+    description = 'Top Stories from the New York Times'
+    #max_articles_per_feed = 3
+    timefmt = ''
+    needs_subscription = False
+    remove_tags_before = dict(id='article')
+    remove_tags_after  = dict(id='article')
+    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink', 'clearfix']}),
+                   dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
+                   dict(name=['script', 'noscript', 'style'])]
+    encoding = 'cp1252'
     no_stylesheets = True
-    extra_css = '''
-    .h1 { font-size: x-large; font-weight: bold; font-family: sans-serif; text-align: left }
-    .h2 { font-size: large; font-weight: bold }
-    .credit { font-size: small }
-    .aut { font-weight: bold }
-    .bodycontent { font-family: serif }
-    ''' 
-    
-    remove_tags = [
-                   dict(name='div', attrs={'class':['banner center', 'greyBackBlackTop', 'c bB']}), 
-                   dict(name='a', href='/main')
-                   ]
-    remove_tags_after = [
-                         dict(name='a', attrs={'name': 'bottom'})
-                         ]
-    
-    def image_url_processor(self, baseurl, url):
-        return re.sub(r'(&|&amp;).*', '', url)
-    
-    def get_browser(self):
-        return BasicNewsRecipe.get_browser(mobile_browser=True)
-    
-    def download(self, for_lrf=False):
-        if for_lrf:
-            self.max_articles_per_feed = 10
-        return BasicNewsRecipe.download(self, for_lrf=for_lrf)
-    
-    def process_section(self, href):
-        raw = self.index_to_soup('http://mobile.nytimes.com/section'+href[href.find('?'):], raw=True)
-        articles = []
-        while True:
-            root = html.fromstring(raw)
-            for art in self.find_articles(root):
-                append = True
-                for x in articles:
-                    if x['title'] == art['title']:
-                        append = False
-                        break
-                if append: articles.append(art)
-            more = root.xpath('//a[starts-with(@href, "section") and contains(text(), "MORE")]')
-            if not more:
-                break
-            href = more[0].get('href')
-            raw = self.index_to_soup('http://mobile.nytimes.com/section'+href[href.find('?'):], raw=True)
-        return articles
-        
-    
-    def find_articles(self, root):
-        for a in root.xpath('//a[@accesskey]'):
-            href = a.get('href')
-            if href.startswith('http://'):
-                url = href
-            else:
-                url = 'http://mobile.nytimes.com/article' + href[href.find('?'):]+'&single=1', 
-            yield {
-                   'title': a.text.strip(),
-                   'date' : '',
-                   'url'  : url, 
-                   'description': '',
-                   }
-        
-    
+    #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+    extra_css = '.headline  {text-align:left;}\n\
+                 .byline    {font:monospace; margin-bottom:0px;}\n\
+                 .source    {align:left;}\n\
+                 .credit    {align:right;}\n'
+
+
+    flatPeriodical = True
+
     def parse_index(self):
-        raw = self.index_to_soup('http://mobile.nytimes.com', raw=True)
-        root = html.fromstring(raw)
-        feeds = [('Latest news', list(self.find_articles(root)))]
-            
-        for a in root.xpath('//a[starts-with(@href, "section")]'):
-            title = a.text.replace('&raquo;', '').replace(u'\xbb', '').strip()
-            print 'Processing section:', title
-            articles = self.process_section(a.get('href'))
-            feeds.append((title, articles))
-            
-        return feeds
-    
-    def postprocess_html(self, soup, first_fetch):
-        for img in soup.findAll('img', width=True):
-            try:
-                width = int(img['width'].replace('px', ''))
-                if width < 5:
-                    img.extract()
-                    continue
-            except:
-                pass
-            del img['width']
-            del img['height']
-            del img.parent['style']
+        soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
+
+        def feed_title(div):
+            return ''.join(div.findAll(text=True, recursive=False)).strip()
+
+        articles = {}
+
+        ans = []
+        if self.flatPeriodical :
+            feed = key = 'All Top Stories'
+            articles[key] = []
+            ans.append(key)
+        else :
+            key = None
+
+        sections = { 'topstories'   :   'Top Stories',
+                     'world'        :   'World',
+                     'us'           :   'U.S.',
+                     'politics'     :   'Politics',
+                     'business'     :   'Business',
+                     'technology'   :   'Technology',
+                     'sports'       :   'Sports',
+                     'arts'         :   'Arts',
+                     'newyorkregion':   'New York/Region',
+                     'travel'       :   'Travel',
+                     'editorials'   :   'Editorials',
+                     'oped'         :   'Op-Ed'
+                   }
+
+        #excludeSectionKeywords = ['World','U.S.', 'Politics','Business','Technology','Sports','Arts','New York','Travel', 'Editorials', 'Op-Ed']
+        excludeSectionKeywords = []
+
+        # Fetch the outer table
+        table = soup.find('table')
+        previousTable = table
+        contentTable = None
+
+        # Find the deepest table containing the stories
+        while True :
+            table = table.find('table')
+            if table.find(text=re.compile('top stories start')) :
+                if self.verbose > 2 : self.log( "*********** dropping one level deeper **************")
+                previousTable = table
+                continue
+            else :
+                if self.verbose > 2 : self.log( "found table with top stories")
+                table = previousTable
+                if self.verbose > 2 : self.log( "lowest table containing 'top stories start:\n%s" % table)
+                break
+
+        # There are multiple subtables, find the one containing the stories
+        for block in table.findAll('table') :
+            if block.find(text=re.compile('top stories start')) :
+                if self.verbose > 2 : self.log( "found subtable with top stories")
+                table = block
+                if self.verbose > 2 : self.log( "lowest subtable containing 'top stories start:\n%s" % table)
+                break
+            else :
+                if self.verbose > 2 : self.log( "trying next subtable")
+                continue
+
+        # Again there are multiple subtables, find the one containing the stories
+        for storyblock in table.findAll('table') :
+            if storyblock.find(text=re.compile('top stories start')) :
+                if self.verbose > 2 : self.log( "found subsubtable with top stories\n" )
+                # table = storyblock
+                if self.verbose > 2 : self.log( "\nlowest subsubtable containing 'top stories start:\n%s" % storyblock)
+                break
+            else :
+                if self.verbose > 2 : self.log( "trying next subsubtable")
+                continue
+
+        skipThisSection = False
+
+        # Within this table are <font face="times new roman, times, san serif"> entries
+        for tr in storyblock.findAllNext('tr'):
+            if tr.find('span') is not None :
+
+                sectionblock = tr.find(True, attrs={'face':['times new roman, times,sans serif',
+                                                         'times new roman,times, sans serif',
+                                                         'times new roman, times, sans serif']})
+                if self.verbose > 2 : self.log( "----------- new tr ----------------")
+                section = None
+                bylines = []
+                descriptions = []
+                pubdate = None
+
+                # Get the Section title
+                for (x,i) in enumerate(sectionblock.contents) :
+                    skipThisSection = False
+                    # Extract the section title
+                    if ('Comment' in str(i.__class__)) :
+                        if 'start(name=' in i :
+                            section = i[i.find('=')+1:-2]
+                            if self.verbose > 2 : self.log( "sectionTitle: %s" % sections[section])
+
+                        # Check for excluded section
+                        if len(excludeSectionKeywords):
+                            key = sections[section]
+                            excluded = re.compile('|'.join(excludeSectionKeywords))
+                            if excluded.search(key) or articles.has_key(key):
+                                if self.verbose > 2 : self.log("Skipping section %s" % key)
+                                skipThisSection = True
+                                break
+
+                        if not self.flatPeriodical :
+                            articles[key] = []
+                            ans.append(key)
+
+                # Get the bylines and descriptions
+                if not skipThisSection :
+                    for (x,i) in enumerate(sectionblock.contents) :
+
+                        # Extract the bylines and descriptions
+                        if (i.string is not None) and       \
+                           (i.string.strip() > "") and      \
+                           not ('Comment' in str(i.__class__)) :
+
+                            contentString = i.strip().encode('utf-8')
+                            if contentString[0:3] == 'By ' :
+                                bylines.append(contentString)
+                            else :
+                                descriptions.append(contentString)
+
+                    # Fetch the article titles and URLs
+                    articleCount = len(sectionblock.findAll('span'))
+                    for (i,span) in enumerate(sectionblock.findAll('span')) :
+                        a = span.find('a', href=True)
+                        #if not a:
+                            #continue
+                        url = re.sub(r'\?.*', '', a['href'])
+                        url += '?pagewanted=all'
+                        title = self.tag_to_string(a, use_alt=True)
+                        if self.flatPeriodical :
+                            # prepend the section name
+                            title = sections[section] + " : " + title
+                        if not isinstance(title, unicode):
+                            title = title.decode('utf-8', 'replace')
+                        description = descriptions[i]
+                        if len(bylines) == articleCount :
+                            author = bylines[i]
+                        else :
+                            author = None
+
+
+                        if self.verbose > 2 : self.log( "      title: %s" % title)
+                        if self.verbose > 2 : self.log( "        url: %s" % url)
+                        if self.verbose > 2 : self.log( "     author: %s" % author)
+                        if self.verbose > 2 : self.log( "description: %s" % description)
+
+                        if not self.flatPeriodical :
+                            feed = key
+
+                        if not articles.has_key(feed):
+                            if self.verbose > 2 : self.log( "adding %s to articles[]" % feed)
+                            articles[feed] = []
+                        if self.verbose > 2 : self.log( "     adding: %s to articles[%s]\n" % (title, feed))
+                        articles[feed].append(
+                            dict(title=title, url=url, date=pubdate,
+                                 description=description, author=author, content=''))
+
+        ans = self.sort_index_by(ans, {'Top Stories':-1})
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        #sys.exit(1)
+
+        return ans
+
+    def postprocess_html(self,soup, True):
+        if self.verbose > 2 : self.log(" ********** recipe.postprocess_html ********** ")
+
+        # Change captions to italic -1
+        for caption in soup.findAll(True, {'class':'caption'}) :
+            emTag = Tag(soup, "em")
+            #emTag['class'] = "caption"
+            #emTag['font-size-adjust'] = "-1"
+            emTag.insert(0, caption.contents[0])
+            hrTag = Tag(soup, 'hr')
+            emTag.insert(1, hrTag)
+            caption.replaceWith(emTag)
+
+
+        # Change <nyt_headline> to <h2>
+        headline = soup.div.div.div.div.div.h1.nyt_headline
+        tag = Tag(soup, "h2")
+        tag['class'] = "headline"
+        tag.insert(0, headline.contents[0])
+        soup.h1.replaceWith(tag)
+
         return soup
+
+    def postprocess_book(self, oeb, opts, log) :
+        log( " ********** recipe.postprocess_book ********** ")
+        log( list(oeb.toc) )
+        log( "oeb: %s" % oeb.toc)
+        log( "opts: %s" % opts.verbose)
+        for sections in oeb.toc :
+            log( "section:")
+            for articleTOC in sections:
+                log( "      title: %s" % articleTOC.title)
+                log( "     author: %s" % articleTOC.author)
+                log( "description: %s" % articleTOC.description)
+                log( "       href: %s" % articleTOC.href)
+                log( "    content: %s" % oeb.manifest.hrefs[articleTOC.href])
+        return