From ef0af86b19f4477602ebefee16fc195fa27f7286 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 11:08:16 -0600
Subject: [PATCH 1/8] Updte Newsweek recipe for new site

---
 resources/recipes/newsweek.recipe | 229 ++++++++----------------------
 1 file changed, 58 insertions(+), 171 deletions(-)
diff --git a/resources/recipes/newsweek.recipe b/resources/recipes/newsweek.recipe
index 7a53c23e45..73837c1872 100644
--- a/resources/recipes/newsweek.recipe
+++ b/resources/recipes/newsweek.recipe
@@ -1,189 +1,76 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-import re
-from calibre import strftime
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+import string
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class Newsweek(BasicNewsRecipe):
 
-
     title          = 'Newsweek'
-    __author__     = 'Kovid Goyal and Sujata Raman'
+    __author__     = 'Kovid Goyal'
     description    = 'Weekly news and current affairs in the US'
+    language       = 'en'
+    encoding       = 'utf-8'
     no_stylesheets = True
 
-    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#383733;}
-                    .deck{font-family:Georgia,sans-serif; color:#383733;}
-                    .bylineDate{font-family:georgia ; color:#58544A; font-size:x-small;}
-                    .authorInfo{font-family:arial,helvetica,sans-serif; color:#0066CC; font-size:x-small;}
-                    .articleUpdated{font-family:arial,helvetica,sans-serif; color:#73726C; font-size:x-small;}
-                    .issueDate{font-family:arial,helvetica,sans-serif; color:#73726C; font-size:x-small; font-style:italic;}
-                    h5{font-family:arial,helvetica,sans-serif; color:#73726C; font-size:x-small;}
-                    h6{font-family:arial,helvetica,sans-serif; color:#73726C; font-size:x-small;}
-                    .story{font-family:georgia,sans-serif ;color:black;}
-                    .photoCredit{color:#999999; font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
-                    .photoCaption{color:#0A0A09;font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
-                    .fwArticle{font-family:Arial,Helvetica,sans-serif;font-size:x-small;font-weight:bold;}
-                    '''
+    BASE_URL = 'http://www.newsweek.com'
+    INDEX = BASE_URL+'/topics.html'
 
-    encoding       = 'utf-8'
-    language = 'en'
+    keep_only_tags = dict(name='article', attrs={'class':'article-text'})
+    remove_tags = [dict(attrs={'data-dartad':True})]
+    remove_attributes = ['property']
 
-    remove_tags = [
-            {'class':['fwArticle noHr','fwArticle','hdlBulletItem','head-content','navbar','link', 'ad', 'sponsorLinksArticle', 'mm-content',
-                'inline-social-links-wrapper', 'email-article','ToolBox',
-                'inline-promo-link', 'sponsorship',
-                'inlineComponentRight',
-                'comments-and-social-links-wrapper', 'EmailArticleBlock']},
-            {'id' : ['footer', 'ticker-data', 'topTenVertical',
-                'digg-top-five', 'mesothorax', 'nw-comments', 'my-take-landing',
-                'ToolBox', 'EmailMain']},
-            {'class': re.compile('related-cloud')},
-            dict(name='li', attrs={'id':['slug_bigbox']})
-            ]
+    def postprocess_html(self, soup, first):
+        for tag in soup.findAll(name=['article', 'header']):
+            tag.name = 'div'
+        return soup
+
+    def newsweek_sections(self):
+        soup = self.index_to_soup(self.INDEX)
+        for a in soup.findAll('a', title='Primary tag', href=True):
+            yield (string.capitalize(self.tag_to_string(a)),
+                    self.BASE_URL+a['href'])
 
 
-    keep_only_tags = [{'class':['article HorizontalHeader',
-        'articlecontent','photoBox', 'article columnist first']}, ]
-    recursions = 1
-    match_regexps = [r'http://www.newsweek.com/id/\S+/page/\d+']
-    preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
-
-    def find_title(self, section):
-        d = {'scope':'Scope', 'thetake':'The Take', 'features':'Features',
-                None:'Departments', 'culture':'Culture'}
-        ans = None
-        a = section.find('a', attrs={'name':True})
-        if a is not None:
-            ans = a['name']
-        return d.get(ans, ans)
-
-
-    def find_articles(self, section):
-        ans = []
-        for x in section.findAll('h5'):
-            title = ' '.join(x.findAll(text=True)).strip()
-            a = x.find('a')
-            if not a: continue
-            href = a['href']
-            ans.append({'title':title, 'url':href, 'description':'', 'date': strftime('%a, %d %b')})
-        if not ans:
-            for x in section.findAll('div', attrs={'class':'hdlItem'}):
-                a = x.find('a', href=True)
-                if not a : continue
-                title = ' '.join(a.findAll(text=True)).strip()
-                href = a['href']
-                if 'http://xtra.newsweek.com' in href: continue
-                ans.append({'title':title, 'url':href, 'description':'', 'date': strftime('%a, %d %b')})
-
-        #for x in ans:
-        #    x['url'] += '/output/print'
-        return ans
+    def newsweek_parse_section_page(self, soup):
+        for article in soup.findAll('article', about=True,
+                attrs={'class':'stream-item'}):
+            title = article.find(attrs={'property': 'dc:title'})
+            if title is None: continue
+            title = self.tag_to_string(title)
+            url = self.BASE_URL + article['about']
+            desc = ''
+            author = article.find({'property':'dc:creator'})
+            if author:
+                desc = u'by %s. '%self.tag_to_string(author)
+            p = article.find(attrs={'property':'dc:abstract'})
+            if p is not None:
+                for a in p.find('a'): a.extract()
+                desc += self.tag_to_string(p)
+            t = article.find('time', attrs={'property':'dc:created'})
+            date = ''
+            if t is not None:
+                date = u' [%s]'%self.tag_to_string(t)
+            self.log('\tFound article:', title, 'at', url)
+            self.log('\t\t', desc)
+            yield {'title':title, 'url':url, 'description':desc, 'date':date}
 
 
     def parse_index(self):
-        soup = self.get_current_issue()
-        if not soup:
-            raise RuntimeError('Unable to connect to newsweek.com. Try again later.')
-        sections = soup.findAll('div', attrs={'class':'featurewell'})
-        titles = map(self.find_title, sections)
-        articles = map(self.find_articles, sections)
-        ans = list(zip(titles, articles))
-        def fcmp(x, y):
-            tx, ty = x[0], y[0]
-            if tx == "Features": return cmp(1, 2)
-            if ty == "Features": return cmp(2, 1)
-            return cmp(tx, ty)
-        return sorted(ans, cmp=fcmp)
-
-    def ensure_html(self, soup):
-        root = soup.find(name=True)
-        if root.name == 'html': return soup
-        nsoup = BeautifulSoup('<html><head></head><body/></html>')
-        nroot = nsoup.find(name='body')
-        for x in soup.contents:
-            if getattr(x, 'name', False):
-                x.extract()
-                nroot.insert(len(nroot), x)
-        return nsoup
-
-    def postprocess_html(self, soup, first_fetch):
-        if not first_fetch:
-            h1 = soup.find(id='headline')
-            if h1:
-                h1.extract()
-            div = soup.find(attrs={'class':'articleInfo'})
-            if div:
-                div.extract()
-        divs = list(soup.findAll('div', 'pagination'))
-        if not divs:
-            return self.ensure_html(soup)
-        for div in divs[1:]: div.extract()
-        all_a = divs[0].findAll('a', href=True)
-        divs[0]['style']="display:none"
-        if len(all_a) > 1:
-            all_a[-1].extract()
-        test = re.compile(self.match_regexps[0])
-        for a in soup.findAll('a', href=test):
-            if a not in all_a:
-                del a['href']
-        return self.ensure_html(soup)
-
-    def get_current_issue(self):
-        soup = self.index_to_soup('http://www.newsweek.com')
-        div = soup.find('div', attrs={'class':re.compile('more-from-mag')})
-        if div is None: return None
-        a = div.find('a')
-        if a is not None:
-            href = a['href'].split('#')[0]
-            return self.index_to_soup(href)
-
-    def get_cover_url(self):
-        cover_url = None
-        soup = self.index_to_soup('http://www.newsweek.com')
-        link_item = soup.find('div',attrs={'class':'cover-image'})
-        if link_item and link_item.a and link_item.a.img:
-           cover_url = link_item.a.img['src']
-        return cover_url
+        sections = []
+        for section, shref in self.newsweek_sections():
+            self.log('Processing section', section, shref)
+            articles = []
+            soups = [self.index_to_soup(shref)]
+            na = soups[0].find('a', rel='next')
+            if na:
+                soups.append(self.index_to_soup(self.BASE_URL+na['href']))
+            for soup in soups:
+                articles.extend(self.newsweek_parse_section_page(soup))
+                if self.test and len(articles) > 1:
+                    break
+            if articles:
+                sections.append((section, articles))
+            if self.test and len(sections) > 1:
+                break
+        return sections
 
 
-    def postprocess_book(self, oeb, opts, log) :
-
-        def extractByline(href) :
-            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
-            byline = soup.find(True,attrs={'class':'authorInfo'})
-            byline = self.tag_to_string(byline) if byline is not None else ''
-            issueDate = soup.find(True,attrs={'class':'issueDate'})
-            issueDate = self.tag_to_string(issueDate) if issueDate is not None else ''
-            issueDate = re.sub(',','', issueDate)
-            if byline > '' and issueDate > '' :
-                return byline + ' | ' + issueDate
-            else :
-                return byline + issueDate
-
-        def extractDescription(href) :
-            soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
-            description = soup.find(True,attrs={'name':'description'})
-            if description is not None and description.has_key('content'):
-                description = description['content']
-                if description.startswith('Newsweek magazine online plus') :
-                    description = soup.find(True, attrs={'class':'story'})
-                    firstPara = soup.find('p')
-                    description = self.tag_to_string(firstPara)
-            else :
-                description = soup.find(True, attrs={'class':'story'})
-                firstPara = soup.find('p')
-                description = self.tag_to_string(firstPara)
-            return description
-
-        for section in oeb.toc :
-            for article in section :
-                if article.author is None :
-                    article.author = extractByline(article.href)
-                if article.description is None :
-                    article.description = extractDescription(article.href)
-        return
 

From 721b48038e75a6992b849379b5f685458caa45b3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 16:07:40 -0600
Subject: [PATCH 2/8] Fix minor multiple location ondevice bug

---
 src/calibre/gui2/device.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index d3c2e4f10f..181d0c784b 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -1123,12 +1123,12 @@ class DeviceGUI(object):
             if cache:
                 if id in cache['db_ids']:
                     loc[i] = True
-                    break
+                    continue
                 if mi.authors and \
                         re.sub('(?u)\W|[_]', '', authors_to_string(mi.authors).lower()) \
                         in cache['authors']:
                     loc[i] = True
-                    break
+                    continue
         return loc
 
     def set_books_in_library(self, booklists, reset=False):

From a529cb0303f22329214012e280d1ff026a8942a7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 18:21:39 -0600
Subject: [PATCH 3/8] Fix #5662 (&#x003c; hexa entity problem)

---
 src/calibre/ebooks/conversion/preprocess.py | 10 ++++++++--
 src/calibre/ebooks/oeb/base.py              |  1 -
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 40c67453b2..7a7f362169 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -14,8 +14,14 @@ XMLDECL_RE    = re.compile(r'^\s*<[?]xml.*?[?]>')
 SVG_NS       = 'http://www.w3.org/2000/svg'
 XLINK_NS     = 'http://www.w3.org/1999/xlink'
 
-convert_entities = functools.partial(entity_to_unicode, exceptions=['quot',
-    'apos', 'lt', 'gt', 'amp', '#60', '#62'])
+convert_entities = functools.partial(entity_to_unicode,
+        result_exceptions = {
+            u'<' : '&lt;',
+            u'>' : '&gt;',
+            u"'" : '&apos;',
+            u'"' : '&quot;',
+            u'&' : '&amp;',
+        })
 _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
 
 LIGATURES = {
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 79f9f15248..76e2cef3bb 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -787,7 +787,6 @@ class Manifest(object):
             data = self.oeb.decode(data)
             data = self.oeb.html_preprocessor(data)
 
-
             # Remove DOCTYPE declaration as it messes up parsing
             # In particular, it causes tostring to insert xmlns
             # declarations, which messes up the coercing logic

From 900ff7204b12eb15d65df68b43a427ab38962d95 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 18:25:17 -0600
Subject: [PATCH 4/8] Fix #5654 (No Default Cover causes conversion error)

---
 src/calibre/ebooks/oeb/transforms/cover.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py
index ecdc1294ad..4d41ab14b4 100644
--- a/src/calibre/ebooks/oeb/transforms/cover.py
+++ b/src/calibre/ebooks/oeb/transforms/cover.py
@@ -136,6 +136,8 @@ class CoverManager(object):
                 href = g['cover'].href
             else:
                 href = self.default_cover()
+            if href is None:
+                return
             width, height = self.inspect_cover(href)
             if width is None or height is None:
                 self.log.warning('Failed to read cover dimensions')

From 7213c1e4b61cb13ca40d01040461a08915be7573 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 18:40:12 -0600
Subject: [PATCH 5/8] Regex builder: Convert entities so people don't use them
 in building their regexes. Fixes #5549 (Not removing header/footer)

---
 src/calibre/gui2/convert/regex_builder.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py
index 58e1d1ae45..6fa0fa5fe4 100644
--- a/src/calibre/gui2/convert/regex_builder.py
+++ b/src/calibre/gui2/convert/regex_builder.py
@@ -14,6 +14,7 @@ from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
 from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
 from calibre.gui2 import error_dialog, choose_files
 from calibre.ebooks.oeb.iterator import EbookIterator
+from calibre.ebooks.conversion.preprocess import convert_entities
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 
 class RegexBuilder(QDialog, Ui_RegexBuilder):
@@ -87,8 +88,10 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
         self.iterator = EbookIterator(pathtoebook)
         self.iterator.__enter__(only_input_plugin=True)
         text = [u'']
+        ent_pat = re.compile(r'&(\S+?);')
         for path in self.iterator.spine:
             html = open(path, 'rb').read().decode('utf-8', 'replace')
+            html = ent_pat.sub(convert_entities, html)
             text.append(html)
         self.preview.setPlainText('\n---\n'.join(text))
 

From 7a737aa3a1d2829ac28df6cb4825f000cb9b8433 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 19:01:25 -0600
Subject: [PATCH 6/8] Fix warnings when compiling user manual

---
 src/calibre/customize/__init__.py | 6 +-----
 src/calibre/manual/conversion.rst | 6 +++---
 src/calibre/manual/faq.rst        | 2 +-
 src/calibre/web/feeds/news.py     | 6 +++---
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/calibre/customize/__init__.py b/src/calibre/customize/__init__.py
index 4eaaf3b90a..9a018231ef 100644
--- a/src/calibre/customize/__init__.py
+++ b/src/calibre/customize/__init__.py
@@ -29,7 +29,7 @@ class Plugin(object):
 
     '''
     #: List of platforms this plugin works on
-    #: For example: ``['windows', 'osx', 'linux']
+    #: For example: ``['windows', 'osx', 'linux']``
     supported_platforms = []
 
     #: The name of this plugin. You must set it something other
@@ -214,10 +214,8 @@ class MetadataReaderPlugin(Plugin):
         Return metadata for the file represented by stream (a file like object
         that supports reading). Raise an exception when there is an error
         with the input data.
-
         :param type: The type of file. Guaranteed to be one of the entries
         in :attr:`file_types`.
-
         :return: A :class:`calibre.ebooks.metadata.MetaInformation` object
         '''
         return None
@@ -245,11 +243,9 @@ class MetadataWriterPlugin(Plugin):
         Set metadata for the file represented by stream (a file like object
         that supports reading). Raise an exception when there is an error
         with the input data.
-
         :param type: The type of file. Guaranteed to be one of the entries
         in :attr:`file_types`.
         :param mi: A :class:`calibre.ebooks.metadata.MetaInformation` object
-
         '''
         pass
 
diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst
index ee148c79c7..cd8abd0493 100644
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@@ -453,7 +453,7 @@ as HTML and then convert the resulting HTML file with |app|. When saving as HTML
 
 There is a Word macro package that can automate the conversion of Word documents using |app|. It also makes
 generating the Table of Contents much simpler. It is called BookCreator and is available for free
-`here <http://www.mobileread.com/forums/showthread.php?t=28313>`_.
+at `mobileread <http://www.mobileread.com/forums/showthread.php?t=28313>`_.
 
 Convert TXT documents
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -493,7 +493,7 @@ TXT input supports a number of options to differentiate how paragraphs are detec
         allows for basic formatting to be added to TXT documents, such as bold, italics, section headings, tables,
         lists, a Table of Contents, etc. Marking chapter headings with a leading # and setting the chapter XPath detection
         expression to "//h:h1" is the easiest way to have a proper table of contents generated from a TXT document.
-        You can learn more about the markdown syntax `here <http://daringfireball.net/projects/markdown/syntax>`_.
+        You can learn more about the markdown syntax at `daringfireball <http://daringfireball.net/projects/markdown/syntax>`_.
 
 
 Convert PDF documents
@@ -540,7 +540,7 @@ EPUB advanced formatting demo
 Various advanced formatting for EPUB files is demonstrated in this `demo file <http://calibre-ebook.com/downloads/demos/demo.epub>`_.
 The file was created from hand coded HTML using calibre and is meant to be used as a template for your own EPUB creation efforts. 
 
-The source HTML it was created from is available `here <http://calibre-ebook.com/downloads/demos/demo.zip>`_. The settings used to create the
+The source HTML it was created from is available `demo.zip <http://calibre-ebook.com/downloads/demos/demo.zip>`_. The settings used to create the
 EPUB from the ZIP file are::
 
     ebook-convert demo.zip .epub -vv --authors "Kovid Goyal" --language en --level1-toc '//*[@class="title"]' --disable-font-rescaling --page-breaks-before / --no-default-epub-cover
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index f7329fb54d..e606505194 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -133,7 +133,7 @@ Can I use the collections feature of the SONY reader?
 turned into a collection on the reader. Note that the PRS-500 does not support collections for books stored on the SD card. The PRS-505 does. 
 
 How do I use |app| with my iPad/iPhone/iTouch?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 You can access your calibre library on a iPad/iPhone/iTouch over the air using the calibre content server. 
 
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 26b3ad0593..9faabb2615 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -146,7 +146,7 @@ class BasicNewsRecipe(Recipe):
     #: If True empty feeds are removed from the output.
     #: This option has no effect if parse_index is overriden in
     #: the sub class. It is meant only for recipes that return a list
-    #: of feeds using :member:`feeds` or :method:`get_feeds`.
+    #: of feeds using `feeds` or :method:`get_feeds`.
     remove_empty_feeds = False
 
     #: List of regular expressions that determines which links to follow
@@ -256,7 +256,7 @@ class BasicNewsRecipe(Recipe):
 
     #: The CSS that is used to styles the templates, i.e., the navigation bars and
     #: the Tables of Contents. Rather than overriding this variable, you should
-    #: use :member:`extra_css` in your recipe to customize look and feel.
+    #: use `extra_css` in your recipe to customize look and feel.
     template_css = u'''
             .article_date {
                 color: gray; font-family: monospace;
@@ -506,7 +506,7 @@ class BasicNewsRecipe(Recipe):
 
     def get_obfuscated_article(self, url):
         '''
-        If you set :member:`articles_are_obfuscated` this method is called with
+        If you set `articles_are_obfuscated` this method is called with
         every article URL. It should return the path to a file on the filesystem
         that contains the article HTML. That file is processed by the recursive
         HTML fetching engine, so it can contain links to pages/images on the web.

From c2f655ad7188582a2709f035e7e46cb7ff82ad4b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 19:24:01 -0600
Subject: [PATCH 7/8] When listing series, sort ignoring leading English
 prepositions. Fixes #5090 (Series Sort)

---
 src/calibre/library/caches.py         | 5 +++--
 src/calibre/library/database2.py      | 3 +++
 src/calibre/library/server/content.py | 4 ++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 93891ee92b..83c56c5395 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -17,7 +17,7 @@ from calibre.utils.config import tweaks
 from calibre.utils.date import parse_date, now, UNDEFINED_DATE
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.pyparsing import ParseException
-# from calibre.library.field_metadata import FieldMetadata
+from calibre.ebooks.metadata import title_sort
 
 class CoverCache(QThread):
 
@@ -564,7 +564,8 @@ class ResultCache(SearchQueryParser):
     def seriescmp(self, x, y):
         sidx = self.FIELD_MAP['series']
         try:
-            ans = cmp(self._data[x][sidx].lower(), self._data[y][sidx].lower())
+            ans = cmp(title_sort(self._data[x][sidx].lower()),
+                    title_sort(self._data[y][sidx].lower()))
         except AttributeError: # Some entries may be None
             ans = cmp(self._data[x][sidx], self._data[y][sidx])
         if ans != 0: return ans
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 4107d327ce..f27a42beee 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -725,6 +725,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             categories[category] = [Tag(formatter(r[1]), count=r[2], id=r[0],
                                         icon=icon, tooltip = tooltip)
                                     for r in data if item_not_zero_func(r)]
+            if category == 'series':
+                categories[category].sort(cmp=lambda x,y:cmp(title_sort(x.name),
+                    title_sort(y.name)))
 
         # We delayed computing the standard formats category because it does not
         # use a view, but is computed dynamically
diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index 8638035c88..12bd786322 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -16,7 +16,7 @@ except ImportError:
 
 from calibre import fit_image, guess_type
 from calibre.utils.date import fromtimestamp
-
+from calibre.ebooks.metadata import title_sort
 
 class ContentServer(object):
 
@@ -67,7 +67,7 @@ class ContentServer(object):
     def seriescmp(self, x, y):
         si = self.db.FIELD_MAP['series']
         try:
-            ans = cmp(x[si].lower(), y[si].lower())
+            ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower()))
         except AttributeError: # Some entries may be None
             ans = cmp(x[si], y[si])
         if ans != 0: return ans

From f4bbf10ee348fb7f998c20301073c7fadf0dac99 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 1 Jun 2010 19:49:53 -0600
Subject: [PATCH 8/8] LRF Input: Handle ampersands and other XML reserved
 characters correctly when converting LRF documents. Fixes #4923 (Ampersands
 in input text get lost in output)

---
 src/calibre/ebooks/lrf/objects.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/lrf/objects.py b/src/calibre/ebooks/lrf/objects.py
index 0045e679a3..8f69e94013 100644
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
@@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import struct, array, zlib, cStringIO, collections, re
 
 from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE
-from calibre import entity_to_unicode
+from calibre import entity_to_unicode, prepare_string_for_xml
 from calibre.ebooks.lrf.tags import Tag
 
 ruby_tags = {
@@ -870,7 +870,7 @@ class Text(LRFStream):
         open_containers = collections.deque()
         for c in self.content:
             if isinstance(c, basestring):
-                s += c
+                s += prepare_string_for_xml(c)
             elif c is None:
                 if open_containers:
                     p = open_containers.pop()