From 5804d188d9267e968582d3e0e0482368dd77a41f Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Sep 2011 08:32:54 -0400
Subject: [PATCH 01/45] Fix for bug #846183: unhandled exception converting to
 PDF.

---
 src/calibre/ebooks/pdf/writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index ebe6533419..ac3708ff47 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -202,7 +202,7 @@ class PDFWriter(QObject): # {{{
                     inputPDF = PdfFileReader(item_stream)
                     for page in inputPDF.pages:
                         outPDF.addPage(page)
-            outPDF.write(self.out_stream)
+                        outPDF.write(self.out_stream)
         finally:
             self._delete_tmpdir()
             self.loop.exit(0)

From f9a358f77041052253ef643cafe82f4a60f12b09 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Sep 2011 08:50:41 -0400
Subject: [PATCH 02/45] Fix PDF output on OSX: Force the use of OSX's internal
 PDF engine instead of using Qt's.

---
 src/calibre/ebooks/pdf/writer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index ac3708ff47..fe095ad441 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -11,6 +11,7 @@ Write content to PDF.
 import os
 import shutil
 
+from calibre import isosx
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
     orientation
@@ -164,6 +165,8 @@ class PDFWriter(QObject): # {{{
             self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
             printer = get_pdf_printer(self.opts)
             printer.setOutputFileName(item_path)
+            if isosx:
+                printer.setOutputFormat(QPrinter.NativeFormat)
             self.view.print_(printer)
             printer.abort()
         self._render_book()

From be691bf8fcfb6eb5a5c3d08d00c8076a1a1eb412 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sun, 11 Sep 2011 11:12:21 -0400
Subject: [PATCH 03/45] Add more places to set the PDF engine for OS X.

---
 src/calibre/ebooks/pdf/writer.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py
index fe095ad441..05d874c9c3 100644
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@@ -165,6 +165,10 @@ class PDFWriter(QObject): # {{{
             self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
             printer = get_pdf_printer(self.opts)
             printer.setOutputFileName(item_path)
+            # We have to set the engine to Native on OS X after the call to set
+            # filename. Setting a filename with .pdf as the extension causes
+            # Qt to set the format to use Qt's PDF engine even if native was
+            # previously set on the printer.
             if isosx:
                 printer.setOutputFormat(QPrinter.NativeFormat)
             self.view.print_(printer)
@@ -182,6 +186,8 @@ class PDFWriter(QObject): # {{{
         item_path = os.path.join(self.tmp_path, 'cover.pdf')
         printer = get_pdf_printer(self.opts)
         printer.setOutputFileName(item_path)
+        if isosx:
+            printer.setOutputFormat(QPrinter.NativeFormat)
         self.combine_queue.insert(0, item_path)
         p = QPixmap()
         p.loadFromData(self.cover_data)
@@ -232,6 +238,8 @@ class ImagePDFWriter(object):
     def render_images(self, outpath, mi, items):
         printer = get_pdf_printer(self.opts, for_comic=True)
         printer.setOutputFileName(outpath)
+        if isosx:
+            printer.setOutputFormat(QPrinter.NativeFormat)
         printer.setDocName(mi.title)
         printer.setCreator(u'%s [%s]'%(__appname__, __version__))
         # Seems to be no way to set author

From bd58a50675d9382a47e7e88a1bd1cb8ac7cc9bfc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 09:29:40 -0600
Subject: [PATCH 04/45] Improved Guardian/Observer

---
 recipes/guardian.recipe | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe
index 124820d0a1..05d6616ace 100644
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@@ -15,8 +15,10 @@ class Guardian(BasicNewsRecipe):
     title = u'The Guardian and The Observer'
     if date.today().weekday() == 6:
         base_url = "http://www.guardian.co.uk/theobserver"
+        cover_pic = 'Observer digital edition'
     else:
         base_url = "http://www.guardian.co.uk/theguardian"
+        cover_pic = 'Guardian digital edition'
 
     __author__ = 'Seabound and Sujata Raman'
     language = 'en_GB'
@@ -79,7 +81,7 @@ class Guardian(BasicNewsRecipe):
         # soup = self.index_to_soup("http://www.guardian.co.uk/theobserver")
         soup = self.index_to_soup(self.base_url)
         # find cover pic
-        img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
+        img = soup.find( 'img',attrs ={'alt':self.cover_pic})
         if img is not None:
             self.cover_url = img['src']
         # end find cover pic

From f3d9c59cfb8bccce17a7b19d9d981731eca27295 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 09:44:41 -0600
Subject: [PATCH 05/45] Fix Business Week

---
 recipes/business_week.recipe | 168 +++++++++++++++++++----------------
 1 file changed, 90 insertions(+), 78 deletions(-)

diff --git a/recipes/business_week.recipe b/recipes/business_week.recipe
index fcb28d1d3e..ca9078a112 100644
--- a/recipes/business_week.recipe
+++ b/recipes/business_week.recipe
@@ -1,93 +1,105 @@
-#!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
+__copyright__ = '2008 Kovid Goyal kovid@kovidgoyal.net, 2010 Darko Miletic <darko.miletic at gmail.com>'
 '''
-businessweek.com
+www.businessweek.com
 '''
 
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class BusinessWeek(BasicNewsRecipe):
-    title          = 'Business Week'
-    description    = 'Business News, Stock Market and Financial Advice'
-    __author__     = 'ChuckEggDotCom and Sujata Raman'
-    language = 'en'
+    title                 = 'Business Week'
+    __author__            = 'Kovid Goyal and Darko Miletic'
+    description           = 'Read the latest international business news & stock market news. Get updated company profiles, financial advice, global economy and technology news.'
+    publisher             = 'Bloomberg L.P.'
+    category              = 'Business, business news, stock market, stock market news, financial advice, company profiles, financial advice, global economy, technology news'
+    oldest_article        = 7
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    publication_type      = 'magazine'
+    cover_url             = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
+    masthead_url          = 'http://assets.businessweek.com/images/bw-logo.png'
+    extra_css             = """
+                               body{font-family: Helvetica,Arial,sans-serif }
+                               img{margin-bottom: 0.4em; display:block}
+                               .tagline{color: gray; font-style: italic}
+                               .photoCredit{font-size: small; color: gray}
+                            """
 
-    oldest_article = 7
-    max_articles_per_feed = 10
-    no_stylesheets = True
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
 
-    recursions = 1
-    match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
-    extra_css = '''
-                h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
-                .news_story_title{font-family :Arial,Helvetica,sans-serif; font-size:large;font-weight:bold;}
-                h2{font-family :Arial,Helvetica,sans-serif; font-size:medium;color:#666666;}
-                h3{text-transform:uppercase;font-family :Arial,Helvetica,sans-serif; font-size:large;font-weight:bold;}
-                h4{font-family :Arial,Helvetica,sans-serif; font-size:small;font-weight:bold;}
-                p{font-family :Arial,Helvetica,sans-serif; }
-                #lede600{font-size:x-small;}
-                #storybody{font-size:x-small;}
-                p{font-family :Arial,Helvetica,sans-serif;}
-                .strap{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#064599;}
-                .byline{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                .postedBy{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
-                .trackback{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
-                .date{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
-                .wrapper{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                .photoCredit{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
-                .tagline{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}
-                .pageCount{color:#666666;font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                .note{font-family :Arial,Helvetica,sans-serif; font-size:small;color:#666666;font-style:italic;}
-                .highlight{font-family :Arial,Helvetica,sans-serif; font-size:small;background-color:#FFF200;}
-                .annotation{font-family :Arial,Helvetica,sans-serif; font-size:x-small;color:#666666;}                
-                '''
-   
-    remove_tags = [  dict(name='div', attrs={'id':["log","feedback","footer","secondarynav","secondnavbar","header","email","bw2-header","column2","wrapper-bw2-footer","wrapper-mgh-footer","inset","commentForm","commentDisplay","bwExtras","bw2-umbrella","readerComments","leg","rightcol"]}),
-                    dict(name='div', attrs={'class':["menu",'sponsorbox smallertext',"TopNavTile","graybottom leaderboard"]}),
-                    dict(name='img', alt ="News"),
-                     dict(name='td', width ="1"),
-                    ]
+    remove_tags       = [
+                           dict(attrs={'class':'inStory'})
+                          ,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
+                          ,dict(attrs={'id':['inset','videoDisplay']})
+                        ]
+    keep_only_tags    = [dict(name='div', attrs={'id':['story-body','storyBody','article_body','articleBody']})]
+    remove_attributes = ['lang']
+    match_regexps     = [r'http://www.businessweek.com/.*_page_[1-9].*']
 
-    feeds          = [
-                      (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
-                      (u'Top News', u'http://www.businessweek.com/rss/bwdaily.rss'),
-                      (u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
-                      (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
-                      (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
-                      (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
-                      (u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
-                      (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
-                      (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
-                      (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
-                      (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
-                      (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
-                      (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
-                      (u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
-                      (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
-                      (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
-                      (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
-                      (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
-                      (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
-                      (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
-                      ]
+
+    feeds = [
+              (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
+              (u'Top News'   , u'http://www.businessweek.com/rss/bwdaily.rss'              ),
+              (u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
+              (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
+              (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
+              (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
+              (u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
+              (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
+              (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
+              (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
+              (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
+              (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
+              (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
+              (u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
+              (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
+              (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
+              (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
+              (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
+              (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
+              (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
+            ]
 
     def get_article_url(self, article):
-
         url = article.get('guid', None)
+        if 'podcasts' in url:
+            return None
+        if 'surveys' in url:
+            return None
+        if 'images' in url:
+            return None
+        if 'feedroom' in url:
+            return None
+        if '/magazine/toc/' in url:
+            return None
+        rurl, sep, rest = url.rpartition('?')
+        if rurl:
+           return rurl
+        return rest
 
-        if 'podcasts' in url or 'surveys' in url:
-            url = None
-
-        return url
-
-    def postprocess_html(self, soup, first):
-
-            for tag in soup.findAll(name=['ul','li','table','td','tr','span']):
-                tag.name = 'div'
-            for tag in soup.findAll(name= 'div',attrs={ 'id':'pageNav'}):                
-                tag.extract()
-            return soup
+    def print_version(self, url):
+        if '/news/' in url or '/blog/ in url':
+           return url
+        if '/magazine' in url:
+          rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/printer/')
+        else:
+          rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
+        return rurl.replace('/investing/','/investor/')
 
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup

From 9d199ec40af48b4603fde4572921734142eebe78 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 10:18:10 -0600
Subject: [PATCH 06/45] Dummy commit to record that PDF output regression in
 0.8.18 was fixed. Fixes #846183 (unhandled exception converting to PDF)


From 1411e439200c859764abac541ea676debe544503 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 10:19:25 -0600
Subject: [PATCH 07/45] ...

---
 recipes/business_week.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/business_week.recipe b/recipes/business_week.recipe
index ca9078a112..fe98d9fa00 100644
--- a/recipes/business_week.recipe
+++ b/recipes/business_week.recipe
@@ -87,7 +87,7 @@ class BusinessWeek(BasicNewsRecipe):
         return rest
 
     def print_version(self, url):
-        if '/news/' in url or '/blog/ in url':
+        if '/news/' in url or '/blog/' in url:
            return url
         if '/magazine' in url:
           rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/printer/')

From 2cd448687d263a9d926ad2b910b5ff16c1f419d8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 10:51:37 -0600
Subject: [PATCH 08/45] author_to_author_sort(): handle multiple suffixes

---
 resources/default_tweaks.py             |  2 +-
 src/calibre/ebooks/metadata/__init__.py | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index ead9995eb3..f12121dd89 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -61,7 +61,7 @@ authors_completer_append_separator = False
 # selecting 'manage authors', and pressing 'Recalculate all author sort values'.
 # The author name suffixes are words that are ignored when they occur at the
 # end of an author name. The case of the suffix is ignored and trailing
-# periods are automatically handled.
+# periods are automatically handled. The same is true for prefixes.
 # The author name copy words are a set of words which if they occur in an
 # author name cause the automatically generated author sort string to be
 # identical to the author name. This means that the sort for a string like Acme
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index c3a229fe3c..07fae187ba 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -65,20 +65,27 @@ def author_to_author_sort(author, method=None):
     suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
     suffixes |= set([x+u'.' for x in suffixes])
 
-    last = tokens[-1].lower()
-    suffix = None
-    if last in suffixes:
-        suffix = tokens[-1]
-        tokens = tokens[:-1]
+    suffix = u''
+    while True:
+        if not tokens:
+            return author
+        last = tokens[-1].lower()
+        if last in suffixes:
+            suffix = tokens[-1] + ' ' + suffix
+            tokens = tokens[:-1]
+        else:
+            break
+    suffix = suffix.strip()
 
     if method == u'comma' and u',' in u''.join(tokens):
         return author
 
     atokens = tokens[-1:] + tokens[:-1]
+    num_toks = len(atokens)
     if suffix:
         atokens.append(suffix)
 
-    if method != u'nocomma' and len(atokens) > 1:
+    if method != u'nocomma' and num_toks > 1:
         atokens[0] += u','
 
     return u' '.join(atokens)

From 400c68e20fc35426717d3dc9fca0808ea176522e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 14:00:51 -0600
Subject: [PATCH 09/45] Hindustan Times by Krittika Goyal

---
 recipes/hindustan_times.recipe | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 recipes/hindustan_times.recipe

diff --git a/recipes/hindustan_times.recipe b/recipes/hindustan_times.recipe
new file mode 100644
index 0000000000..f228757c70
--- /dev/null
+++ b/recipes/hindustan_times.recipe
@@ -0,0 +1,29 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class HindustanTimes(BasicNewsRecipe):
+    title          = u'Hindustan Times'
+    language       = 'en_IN'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False
+
+    no_stylesheets = True
+    auto_cleanup = True
+
+    feeds          = [
+            ('News',
+            'http://feeds.hindustantimes.com/HT-NewsSectionPage-Topstories'),
+            ('Views',
+            'http://feeds.hindustantimes.com/HT-ViewsSectionpage-Topstories'),
+            ('Cricket',
+            'http://feeds.hindustantimes.com/HT-Cricket-TopStories'),
+            ('Business',
+            'http://feeds.hindustantimes.com/HT-BusinessSectionpage-TopStories'),
+            ('Entertainment',
+            'http://feeds.hindustantimes.com/HT-HomePage-Entertainment'),
+            ('Lifestyle',
+            'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
+]
+
+

From 8a7100b3386056c352926ba79236a4abd93452a9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 15:15:16 -0600
Subject: [PATCH 10/45] ...

---
 src/calibre/library/server/mobile.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/calibre/library/server/mobile.py b/src/calibre/library/server/mobile.py
index 3ce96a2b49..0cb7a86126 100644
--- a/src/calibre/library/server/mobile.py
+++ b/src/calibre/library/server/mobile.py
@@ -277,12 +277,15 @@ class MobileServer(object):
         cherrypy.response.headers['Content-Type'] = 'text/html; charset=utf-8'
         cherrypy.response.headers['Last-Modified'] = self.last_modified(updated)
 
-
         url_base = "/mobile?search=" + search+";order="+order+";sort="+sort+";num="+str(num)
 
-        return html.tostring(build_index(books, num, search, sort, order,
+        raw = html.tostring(build_index(books, num, search, sort, order,
                              start, len(ids), url_base, CKEYS,
                              self.opts.url_prefix),
-                             encoding='utf-8', include_meta_content_type=True,
+                             encoding='utf-8',
                              pretty_print=True)
+        # tostring's include_meta_content_type is broken
+        raw = raw.replace('<head>', '<head>\n'
+                '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">')
+        return raw
 

From d76c312c89c89a7990d247e0a9e81f9aa571b7c8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 15:49:03 -0600
Subject: [PATCH 11/45] Fix Inquirer.net

---
 recipes/inquirer_net.recipe | 45 +++++++++----------------------------
 1 file changed, 11 insertions(+), 34 deletions(-)

diff --git a/recipes/inquirer_net.recipe b/recipes/inquirer_net.recipe
index 3a3d5b9e89..30f2519f8b 100644
--- a/recipes/inquirer_net.recipe
+++ b/recipes/inquirer_net.recipe
@@ -7,56 +7,33 @@ www.inquirer.net
 '''
 
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
 
 class InquirerNet(BasicNewsRecipe):
     title                  = 'Inquirer.net'
-    __author__             = 'Darko Miletic'
+    __author__             = 'Krittika Goyal'
     description            = 'News from Philipines'
     oldest_article         = 2
     max_articles_per_feed  = 100
     no_stylesheets         = True
     use_embedded_content   = False
-    encoding               = 'cp1252'
+    encoding               = 'utf8'
     publisher              = 'inquirer.net'
     category               = 'news, politics, philipines'
     lang                   = 'en'
     language = 'en'
 
-    extra_css              = ' .fontheadline{font-size: x-large} .fontsubheadline{font-size: large} .fontkick{font-size: medium}'
+    use_embedded_content = False
 
-    html2lrf_options = [
-                          '--comment', description
-                        , '--category', category
-                        , '--publisher', publisher
-                        , '--ignore-tables'
-                        ]
-
-    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
-
-    remove_tags = [dict(name=['object','link','script','iframe','form'])]
+    no_stylesheets = True
+    auto_cleanup = True
 
     feeds = [
-               (u'Breaking news', u'http://services.inquirer.net/rss/breakingnews.xml'             )
-              ,(u'Top stories'  , u'http://services.inquirer.net/rss/topstories.xml'               )
-              ,(u'Sports'       , u'http://services.inquirer.net/rss/brk_breakingnews.xml'         )
-              ,(u'InfoTech'     , u'http://services.inquirer.net/rss/infotech_tech.xml'            )
-              ,(u'InfoTech'     , u'http://services.inquirer.net/rss/infotech_tech.xml'            )
-              ,(u'Business'     , u'http://services.inquirer.net/rss/inq7money_breaking_news.xml'  )
-              ,(u'Editorial'    , u'http://services.inquirer.net/rss/opinion_editorial.xml'        )
-              ,(u'Global Nation', u'http://services.inquirer.net/rss/globalnation_breakingnews.xml')
+               (u'Inquirer', u'http://www.inquirer.net/fullfeed')
             ]
 
-    def preprocess_html(self, soup):
-        mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
-        mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
-        soup.head.insert(0,mlang)
-        soup.head.insert(1,mcharset)
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.set_handle_gzip(True)
+        return br
+
 
-    def print_version(self, url):
-        rest, sep, art = url.rpartition('/view/')
-        art_id, sp, rrest = art.partition('/')
-        return 'http://services.inquirer.net/print/print.php?article_id=' + art_id

From 7a3babf49eb10d4ca964212c395cda5f32e6673b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 15:53:10 -0600
Subject: [PATCH 12/45] India Today by Krittika Goyal

---
 recipes/india_today.recipe                  | 83 ++++-----------------
 src/calibre/web/feeds/recipes/collection.py |  2 +-
 2 files changed, 17 insertions(+), 68 deletions(-)

diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe
index 604a7f57ad..7b53fe3d65 100644
--- a/recipes/india_today.recipe
+++ b/recipes/india_today.recipe
@@ -1,76 +1,25 @@
+
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class IndiaToday(BasicNewsRecipe):
-
-    title = 'India Today'
-    __author__ = 'Kovid Goyal'
-    language = 'en_IN'
-    timefmt = ' [%d %m, %Y]'
-
-    oldest_article = 700
-    max_articles_per_feed = 10
+    title          = u'India Today'
+    language       = 'en_IN'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 15 #days
+    max_articles_per_feed = 25
 
     no_stylesheets = True
+    auto_cleanup = True
 
-    remove_tags_before = dict(id='content_story_title')
-    remove_tags_after = dict(id='rightblockdiv')
-    remove_tags = [dict(id=['rightblockdiv', 'share_links'])]
-
-    extra_css = '#content_story_title { font-size: 170%; font-weight: bold;}'
-    conversion_options = { 'linearize_tables': True }
-
-    def it_get_index(self):
-        soup = self.index_to_soup('http://indiatoday.intoday.in/site/archive')
-        a = soup.find('a', href=lambda x: x and 'issueId=' in x)
-        url = 'http://indiatoday.intoday.in/site/'+a.get('href')
-        img = a.find('img')
-        self.cover_url = img.get('src')
-        return self.index_to_soup(url)
-
-    def parse_index(self):
-        soup = self.it_get_index()
-        feeds, current_section, current_articles = [], None, []
-        for x in soup.findAll(name=['h1', 'a']):
-            if x.name == 'h1':
-                if current_section and current_articles:
-                    feeds.append((current_section, current_articles))
-                current_section = self.tag_to_string(x)
-                current_articles = []
-                self.log('\tFound section:', current_section)
-            elif x.name == 'a' and 'Story' in x.get('href', ''):
-                title = self.tag_to_string(x)
-                url = x.get('href')
-                url = url.replace(' ', '%20')
-                if not url.startswith('/'):
-                    url = 'http://indiatoday.intoday.in/site/' + url
-                if title and url:
-                    url += '?complete=1'
-                    self.log('\tFound article:', title)
-                    self.log('\t\t', url)
-                    desc = ''
-                    h3 = x.parent.findNextSibling('h3')
-                    if h3 is not None:
-                        desc = 'By ' + self.tag_to_string(h3)
-                        h4 = h3.findNextSibling('h4')
-                        if h4 is not None:
-                            desc = self.tag_to_string(h4) + ' ' + desc
-                    if desc:
-                        self.log('\t\t', desc)
-                    current_articles.append({'title':title, 'description':desc,
-                        'url':url, 'date':''})
-
-        if current_section and current_articles:
-            feeds.append((current_section, current_articles))
-
-        return feeds
-
-    def postprocess_html(self, soup, first):
-        a = soup.find(text='Print')
-        if a is not None:
-            tr = a.findParent('tr')
-            if tr is not None:
-                tr.extract()
-        return soup
+    feeds          = [
+('Latest News', 'http://indiatoday.intoday.in/rss/article.jsp?sid=4'),
+('Cover Story', 'http://indiatoday.intoday.in/rss/article.jsp?sid=30'),
+('Nation', 'http://indiatoday.intoday.in/rss/article.jsp?sid=36'),
+('States', 'http://indiatoday.intoday.in/rss/article.jsp?sid=21'),
+('Economy', 'http://indiatoday.intoday.in/rss/article.jsp?sid=34'),
+('World', 'http://indiatoday.intoday.in/rss/article.jsp?sid=61'),
+('Sport', 'http://indiatoday.intoday.in/rss/article.jsp?sid=41'),
 
+]
 
 
diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py
index 13bae3a554..6b9c3a2129 100644
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@@ -22,7 +22,7 @@ E = ElementMaker(namespace=NS, nsmap={None:NS})
 
 def iterate_over_builtin_recipe_files():
     exclude = ['craigslist', 'iht', 'toronto_sun',
-            'india_today', 'livemint']
+            'livemint']
     d = os.path.dirname
     base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
     for f in os.listdir(base):

From bd5ddfc7ed499b0a9278cf5d1b55ee258d5d0db2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 15:56:00 -0600
Subject: [PATCH 13/45] CIO Magazine by Julio Map

---
 recipes/cio_magazine.recipe | 128 ++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 recipes/cio_magazine.recipe

diff --git a/recipes/cio_magazine.recipe b/recipes/cio_magazine.recipe
new file mode 100644
index 0000000000..084a45ff93
--- /dev/null
+++ b/recipes/cio_magazine.recipe
@@ -0,0 +1,128 @@
+# Los primeros comentarios son las dificultades que he tenido con el Piton
+# Cuando da error UTF8 revisa los comentarios (acentos). En notepad++ Search, Goto, posicion y lo ves.
+# Editar con Notepad++ Si pone - donde no debe es que ha indentado mal... Edit - Blank operations - tab to space
+# He entendido lo que significa el from... son paths dentro de pylib.zip...
+# Con from importa solo un simbolo...con import,la libreria completa
+from calibre.web.feeds.news import BasicNewsRecipe
+# sys no hace falta... lo intente usar para escribir en stderr
+from calibre import strftime
+# Para convertir el tiempo del articulo
+import string, re
+# Para usar expresiones regulares
+# Visto en pylib.zip... la primera letra es mayuscula
+# Estas dos ultimas han sido un vago intento de establecer una cookie (no usado)
+
+class CIO_Magazine(BasicNewsRecipe):
+    title      = 'CIO Magazine'
+    oldest_article = 14
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    __author__ = 'Julio Map'
+    description = 'CIO is the leading information brand for today-s busy Chief information Officer - CIO Magazine bi-monthly '
+    language              = 'en'
+    encoding              = 'utf8'
+    cover_url = 'http://www.cio.com/homepage/images/hp-cio-logo-linkedin.png'
+
+    remove_tags_before = dict(name='div', attrs={'id':'container'})
+# Absolutamente innecesario... al final he visto un print_version (ver mas adelante)
+
+# Dentro de una revista dada...
+# issue_details contiene el titulo y las secciones de este ejemplar
+# DetailModule esta dentro de issue_details contiene las urls y resumenes
+# Dentro de un articulo dado...
+# Article-default-body contiene el texto. Pero como digo, he encontrado una print_version
+
+    no_stylesheets = True
+    remove_javascript     = True
+
+    def print_version(self,url):
+    # A esta funcion le llama el sistema... no hay que llamarla uno mismo (porque seria llamada dos veces)
+    # Existe una version imprimible de los articulos cambiando
+    # http://www.cio.com/article/<num>/<titulo> por
+    # http://www.cio.com/article/print/<num> que contiene todas las paginas dentro del div id=container
+        if url.startswith('/'):
+            url = 'http://www.cio.com'+url
+        segments = url.split('/')
+        printURL = '/'.join(segments[0:4]) + '/print/' + segments[4] +'#'
+        return printURL
+
+
+    def parse_index(self):
+    ###########################################################################
+    #    This method should be implemented in recipes that parse a website
+    # instead of feeds to generate a list of articles. Typical uses are for
+    # news sources that have a Print Edition webpage that lists all the
+    # articles in the current print edition. If this function is implemented,
+    # it will be used in preference to BasicNewsRecipe.parse_feeds().
+    #
+    # It must return a list. Each element of the list must be a 2-element
+    # tuple of the form ('feed title', list of articles).
+    #
+    # Each list of articles must contain dictionaries of the form:
+    #
+    # {
+    # 'title'       : article title,
+    # 'url'         : URL of print version,
+    # 'date'        : The publication date of the article as a string,
+    # 'description' : A summary of the article
+    # 'content'     : The full article (can be an empty string). This is used by FullContentProfile
+    # }
+    #
+    # For an example, see the recipe for downloading The Atlantic.
+    # In addition, you can add 'author' for the author of the article.
+    ###############################################################################
+
+    # Primero buscamos cual es la ultima revista que se ha creado
+        soupinicial = self.index_to_soup('http://www.cio.com/magazine')
+        # Es el primer enlace que hay en el DIV con class content_body
+        a= soupinicial.find(True, attrs={'class':'content_body'}).find('a', href=True)
+        INDEX = re.sub(r'\?.*', '', a['href'])
+        # Como cio.com usa enlaces relativos, le anteponemos el domain name.
+        if INDEX.startswith('/'): # protegiendonos de que dejen de usarlos
+            INDEX = 'http://www.cio.com'+INDEX
+        # Y nos aseguramos en los logs que lo estamos haciendo bien
+        print ("INDEX en parse_index: ", INDEX)
+
+    # Ya sabemos cual es la revista... procesemosla.
+        soup = self.index_to_soup(INDEX)
+
+        articles = {}
+        key = None
+        feeds = []
+        # Para empezar nos quedamos solo con dos DIV, 'heading' y ' issue_item'
+        # Del primero sacamos las categorias (key) y del segundo las urls y resumenes
+        for div in soup.findAll(True,
+             attrs={'class':['heading', 'issue_item']}):
+
+             if div['class'] == 'heading':
+                 key = string.capwords(self.tag_to_string(div.span))
+                 print ("Key: ",key) # Esto es para depurar
+                 articles[key] = []
+                 feeds.append(key)
+
+             elif div['class'] == 'issue_item':
+                 a = div.find('a', href=True)
+                 if not a:
+                     continue
+                 url = re.sub(r'\?.*', '', a['href'])
+                 print("url: ",url) # Esto es para depurar
+                 title = self.tag_to_string(a, use_alt=True).strip() # Ya para nota, quitar al final las dos ultimas palabras
+                 pubdate = strftime('%a, %d %b') # No es la fecha de publicacion sino la de colecta
+                 summary = div.find('p') # Dentro de la div 'issue_item' el unico parrafo que hay es el resumen
+                 description = '' # Si hay summary la description sera el summary... si no, la dejamos en blanco
+
+                 if summary:
+                     description = self.tag_to_string(summary, use_alt=False)
+                     print ("Description = ", description)
+
+
+                 feed = key if key is not None else 'Uncategorized' # Esto esta copiado del NY times
+                 if not articles.has_key(feed):
+                     articles[feed] = []
+                 if not 'podcasts' in url:
+                     articles[feed].append(
+                               dict(title=title, url=url, date=pubdate,
+                                    description=description,
+                                    content=''))
+        feeds = [(key, articles[key]) for key in feeds if articles.has_key(key)]
+        return feeds

From fe21bf186f74a8b5e50278f2bb1917288b547dca Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 16:12:56 -0600
Subject: [PATCH 14/45] ...

---
 src/calibre/library/server/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index 26e4d3469e..d18bffc6a2 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -34,7 +34,7 @@ class DispatchController(object): # {{{
     def __init__(self, prefix, wsgi=False):
         self.dispatcher = cherrypy.dispatch.RoutesDispatcher()
         self.funcs = []
-        self.seen = set([])
+        self.seen = set()
         self.prefix = prefix if prefix else ''
         if wsgi:
             self.prefix = ''

From 7eadf1c5c14bb18160ef1751037430278f7ada4e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Sep 2011 20:29:04 -0600
Subject: [PATCH 15/45] ...

---
 src/cherrypy/lib/httpauth.py    | 6 +++---
 src/cherrypy/process/servers.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/cherrypy/lib/httpauth.py b/src/cherrypy/lib/httpauth.py
index 0b4743d668..f5d87d2b43 100644
--- a/src/cherrypy/lib/httpauth.py
+++ b/src/cherrypy/lib/httpauth.py
@@ -75,7 +75,7 @@ MD5_SESS = "MD5-sess"
 AUTH = "auth"
 AUTH_INT = "auth-int"
 
-SUPPORTED_ALGORITHM = ('md5', MD5, MD5_SESS)
+SUPPORTED_ALGORITHM = ('md5', MD5, MD5_SESS) # Changed by Kovid
 SUPPORTED_QOP = (AUTH, AUTH_INT)
 
 ################################################################################
@@ -83,7 +83,7 @@ SUPPORTED_QOP = (AUTH, AUTH_INT)
 #
 DIGEST_AUTH_ENCODERS = {
     MD5: lambda val: md5(val).hexdigest(),
-    'md5': lambda val:md5(val).hexdigest(),
+    'md5': lambda val:md5(val).hexdigest(), # Added by Kovid
     MD5_SESS: lambda val: md5(val).hexdigest(),
 #    SHA: lambda val: sha(val).hexdigest(),
 }
@@ -225,7 +225,7 @@ def _A1(params, password):
     algorithm = params.get ("algorithm", MD5)
     H = DIGEST_AUTH_ENCODERS[algorithm]
 
-    if algorithm in (MD5, 'md5'):
+    if algorithm in (MD5, 'md5'): # Changed by Kovid
         # If the "algorithm" directive's value is "MD5" or is
         # unspecified, then A1 is:
         # A1 = unq(username-value) ":" unq(realm-value) ":" passwd
diff --git a/src/cherrypy/process/servers.py b/src/cherrypy/process/servers.py
index 932d28d01f..da469bfad2 100644
--- a/src/cherrypy/process/servers.py
+++ b/src/cherrypy/process/servers.py
@@ -241,10 +241,10 @@ def wait_for_free_port(host, port):
     for trial in xrange(50):
         try:
             # we are expecting a free port, so reduce the timeout
-            check_port(host, port, timeout=0.2)
+            check_port(host, port, timeout=0.2) # Changed by Kovid
         except IOError:
             # Give the old server thread time to free the port.
-            time.sleep(0.2)
+            time.sleep(0.2) # Changed by Kovid
         else:
             return
 

From 1ecfb81a0708b4f7027d5dde6f8b189f8e060933 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 12 Sep 2011 11:48:53 -0600
Subject: [PATCH 16/45] Keyboard shortcuts: Allow use of symbol keys like
 >,*,etc. Fixes #847378 (Error in shortcut-handler)

---
 src/calibre/gui2/keyboard.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/keyboard.py b/src/calibre/gui2/keyboard.py
index 9b0b1d8f69..362a074304 100644
--- a/src/calibre/gui2/keyboard.py
+++ b/src/calibre/gui2/keyboard.py
@@ -443,7 +443,13 @@ class Editor(QFrame): # {{{
             return QWidget.keyPressEvent(self, ev)
         button = getattr(self, 'button%d'%which)
         button.setStyleSheet('QPushButton { font-weight: normal}')
-        sequence = QKeySequence(code|(int(ev.modifiers())&~Qt.KeypadModifier))
+        mods = int(ev.modifiers()) & ~Qt.KeypadModifier
+        txt = unicode(ev.text())
+        if txt and txt.lower() == txt.upper():
+            # We have a symbol like ! or > etc. In this case the value of code
+            # already includes Shift, so remove it
+            mods &= ~Qt.ShiftModifier
+        sequence = QKeySequence(code|mods)
         button.setText(sequence.toString(QKeySequence.NativeText))
         self.capture = 0
         dup_desc = self.dup_check(sequence)

From 2bf6e7bed0600aa139ded457d28a9f9746a8994f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 12 Sep 2011 14:21:34 -0600
Subject: [PATCH 17/45] New MOBI writer: Change values of dictype and cdetype
 fields to be the same as for the old writer. Fixes #847766 (8.18 doesn't
 overwrite previous days newsfeeds for same publications)

---
 recipes/usatoday.recipe                 |   1 +
 src/calibre/ebooks/mobi/writer2/main.py |  26 ++-
 src/calibre/library/server/base.py      |   6 +
 src/calibre/utils/browser.py            |   4 +
 src/cherrypy/lib/sessions.py            | 209 ++++++++++++------------
 5 files changed, 133 insertions(+), 113 deletions(-)

diff --git a/recipes/usatoday.recipe b/recipes/usatoday.recipe
index a4899b7187..18aeab2648 100644
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@@ -13,6 +13,7 @@ class USAToday(BasicNewsRecipe):
     title = 'USA Today'
     __author__ = 'Kovid Goyal'
     oldest_article = 1
+    publication_type = 'newspaper'
     timefmt  = ''
     max_articles_per_feed = 20
     language = 'en'
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 7e748aac95..987d22afd3 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -61,6 +61,13 @@ class MobiWriter(object):
 
     def __call__(self, oeb, path_or_stream):
         self.log = oeb.log
+        pt = None
+        if oeb.metadata.publication_type:
+            x = unicode(oeb.metadata.publication_type[0]).split(':')
+            if len(x) > 1:
+                pt = x[1].lower()
+        self.publication_type = pt
+
         if hasattr(path_or_stream, 'write'):
             return self.dump_stream(oeb, path_or_stream)
         with open(path_or_stream, 'w+b') as stream:
@@ -351,7 +358,7 @@ class MobiWriter(object):
             elif self.indexer.is_periodical:
                 # If you change this, remember to change the cdetype in the EXTH
                 # header as well
-                bt = 0x103
+                bt = {'newspaper':0x101}.get(self.publication_type, 0x103)
 
         record0.write(pack(b'>IIIII',
             0xe8, bt, 65001, uid, 6))
@@ -525,15 +532,16 @@ class MobiWriter(object):
         nrecs += 1
 
         # Write cdetype
-        if self.is_periodical:
-            # If you set the book type header field to 0x101 use NWPR here if
-            # you use 0x103 use MAGZ
-            data = b'MAGZ'
+        if not self.is_periodical:
+            exth.write(pack(b'>II', 501, 12))
+            exth.write(b'EBOK')
+            nrecs += 1
         else:
-            data = b'EBOK'
-        exth.write(pack(b'>II', 501, len(data)+8))
-        exth.write(data)
-        nrecs += 1
+            # Should be b'NWPR' for doc type of 0x101 and b'MAGZ' for doctype
+            # of 0x103 but the old writer didn't write them, and I dont know
+            # what it should be for type 0x102 (b'BLOG'?) so write nothing
+            # instead
+            pass
 
         # Add a publication date entry
         if oeb.metadata['date']:
diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index d18bffc6a2..9ffe1915f8 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -146,6 +146,11 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
         self.config = {}
         self.is_running = False
         self.exception = None
+        self.config['/'] = {
+            'tools.sessions.on' : True,
+            'tools.sessions.timeout': 60, # Session times out after 60 minutes
+        }
+
         if not wsgi:
             self.setup_loggers()
             cherrypy.engine.bonjour.subscribe()
@@ -154,6 +159,7 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
                 'tools.gzip.mime_types': ['text/html', 'text/plain',
                     'text/xml', 'text/javascript', 'text/css'],
             }
+
             if opts.password:
                 self.config['/'] = {
                         'tools.digest_auth.on'    : True,
diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py
index 6f8703ab49..430ced9fdd 100644
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@@ -28,6 +28,10 @@ class Browser(B):
         B.set_cookiejar(self, *args, **kwargs)
         self._clone_actions['set_cookiejar'] = ('set_cookiejar', args, kwargs)
 
+    @property
+    def cookiejar(self):
+        return self._clone_actions['set_cookiejar'][1][0]
+
     def set_handle_redirect(self, *args, **kwargs):
         B.set_handle_redirect(self, *args, **kwargs)
         self._clone_actions['set_handle_redirect'] = ('set_handle_redirect',
diff --git a/src/cherrypy/lib/sessions.py b/src/cherrypy/lib/sessions.py
index f9b52d4e37..326e72c2b2 100644
--- a/src/cherrypy/lib/sessions.py
+++ b/src/cherrypy/lib/sessions.py
@@ -33,13 +33,13 @@ missing = object()
 
 class Session(object):
     """A CherryPy dict-like Session object (one per request)."""
-    
+
     __metaclass__ = cherrypy._AttributeDocstrings
-    
+
     _id = None
     id_observers = None
     id_observers__doc = "A list of callbacks to which to pass new id's."
-    
+
     id__doc = "The current session ID."
     def _get_id(self):
         return self._id
@@ -48,33 +48,33 @@ class Session(object):
         for o in self.id_observers:
             o(value)
     id = property(_get_id, _set_id, doc=id__doc)
-    
+
     timeout = 60
     timeout__doc = "Number of minutes after which to delete session data."
-    
+
     locked = False
     locked__doc = """
     If True, this session instance has exclusive read/write access
     to session data."""
-    
+
     loaded = False
     loaded__doc = """
     If True, data has been retrieved from storage. This should happen
     automatically on the first attempt to access session data."""
-    
+
     clean_thread = None
     clean_thread__doc = "Class-level Monitor which calls self.clean_up."
-    
+
     clean_freq = 5
     clean_freq__doc = "The poll rate for expired session cleanup in minutes."
-    
+
     def __init__(self, id=None, **kwargs):
         self.id_observers = []
         self._data = {}
-        
+
         for k, v in kwargs.iteritems():
             setattr(self, k, v)
-        
+
         if id is None:
             self.regenerate()
         else:
@@ -84,30 +84,30 @@ class Session(object):
                 # See http://www.cherrypy.org/ticket/709.
                 self.id = None
                 self.regenerate()
-    
+
     def regenerate(self):
         """Replace the current session (with a new id)."""
         if self.id is not None:
             self.delete()
-        
+
         old_session_was_locked = self.locked
         if old_session_was_locked:
             self.release_lock()
-        
+
         self.id = None
         while self.id is None:
             self.id = self.generate_id()
             # Assert that the generated id is not already stored.
             if self._exists():
                 self.id = None
-        
+
         if old_session_was_locked:
             self.acquire_lock()
-    
+
     def clean_up(self):
         """Clean up expired sessions."""
         pass
-    
+
     try:
         os.urandom(20)
     except (AttributeError, NotImplementedError):
@@ -119,7 +119,7 @@ class Session(object):
         def generate_id(self):
             """Return a new session id."""
             return os.urandom(20).encode('hex')
-    
+
     def save(self):
         """Save session data."""
         try:
@@ -129,12 +129,12 @@ class Session(object):
                 t = datetime.timedelta(seconds = self.timeout * 60)
                 expiration_time = datetime.datetime.now() + t
                 self._save(expiration_time)
-            
+
         finally:
             if self.locked:
                 # Always release the lock if the user didn't release it
                 self.release_lock()
-    
+
     def load(self):
         """Copy stored session data into this session instance."""
         data = self._load()
@@ -145,7 +145,7 @@ class Session(object):
         else:
             self._data = data[0]
         self.loaded = True
-        
+
         # Stick the clean_thread in the class, not the instance.
         # The instances are created and destroyed per-request.
         cls = self.__class__
@@ -157,23 +157,23 @@ class Session(object):
             t.subscribe()
             cls.clean_thread = t
             t.start()
-    
+
     def delete(self):
         """Delete stored session data."""
         self._delete()
-    
+
     def __getitem__(self, key):
         if not self.loaded: self.load()
         return self._data[key]
-    
+
     def __setitem__(self, key, value):
         if not self.loaded: self.load()
         self._data[key] = value
-    
+
     def __delitem__(self, key):
         if not self.loaded: self.load()
         del self._data[key]
-    
+
     def pop(self, key, default=missing):
         """Remove the specified key and return the corresponding value.
         If key is not found, default is returned if given,
@@ -184,46 +184,46 @@ class Session(object):
             return self._data.pop(key)
         else:
             return self._data.pop(key, default)
-    
+
     def __contains__(self, key):
         if not self.loaded: self.load()
         return key in self._data
-    
+
     def has_key(self, key):
         """D.has_key(k) -> True if D has a key k, else False."""
         if not self.loaded: self.load()
         return self._data.has_key(key)
-    
+
     def get(self, key, default=None):
         """D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None."""
         if not self.loaded: self.load()
         return self._data.get(key, default)
-    
+
     def update(self, d):
         """D.update(E) -> None.  Update D from E: for k in E: D[k] = E[k]."""
         if not self.loaded: self.load()
         self._data.update(d)
-    
+
     def setdefault(self, key, default=None):
         """D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D."""
         if not self.loaded: self.load()
         return self._data.setdefault(key, default)
-    
+
     def clear(self):
         """D.clear() -> None.  Remove all items from D."""
         if not self.loaded: self.load()
         self._data.clear()
-    
+
     def keys(self):
         """D.keys() -> list of D's keys."""
         if not self.loaded: self.load()
         return self._data.keys()
-    
+
     def items(self):
         """D.items() -> list of D's (key, value) pairs, as 2-tuples."""
         if not self.loaded: self.load()
         return self._data.items()
-    
+
     def values(self):
         """D.values() -> list of D's values."""
         if not self.loaded: self.load()
@@ -231,11 +231,11 @@ class Session(object):
 
 
 class RamSession(Session):
-    
+
     # Class-level objects. Don't rebind these!
     cache = {}
     locks = {}
-    
+
     def clean_up(self):
         """Clean up expired sessions."""
         now = datetime.datetime.now()
@@ -249,29 +249,29 @@ class RamSession(Session):
                     del self.locks[id]
                 except KeyError:
                     pass
-    
+
     def _exists(self):
         return self.id in self.cache
-    
+
     def _load(self):
         return self.cache.get(self.id)
-    
+
     def _save(self, expiration_time):
         self.cache[self.id] = (self._data, expiration_time)
-    
+
     def _delete(self):
         del self.cache[self.id]
-    
+
     def acquire_lock(self):
         """Acquire an exclusive lock on the currently-loaded session data."""
         self.locked = True
         self.locks.setdefault(self.id, threading.RLock()).acquire()
-    
+
     def release_lock(self):
         """Release the lock on the currently-loaded session data."""
         self.locks[self.id].release()
         self.locked = False
-    
+
     def __len__(self):
         """Return the number of active sessions."""
         return len(self.cache)
@@ -279,32 +279,32 @@ class RamSession(Session):
 
 class FileSession(Session):
     """Implementation of the File backend for sessions
-    
+
     storage_path: the folder where session data will be saved. Each session
         will be saved as pickle.dump(data, expiration_time) in its own file;
         the filename will be self.SESSION_PREFIX + self.id.
     """
-    
+
     SESSION_PREFIX = 'session-'
     LOCK_SUFFIX = '.lock'
-    
+
     def __init__(self, id=None, **kwargs):
         # The 'storage_path' arg is required for file-based sessions.
         kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
         Session.__init__(self, id=id, **kwargs)
-    
+
     def setup(cls, **kwargs):
         """Set up the storage system for file-based sessions.
-        
+
         This should only be called once per process; this will be done
         automatically when using sessions.init (as the built-in Tool does).
         """
         # The 'storage_path' arg is required for file-based sessions.
         kwargs['storage_path'] = os.path.abspath(kwargs['storage_path'])
-        
+
         for k, v in kwargs.iteritems():
             setattr(cls, k, v)
-        
+
         # Warn if any lock files exist at startup.
         lockfiles = [fname for fname in os.listdir(cls.storage_path)
                      if (fname.startswith(cls.SESSION_PREFIX)
@@ -316,17 +316,17 @@ class FileSession(Session):
                  "manually delete the lockfiles found at %r."
                  % (len(lockfiles), plural, cls.storage_path))
     setup = classmethod(setup)
-    
+
     def _get_file_path(self):
         f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id)
         if not os.path.abspath(f).startswith(self.storage_path):
             raise cherrypy.HTTPError(400, "Invalid session id in cookie.")
         return f
-    
+
     def _exists(self):
         path = self._get_file_path()
         return os.path.exists(path)
-    
+
     def _load(self, path=None):
         if path is None:
             path = self._get_file_path()
@@ -338,20 +338,20 @@ class FileSession(Session):
                 f.close()
         except (IOError, EOFError):
             return None
-    
+
     def _save(self, expiration_time):
         f = open(self._get_file_path(), "wb")
         try:
             pickle.dump((self._data, expiration_time), f)
         finally:
             f.close()
-    
+
     def _delete(self):
         try:
             os.unlink(self._get_file_path())
         except OSError:
             pass
-    
+
     def acquire_lock(self, path=None):
         """Acquire an exclusive lock on the currently-loaded session data."""
         if path is None:
@@ -363,17 +363,17 @@ class FileSession(Session):
             except OSError:
                 time.sleep(0.1)
             else:
-                os.close(lockfd) 
+                os.close(lockfd)
                 break
         self.locked = True
-    
+
     def release_lock(self, path=None):
         """Release the lock on the currently-loaded session data."""
         if path is None:
             path = self._get_file_path()
         os.unlink(path + self.LOCK_SUFFIX)
         self.locked = False
-    
+
     def clean_up(self):
         """Clean up expired sessions."""
         now = datetime.datetime.now()
@@ -395,7 +395,7 @@ class FileSession(Session):
                             os.unlink(path)
                 finally:
                     self.release_lock(path)
-    
+
     def __len__(self):
         """Return the number of active sessions."""
         return len([fname for fname in os.listdir(self.storage_path)
@@ -412,38 +412,38 @@ class PostgresqlSession(Session):
                 data text,
                 expiration_time timestamp
             )
-    
+
     You must provide your own get_db function.
     """
-    
+
     def __init__(self, id=None, **kwargs):
         Session.__init__(self, id, **kwargs)
         self.cursor = self.db.cursor()
-    
+
     def setup(cls, **kwargs):
         """Set up the storage system for Postgres-based sessions.
-        
+
         This should only be called once per process; this will be done
         automatically when using sessions.init (as the built-in Tool does).
         """
         for k, v in kwargs.iteritems():
             setattr(cls, k, v)
-        
+
         self.db = self.get_db()
     setup = classmethod(setup)
-    
+
     def __del__(self):
         if self.cursor:
             self.cursor.close()
         self.db.commit()
-    
+
     def _exists(self):
         # Select session data from table
         self.cursor.execute('select data, expiration_time from session '
                             'where id=%s', (self.id,))
         rows = self.cursor.fetchall()
         return bool(rows)
-    
+
     def _load(self):
         # Select session data from table
         self.cursor.execute('select data, expiration_time from session '
@@ -451,34 +451,34 @@ class PostgresqlSession(Session):
         rows = self.cursor.fetchall()
         if not rows:
             return None
-        
+
         pickled_data, expiration_time = rows[0]
         data = pickle.loads(pickled_data)
         return data, expiration_time
-    
+
     def _save(self, expiration_time):
         pickled_data = pickle.dumps(self._data)
         self.cursor.execute('update session set data = %s, '
                             'expiration_time = %s where id = %s',
                             (pickled_data, expiration_time, self.id))
-    
+
     def _delete(self):
         self.cursor.execute('delete from session where id=%s', (self.id,))
-   
+
     def acquire_lock(self):
         """Acquire an exclusive lock on the currently-loaded session data."""
         # We use the "for update" clause to lock the row
         self.locked = True
         self.cursor.execute('select id from session where id=%s for update',
                             (self.id,))
-    
+
     def release_lock(self):
         """Release the lock on the currently-loaded session data."""
         # We just close the cursor and that will remove the lock
         #   introduced by the "for update" clause
         self.cursor.close()
         self.locked = False
-    
+
     def clean_up(self):
         """Clean up expired sessions."""
         self.cursor.execute('delete from session where expiration_time < %s',
@@ -486,43 +486,43 @@ class PostgresqlSession(Session):
 
 
 class MemcachedSession(Session):
-    
+
     # The most popular memcached client for Python isn't thread-safe.
     # Wrap all .get and .set operations in a single lock.
     mc_lock = threading.RLock()
-    
+
     # This is a seperate set of locks per session id.
     locks = {}
-    
+
     servers = ['127.0.0.1:11211']
-    
+
     def setup(cls, **kwargs):
         """Set up the storage system for memcached-based sessions.
-        
+
         This should only be called once per process; this will be done
         automatically when using sessions.init (as the built-in Tool does).
         """
         for k, v in kwargs.iteritems():
             setattr(cls, k, v)
-        
+
         import memcache
         cls.cache = memcache.Client(cls.servers)
     setup = classmethod(setup)
-    
+
     def _exists(self):
         self.mc_lock.acquire()
         try:
             return bool(self.cache.get(self.id))
         finally:
             self.mc_lock.release()
-    
+
     def _load(self):
         self.mc_lock.acquire()
         try:
             return self.cache.get(self.id)
         finally:
             self.mc_lock.release()
-    
+
     def _save(self, expiration_time):
         # Send the expiration time as "Unix time" (seconds since 1/1/1970)
         td = int(time.mktime(expiration_time.timetuple()))
@@ -532,20 +532,20 @@ class MemcachedSession(Session):
                 raise AssertionError("Session data for id %r not set." % self.id)
         finally:
             self.mc_lock.release()
-    
+
     def _delete(self):
         self.cache.delete(self.id)
-    
+
     def acquire_lock(self):
         """Acquire an exclusive lock on the currently-loaded session data."""
         self.locked = True
         self.locks.setdefault(self.id, threading.RLock()).acquire()
-    
+
     def release_lock(self):
         """Release the lock on the currently-loaded session data."""
         self.locks[self.id].release()
         self.locked = False
-    
+
     def __len__(self):
         """Return the number of active sessions."""
         raise NotImplementedError
@@ -555,15 +555,15 @@ class MemcachedSession(Session):
 
 def save():
     """Save any changed session data."""
-    
+
     if not hasattr(cherrypy.serving, "session"):
         return
-    
+
     # Guard against running twice
     if hasattr(cherrypy.request, "_sessionsaved"):
         return
     cherrypy.request._sessionsaved = True
-    
+
     if cherrypy.response.stream:
         # If the body is being streamed, we have to save the data
         #   *after* the response has been written out
@@ -589,7 +589,7 @@ close.priority = 90
 def init(storage_type='ram', path=None, path_header=None, name='session_id',
          timeout=60, domain=None, secure=False, clean_freq=5, **kwargs):
     """Initialize session object (using cookies).
-    
+
     storage_type: one of 'ram', 'file', 'postgresql'. This will be used
         to look up the corresponding class in cherrypy.lib.sessions
         globals. For example, 'file' will use the FileSession class.
@@ -603,31 +603,31 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
     secure: if False (the default) the cookie 'secure' value will not
         be set. If True, the cookie 'secure' value will be set (to 1).
     clean_freq (minutes): the poll rate for expired session cleanup.
-    
+
     Any additional kwargs will be bound to the new Session instance,
     and may be specific to the storage type. See the subclass of Session
     you're using for more information.
     """
-    
+
     request = cherrypy.request
-    
+
     # Guard against running twice
     if hasattr(request, "_session_init_flag"):
         return
     request._session_init_flag = True
-    
+
     # Check if request came with a session ID
     id = None
     if name in request.cookie:
         id = request.cookie[name].value
-    
+
     # Find the storage class and call setup (first time only).
     storage_class = storage_type.title() + 'Session'
     storage_class = globals()[storage_class]
     if not hasattr(cherrypy, "session"):
         if hasattr(storage_class, "setup"):
             storage_class.setup(**kwargs)
-    
+
     # Create and attach a new Session instance to cherrypy.serving.
     # It will possess a reference to (and lock, and lazily load)
     # the requested session data.
@@ -638,11 +638,11 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
         """Update the cookie every time the session id changes."""
         cherrypy.response.cookie[name] = id
     sess.id_observers.append(update_cookie)
-    
+
     # Create cherrypy.session which will proxy to cherrypy.serving.session
     if not hasattr(cherrypy, "session"):
         cherrypy.session = cherrypy._ThreadLocalProxy('session')
-    
+
     set_response_cookie(path=path, path_header=path_header, name=name,
                         timeout=timeout, domain=domain, secure=secure)
 
@@ -650,7 +650,7 @@ def init(storage_type='ram', path=None, path_header=None, name='session_id',
 def set_response_cookie(path=None, path_header=None, name='session_id',
                         timeout=60, domain=None, secure=False):
     """Set a response cookie for the client.
-    
+
     path: the 'path' value to stick in the response cookie metadata.
     path_header: if 'path' is None (the default), then the response
         cookie 'path' will be pulled from request.headers[path_header].
@@ -665,14 +665,15 @@ def set_response_cookie(path=None, path_header=None, name='session_id',
     cookie[name] = cherrypy.serving.session.id
     cookie[name]['path'] = (path or cherrypy.request.headers.get(path_header)
                             or '/')
-    
+
     # We'd like to use the "max-age" param as indicated in
     # http://www.faqs.org/rfcs/rfc2109.html but IE doesn't
     # save it to disk and the session is lost if people close
     # the browser. So we have to use the old "expires" ... sigh ...
 ##    cookie[name]['max-age'] = timeout * 60
-    if timeout:
-        cookie[name]['expires'] = http.HTTPDate(time.time() + (timeout * 60))
+    if False and timeout: # Changed by Kovid, we want the user to have to
+                          # re-authenticate on browser restart
+        cookie[name]['expires'] = http.HTTPDate(time.time() + timeout)
     if domain is not None:
         cookie[name]['domain'] = domain
     if secure:

From 2de76958ce55857fe3f8aaff86932ac62fc02187 Mon Sep 17 00:00:00 2001
From: Timothy Legge <timlegge@gmail.com>
Date: Mon, 12 Sep 2011 22:44:46 -0300
Subject: [PATCH 18/45] Kobo - Only process supported collections

---
 src/calibre/devices/kobo/driver.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py
index 528057dad9..fa4796a5a9 100644
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@@ -653,6 +653,15 @@ class KOBO(USBMS):
             debug_print('    Commit: Set FavouritesIndex')
 
     def update_device_database_collections(self, booklists, collections_attributes, oncard):
+        # Only process categories in this list
+        supportedcategories = {
+            "Im_Reading":1,
+            "Read":2,
+            "Closed":3,
+            "Shortlist":4,
+            # "Preview":99, # Unsupported as we don't want to change it 
+        }
+
         # Define lists for the ReadStatus
         readstatuslist = {
             "Im_Reading":1,
@@ -692,6 +701,7 @@ class KOBO(USBMS):
 
                 # Process any collections that exist
                 for category, books in collections.items():
+                    if category in supportedcategories:
                         debug_print("Category: ", category, " id = ", readstatuslist.get(category))
                         for book in books:
                             debug_print('    Title:', book.title, 'category: ', category)

From be7114ba3acc8c650b48ff8881f40ae704b20c5f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 12 Sep 2011 21:21:21 -0600
Subject: [PATCH 19/45] ...

---
 recipes/businessworldin.recipe | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/recipes/businessworldin.recipe b/recipes/businessworldin.recipe
index e44682d7e1..a4c774ccdb 100644
--- a/recipes/businessworldin.recipe
+++ b/recipes/businessworldin.recipe
@@ -4,6 +4,7 @@ __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 www.businessworld.in
 '''
 
+import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -15,7 +16,7 @@ class BusinessWorldMagazine(BasicNewsRecipe):
     category             = 'news, politics, finances, India, Asia'
     delay                = 1
     no_stylesheets       = True
-    INDEX                = 'http://www.businessworld.in/bw/Magazine_Current_Issue'
+    INDEX                = 'http://www.businessworld.in/businessworld/magazine_latest_issue.php'
     ROOT                 = 'http://www.businessworld.in'
     use_embedded_content = False
     encoding             = 'utf-8'
@@ -38,13 +39,17 @@ class BusinessWorldMagazine(BasicNewsRecipe):
             if litem == url:
                return True
         return False
-    
-    
+
+
     def parse_index(self):
         articles = []
         linklist = []
-        soup = self.index_to_soup(self.INDEX)
-        
+        br = self.browser
+        br.open(self.ROOT)
+        raw = br.open(br.click_link(text_regex=re.compile('Current.*Issue',
+            re.I))).read()
+        soup = self.index_to_soup(raw)
+
         tough = soup.find('div', attrs={'id':'tough'})
         if tough:
            for item in tough.findAll('h1'):
@@ -63,7 +68,7 @@ class BusinessWorldMagazine(BasicNewsRecipe):
                                          ,'description':description
                                         })
                         linklist.append(url)
-        
+
         for item in soup.findAll('div', attrs={'class':'nametitle'}):
             description = ''
             title_prefix = ''
@@ -82,7 +87,7 @@ class BusinessWorldMagazine(BasicNewsRecipe):
                     linklist.append(url)
         return [(soup.head.title.string, articles)]
 
-    
+
     keep_only_tags = [dict(name='div', attrs={'id':'printwrapper'})]
     remove_tags = [dict(name=['object','link','meta','base','iframe','link','table'])]
 

From 7198f843283f36686ad9d6bca4f87cf9986de25e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 12 Sep 2011 21:58:53 -0600
Subject: [PATCH 20/45] Fix Business World India. Fixes #848431
 (businessworldin.recipe should be updated)

---
 recipes/businessworldin.recipe | 113 +++++++++++++--------------------
 1 file changed, 43 insertions(+), 70 deletions(-)

diff --git a/recipes/businessworldin.recipe b/recipes/businessworldin.recipe
index a4c774ccdb..cb5f443e9f 100644
--- a/recipes/businessworldin.recipe
+++ b/recipes/businessworldin.recipe
@@ -5,12 +5,11 @@ www.businessworld.in
 '''
 
 import re
-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class BusinessWorldMagazine(BasicNewsRecipe):
     title                = 'Business World Magazine'
-    __author__           = 'Darko Miletic'
+    __author__           = 'Kovid Goyal'
     description          = 'News from India'
     publisher            = 'ABP Pvt Ltd Publication'
     category             = 'news, politics, finances, India, Asia'
@@ -18,86 +17,60 @@ class BusinessWorldMagazine(BasicNewsRecipe):
     no_stylesheets       = True
     INDEX                = 'http://www.businessworld.in/businessworld/magazine_latest_issue.php'
     ROOT                 = 'http://www.businessworld.in'
-    use_embedded_content = False
     encoding             = 'utf-8'
     language             = 'en_IN'
-    extra_css            = """
-                              img{display: block; margin-bottom: 0.5em}
-                              body{font-family: Arial,Helvetica,sans-serif}
-                              h2{color: gray; display: block}
-                           """
-
-    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : language
-                        }
-
-    def is_in_list(self,linklist,url):
-        for litem in linklist:
-            if litem == url:
-               return True
-        return False
-
+    auto_cleanup = True
 
     def parse_index(self):
-        articles = []
-        linklist = []
         br = self.browser
         br.open(self.ROOT)
         raw = br.open(br.click_link(text_regex=re.compile('Current.*Issue',
             re.I))).read()
         soup = self.index_to_soup(raw)
+        mc = soup.find(attrs={'class':'mag_cover'})
+        if mc is not None:
+            img = mc.find('img', src=True)
+            if img is not None:
+                self.cover_url = img['src']
+
+        feeds = []
+        current_section = None
+        articles = []
+        for tag in soup.findAll(['h3', 'h2']):
+            inner_a = tag.find('a')
+            if tag.name == 'h3' and inner_a is not None:
+                continue
+            if tag.name == 'h2' and (inner_a is None or current_section is
+                    None):
+                continue
+
+            if tag.name == 'h3':
+                if current_section is not None and articles:
+                    feeds.append((current_section, articles))
+                current_section = self.tag_to_string(tag)
+                self.log('Found section:', current_section)
+                articles = []
+            elif tag.name == 'h2':
+                url = inner_a.get('href', None)
+                if url is None: continue
+                if url.startswith('/'): url = self.ROOT + url
+                title = self.tag_to_string(inner_a)
+                h1 = tag.findPreviousSibling('h1')
+                if h1 is not None:
+                    title = self.tag_to_string(h1) + title
+                self.log('\tFound article:', title)
+                articles.append({'title':title, 'url':url, 'date':'',
+                    'description':''})
+
+        if current_section and articles:
+            feeds.append((current_section, articles))
+
+        return feeds
+
+
 
-        tough = soup.find('div', attrs={'id':'tough'})
-        if tough:
-           for item in tough.findAll('h1'):
-                description = ''
-                title_prefix = ''
-                feed_link = item.find('a')
-                if feed_link and feed_link.has_key('href'):
-                    url   = self.ROOT + feed_link['href']
-                    if not self.is_in_list(linklist,url):
-                        title = title_prefix + self.tag_to_string(feed_link)
-                        date  = strftime(self.timefmt)
-                        articles.append({
-                                          'title'      :title
-                                         ,'date'       :date
-                                         ,'url'        :url
-                                         ,'description':description
-                                        })
-                        linklist.append(url)
 
-        for item in soup.findAll('div', attrs={'class':'nametitle'}):
-            description = ''
-            title_prefix = ''
-            feed_link = item.find('a')
-            if feed_link and feed_link.has_key('href'):
-                url   = self.ROOT + feed_link['href']
-                if not self.is_in_list(linklist,url):
-                    title = title_prefix + self.tag_to_string(feed_link)
-                    date  = strftime(self.timefmt)
-                    articles.append({
-                                      'title'      :title
-                                     ,'date'       :date
-                                     ,'url'        :url
-                                     ,'description':description
-                                    })
-                    linklist.append(url)
-        return [(soup.head.title.string, articles)]
 
 
-    keep_only_tags = [dict(name='div', attrs={'id':'printwrapper'})]
-    remove_tags = [dict(name=['object','link','meta','base','iframe','link','table'])]
 
-    def print_version(self, url):
-        return url.replace('/bw/','/bw/storyContent/')
 
-    def get_cover_url(self):
-        cover_url = None
-        soup = self.index_to_soup(self.INDEX)
-        cover_item = soup.find('img',attrs={'class':'toughbor'})
-        if cover_item:
-           cover_url = self.ROOT + cover_item['src']
-        return cover_url

From cf13881d0847e55c23eb3a9f53b6613227f83347 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 12 Sep 2011 23:39:02 -0600
Subject: [PATCH 21/45] ...

---
 src/calibre/ebooks/mobi/writer2/serializer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py
index ed6df6698a..6bc597ccb4 100644
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@@ -160,7 +160,7 @@ class Serializer(object):
                 buf.write(b'title="')
                 self.serialize_text(ref.title, quot=True)
                 buf.write(b'" ')
-                if ref.title == 'start':
+                if ref.title == 'start' or ref.type in ('start', 'other.start'):
                     self._start_href = ref.href
             self.serialize_href(ref.href)
             # Space required or won't work, I kid you not
@@ -348,8 +348,9 @@ class Serializer(object):
         '''
         buf = self.buf
         id_offsets = self.id_offsets
+        start_href = getattr(self, '_start_href', None)
         for href, hoffs in self.href_offsets.items():
-            is_start = (href and href == getattr(self, '_start_href', None))
+            is_start = (href and href == start_href)
             # Iterate over all filepos items
             if href not in id_offsets:
                 self.logger.warn('Hyperlink target %r not found' % href)

From 636496f1dd88bee5d7536de7b456dc920e7cf415 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 00:11:43 -0600
Subject: [PATCH 22/45] Fixed People/US Magazine mashup

---
 recipes/people_us_mashup.recipe | 72 ++-------------------------------
 1 file changed, 3 insertions(+), 69 deletions(-)

diff --git a/recipes/people_us_mashup.recipe b/recipes/people_us_mashup.recipe
index ed43e24e56..28c76d820c 100644
--- a/recipes/people_us_mashup.recipe
+++ b/recipes/people_us_mashup.recipe
@@ -14,54 +14,10 @@ class PeopleMag(BasicNewsRecipe):
     use_embedded_content  = False
     oldest_article = 2
     max_articles_per_feed = 50
+    use_embedded_content = False
 
-    extra_css = '''
-        h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
-        h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
-        .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
-        .byline {font-size: small; color: #666666; font-style:italic; }
-        .lastline {font-size: small; color: #666666; font-style:italic;}
-        .contact {font-size: small; color: #666666;}
-        .contact p {font-size: small; color: #666666;}
-        .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
-        .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
-        .article_timestamp{font-size:x-small; color:#666666;}
-        a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
-                '''
-
-
-    keep_only_tags = [
-              dict(name='div', attrs={'class': 'panel_news_article_main'}), 	
-	        dict(name='div', attrs={'class':'article_content'}),
-              dict(name='div', attrs={'class': 'headline'}),
-              dict(name='div', attrs={'class': 'post'}),
-              dict(name='div', attrs={'class': 'packageheadlines'}),
-              dict(name='div', attrs={'class': 'snap_preview'}),
-              dict(name='div', attrs={'id': 'articlebody'})
-                   ]
-
-    remove_tags = [
-         dict(name='div', attrs={'class':'share_comments'}),
-         dict(name='p', attrs={'class':'twitter_facebook'}),
-         dict(name='div', attrs={'class':'share_comments_bottom'}),
-         dict(name='h2', attrs={'id':'related_content'}),
-   	   dict(name='div', attrs={'class':'next_article'}),
-	   dict(name='div', attrs={'class':'prev_article'}),
-           dict(name='ul', attrs={'id':'sharebar'}),
-         dict(name='div', attrs={'class':'sharelinkcont'}),
-         dict(name='div', attrs={'class':'categories'}),
-         dict(name='ul', attrs={'class':'categories'}),
-         dict(name='div', attrs={'class':'related_content'}),
-         dict(name='div', attrs={'id':'promo'}),
-         dict(name='div', attrs={'class':'linksWrapper'}),
-         dict(name='p', attrs={'class':'tag tvnews'}),
-        dict(name='p', attrs={'class':'tag movienews'}),
-        dict(name='p', attrs={'class':'tag musicnews'}),
-        dict(name='p', attrs={'class':'tag couples'}),
-        dict(name='p', attrs={'class':'tag gooddeeds'}),
-        dict(name='p', attrs={'class':'tag weddings'}),
-        dict(name='p', attrs={'class':'tag health'})
-]
+    no_stylesheets = True
+    auto_cleanup = True
 
 
     feeds = [
@@ -69,26 +25,4 @@ class PeopleMag(BasicNewsRecipe):
         ('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss')
         ]
 
-    def get_article_url(self, article):
-        ans = article.link
 
-        try:
-            self.log('Looking for full story link in', ans)
-            soup = self.index_to_soup(ans)
-            x = soup.find(text="View All")
-
-            if x is not None:
-                ans = ans + '?viewAll=y'
-                self.log('Found full story link', ans)
-        except:
-            pass
-        return ans
-
-    def postprocess_html(self, soup,first):
-
-         for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
-                tag.extract()
-         for tag in soup.findAll(name='br'):
-                tag.extract()
-
-         return soup

From 5168e3c18b60fd8816b79bf9f6a54c699242ca5b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 00:12:11 -0600
Subject: [PATCH 23/45] ...

---
 src/calibre/ebooks/readability/readability.py | 37 +++++++++----------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/calibre/ebooks/readability/readability.py b/src/calibre/ebooks/readability/readability.py
index 7713584d14..8d4a23b338 100644
--- a/src/calibre/ebooks/readability/readability.py
+++ b/src/calibre/ebooks/readability/readability.py
@@ -484,30 +484,29 @@ class HashableElement():
     def __getattr__(self, tag):
         return getattr(self.node, tag)
 
+def option_parser():
+    from calibre.utils.config import OptionParser
+    parser = OptionParser(usage='%prog: [options] file')
+    parser.add_option('-v', '--verbose', default=False, action='store_true',
+                      dest='verbose',
+                      help=_('Show detailed output information. Useful for debugging'))
+
+    return parser
+
 def main():
-    import logging
-    from optparse import OptionParser
-    parser = OptionParser(usage="%prog: [options] [file]")
-    parser.add_option('-v', '--verbose', action='store_true')
-    parser.add_option('-u', '--url', help="use URL instead of a local file")
-    (options, args) = parser.parse_args()
+    from calibre.utils.logging import default_log
+    parser = option_parser()
+    options, args = parser.parse_args()
 
-    if not (len(args) == 1 or options.url):
+    if len(args) != 1:
         parser.print_help()
-        sys.exit(1)
-    logging.basicConfig(level=logging.INFO)
+        raise SystemExit(1)
+
+    with open(args[0], 'rb') as f:
+        raw = f.read()
 
-    file = None
-    if options.url:
-        import urllib
-        file = urllib.urlopen(options.url)
-    else:
-        file = open(args[0], 'rt')
     enc = sys.__stdout__.encoding or 'utf-8'
-    try:
-        print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
-    finally:
-        file.close()
+    print Document(raw, default_log, debug=options.verbose).summary().encode(enc, 'replace')
 
 if __name__ == '__main__':
     main()

From 41792efac3ed46d7b31159f70d598986f1c4d7b8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 09:42:50 -0600
Subject: [PATCH 24/45] Fix #848717 (preferences - behaviour - wrong label)

---
 src/calibre/gui2/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index ccd1dac1ad..0e123bee8b 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -142,7 +142,7 @@ def _config(): # {{{
     c.add_opt('upload_news_to_device', default=True,
               help=_('Upload downloaded news to device'))
     c.add_opt('delete_news_from_library_on_upload', default=False,
-              help=_('Delete books from library after uploading to device'))
+              help=_('Delete news books from library after uploading to device'))
     c.add_opt('separate_cover_flow', default=False,
               help=_('Show the cover flow in a separate window instead of in the main calibre window'))
     c.add_opt('disable_tray_notification', default=False,

From dbb2ede515f12b69e5f2de78c985ff6a62e925fa Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 09:47:47 -0600
Subject: [PATCH 25/45] ...

---
 src/calibre/ebooks/conversion/plumber.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 3e5313eb96..3d345b50f3 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -394,8 +394,9 @@ OptionRecommendation(name='insert_blank_line_size',
 OptionRecommendation(name='remove_first_image',
         recommended_value=False, level=OptionRecommendation.LOW,
         help=_('Remove the first image from the input ebook. Useful if the '
-        'first image in the source file is a cover and you are specifying '
-        'an external cover.'
+        'input document has a cover image that is not identified as a cover. '
+        'In this case, if you set a cover in calibre, the output document will '
+        'end up with two cover images if you do not specify this option.'
             )
         ),
 
@@ -1024,7 +1025,7 @@ OptionRecommendation(name='sr3_replace',
                 self.output_plugin.file_type not in ('mobi', 'lrf'):
             from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
             LinearizeTables()(self.oeb, self.opts)
-            
+
         if self.opts.unsmarten_punctuation:
             from calibre.ebooks.oeb.transforms.unsmarten import UnsmartenPunctuation
             UnsmartenPunctuation()(self.oeb, self.opts)

From 7abf29c5ba73cdbcd06cf75579139d1db669aa72 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 09:49:13 -0600
Subject: [PATCH 26/45] Fix #848900 (Updated recipe for The Japan Times)

---
 recipes/icons/japan_times.png | Bin 0 -> 1264 bytes
 recipes/japan_times.recipe    |  67 ++++++++++++++++++++++++++--------
 2 files changed, 51 insertions(+), 16 deletions(-)
 create mode 100644 recipes/icons/japan_times.png

diff --git a/recipes/icons/japan_times.png b/recipes/icons/japan_times.png
new file mode 100644
index 0000000000000000000000000000000000000000..1b2ac895725ec55d4328a964f6c3f70f4076f111
GIT binary patch
literal 1264
zcmeIw|5MTj7zgkNycVOlqwIE@xy97zo6-l+^v-oV^y39ra_UOg4}NKDUXxZ%@6)<g
z89LG|Q@b@!k*+nlO~s_9n_#3;A=w4Tkmb=JQ!+3eUj-ELGyE_5VXu3h=l%Qpo=Z9W
zeH3yG82~6eE}9of+Uk;`A{Xtb*8oI8&VhqjRNzO$k*!qhFu!Uuh+^S#EL`0Jk{#5>
zIOvRn?l_ReLr*;1j)yx5a5n+^6X4ISFhGa<bQoe#rQ2X+8$8?&V>{sKPB83(i9|3b
zf+Z2GN$@fWrWjyjzzhTI2)GdNFkyiSe%9(NvLM8QWj3s^0pEw*;vusAw1oo1cNmqQ
z!`doKrKi3c_!4~~IHWE`-N(?NutHITDSp6IBJ7a}(}=LqI&Az3Hrs&BHDJz`W?9=M
ztF+bF)jHqR>XJzx_y0aT(B)Bdc^@gg8l_LIR%^9dy<Tt7Squh)*=)92t$K~;FP&||
zWV2dczOYW24Y>7heEJ{UX0y%C&c2D5!RPI`^9}BDdflr$5pI{?<6iLjd_h0HxVRV!
zg_eSNXlZ3Rgonf7<z;*&j7LP;{@>f&z?~sBAAr!!LpcXex1TdU5vUP($E9?%H)Ybe
zx!Rw{IkZfPGBDQhePfwp(-wP2mJ0RY#Vu{VBZKql{ZDrn8~aSvC6XtBK%U6ihmTwp
zw13uod0psh+EiL<YVviATJvi$N1i{7&}?x(9qNs~CZJVS4+xv6$R4rk-h{s-E%yG<
z^FOBL4aCwYqApCT<H}3PIrUA>#!<cg73(*xnDcqhiRhc7H;J3Tv~z!|EdR!LXV-r`
zQB%9Nuy)eRHqi)qlPg8f2nU4alwWdtYnd5$M}{xD-5aXC$0%g`#yWR0={1u+^TD?@
z9Q)C0Wz^~C>CD|_Tu`a5Q#=(FsH39!!s((&e$mPF`!$Mh;@|JtYn120txscSyw3`B
zy}hse$znyyQDN)Jw7uaa<-NW{-C!Y*9;a|!!R<mGiz23ENb|Z)j@S<+KO%f*-P!t2
z(vDBICzq@8jadl2!}4z+6A>2`c1n)V5ymb!KAPKXE69%V3?kV;UX#7+JwZyw_#jEx
zdA>HPoE~#3{XA8psnlZ@gEsJ>GKtL?E}J{^ZjeC4iR2b`)rQr#;$`vC+ZiR*{{h9F
BuS5U<

literal 0
HcmV?d00001

diff --git a/recipes/japan_times.recipe b/recipes/japan_times.recipe
index bb83b16f1e..229d5e4035 100644
--- a/recipes/japan_times.recipe
+++ b/recipes/japan_times.recipe
@@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 japantimes.co.jp
 '''
@@ -9,24 +7,61 @@ japantimes.co.jp
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class JapanTimes(BasicNewsRecipe):
-    title                 = u'The Japan Times'
+    title                 = 'The Japan Times'
     __author__            = 'Darko Miletic'
-    description           = 'News from Japan'
-    language = 'en'
-    
-    oldest_article        = 7
-    max_articles_per_feed = 100
+    description           = "Daily news and features on Japan from the most widely read English-language newspaper in Japan. Coverage includes national news, business news, sports news, commentary and features on living in Japan, entertainment, the arts, education and more."
+    language              = 'en_JP'
+    category              = 'news, politics, japan'
+    publisher             = 'The Japan Times'
+    oldest_article        = 5
+    max_articles_per_feed = 150
     no_stylesheets        = True
     use_embedded_content  = False
+    encoding              = 'utf8'
+    publication_type      = 'newspaper'
+    masthead_url          = 'http://search.japantimes.co.jp/images/header_title.gif'
+    extra_css             = 'body{font-family: Geneva,Arial,Helvetica,sans-serif}'
 
-    keep_only_tags    = [ dict(name='div', attrs={'id':'searchresult'}) ]
-    remove_tags_after = [ dict(name='div', attrs={'id':'mainbody'    }) ]
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+
+    keep_only_tags    = [dict(name='div', attrs={'id':'printresult'})]
     remove_tags       = [
-                           dict(name='div'  , attrs={'id':'ads' })
-                          ,dict(name='table', attrs={'width':470})
+                          dict(name=['iframe','meta','link','embed','object','base'])
+                         ,dict(attrs={'id':'searchfooter'})
                         ]
+    feeds             = [(u'The Japan Times', u'http://feeds.feedburner.com/japantimes')]
+    remove_attributes = ['border']
 
+    def get_article_url(self, article):
+        rurl = BasicNewsRecipe.get_article_url(self, article)
+        return rurl.partition('?')[0]
 
-    feeds          = [
-                        (u'The Japan Times', u'http://feedproxy.google.com/japantimes')
-                     ]
\ No newline at end of file
+    def print_version(self, url):
+        return url.replace('/cgi-bin/','/print/')
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        for item in soup.findAll('photo'):
+            item.name = 'div'
+        for item in soup.head.findAll('paragraph'):
+            item.extract()
+        for item in soup.findAll('wwfilename'):
+            item.extract()
+        for item in soup.findAll('jtcategory'):
+            item.extract()
+        for item in soup.findAll('nomooter'):
+            item.extract()
+        for item in soup.body.findAll('paragraph'):
+            item.name = 'p'
+        return soup

From 2b315603771f56e57546523de5745794c8c8f138 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 10:04:43 -0600
Subject: [PATCH 27/45] ...

---
 src/calibre/ebooks/mobi/writer2/serializer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py
index 6bc597ccb4..eeef720144 100644
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@@ -160,7 +160,9 @@ class Serializer(object):
                 buf.write(b'title="')
                 self.serialize_text(ref.title, quot=True)
                 buf.write(b'" ')
-                if ref.title == 'start' or ref.type in ('start', 'other.start'):
+                if (ref.title.lower() == 'start' or
+                    (ref.type and ref.type.lower() in ('start',
+                        'other.start'))):
                     self._start_href = ref.href
             self.serialize_href(ref.href)
             # Space required or won't work, I kid you not

From 371db4901f7aadd4b46f60da79a04c717a6dea22 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 10:28:47 -0600
Subject: [PATCH 28/45] Conversion: Remove paragraph spacing: If you set the
 indent size to 0, calibre will now leave the indents specified in the input
 document

---
 src/calibre/ebooks/conversion/plumber.py     | 4 +++-
 src/calibre/ebooks/oeb/transforms/flatcss.py | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 3d345b50f3..7f38106229 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -368,7 +368,9 @@ OptionRecommendation(name='remove_paragraph_spacing_indent_size',
         recommended_value=1.5, level=OptionRecommendation.LOW,
         help=_('When calibre removes blank lines between paragraphs, it automatically '
             'sets a paragraph indent, to ensure that paragraphs can be easily '
-            'distinguished. This option controls the width of that indent (in em).')
+            'distinguished. This option controls the width of that indent (in em). '
+            'If you set this value to 0, then the indent specified in the input '
+            'document is used.')
         ),
 
 OptionRecommendation(name='prefer_metadata_cover',
diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index 1493a647ae..078174218e 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -320,9 +320,10 @@ class CSSFlattener(object):
             if self.context.insert_blank_line:
                 cssdict['margin-top'] = cssdict['margin-bottom'] = \
                     '%fem'%self.context.insert_blank_line_size
-            if (self.context.remove_paragraph_spacing and
+            indent_size = self.context.remove_paragraph_spacing_indent_size
+            if (self.context.remove_paragraph_spacing and indent_size != 0.0 and
                 cssdict.get('text-align', None) not in ('center', 'right')):
-                cssdict['text-indent'] =  "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
+                cssdict['text-indent'] =  "%1.1fem" % indent_size
 
         if cssdict:
             items = cssdict.items()

From 92fdad1ef3b4eee384ef5d69bc4ded66ffb72acc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 19:07:53 -0600
Subject: [PATCH 29/45] News download: Add an auto_cleanup_keep variable that
 allows recipe writers to tell the auto cleanup to never remove a specified
 element

---
 recipes/people_us_mashup.recipe               |  1 +
 src/calibre/ebooks/readability/readability.py | 65 ++++++++-----------
 src/calibre/web/feeds/news.py                 | 15 ++++-
 3 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/recipes/people_us_mashup.recipe b/recipes/people_us_mashup.recipe
index 28c76d820c..5d820bacc0 100644
--- a/recipes/people_us_mashup.recipe
+++ b/recipes/people_us_mashup.recipe
@@ -18,6 +18,7 @@ class PeopleMag(BasicNewsRecipe):
 
     no_stylesheets = True
     auto_cleanup = True
+    auto_cleanup_keep = '//div[@id="article-image"]'
 
 
     feeds = [
diff --git a/src/calibre/ebooks/readability/readability.py b/src/calibre/ebooks/readability/readability.py
index 8d4a23b338..028a4d6ede 100644
--- a/src/calibre/ebooks/readability/readability.py
+++ b/src/calibre/ebooks/readability/readability.py
@@ -1,3 +1,8 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
 import re, sys
 from collections import defaultdict
 
@@ -72,10 +77,15 @@ class Document:
             self.options[k] = v
         self.html = None
         self.log = log
+        self.keep_elements = set()
 
     def _html(self, force=False):
         if force or self.html is None:
             self.html = self._parse(self.input)
+            path = self.options['keep_elements']
+            if path is not None:
+                self.keep_elements = set(self.html.xpath(path))
+
         return self.html
 
     def _parse(self, input):
@@ -152,8 +162,9 @@ class Document:
             append = False
             if sibling is best_elem:
                 append = True
-            sibling_key = sibling #HashableElement(sibling)
-            if sibling_key in candidates and candidates[sibling_key]['content_score'] >= sibling_score_threshold:
+            if sibling in candidates and candidates[sibling]['content_score'] >= sibling_score_threshold:
+                append = True
+            if sibling in self.keep_elements:
                 append = True
 
             if sibling.tag == "p":
@@ -283,6 +294,8 @@ class Document:
 
     def remove_unlikely_candidates(self):
         for elem in self.html.iter():
+            if elem in self.keep_elements:
+                continue
             s = "%s %s" % (elem.get('class', ''), elem.get('id', ''))
             #self.debug(s)
             if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
@@ -337,7 +350,7 @@ class Document:
         allowed = {}
         # Conditionally clean <table>s, <ul>s, and <div>s
         for el in self.reverse_tags(node, "table", "ul", "div"):
-            if el in allowed:
+            if el in allowed or el in self.keep_elements:
                 continue
             weight = self.class_weight(el)
             if el in candidates:
@@ -450,46 +463,17 @@ class Document:
                     #self.debug("pname %s pweight %.3f" %(pname, pweight))
                     el.drop_tree()
 
-        for el in ([node] + [n for n in node.iter()]):
-            if not (self.options['attributes']):
-                #el.attrib = {} #FIXME:Checkout the effects of disabling this
-                pass
-
         return clean_attributes(tounicode(node))
 
-
-class HashableElement():
-    def __init__(self, node):
-        self.node = node
-        self._path = None
-
-    def _get_path(self):
-        if self._path is None:
-            reverse_path = []
-            node = self.node
-            while node is not None:
-                node_id = (node.tag, tuple(node.attrib.items()), node.text)
-                reverse_path.append(node_id)
-                node = node.getparent()
-            self._path = tuple(reverse_path)
-        return self._path
-    path = property(_get_path)
-
-    def __hash__(self):
-        return hash(self.path)
-
-    def __eq__(self, other):
-        return self.path == other.path
-
-    def __getattr__(self, tag):
-        return getattr(self.node, tag)
-
 def option_parser():
     from calibre.utils.config import OptionParser
     parser = OptionParser(usage='%prog: [options] file')
     parser.add_option('-v', '--verbose', default=False, action='store_true',
-                      dest='verbose',
-                      help=_('Show detailed output information. Useful for debugging'))
+            dest='verbose',
+            help='Show detailed output information. Useful for debugging')
+    parser.add_option('-k', '--keep-elements', default=None, action='store',
+            dest='keep_elements',
+            help='XPath specifying elements that should not be removed')
 
     return parser
 
@@ -506,7 +490,12 @@ def main():
         raw = f.read()
 
     enc = sys.__stdout__.encoding or 'utf-8'
-    print Document(raw, default_log, debug=options.verbose).summary().encode(enc, 'replace')
+    if options.verbose:
+        default_log.filter_level = default_log.DEBUG
+    print (Document(raw, default_log,
+            debug=options.verbose,
+            keep_elements=options.keep_elements).summary().encode(enc,
+                'replace'))
 
 if __name__ == '__main__':
     main()
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 436612af7e..b7efd611e0 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -144,6 +144,18 @@ class BasicNewsRecipe(Recipe):
     #: manually (though manual cleanup will always be superior).
     auto_cleanup = False
 
+    #: Specify elements that the auto cleanup algorithm should never remove
+    #: The syntax is a XPath expression. For example::
+    #:
+    #: auto_cleanup_keep = '//div[@id="article-image"]' will keep all divs with
+    #:                                                  id="article-image"
+    #: auto_cleanup_keep = '//*[@class="important"]' will keep all elements
+    #:                                               with class="important"
+    #: auto_cleanup_keep = '//div[@id="article-image"]|//span[@class="important"]'
+    #:                     will keep all divs with id="article-image" and spans
+    #:                     with class="important"
+    auto_cleanup_keep = None
+
     #: Specify any extra :term:`CSS` that should be addded to downloaded :term:`HTML` files
     #: It will be inserted into `<style>` tags, just before the closing
     #: `</head>` tag thereby overriding all :term:`CSS` except that which is
@@ -552,7 +564,8 @@ class BasicNewsRecipe(Recipe):
         from lxml.html import (fragment_fromstring, tostring,
                 document_fromstring)
 
-        doc = readability.Document(html, self.log, url=url)
+        doc = readability.Document(html, self.log, url=url,
+                keep_elements=self.auto_cleanup_keep)
         article_html = doc.summary()
         extracted_title = doc.title()
 

From 08ce94f3a0c2b3e41b4164d9857fbd30ced59e2c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 19:37:50 -0600
Subject: [PATCH 30/45] Fix #849469 (Calibre not closing metadata.db after copy
 to library)

---
 src/calibre/gui2/actions/copy_to_library.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py
index 14c61c91e6..ff95ca06a5 100644
--- a/src/calibre/gui2/actions/copy_to_library.py
+++ b/src/calibre/gui2/actions/copy_to_library.py
@@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
 import os
 from functools import partial
 from threading import Thread
+from contextlib import closing
 
 from PyQt4.Qt import QToolButton
 
@@ -53,6 +54,10 @@ class Worker(Thread): # {{{
     def doit(self):
         from calibre.library.database2 import LibraryDatabase2
         newdb = LibraryDatabase2(self.loc)
+        with closing(newdb):
+            self._doit(newdb)
+
+    def _doit(self, newdb):
         for i, x in enumerate(self.ids):
             mi = self.db.get_metadata(x, index_is_id=True, get_cover=True,
                     cover_as_data=True)
@@ -111,6 +116,7 @@ class Worker(Thread): # {{{
                     os.remove(path)
                 except:
                     pass
+
 # }}}
 
 class CopyToLibraryAction(InterfaceAction):

From 88627828b7d932e7f7d480220a57fad574f8aa6a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Sep 2011 20:07:44 -0600
Subject: [PATCH 31/45] Fix a memory leak in the Copy to library operation

---
 src/calibre/gui2/actions/copy_to_library.py |  4 ++-
 src/calibre/library/database2.py            |  6 ++--
 src/calibre/utils/search_query_parser.py    | 40 ++++++++++++++-------
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py
index ff95ca06a5..fdcce87342 100644
--- a/src/calibre/gui2/actions/copy_to_library.py
+++ b/src/calibre/gui2/actions/copy_to_library.py
@@ -53,9 +53,11 @@ class Worker(Thread): # {{{
 
     def doit(self):
         from calibre.library.database2 import LibraryDatabase2
-        newdb = LibraryDatabase2(self.loc)
+        newdb = LibraryDatabase2(self.loc, is_second_db=True)
         with closing(newdb):
             self._doit(newdb)
+        newdb.break_cycles()
+        del newdb
 
     def _doit(self, newdb):
         for i, x in enumerate(self.ids):
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index c65484ff56..d6c2ddd659 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -161,7 +161,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         return path and os.path.exists(os.path.join(path, 'metadata.db'))
 
     def __init__(self, library_path, row_factory=False, default_prefs=None,
-            read_only=False):
+            read_only=False, is_second_db=False):
+        self.is_second_db = is_second_db
         try:
             if isbytestring(library_path):
                 library_path = library_path.decode(filesystem_encoding)
@@ -263,7 +264,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
 
         migrate_preference('user_categories', {})
         migrate_preference('saved_searches', {})
-        set_saved_searches(self, 'saved_searches')
+        if not self.is_second_db:
+            set_saved_searches(self, 'saved_searches')
 
         # migrate grouped_search_terms
         if self.prefs.get('grouped_search_terms', None) is None:
diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py
index 3c41498107..a937e055ac 100644
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@@ -16,11 +16,11 @@ methods :method:`SearchQueryParser.universal_set` and
 If this module is run, it will perform a series of unit tests.
 '''
 
-import sys, operator
+import sys, operator, weakref
 
-from calibre.utils.pyparsing import CaselessKeyword, Group, Forward, \
-        CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral, \
-        Optional, NoMatch, ParseException, QuotedString
+from calibre.utils.pyparsing import (CaselessKeyword, Group, Forward,
+        CharsNotIn, Suppress, OneOrMore, MatchFirst, CaselessLiteral,
+        Optional, NoMatch, ParseException, QuotedString)
 from calibre.constants import preferred_encoding
 from calibre.utils.icu import sort_key
 from calibre import prints
@@ -37,11 +37,19 @@ class SavedSearchQueries(object):
 
     def __init__(self, db, _opt_name):
         self.opt_name = _opt_name;
-        self.db = db
         if db is not None:
             self.queries = db.prefs.get(self.opt_name, {})
         else:
             self.queries = {}
+        try:
+            self._db = weakref.ref(db)
+        except:
+            # db could be None
+            self._db = lambda : None
+
+    @property
+    def db(self):
+        return self._db()
 
     def force_unicode(self, x):
         if not isinstance(x, unicode):
@@ -49,21 +57,27 @@ class SavedSearchQueries(object):
         return x
 
     def add(self, name, value):
-        self.queries[self.force_unicode(name)] = self.force_unicode(value).strip()
-        self.db.prefs[self.opt_name] = self.queries
+        db = self.db
+        if db is not None:
+            self.queries[self.force_unicode(name)] = self.force_unicode(value).strip()
+            db.prefs[self.opt_name] = self.queries
 
     def lookup(self, name):
         return self.queries.get(self.force_unicode(name), None)
 
     def delete(self, name):
-        self.queries.pop(self.force_unicode(name), False)
-        self.db.prefs[self.opt_name] = self.queries
+        db = self.db
+        if db is not None:
+            self.queries.pop(self.force_unicode(name), False)
+            db.prefs[self.opt_name] = self.queries
 
     def rename(self, old_name, new_name):
-        self.queries[self.force_unicode(new_name)] = \
-                    self.queries.get(self.force_unicode(old_name), None)
-        self.queries.pop(self.force_unicode(old_name), False)
-        self.db.prefs[self.opt_name] = self.queries
+        db = self.db
+        if db is not None:
+            self.queries[self.force_unicode(new_name)] = \
+                        self.queries.get(self.force_unicode(old_name), None)
+            self.queries.pop(self.force_unicode(old_name), False)
+            db.prefs[self.opt_name] = self.queries
 
     def names(self):
         return sorted(self.queries.keys(),key=sort_key)

From 58268e61c3ea79a909259bc94fc23f38f97ddcc7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 10:40:49 -0600
Subject: [PATCH 32/45] RT by Darko Mieltic. Fixes #849987 (New recipe for
 Russian newsportal RT in English)

---
 recipes/icons/rtnews.png          | Bin 0 -> 606 bytes
 recipes/rtnews.recipe             |  64 ++++++++++++++++++++++++++++++
 src/calibre/utils/localization.py |   1 +
 3 files changed, 65 insertions(+)
 create mode 100644 recipes/icons/rtnews.png
 create mode 100644 recipes/rtnews.recipe

diff --git a/recipes/icons/rtnews.png b/recipes/icons/rtnews.png
new file mode 100644
index 0000000000000000000000000000000000000000..f29cc707a8774d54afab0308f19f33a776a2cfc9
GIT binary patch
literal 606
zcmV-k0-^nhP)<h;3K|Lk000e1NJLTq001BW001Be0ssI2{21+{0006dNkl<Zc%1E(
zziSjh6vuad?Cjm_xw*X~7AFxoP|!kx3jTtCm4ZJCEwxDDe~`k$l$KWh3n|1(unHDp
zC0H1YqJ@P7V@RRNZEkOK%kAupZ)UyZqIXMHuuyIohM6~SzMpyX-m*w3W&O>D#(**4
zUj;mTeJ2*tJ|)VuGIw@i)Gi)s-A;ly4NY(NC*fYp?ieLO4t_WQBYR+_`eQah16#;A
z)53th)}S2vP5KvbApk-~P?-_^6yHJYfg;_D0w_b2VakX;jx96CtV}T0R4`HpQ-^>N
zc60SdRMGUM=2km^3sxs9Nl$b(V-RW_^_?<nZ*--Q!M8|BebB4B^?L2a<K<!ztiIb?
zS!%7kX`Q)s=<2=W;rAXaUp@H@uq$)N&fh+~x@;VlS|=|zX6_!ziC|Qc%^tz6_0GnZ
zD9a)&C!3tt2(MGdi4Nbw+^1mqLx82!*SFEL2Ol@r^hbC4`qZhLjW`r4-F;OhEh+Of
z@&J&Xi?dTV?w>#=j~`yHue}c;y)c)Cp{y)}#mlr*p=HnJRl6G|&2zPx*(NO0L_UA?
zsTZY~AlT9l9+X$U{)j4?U%b{$L%23`DWUrM+d%?P4h%lQ01$VCt|DYPoN~vTzwoqJ
z1l+e>pK)Jn!7IZZ-5LXncd!H&?J!|y77X~@2!;bhA|zZ<sO&HVnJu-&G|0h@gH)n0
sz_qI-a$uhwoO1yG>mO|l7z2v5AB(UfKSjs9vH$=807*qoM6N<$g7Y^N3;+NC

literal 0
HcmV?d00001

diff --git a/recipes/rtnews.recipe b/recipes/rtnews.recipe
new file mode 100644
index 0000000000..22cdc6467f
--- /dev/null
+++ b/recipes/rtnews.recipe
@@ -0,0 +1,64 @@
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+rt.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RT_eng(BasicNewsRecipe):
+    title                 = 'RT in English'
+    __author__            = 'Darko Miletic'
+    description           = 'RT is the first Russian 24/7 English-language news channel which brings the Russian view on global news.'
+    publisher             = 'Autonomous Nonprofit Organization "TV-Novosti"'
+    category              = 'news, politics, economy, finances, Russia, world'
+    oldest_article        = 2
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    masthead_url          = 'http://rt.com/s/css/img/printlogo.gif'
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    language              = 'en_RU'
+    publication_type      = 'newsportal'
+    extra_css             = """
+                                body{font-family: Arial,Helvetica,sans-serif}
+                                h1{font-family: Georgia,"Times New Roman",Times,serif}
+                                .grey{color: gray}
+                                .fs12{font-size: small}
+                            """
+
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
+                        }
+
+    keep_only_tags    = [dict(name='div', attrs={'class':'all'})]
+    remove_tags       = [
+                           dict(name=['object','link','embed','iframe','meta','link'])
+                          ,dict(attrs={'class':'crumbs oh'})
+                        ]
+    remove_attributes = ['clear']
+
+    feeds = [
+               (u'Politics'   , u'http://rt.com/politics/rss/'            )
+              ,(u'USA'        , u'http://rt.com/usa/news/rss/'            )
+              ,(u'Business'   , u'http://rt.com/business/news/rss/'       )
+              ,(u'Sport'      , u'http://rt.com/sport/rss/'               )
+              ,(u'Art&Culture', u'http://rt.com/art-and-culture/news/rss/')
+            ]
+
+    def print_version(self, url):
+        return url + 'print/'
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll('a'):
+            str = item.string
+            if str is None:
+               str = self.tag_to_string(item)
+            item.replaceWith(str)
+        return soup
diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py
index 947ee823c6..3e9e133590 100644
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@@ -125,6 +125,7 @@ _extra_lang_codes = {
         'en_HR' : _('English (Croatia)'),
         'en_ID' : _('English (Indonesia)'),
         'en_IL' : _('English (Israel)'),
+        'en_RU' : _('English (Russia)'),
         'en_SG' : _('English (Singapore)'),
         'en_YE' : _('English (Yemen)'),
         'en_IE' : _('English (Ireland)'),

From f53ac1cff82e5d7cbd71ded5f08037cdb699bba9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 11:05:32 -0600
Subject: [PATCH 33/45] ...

---
 src/calibre/ebooks/oeb/transforms/flatcss.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index 078174218e..6458ca80b0 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -321,7 +321,8 @@ class CSSFlattener(object):
                 cssdict['margin-top'] = cssdict['margin-bottom'] = \
                     '%fem'%self.context.insert_blank_line_size
             indent_size = self.context.remove_paragraph_spacing_indent_size
-            if (self.context.remove_paragraph_spacing and indent_size != 0.0 and
+            keep_indents = indent_size == 0.0 and not self.context.insert_blank_line
+            if (self.context.remove_paragraph_spacing and not keep_indents and
                 cssdict.get('text-align', None) not in ('center', 'right')):
                 cssdict['text-indent'] =  "%1.1fem" % indent_size
 

From 67ee0726e9afea6051b1f93cf355932fff6ae920 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 11:07:52 -0600
Subject: [PATCH 34/45] ...

---
 src/calibre/ebooks/conversion/plumber.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 7f38106229..df1098da8f 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -370,7 +370,8 @@ OptionRecommendation(name='remove_paragraph_spacing_indent_size',
             'sets a paragraph indent, to ensure that paragraphs can be easily '
             'distinguished. This option controls the width of that indent (in em). '
             'If you set this value to 0, then the indent specified in the input '
-            'document is used.')
+            'document is used, unless you also set the insert line between '
+            'paragraphs option.')
         ),
 
 OptionRecommendation(name='prefer_metadata_cover',

From e762bb89641e6a640dc0ec221b29792d2c34f307 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 11:28:07 -0600
Subject: [PATCH 35/45] Fix #850167 (Wall Street Journal Newsfetch Script does
 not capture cover image)

---
 recipes/wsj.recipe | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index 7a044aa5a7..f01e7ae858 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -94,9 +94,11 @@ class WallStreetJournal(BasicNewsRecipe):
         if date is not None:
             self.timefmt = ' [%s]'%self.tag_to_string(date)
 
-        cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
+        cov = soup.find('div', attrs={'class':'itpSectionHeaderPdf'})
         if cov is not None:
-            self.cover_url = cov['href']
+            a = cov.find('a', href=True)
+            if a is not None:
+                self.cover_url = a['href']
 
         feeds = []
         div = soup.find('div', attrs={'class':'itpHeader'})

From d1d7eb99a0530ceb5b353e9a3876711cdcbd804e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 11:40:11 -0600
Subject: [PATCH 36/45] Fix #850183 (fix hardcoded errno values)

---
 src/calibre/ebooks/html/input.py           | 4 ++--
 src/calibre/ebooks/pdf/pdftohtml.py        | 2 +-
 src/calibre/gui2/metadata/basic_widgets.py | 6 +++---
 src/calibre/gui2/metadata/single.py        | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index 69eb493c7d..f68ea4f678 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -11,7 +11,7 @@ __docformat__ = 'restructuredtext en'
 Input plugin for HTML or OPF ebooks.
 '''
 
-import os, re, sys, uuid, tempfile
+import os, re, sys, uuid, tempfile, errno
 from urlparse import urlparse, urlunparse
 from urllib import unquote
 from functools import partial
@@ -75,7 +75,7 @@ class IgnoreFile(Exception):
 
     def __init__(self, msg, errno):
         Exception.__init__(self, msg)
-        self.doesnt_exist = errno == 2
+        self.doesnt_exist = errno == errno.ENOENT
         self.errno = errno
 
 class HTMLFile(object):
diff --git a/src/calibre/ebooks/pdf/pdftohtml.py b/src/calibre/ebooks/pdf/pdftohtml.py
index a791dab48a..9d81c73c2a 100644
--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@@ -53,7 +53,7 @@ def pdftohtml(output_dir, pdf_path, no_images):
             p = popen(cmd, stderr=logf._fd, stdout=logf._fd,
                     stdin=subprocess.PIPE)
         except OSError as err:
-            if err.errno == 2:
+            if err.errno == errno.ENOENT:
                 raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'))
             else:
                 raise
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index a349b8ca92..fe20be765f 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import textwrap, re, os
+import textwrap, re, os, errno
 
 from PyQt4.Qt import (Qt, QDateEdit, QDate, pyqtSignal, QMessageBox,
     QIcon, QToolButton, QWidget, QLabel, QGridLayout, QApplication,
@@ -98,7 +98,7 @@ class TitleEdit(EnLineEdit):
                 getattr(db, 'set_'+ self.TITLE_ATTR)(id_, title, notify=False,
                         commit=False)
         except (IOError, OSError) as err:
-            if getattr(err, 'errno', -1) == 13: # Permission denied
+            if getattr(err, 'errno', -1) == errno.EACCES: # Permission denied
                 import traceback
                 fname = err.filename if err.filename else 'file'
                 error_dialog(self, _('Permission denied'),
@@ -262,7 +262,7 @@ class AuthorsEdit(MultiCompleteComboBox):
             self.books_to_refresh |= db.set_authors(id_, authors, notify=False,
                 allow_case_change=True)
         except (IOError, OSError) as err:
-            if getattr(err, 'errno', -1) == 13: # Permission denied
+            if getattr(err, 'errno', -1) == errno.EACCES: # Permission denied
                 import traceback
                 fname = err.filename if err.filename else 'file'
                 error_dialog(self, _('Permission denied'),
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index a2666b0351..bbc5f6fce4 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import os
+import os, errno
 from functools import partial
 
 from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
@@ -427,7 +427,7 @@ class MetadataSingleDialogBase(ResizableDialog):
                 self.books_to_refresh |= getattr(widget, 'books_to_refresh',
                         set([]))
             except IOError as err:
-                if err.errno == 13: # Permission denied
+                if err.errno == errno.EACCES: # Permission denied
                     import traceback
                     fname = err.filename if err.filename else 'file'
                     error_dialog(self, _('Permission denied'),

From b3259427e89c2165290c484462d3985dfa6f3e98 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 13:47:15 -0600
Subject: [PATCH 37/45] ...

---
 src/calibre/ebooks/mobi/writer2/main.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 987d22afd3..2d1f0536ea 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -353,9 +353,7 @@ class MobiWriter(object):
 
         bt = 0x002
         if self.primary_index_record_idx is not None:
-            if self.indexer.is_flat_periodical:
-                bt = 0x102
-            elif self.indexer.is_periodical:
+            if self.indexer.is_periodical:
                 # If you change this, remember to change the cdetype in the EXTH
                 # header as well
                 bt = {'newspaper':0x101}.get(self.publication_type, 0x103)

From ea325148eacf47acc7d15e1bd6ad39ffa49ab1c4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 14:29:22 -0600
Subject: [PATCH 38/45] ...

---
 src/calibre/ebooks/mobi/writer2/main.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 2d1f0536ea..1cda1b14dd 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -353,7 +353,11 @@ class MobiWriter(object):
 
         bt = 0x002
         if self.primary_index_record_idx is not None:
-            if self.indexer.is_periodical:
+            if False and self.indexer.is_flat_periodical:
+                # Disabled as setting this to 0x102 causes the Kindle to not
+                # auto archive the issues
+                bt = 0x102
+            elif self.indexer.is_periodical:
                 # If you change this, remember to change the cdetype in the EXTH
                 # header as well
                 bt = {'newspaper':0x101}.get(self.publication_type, 0x103)

From 31534f3dd27b17a92333850af7fbe24fb8e99a47 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 15:14:06 -0600
Subject: [PATCH 39/45] Fix #850382 (Updated recipe for twitch films)

---
 recipes/icons/twitchfilms.png | Bin 0 -> 200 bytes
 recipes/twitchfilms.recipe    |  54 +++++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 20 deletions(-)
 create mode 100644 recipes/icons/twitchfilms.png

diff --git a/recipes/icons/twitchfilms.png b/recipes/icons/twitchfilms.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a958eb4d17cc0aeec82b0ca0c34499ce4ed97be
GIT binary patch
literal 200
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPFP2=EDU1qliWC@3fx7#KJ>I0OU)
zBqSsh6cp6g*Z24LuV24@|Ni~w&!4}4|Ni^;@Bjb*U-d{i6sRZI)5S5QVoq|x0hT5n
z9X&lJhqkZv3NtuTGWGx!#S2bQeDv>seh0IM;P2n|39}mC{+DMpX#V;)pLNBFcxHp<
t&-Nj_umA5C(0mqZt-zz!!7IVQu=kbpzr>6s4}iuqc)I$ztaD0e0s!s$O$7h|

literal 0
HcmV?d00001

diff --git a/recipes/twitchfilms.recipe b/recipes/twitchfilms.recipe
index 681eb05aba..dab0643410 100644
--- a/recipes/twitchfilms.recipe
+++ b/recipes/twitchfilms.recipe
@@ -1,12 +1,9 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
-twitchfilm.net/site/
+twitchfilm.net/news/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
 
 class Twitchfilm(BasicNewsRecipe):
     title                 = 'Twitch Films'
@@ -15,29 +12,46 @@ class Twitchfilm(BasicNewsRecipe):
     oldest_article        = 30
     max_articles_per_feed = 100
     no_stylesheets        = True
-    use_embedded_content  = True
+    use_embedded_content  = False
     encoding              = 'utf-8'
     publisher             = 'Twitch'
+    masthead_url          = 'http://twitchfilm.com/img/logo.png'
     category              = 'twitch, twitchfilm, movie news, movie reviews, cult cinema, independent cinema, anime, foreign cinema, geek talk'
-    language = 'en'
-
-    lang                  = 'en-US'
+    language              = 'en'
 
     conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : lang
-                        , 'pretty_print'     : True
+                          'comment'  : description
+                        , 'tags'     : category
+                        , 'publisher': publisher
+                        , 'language' : language
                         }
 
-    remove_tags = [dict(name='div', attrs={'class':'feedflare'})]
+    keep_only_tags=[dict(attrs={'class':'asset-header'})]
+    remove_tags_after=dict(attrs={'class':'asset-body'})
+    remove_tags = [  dict(name='div', attrs={'class':['social','categories']})
+                   , dict(attrs={'id':'main-asset'})
+                   , dict(name=['meta','link','iframe','embed','object'])
+                  ]
 
-    feeds = [(u'News', u'http://feedproxy.google.com/TwitchEverything')]
+    feeds = [(u'News', u'http://feeds.twitchfilm.net/TwitchEverything')]
 
     def preprocess_html(self, soup):
-        mtag = Tag(soup,'meta',[('http-equiv','Content-Type'),('context','text/html; charset=utf-8')])
-        soup.head.insert(0,mtag)
-        soup.html['lang'] = self.lang
-        return self.adeify_images(soup)
+        for item in soup.findAll(style=True):
+            del item['style']    
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
 

From 91b166b7442f6a9fcc6bea2e7ee872effa84813c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 15:19:33 -0600
Subject: [PATCH 40/45] Content server: Update metadata when sending MOBI files

---
 src/calibre/library/server/content.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py
index e55970ccd7..bb6fe1c454 100644
--- a/src/calibre/library/server/content.py
+++ b/src/calibre/library/server/content.py
@@ -202,7 +202,7 @@ class ContentServer(object):
                 mode='rb')
         if fmt is None:
             raise cherrypy.HTTPError(404, 'book: %d does not have format: %s'%(id, format))
-        mi = self.db.get_metadata(id, index_is_id=True)
+        mi = newmi = self.db.get_metadata(id, index_is_id=True)
         if format == 'EPUB':
             # Get the original metadata
 
@@ -214,9 +214,8 @@ class ContentServer(object):
                 # Transform the metadata via the plugboard
                 newmi = mi.deepcopy_metadata()
                 newmi.template_to_attribute(mi, cpb)
-            else:
-                newmi = mi
 
+        if format in ('MOBI', 'EPUB'):
             # Write the updated file
             from calibre.ebooks.metadata.meta import set_metadata
             set_metadata(fmt, newmi, 'epub')

From fb0f73a7ec70111c08695ac6a020f40dbf60c74c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 15:20:26 -0600
Subject: [PATCH 41/45] ...

---
 src/calibre/library/server/base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index 9ffe1915f8..69322512a0 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -146,10 +146,10 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
         self.config = {}
         self.is_running = False
         self.exception = None
-        self.config['/'] = {
-            'tools.sessions.on' : True,
-            'tools.sessions.timeout': 60, # Session times out after 60 minutes
-        }
+        #self.config['/'] = {
+        #    'tools.sessions.on' : True,
+        #    'tools.sessions.timeout': 60, # Session times out after 60 minutes
+        #}
 
         if not wsgi:
             self.setup_loggers()

From 640f345640e83df691d84e4246e62f1d164a4240 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 14 Sep 2011 18:55:55 -0400
Subject: [PATCH 42/45] Fix for issue 845420: Problems with processing metadata
 in plugin ozon.ru. Fix by Roman Mukhin.

---
 src/calibre/ebooks/metadata/sources/base.py   |   3 +-
 src/calibre/ebooks/metadata/sources/ozon.py   | 135 +++++++++++++-----
 .../gui2/store/stores/ozon_ru_plugin.py       |  14 +-
 3 files changed, 108 insertions(+), 44 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 2d7bb73e9c..701394e1a5 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -116,7 +116,8 @@ def cap_author_token(token):
     lt = lower(token)
     if lt in ('von', 'de', 'el', 'van', 'le'):
         return lt
-    if re.match(r'([a-z]\.){2,}$', lt) is not None:
+    # no digits no spez. characters
+    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
         # Normalize tokens of the form J.K. to J. K.
         parts = token.split('.')
         return '. '.join(map(capitalize, parts)).strip()
diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py
index 3f5f956fae..fa9951c40c 100644
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ b/src/calibre/ebooks/metadata/sources/ozon.py
@@ -28,7 +28,7 @@ class Ozon(Source):
     touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
                                'publisher', 'pubdate', 'comments', 'series', 'rating', 'language'])
     # Test purpose only, test function does not like when sometimes some filed are empty
-    #touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
+    # touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'identifier:ozon',
     #                          'publisher', 'pubdate', 'comments'])
 
     supports_gzip_transfer_encoding = True
@@ -109,8 +109,16 @@ class Ozon(Source):
     # }}}
 
     def get_metadata(self, log, entries, title, authors, identifiers): # {{{
+        # some book titles have extra charactes like this
+        # TODO: make a twick
+        reRemoveFromTitle = None 
+        #reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
+        
         title = unicode(title).upper() if title else ''
-        authors = map(unicode.upper, map(unicode, authors)) if authors else None
+        if reRemoveFromTitle:
+            title = reRemoveFromTitle.sub('', title) 
+        authors = map(_normalizeAuthorNameWithInitials, 
+                      map(unicode.upper, map(unicode, authors))) if authors else None
         ozon_id = identifiers.get('ozon', None)
 
         unk = unicode(_('Unknown')).upper()
@@ -124,6 +132,7 @@ class Ozon(Source):
         def in_authors(authors, miauthors):
             for author in authors:
                 for miauthor in miauthors:
+                    #log.debug(u'=> %s <> %s'%(author, miauthor))
                     if author in miauthor: return True
             return None
 
@@ -131,7 +140,10 @@ class Ozon(Source):
             match = True
             if title:
                 mititle = unicode(mi.title).upper() if mi.title else ''
+                if reRemoveFromTitle:
+                    mititle = reRemoveFromTitle.sub('', mititle)
                 match = title in mititle
+                #log.debug(u't=> %s <> %s'%(title, mititle))
             if match and authors:
                 miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
                 match = in_authors(authors, miauthors)
@@ -190,7 +202,8 @@ class Ozon(Source):
 
         title = entry.xpath(xp_template.format('Name'))
         author = entry.xpath(xp_template.format('Author'))
-        mi = Metadata(title, author.split(','))
+        norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
+        mi = Metadata(title, norm_authors)
 
         ozon_id = entry.xpath(xp_template.format('ID'))
         mi.identifiers = {'ozon':ozon_id}
@@ -202,6 +215,11 @@ class Ozon(Source):
         if cover:
             mi.ozon_cover_url = _translateToBigCoverUrl(cover)
 
+        pub_year = entry.xpath(xp_template.format('Year'))
+        if pub_year:
+            mi.pubdate = toPubdate(log, pub_year)
+            #log.debug('pubdate %s'%mi.pubdate)
+
         rating = entry.xpath(xp_template.format('ClientRatingValue'))
         if rating:
             try:
@@ -269,13 +287,17 @@ class Ozon(Source):
         raw = self.browser.open_novisit(url, timeout=timeout).read()
         doc = html.fromstring(raw)
 
+        xpt_prod_det_at = u'string(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "%s")]/a[1]/@title)'
+        xpt_prod_det_tx = u'substring-after(//div[contains(@class, "product-detail")]//text()[contains(., "%s")], ":")'
+
         # series
-        xpt = u'normalize-space(//div[@class="frame_content"]//div[contains(normalize-space(text()), "Серия:")]//a/@title)'
+        xpt = xpt_prod_det_at % u'Сери'
+        # % u'Серия:'
         series = doc.xpath(xpt)
         if series:
             metadata.series = series
 
-        xpt = u'substring-after(//meta[@name="description"]/@content, "ISBN")'
+        xpt = u'normalize-space(substring-after(//meta[@name="description"]/@content, "ISBN"))'
         isbn_str = doc.xpath(xpt)
         if isbn_str:
             all_isbns = [check_isbn(isbn) for isbn in self.isbnRegex.findall(isbn_str) if check_isbn(isbn)]
@@ -283,38 +305,42 @@ class Ozon(Source):
                 metadata.all_isbns = all_isbns
                 metadata.isbn = all_isbns[0]
 
-        xpt = u'//div[@class="frame_content"]//div[contains(normalize-space(text()), "Издатель")]//a[@title="Издательство"]'
+        xpt = xpt_prod_det_at % u'Издатель'
         publishers = doc.xpath(xpt)
         if publishers:
-            metadata.publisher = publishers[0].text
+            metadata.publisher = publishers
 
-            xpt = u'string(../text()[contains(., "г.")])'
-            yearIn = publishers[0].xpath(xpt)
+        displ_lang = None
+        xpt = xpt_prod_det_tx % u'Язык'
+        langs = doc.xpath(xpt)
+        if langs:
+            lng_splt = langs.split(u',')
+            if lng_splt:
+                displ_lang = lng_splt[0].strip()
+        metadata.language = _translageLanguageToCode(displ_lang)
+        #log.debug(u'language: %s'%displ_lang)
+        
+        # can be set before from xml search responce
+        if not metadata.pubdate:
+            xpt = u'normalize-space(//div[@class="product-misc"]//text()[contains(., "г.")])'
+            yearIn = doc.xpath(xpt)
             if yearIn:
                 matcher = re.search(r'\d{4}', yearIn)
                 if matcher:
-                    year = int(matcher.group(0))
-                    # only year is available, so use 1-st of Jan
-                    metadata.pubdate = datetime.datetime(year, 1, 1) #<- failed comparation in identify.py
-                    #metadata.pubdate = datetime(year, 1, 1)
-            xpt = u'substring-after(string(../text()[contains(., "Язык")]), ": ")'
-            displLang = publishers[0].xpath(xpt)
-            lang_code =_translageLanguageToCode(displLang)
-            if lang_code:
-                metadata.language = lang_code
+                    metadata.pubdate = toPubdate(log, matcher.group(0))
 
         # overwrite comments from HTML if any
-        # tr/td[contains(.//text(), "От издателя")] -> does not work, why?
-        xpt = u'//div[contains(@class, "detail")]//tr/td//text()[contains(., "От издателя")]'\
-              u'/ancestor::tr[1]/following-sibling::tr[1]/td[contains(./@class, "description")][1]'
+        xpt = u'//table[@id="detail_description"]//tr/td'
         comment_elem = doc.xpath(xpt)
         if comment_elem:
             comments = unicode(etree.tostring(comment_elem[0]))
             if comments:
                 # cleanup root tag, TODO: remove tags like object/embeded
-                comments = re.sub(r'^<td.+?>|</td>.+?$', u'', comments).strip()
-                if comments:
+                comments = re.sub(r'\A.*?<td.*?>|</td>.*\Z', u'', comments.strip(), re.MULTILINE).strip()
+                if comments and (not metadata.comments or len(comments) > len(metadata.comments)):
                     metadata.comments = comments
+                else:
+                    log.debug('HTML book description skipped in favour of search service xml responce')
         else:
             log.debug('No book description found in HTML')
     # }}}
@@ -390,10 +416,40 @@ def _translageLanguageToCode(displayLang): # {{{
                 u'Итальянский': 'it',
                 u'Испанский': 'es',
                 u'Китайский': 'zh',
-                u'Японский': 'ja' }
+                u'Японский': 'ja',
+                u'Финский' : 'fi',
+                u'Польский' : 'pl',}
     return langTbl.get(displayLang, None)
 # }}}
 
+# [В.П. Колесников | Колесников В.П.]-> В. П. BКолесников
+def _normalizeAuthorNameWithInitials(name): # {{{
+    res = name
+    if name: 
+        re1 = u'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$' 
+        re2 = u'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
+        matcher = re.match(re1, unicode(name), re.UNICODE)
+        if not matcher:
+            matcher = re.match(re2, unicode(name), re.UNICODE)
+            
+        if matcher:
+            d = matcher.groupdict()
+            res = ' '.join(x for x in (d['fname'], d['mname'], d['lname']) if x)
+    return res
+# }}}
+
+def toPubdate(log, yearAsString):
+    res = None
+    if yearAsString:
+        try:
+            year = int(yearAsString)
+            # only year is available, so use 1-st of Jan
+            res = datetime.datetime(year, 1, 1)
+        except:
+            log.error('cannot parse to date %s'%yearAsString)
+    return res
+
+
 if __name__ == '__main__': # tests {{{
     # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/ozon.py
     # comment some touched_fields before run thoses tests
@@ -403,40 +459,45 @@ if __name__ == '__main__': # tests {{{
 
     test_identify_plugin(Ozon.name,
         [
-
-            (
+#            (
+#                {'identifiers':{}, 'title':u'Норвежский язык: Практический курс',
+#                    'authors':[u'Колесников В.П.', u'Г.В. Шатков']},
+#                [title_test(u'Норвежский язык: Практический курс', exact=True),
+#                 authors_test([u'В. П. Колесников', u'Г. В. Шатков'])]
+#            ),
+             (
                 {'identifiers':{'isbn': '9785916572629'} },
                 [title_test(u'На все четыре стороны', exact=True),
                  authors_test([u'А. А. Гилл'])]
-            ),
-            (
+             ),
+             (
                 {'identifiers':{}, 'title':u'Der Himmel Kennt Keine Gunstlinge',
                     'authors':[u'Erich Maria Remarque']},
                 [title_test(u'Der Himmel Kennt Keine Gunstlinge', exact=True),
                  authors_test([u'Erich Maria Remarque'])]
-            ),
-            (
+             ),
+             (
                 {'identifiers':{ }, 'title':u'Метро 2033',
                     'authors':[u'Дмитрий Глуховский']},
                 [title_test(u'Метро 2033', exact=False)]
-            ),
-            (
+             ),
+             (
                 {'identifiers':{'isbn': '9785170727209'}, 'title':u'Метро 2033',
                     'authors':[u'Дмитрий Глуховский']},
                 [title_test(u'Метро 2033', exact=True),
                     authors_test([u'Дмитрий Глуховский']),
                     isbn_test('9785170727209')]
-            ),
-            (
+             ),
+             (
                 {'identifiers':{'isbn': '5-699-13613-4'}, 'title':u'Метро 2033',
                     'authors':[u'Дмитрий Глуховский']},
                 [title_test(u'Метро 2033', exact=True),
                  authors_test([u'Дмитрий Глуховский'])]
-            ),
-            (
+             ),
+             (
                 {'identifiers':{}, 'title':u'Метро',
                     'authors':[u'Глуховский']},
                 [title_test(u'Метро', exact=False)]
-            ),
+             ),
     ])
 # }}}
diff --git a/src/calibre/gui2/store/stores/ozon_ru_plugin.py b/src/calibre/gui2/store/stores/ozon_ru_plugin.py
index 866c1c2732..3934ebbbb3 100644
--- a/src/calibre/gui2/store/stores/ozon_ru_plugin.py
+++ b/src/calibre/gui2/store/stores/ozon_ru_plugin.py
@@ -80,13 +80,15 @@ class OzonRUStore(BasicStoreConfig, StorePlugin):
             doc = html.fromstring(f.read())
             
             # example where we are going to find formats
-            # <div class="box">
-            # ...
-            #     <b>Доступные&nbsp;форматы:</b>
-            #     <div class="vertpadd">.epub, .fb2, .pdf, .pdf, .txt</div>
-            # ...
+            # <div class="l">
+            #     <p>            
+            #         Доступно:
+            #    </p>
             # </div>
-            xpt = u'normalize-space(//div[@class="box"]//*[contains(normalize-space(text()), "Доступные форматы:")][1]/following-sibling::div[1]/text())'
+            # <div class="l">
+            #     <p>.epub, .fb2.zip, .pdf</p>
+            # </div>
+            xpt = u'normalize-space(//div[contains(@class, "product-detail")]//*[contains(normalize-space(text()), "Доступ")]/ancestor-or-self::div[1]/following-sibling::div[1]/*[1])'
             formats = doc.xpath(xpt)
             if formats:
                 result = True

From 2e994081ed87430fe0559d8edac5b010bcd4916e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 14 Sep 2011 19:17:31 -0600
Subject: [PATCH 43/45] MOBI Output: Add option in Preferences->Output
 Options->MOBI Output to enable the share via Facebook feature for calibre
 produced MOBI files. Note that enabling this disables the sync last read
 position across multiple devices feature. Don't ask me why, ask Amazon.

---
 src/calibre/ebooks/metadata/mobi.py     | 4 +++-
 src/calibre/ebooks/mobi/output.py       | 5 +++++
 src/calibre/ebooks/mobi/writer2/main.py | 7 ++++---
 src/calibre/gui2/convert/mobi_output.py | 2 +-
 src/calibre/gui2/convert/mobi_output.ui | 7 +++++++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py
index 74db3b3a58..2da9f74961 100644
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@@ -330,9 +330,11 @@ class MetadataUpdater(object):
             prefs = load_defaults('mobi_output')
             pas = prefs.get('prefer_author_sort', False)
             kindle_pdoc = prefs.get('personal_doc', None)
+            share_not_sync = prefs.get('share_not_sync', False)
         except:
             pas = False
             kindle_pdoc = None
+            share_not_sync = False
         if mi.author_sort and pas:
             authors = mi.author_sort
             update_exth_record((100, normalize(authors).encode(self.codec, 'replace')))
@@ -376,7 +378,7 @@ class MetadataUpdater(object):
         # Add a 113 record if not present to allow Amazon syncing
         if (113 not in self.original_exth_records and
                 self.original_exth_records.get(501, None) == 'EBOK' and
-                not added_501):
+                not added_501 and not share_not_sync):
             from uuid import uuid4
             update_exth_record((113, str(uuid4())))
         if 503 in self.original_exth_records:
diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py
index 4f5d09c894..f22015d71f 100644
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@@ -55,6 +55,11 @@ class MOBIOutput(OutputFormatPlugin):
                 ' specified directory. If the directory already '
                 'exists, it will be deleted.')
         ),
+        OptionRecommendation(name='share_not_sync', recommended_value=False,
+            help=_('Enable sharing of book content via Facebook etc. '
+                ' on the Kindle. WARNING: Using this feature means that '
+                ' the book will not auto sync its last read position '
+                ' on multiple devices. Complain to Amazon.'))
     ])
 
     def check_for_periodical(self):
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index 1cda1b14dd..1705a5a342 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -529,9 +529,10 @@ class MobiWriter(object):
 
         if isinstance(uuid, unicode):
             uuid = uuid.encode('utf-8')
-        exth.write(pack(b'>II', 113, len(uuid) + 8))
-        exth.write(uuid)
-        nrecs += 1
+        if not self.opts.share_not_sync:
+            exth.write(pack(b'>II', 113, len(uuid) + 8))
+            exth.write(uuid)
+            nrecs += 1
 
         # Write cdetype
         if not self.is_periodical:
diff --git a/src/calibre/gui2/convert/mobi_output.py b/src/calibre/gui2/convert/mobi_output.py
index f268ac8606..cd1d0430ae 100644
--- a/src/calibre/gui2/convert/mobi_output.py
+++ b/src/calibre/gui2/convert/mobi_output.py
@@ -23,7 +23,7 @@ class PluginWidget(Widget, Ui_Form):
         Widget.__init__(self, parent,
                 ['prefer_author_sort', 'rescale_images', 'toc_title',
                     'mobi_ignore_margins', 'mobi_toc_at_start',
-                'dont_compress', 'no_inline_toc',
+                'dont_compress', 'no_inline_toc', 'share_not_sync',
                 'personal_doc']#, 'mobi_navpoints_only_deepest']
                 )
         self.db, self.book_id = db, book_id
diff --git a/src/calibre/gui2/convert/mobi_output.ui b/src/calibre/gui2/convert/mobi_output.ui
index 7643d791f3..68cd55ab95 100644
--- a/src/calibre/gui2/convert/mobi_output.ui
+++ b/src/calibre/gui2/convert/mobi_output.ui
@@ -75,6 +75,13 @@
         </item>
        </layout>
       </item>
+      <item>
+       <widget class="QCheckBox" name="opt_share_not_sync">
+        <property name="text">
+         <string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
+        </property>
+       </widget>
+      </item>
       <item>
        <spacer name="verticalSpacer">
         <property name="orientation">

From 388968cd88c9efe171050e45324fa93514861aea Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 15 Sep 2011 00:29:32 -0600
Subject: [PATCH 44/45] Update pyparsing to 1.5.6

---
 src/calibre/utils/pyparsing.py | 364 +++++++++++++++------------------
 1 file changed, 161 insertions(+), 203 deletions(-)

diff --git a/src/calibre/utils/pyparsing.py b/src/calibre/utils/pyparsing.py
index bc5571ea5f..9be97dc287 100644
--- a/src/calibre/utils/pyparsing.py
+++ b/src/calibre/utils/pyparsing.py
@@ -1,6 +1,6 @@
 # module pyparsing.py
 #
-# Copyright (c) 2003-2010  Paul T. McGuire
+# Copyright (c) 2003-2011  Paul T. McGuire
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
  - embedded comments
 """
 
-__version__ = "1.5.5"
-__versionTime__ = "12 Aug 2010 03:56"
+__version__ = "1.5.6"
+__versionTime__ = "26 June 2011 10:53"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
 import string
@@ -101,11 +101,12 @@ if _PY3K:
     basestring = str
     unichr = chr
     _ustr = str
-    _str2dict = set
     alphas = string.ascii_lowercase + string.ascii_uppercase
 else:
     _MAX_INT = sys.maxint
     range = xrange
+    set = lambda s : dict( [(c,0) for c in s] )
+    alphas = string.lowercase + string.uppercase
 
     def _ustr(obj):
         """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
@@ -134,9 +135,6 @@ else:
             #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
             # ...
             
-    def _str2dict(strg):
-        return dict( [(c,0) for c in strg] )
-            
     alphas = string.lowercase + string.uppercase
 
 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
@@ -606,10 +604,10 @@ class ParseResults(object):
 
     def __setstate__(self,state):
         self.__toklist = state[0]
-        self.__tokdict, \
-        par, \
-        inAccumNames, \
-        self.__name = state[1]
+        (self.__tokdict,
+         par,
+         inAccumNames,
+         self.__name) = state[1]
         self.__accumNames = {}
         self.__accumNames.update(inAccumNames)
         if par is not None:
@@ -667,6 +665,35 @@ def nullDebugAction(*args):
     """'Do-nothing' debug action, to suppress debugging output during parsing."""
     pass
 
+'decorator to trim function calls to match the arity of the target'
+if not _PY3K:
+    def _trim_arity(func, maxargs=2):
+        limit = [0]
+        def wrapper(*args):
+            while 1:
+                try:
+                    return func(*args[limit[0]:])
+                except TypeError:
+                    if limit[0] <= maxargs:
+                        limit[0] += 1
+                        continue
+                    raise
+        return wrapper
+else:
+    def _trim_arity(func, maxargs=2):
+        limit = maxargs
+        def wrapper(*args):
+            #~ nonlocal limit
+            while 1:
+                try:
+                    return func(*args[limit:])
+                except TypeError:
+                    if limit:
+                        limit -= 1
+                        continue
+                    raise
+        return wrapper
+    
 class ParserElement(object):
     """Abstract base level parser element class."""
     DEFAULT_WHITE_CHARS = " \n\t\r"
@@ -731,6 +758,9 @@ class ParserElement(object):
            see L{I{__call__}<__call__>}.
         """
         newself = self.copy()
+        if name.endswith("*"):
+            name = name[:-1]
+            listAllMatches=True
         newself.resultsName = name
         newself.modalResults = not listAllMatches
         return newself
@@ -753,104 +783,6 @@ class ParserElement(object):
                 self._parse = self._parse._originalParseMethod
         return self
 
-    def _normalizeParseActionArgs( f ):
-        """Internal method used to decorate parse actions that take fewer than 3 arguments,
-           so that all parse actions can be called as C{f(s,l,t)}."""
-        STAR_ARGS = 4
-
-        # special handling for single-argument builtins
-        if (f in singleArgBuiltins):
-            numargs = 1
-        else:
-	        try:
-	            restore = None
-	            if isinstance(f,type):
-	                restore = f
-	                f = f.__init__
-	            if not _PY3K:
-	                codeObj = f.func_code
-	            else:
-	                codeObj = f.code
-	            if codeObj.co_flags & STAR_ARGS:
-	                return f
-	            numargs = codeObj.co_argcount
-	            if not _PY3K:
-	                if hasattr(f,"im_self"):
-	                    numargs -= 1
-	            else:
-	                if hasattr(f,"__self__"):
-	                    numargs -= 1
-	            if restore:
-	                f = restore
-	        except AttributeError:
-	            try:
-	                if not _PY3K:
-	                    call_im_func_code = f.__call__.im_func.func_code
-	                else:
-	                    call_im_func_code = f.__code__
-	
-	                # not a function, must be a callable object, get info from the
-	                # im_func binding of its bound __call__ method
-	                if call_im_func_code.co_flags & STAR_ARGS:
-	                    return f
-	                numargs = call_im_func_code.co_argcount
-	                if not _PY3K:
-	                    if hasattr(f.__call__,"im_self"):
-	                        numargs -= 1
-	                else:
-	                    if hasattr(f.__call__,"__self__"):
-	                        numargs -= 0
-	            except AttributeError:
-	                if not _PY3K:
-	                    call_func_code = f.__call__.func_code
-	                else:
-	                    call_func_code = f.__call__.__code__
-	                # not a bound method, get info directly from __call__ method
-	                if call_func_code.co_flags & STAR_ARGS:
-	                    return f
-	                numargs = call_func_code.co_argcount
-	                if not _PY3K:
-	                    if hasattr(f.__call__,"im_self"):
-	                        numargs -= 1
-	                else:
-	                    if hasattr(f.__call__,"__self__"):
-	                        numargs -= 1
-
-
-        #~ print ("adding function %s with %d args" % (f.func_name,numargs))
-        if numargs == 3:
-            return f
-        else:
-            if numargs > 3:
-                def tmp(s,l,t):
-                    return f(f.__call__.__self__, s,l,t)
-            if numargs == 2:
-                def tmp(s,l,t):
-                    return f(l,t)
-            elif numargs == 1:
-                def tmp(s,l,t):
-                    return f(t)
-            else: #~ numargs == 0:
-                def tmp(s,l,t):
-                    return f()
-            try:
-                tmp.__name__ = f.__name__
-            except (AttributeError,TypeError):
-                # no need for special handling if attribute doesnt exist
-                pass
-            try:
-                tmp.__doc__ = f.__doc__
-            except (AttributeError,TypeError):
-                # no need for special handling if attribute doesnt exist
-                pass
-            try:
-                tmp.__dict__.update(f.__dict__)
-            except (AttributeError,TypeError):
-                # no need for special handling if attribute doesnt exist
-                pass
-            return tmp
-    _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
-
     def setParseAction( self, *fns, **kwargs ):
         """Define action to perform when successfully matching parse element definition.
            Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
@@ -868,13 +800,13 @@ class ParserElement(object):
            consistent view of the parsed string, the parse location, and line and column
            positions within the parsed string.
            """
-        self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
+        self.parseAction = list(map(_trim_arity, list(fns)))
         self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
         return self
 
     def addParseAction( self, *fns, **kwargs ):
         """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
-        self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
+        self.parseAction += list(map(_trim_arity, list(fns)))
         self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
         return self
 
@@ -1012,9 +944,9 @@ class ParserElement(object):
         lookup = (self,instring,loc,callPreParse,doActions)
         if lookup in ParserElement._exprArgCache:
             value = ParserElement._exprArgCache[ lookup ]
-            if isinstance(value,Exception):
+            if isinstance(value, Exception):
                 raise value
-            return value
+            return (value[0],value[1].copy())
         else:
             try:
                 value = self._parseNoCache( instring, loc, doActions, callPreParse )
@@ -1088,8 +1020,8 @@ class ParserElement(object):
         try:
             loc, tokens = self._parse( instring, 0 )
             if parseAll:
-                #loc = self.preParse( instring, loc )
-                se = StringEnd()
+                loc = self.preParse( instring, loc )
+                se = Empty() + StringEnd()
                 se._parse( instring, loc )
         except ParseBaseException:
             if ParserElement.verbose_stacktrace:
@@ -1101,10 +1033,11 @@ class ParserElement(object):
         else:
             return tokens
 
-    def scanString( self, instring, maxMatches=_MAX_INT ):
+    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
         """Scan the input string for expression matches.  Each match will return the
            matching tokens, start location, and end location.  May be called with optional
-           C{maxMatches} argument, to clip scanning after 'n' matches are found.
+           C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
+           C{overlap} is specified, then overlapping matches will be reported.
 
            Note that the start and end locations are reported relative to the string
            being parsed.  See L{I{parseString}<parseString>} for more information on parsing
@@ -1133,7 +1066,14 @@ class ParserElement(object):
                     if nextLoc > loc:
                         matches += 1
                         yield tokens, preloc, nextLoc
-                        loc = nextLoc
+                        if overlap:
+                            nextloc = preparseFn( instring, loc )
+                            if nextloc > loc:
+                                loc = nextLoc
+                            else:
+                                loc += 1
+                        else:
+                            loc = nextLoc
                     else:
                         loc = preloc+1
         except ParseBaseException:
@@ -1168,6 +1108,7 @@ class ParserElement(object):
                         out.append(t)
                 lastE = e
             out.append(instring[lastE:])
+            out = [o for o in out if o]
             return "".join(map(_ustr,_flatten(out)))
         except ParseBaseException:
             if ParserElement.verbose_stacktrace:
@@ -1372,6 +1313,9 @@ class ParserElement(object):
              userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
            could be written as::
              userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
+             
+           If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+           passed as C{True}.
            """
         return self.setResultsName(name)
 
@@ -1398,9 +1342,9 @@ class ParserElement(object):
         return self
 
     def parseWithTabs( self ):
-        """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
+        """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
            Must be called before C{parseString} when the input grammar contains elements that
-           match <TAB> characters."""
+           match C{<TAB>} characters."""
         self.keepTabs = True
         return self
 
@@ -1508,12 +1452,10 @@ class Token(ParserElement):
     """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
     def __init__( self ):
         super(Token,self).__init__( savelist=False )
-        #self.myException = ParseException("",0,"",self)
 
     def setName(self, name):
         s = super(Token,self).setName(name)
         self.errmsg = "Expected " + self.name
-        #s.myException.msg = self.errmsg
         return s
 
 
@@ -1534,7 +1476,6 @@ class NoMatch(Token):
         self.mayReturnEmpty = True
         self.mayIndexError = False
         self.errmsg = "Unmatchable token"
-        #self.myException.msg = self.errmsg
 
     def parseImpl( self, instring, loc, doActions=True ):
         exc = self.myException
@@ -1558,7 +1499,6 @@ class Literal(Token):
         self.name = '"%s"' % _ustr(self.match)
         self.errmsg = "Expected " + self.name
         self.mayReturnEmpty = False
-        #self.myException.msg = self.errmsg
         self.mayIndexError = False
 
     # Performance tuning: this routine gets called a *lot*
@@ -1579,12 +1519,12 @@ _L = Literal
 class Keyword(Token):
     """Token to exactly match a specified string as a keyword, that is, it must be
        immediately followed by a non-keyword character.  Compare with C{Literal}::
-         Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
-         Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
+         Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+         Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
        Accepts two optional constructor arguments in addition to the keyword string:
        C{identChars} is a string of characters that would be valid identifier characters,
        defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
-       matching, default is False.
+       matching, default is C{False}.
     """
     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
 
@@ -1600,13 +1540,12 @@ class Keyword(Token):
         self.name = '"%s"' % self.match
         self.errmsg = "Expected " + self.name
         self.mayReturnEmpty = False
-        #self.myException.msg = self.errmsg
         self.mayIndexError = False
         self.caseless = caseless
         if caseless:
             self.caselessmatch = matchString.upper()
             identChars = identChars.upper()
-        self.identChars = _str2dict(identChars)
+        self.identChars = set(identChars)
 
     def parseImpl( self, instring, loc, doActions=True ):
         if self.caseless:
@@ -1648,7 +1587,6 @@ class CaselessLiteral(Literal):
         self.returnString = matchString
         self.name = "'%s'" % self.returnString
         self.errmsg = "Expected " + self.name
-        #self.myException.msg = self.errmsg
 
     def parseImpl( self, instring, loc, doActions=True ):
         if instring[ loc:loc+self.matchLen ].upper() == self.match:
@@ -1680,18 +1618,25 @@ class Word(Token):
        defaults to the initial character set), and an optional minimum,
        maximum, and/or exact length.  The default value for C{min} is 1 (a
        minimum value < 1 is not valid); the default values for C{max} and C{exact}
-       are 0, meaning no maximum or exact length restriction.
+       are 0, meaning no maximum or exact length restriction. An optional
+       C{exclude} parameter can list characters that might be found in 
+       the input C{bodyChars} string; useful to define a word of all printables
+       except for one or two characters, for instance.
     """
-    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
+    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
         super(Word,self).__init__()
+        if excludeChars:
+            initChars = ''.join([c for c in initChars if c not in excludeChars])
+            if bodyChars:
+                bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
         self.initCharsOrig = initChars
-        self.initChars = _str2dict(initChars)
+        self.initChars = set(initChars)
         if bodyChars :
             self.bodyCharsOrig = bodyChars
-            self.bodyChars = _str2dict(bodyChars)
+            self.bodyChars = set(bodyChars)
         else:
             self.bodyCharsOrig = initChars
-            self.bodyChars = _str2dict(initChars)
+            self.bodyChars = set(initChars)
 
         self.maxSpecified = max > 0
 
@@ -1711,7 +1656,6 @@ class Word(Token):
 
         self.name = _ustr(self)
         self.errmsg = "Expected " + self.name
-        #self.myException.msg = self.errmsg
         self.mayIndexError = False
         self.asKeyword = asKeyword
 
@@ -1743,7 +1687,7 @@ class Word(Token):
                 raise exc
 
             loc = result.end()
-            return loc,result.group()
+            return loc, result.group()
 
         if not(instring[ loc ] in self.initChars):
             #~ raise ParseException( instring, loc, self.errmsg )
@@ -1807,24 +1751,24 @@ class Regex(Token):
     """
     compiledREtype = type(re.compile("[A-Z]"))
     def __init__( self, pattern, flags=0):
-        """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
+        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
         super(Regex,self).__init__()
 
         if isinstance(pattern, basestring):
-	        if len(pattern) == 0:
-	            warnings.warn("null string passed to Regex; use Empty() instead",
-	                    SyntaxWarning, stacklevel=2)
-	
-	        self.pattern = pattern
-	        self.flags = flags
-	
-	        try:
-	            self.re = re.compile(self.pattern, self.flags)
-	            self.reString = self.pattern
-	        except sre_constants.error:
-	            warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
-	                SyntaxWarning, stacklevel=2)
-	            raise
+            if len(pattern) == 0:
+                warnings.warn("null string passed to Regex; use Empty() instead",
+                        SyntaxWarning, stacklevel=2)
+
+            self.pattern = pattern
+            self.flags = flags
+
+            try:
+                self.re = re.compile(self.pattern, self.flags)
+                self.reString = self.pattern
+            except sre_constants.error:
+                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+                    SyntaxWarning, stacklevel=2)
+                raise
 
         elif isinstance(pattern, Regex.compiledREtype):
             self.re = pattern
@@ -1837,7 +1781,6 @@ class Regex(Token):
 
         self.name = _ustr(self)
         self.errmsg = "Expected " + self.name
-        #self.myException.msg = self.errmsg
         self.mayIndexError = False
         self.mayReturnEmpty = True
 
@@ -1929,7 +1872,8 @@ class QuotedString(Token):
             self.pattern += (r'|(?:%s)' % re.escape(escQuote))
         if escChar:
             self.pattern += (r'|(?:%s.)' % re.escape(escChar))
-            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
+            charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
+            self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
 
         try:
@@ -1942,7 +1886,6 @@ class QuotedString(Token):
 
         self.name = _ustr(self)
         self.errmsg = "Expected " + self.name
-        #self.myException.msg = self.errmsg
         self.mayIndexError = False
         self.mayReturnEmpty = True
 
@@ -2014,7 +1957,6 @@ class CharsNotIn(Token):
         self.name = _ustr(self)
         self.errmsg = "Expected " + self.name
         self.mayReturnEmpty = ( self.minLen == 0 )
-        #self.myException.msg = self.errmsg
         self.mayIndexError = False
 
     def parseImpl( self, instring, loc, doActions=True ):
@@ -2077,7 +2019,6 @@ class White(Token):
         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
         self.mayReturnEmpty = True
         self.errmsg = "Expected " + self.name
-        #self.myException.msg = self.errmsg
 
         self.minLen = min
 
@@ -2150,7 +2091,6 @@ class LineStart(_PositionToken):
         super(LineStart,self).__init__()
         self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
         self.errmsg = "Expected start of line"
-        #self.myException.msg = self.errmsg
 
     def preParse( self, instring, loc ):
         preloc = super(LineStart,self).preParse(instring,loc)
@@ -2175,7 +2115,6 @@ class LineEnd(_PositionToken):
         super(LineEnd,self).__init__()
         self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
         self.errmsg = "Expected end of line"
-        #self.myException.msg = self.errmsg
 
     def parseImpl( self, instring, loc, doActions=True ):
         if loc<len(instring):
@@ -2200,7 +2139,6 @@ class StringStart(_PositionToken):
     def __init__( self ):
         super(StringStart,self).__init__()
         self.errmsg = "Expected start of text"
-        #self.myException.msg = self.errmsg
 
     def parseImpl( self, instring, loc, doActions=True ):
         if loc != 0:
@@ -2218,7 +2156,6 @@ class StringEnd(_PositionToken):
     def __init__( self ):
         super(StringEnd,self).__init__()
         self.errmsg = "Expected end of text"
-        #self.myException.msg = self.errmsg
 
     def parseImpl( self, instring, loc, doActions=True ):
         if loc < len(instring):
@@ -2239,14 +2176,14 @@ class StringEnd(_PositionToken):
 
 class WordStart(_PositionToken):
     """Matches if the current position is at the beginning of a Word, and
-       is not preceded by any character in a given set of wordChars
+       is not preceded by any character in a given set of C{wordChars}
        (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
        use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
        the string being parsed, or at the beginning of a line.
     """
     def __init__(self, wordChars = printables):
         super(WordStart,self).__init__()
-        self.wordChars = _str2dict(wordChars)
+        self.wordChars = set(wordChars)
         self.errmsg = "Not at the start of a word"
 
     def parseImpl(self, instring, loc, doActions=True ):
@@ -2261,14 +2198,14 @@ class WordStart(_PositionToken):
 
 class WordEnd(_PositionToken):
     """Matches if the current position is at the end of a Word, and
-       is not followed by any character in a given set of wordChars
+       is not followed by any character in a given set of C{wordChars}
        (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
        use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
        the string being parsed, or at the end of a line.
     """
     def __init__(self, wordChars = printables):
         super(WordEnd,self).__init__()
-        self.wordChars = _str2dict(wordChars)
+        self.wordChars = set(wordChars)
         self.skipWhitespace = False
         self.errmsg = "Not at the end of a word"
 
@@ -2309,7 +2246,7 @@ class ParseExpression(ParserElement):
         return self
 
     def leaveWhitespace( self ):
-        """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
+        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
            all contained expressions."""
         self.skipWhitespace = False
         self.exprs = [ e.copy() for e in self.exprs ]
@@ -2380,11 +2317,16 @@ class ParseExpression(ParserElement):
         for e in self.exprs:
             e.validate(tmp)
         self.checkRecursion( [] )
+        
+    def copy(self):
+        ret = super(ParseExpression,self).copy()
+        ret.exprs = [e.copy() for e in self.exprs]
+        return ret
 
 class And(ParseExpression):
-    """Requires all given C{ParseExpressions} to be found in the given order.
+    """Requires all given C{ParseExpression}s to be found in the given order.
        Expressions may be separated by whitespace.
-       May be constructed using the '+' operator.
+       May be constructed using the C{'+'} operator.
     """
 
     class _ErrorStop(Empty):
@@ -2453,7 +2395,7 @@ class And(ParseExpression):
 class Or(ParseExpression):
     """Requires that at least one C{ParseExpression} is found.
        If two expressions match, the expression that matches the longest string will be used.
-       May be constructed using the '^' operator.
+       May be constructed using the C{'^'} operator.
     """
     def __init__( self, exprs, savelist = False ):
         super(Or,self).__init__(exprs, savelist)
@@ -2515,7 +2457,7 @@ class Or(ParseExpression):
 class MatchFirst(ParseExpression):
     """Requires that at least one C{ParseExpression} is found.
        If two expressions match, the first one listed is the one that will match.
-       May be constructed using the '|' operator.
+       May be constructed using the C{'|'} operator.
     """
     def __init__( self, exprs, savelist = False ):
         super(MatchFirst,self).__init__(exprs, savelist)
@@ -2572,9 +2514,9 @@ class MatchFirst(ParseExpression):
 
 
 class Each(ParseExpression):
-    """Requires all given C{ParseExpressions} to be found, but in any order.
+    """Requires all given C{ParseExpression}s to be found, but in any order.
        Expressions may be separated by whitespace.
-       May be constructed using the '&' operator.
+       May be constructed using the C{'&'} operator.
     """
     def __init__( self, exprs, savelist = True ):
         super(Each,self).__init__(exprs, savelist)
@@ -2757,7 +2699,6 @@ class NotAny(ParseElementEnhance):
         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
         self.mayReturnEmpty = True
         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
-        #self.myException = ParseException("",0,self.errmsg,self)
 
     def parseImpl( self, instring, loc, doActions=True ):
         try:
@@ -2916,7 +2857,6 @@ class SkipTo(ParseElementEnhance):
         else:
             self.failOn = failOn
         self.errmsg = "No match found for "+_ustr(self.expr)
-        #self.myException = ParseException("",0,self.errmsg,self)
 
     def parseImpl( self, instring, loc, doActions=True ):
         startLoc = loc
@@ -3040,7 +2980,7 @@ class _ForwardNoRecurse(Forward):
         return "..."
 
 class TokenConverter(ParseElementEnhance):
-    """Abstract subclass of ParseExpression, for converting parsed results."""
+    """Abstract subclass of C{ParseExpression}, for converting parsed results."""
     def __init__( self, expr, savelist=False ):
         super(TokenConverter,self).__init__( expr )#, savelist )
         self.saveAsList = False
@@ -3089,7 +3029,7 @@ class Combine(TokenConverter):
             return retToks
 
 class Group(TokenConverter):
-    """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
+    """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
     def __init__( self, expr ):
         super(Group,self).__init__( expr )
         self.saveAsList = True
@@ -3143,7 +3083,7 @@ class Suppress(TokenConverter):
 class OnlyOnce(object):
     """Wrapper for parse actions, to ensure they are only called once."""
     def __init__(self, methodCall):
-        self.callable = ParserElement._normalizeParseActionArgs(methodCall)
+        self.callable = _trim_arity(methodCall)
         self.called = False
     def __call__(self,s,l,t):
         if not self.called:
@@ -3156,7 +3096,7 @@ class OnlyOnce(object):
 
 def traceParseAction(f):
     """Decorator for debugging parse actions."""
-    f = ParserElement._normalizeParseActionArgs(f)
+    f = _trim_arity(f)
     def z(*paArgs):
         thisFunc = f.func_name
         s,l,t = paArgs[-3:]
@@ -3194,7 +3134,7 @@ def delimitedList( expr, delim=",", combine=False ):
     else:
         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
 
-def countedArray( expr ):
+def countedArray( expr, intExpr=None ):
     """Helper to define a counted list of expressions.
        This helper defines a pattern of the form::
            integer expr expr expr...
@@ -3203,15 +3143,25 @@ def countedArray( expr ):
     """
     arrayExpr = Forward()
     def countFieldParseAction(s,l,t):
-        n = int(t[0])
+        n = t[0]
         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
         return []
-    return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
+    if intExpr is None:
+        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+    else:
+        intExpr = intExpr.copy()
+    intExpr.setName("arrayLen")
+    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+    return ( intExpr + arrayExpr )
 
 def _flatten(L):
-    if type(L) is not list: return [L]
-    if L == []: return L
-    return _flatten(L[0]) + _flatten(L[1:])
+    ret = []
+    for i in L:
+        if isinstance(i,list):
+            ret.extend(_flatten(i))
+        else:
+            ret.append(i)
+    return ret
 
 def matchPreviousLiteral(expr):
     """Helper to define an expression that is indirectly defined from
@@ -3346,15 +3296,15 @@ def originalTextFor(expr, asString=True):
     """Helper to return the original, untokenized text for a given expression.  Useful to
        restore the parsed fields of an HTML start tag into the raw tag text itself, or to
        revert separate tokens with intervening whitespace back to the original matching
-       input text. Simpler to use than the parse action C{keepOriginalText}, and does not
+       input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
        require the inspect module to chase up the call stack.  By default, returns a 
        string containing the original parsed text.  
        
-       If the optional C{asString} argument is passed as False, then the return value is a 
+       If the optional C{asString} argument is passed as C{False}, then the return value is a 
        C{ParseResults} containing any results names that were originally matched, and a 
        single token containing the original matched text from the input string.  So if 
-       the expression passed to C{originalTextFor} contains expressions with defined
-       results names, you must set C{asString} to False if you want to preserve those
+       the expression passed to C{L{originalTextFor}} contains expressions with defined
+       results names, you must set C{asString} to C{False} if you want to preserve those
        results name values."""
     locMarker = Empty().setParseAction(lambda s,loc,t: loc)
     endlocMarker = locMarker.copy()
@@ -3370,7 +3320,12 @@ def originalTextFor(expr, asString=True):
             del t["_original_end"]
     matchExpr.setParseAction(extractText)
     return matchExpr
-    
+
+def ungroup(expr): 
+    """Helper to undo pyparsing's default grouping of And expressions, even
+       if all but one are non-empty."""
+    return TokenConverter(expr).setParseAction(lambda t:t[0])
+
 # convenience constants for positional expressions
 empty       = Empty().setName("empty")
 lineStart   = LineStart().setName("lineStart")
@@ -3380,8 +3335,8 @@ stringEnd   = StringEnd().setName("stringEnd")
 
 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
-_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
-_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
@@ -3399,7 +3354,8 @@ def srange(s):
        The values enclosed in the []'s may be::
           a single character
           an escaped character with a leading backslash (such as \- or \])
-          an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
+          an escaped hex character with a leading '\x' (\x21, which is a '!' character) 
+            (\0x## is also supported for backwards compatibility) 
           an escaped octal character with a leading '\0' (\041, which is a '!' character)
           a range of any of the above, separated by a dash ('a-z', etc.)
           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
@@ -3486,7 +3442,7 @@ def _makeTags(tagStr, xml):
     else:
         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
-        openTag = Suppress("<") + tagStr + \
+        openTag = Suppress("<") + tagStr("tag") + \
                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
                 Optional( Suppress("=") + tagAttrValue ) ))) + \
                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
@@ -3508,19 +3464,21 @@ def makeXMLTags(tagStr):
 
 def withAttribute(*args,**attrDict):
     """Helper to create a validating parse action to be used with start tags created
-       with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
+       with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
        with a required attribute value, to avoid false matches on common tags such as
-       <TD> or <DIV>.
+       C{<TD>} or C{<DIV>}.
 
-       Call withAttribute with a series of attribute names and values. Specify the list
+       Call C{withAttribute} with a series of attribute names and values. Specify the list
        of filter attributes names and values as:
-        - keyword arguments, as in (class="Customer",align="right"), or
+        - keyword arguments, as in C{(align="right")}, or
+        - as an explicit dict with C{**} operator, when an attribute name is also a Python
+          reserved word, as in C{**{"class":"Customer", "align":"right"}}
         - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
        For attribute names with a namespace prefix, you must use the second form.  Attribute
        names are matched insensitive to upper/lower case.
 
        To verify that the attribute exists, but without specifying a value, pass
-       withAttribute.ANY_VALUE as the value.
+       C{withAttribute.ANY_VALUE} as the value.
        """
     if args:
         attrs = args[:]
@@ -3631,12 +3589,12 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
        expression will capture all whitespace-delimited content between delimiters
        as a list of separate values.
 
-       Use the ignoreExpr argument to define expressions that may contain
+       Use the C{ignoreExpr} argument to define expressions that may contain
        opening or closing characters that should not be treated as opening
        or closing characters for nesting, such as quotedString or a comment
-       expression.  Specify multiple expressions using an Or or MatchFirst.
-       The default is quotedString, but if no expressions are to be ignored,
-       then pass None for this argument.
+       expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+       The default is L{quotedString}, but if no expressions are to be ignored,
+       then pass C{None} for this argument.
     """
     if opener == closer:
         raise ValueError("opening and closing strings cannot be the same")
@@ -3683,7 +3641,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
             the current level; set to False for block of left-most statements
             (default=True)
 
-       A valid block must contain at least one blockStatement.
+       A valid block must contain at least one C{blockStatement}.
     """
     def checkPeerIndent(s,l,t):
         if l >= len(s): return

From 0a83aba96e36c2dbf2e2db44f3bc1acfbf2f8e88 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 15 Sep 2011 10:52:48 -0600
Subject: [PATCH 45/45] Fix #850764 (Killing multiple jobs: Local variabele j
 assignation in wrong order)

---
 src/calibre/gui2/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/jobs.py b/src/calibre/gui2/jobs.py
index b7992eb319..a6011abaa0 100644
--- a/src/calibre/gui2/jobs.py
+++ b/src/calibre/gui2/jobs.py
@@ -266,7 +266,7 @@ class JobManager(QAbstractTableModel): # {{{
 
     def kill_multiple_jobs(self, rows, view):
         jobs = [self.jobs[row] for row in rows]
-        devjobs = [j for j in jobs is isinstance(j, DeviceJob)]
+        devjobs = [j for j in jobs if isinstance(j, DeviceJob)]
         if devjobs:
             error_dialog(view, _('Cannot kill job'),
                          _('Cannot kill jobs that communicate with the device')).exec_()