From f5f81cbe85827e01cfc936eacca38ad614f4f4d0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 18:49:20 -0600
Subject: [PATCH 01/39] ...

---
 setup/install.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup/install.py b/setup/install.py
index 42df360b56..4194f7ed26 100644
--- a/setup/install.py
+++ b/setup/install.py
@@ -55,7 +55,7 @@ class Develop(Command):
     short_description = 'Setup a development environment for calibre'
     MODE = 0755
 
-    sub_commands = ['build', 'resources', 'gui']
+    sub_commands = ['build', 'resources', 'iso639', 'gui',]
 
     def add_postinstall_options(self, parser):
         parser.add_option('--make-errors-fatal', action='store_true', default=False,

From 12072ac7d7d43f4289b126a5b53381d078e12d1b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 19:05:00 -0600
Subject: [PATCH 02/39] ...

---
 src/calibre/utils/icu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py
index d5bef449c4..4daec9d553 100644
--- a/src/calibre/utils/icu.py
+++ b/src/calibre/utils/icu.py
@@ -35,7 +35,7 @@ def load_icu():
         if _icu is None:
             print plugins['icu'][1]
         else:
-            if not _icu.ok:
+            if not getattr(_icu, 'ok', False):
                 print 'icu not ok'
                 _icu = None
     return _icu

From ee3baf7dcf460586d991d69a42dbbc5b264653c9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 19:29:41 -0600
Subject: [PATCH 03/39] Fix --clean-all

---
 setup/gui.py          |  9 +++++----
 setup/resources.py    |  7 ++++++-
 setup/translations.py | 14 ++++++++++++--
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/setup/gui.py b/setup/gui.py
index 058a3f052f..912760ddf8 100644
--- a/setup/gui.py
+++ b/setup/gui.py
@@ -17,8 +17,8 @@ class GUI(Command):
 
     @classmethod
     def find_forms(cls):
-        from calibre.gui2 import find_forms
-        return find_forms(cls.SRC)
+        # We do not use the calibre function find_forms as
+        # mporting calibre.gui2 may not work
         forms = []
         for root, _, files in os.walk(cls.PATH):
             for name in files:
@@ -29,8 +29,9 @@ class GUI(Command):
 
     @classmethod
     def form_to_compiled_form(cls, form):
-        from calibre.gui2 import form_to_compiled_form
-        return form_to_compiled_form(form)
+        # We do not use the calibre function form_to_compiled_form as
+        # importing calibre.gui2 may not work
+        return form.rpartition('.')[0]+'_ui.py'
 
     def run(self, opts):
         self.build_forms()
diff --git a/setup/resources.py b/setup/resources.py
index 41068f78a0..ee72a98cb6 100644
--- a/setup/resources.py
+++ b/setup/resources.py
@@ -219,12 +219,17 @@ class Resources(Command):
         json.dump(function_dict, open(dest, 'wb'), indent=4)
 
     def clean(self):
-        for x in ('scripts', 'recipes', 'ebook-convert-complete'):
+        for x in ('scripts', 'ebook-convert-complete'):
             x = self.j(self.RESOURCES, x+'.pickle')
             if os.path.exists(x):
                 os.remove(x)
         from setup.commands import kakasi
         kakasi.clean()
+        for x in ('builtin_recipes.xml', 'builtin_recipes.zip',
+                'template-functions.json'):
+            x = self.j(self.RESOURCES, x)
+            if os.path.exists(x):
+                os.remove(x)
 
 
 
diff --git a/setup/translations.py b/setup/translations.py
index 2e8e6d52f3..3523272770 100644
--- a/setup/translations.py
+++ b/setup/translations.py
@@ -206,6 +206,10 @@ class Translations(POT): # {{{
             for x in (i, j, d):
                 if os.path.exists(x):
                     os.remove(x)
+        zf = self.DEST + '.zip'
+        if os.path.exists(zf):
+            os.remove(zf)
+
 # }}}
 
 class GetTranslations(Translations):
@@ -273,13 +277,14 @@ class GetTranslations(Translations):
 class ISO639(Command):
 
     description = 'Compile translations for ISO 639 codes'
+    DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
+            'iso639.pickle')
 
     def run(self, opts):
         src = self.j(self.d(self.SRC), 'setup', 'iso639.xml')
         if not os.path.exists(src):
             raise Exception(src + ' does not exist')
-        dest = self.j(self.d(self.SRC), 'resources', 'localization',
-                'iso639.pickle')
+        dest = self.DEST
         if not self.newer(dest, src):
             self.info('Pickled code is up to date')
             return
@@ -322,3 +327,8 @@ class ISO639(Command):
                 '3to2':m3to2, '3bto3t':m3bto3t, 'name_map':nm}
         dump(x, open(dest, 'wb'), -1)
 
+    def clean(self):
+        if os.path.exists(self.DEST):
+            os.remove(self.DEST)
+
+

From f27438b44a713713ee3e3cacc61ef0b319793efd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 21:09:14 -0600
Subject: [PATCH 04/39] Fix HBR

---
 recipes/hbr.recipe | 85 +++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe
index 1152a48784..30cf54bf8d 100644
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@@ -13,6 +13,8 @@ class HBR(BasicNewsRecipe):
     no_stylesheets = True
 
     LOGIN_URL = 'http://hbr.org/login?request_url=/'
+    LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
+
     INDEX = 'http://hbr.org/archive-toc/BR'
 
     keep_only_tags = [dict(name='div', id='pageContainer')]
@@ -34,6 +36,7 @@ class HBR(BasicNewsRecipe):
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
+        #'''
         br.open(self.LOGIN_URL)
         br.select_form(name='signin-form')
         br['signin-form:username'] = self.username
@@ -42,9 +45,13 @@ class HBR(BasicNewsRecipe):
         if 'My Account' not in raw:
             raise Exception('Failed to login, are you sure your username and password are correct?')
         self.logout_url = None
-        link = br.find_link(text='Sign out')
-        if link:
-            self.logout_url = link.absolute_url
+        try:
+            link = br.find_link(text='Sign out')
+            if link:
+                self.logout_url = link.absolute_url
+        except:
+            self.logout_url = self.LOGOUT_URL
+        #'''
         return br
 
     def cleanup(self):
@@ -57,6 +64,8 @@ class HBR(BasicNewsRecipe):
 
 
     def hbr_get_toc(self):
+        #return self.index_to_soup(open('/t/hbr.html').read())
+
         today = date.today()
         future = today + timedelta(days=30)
         for x in [x.strftime('%y%m') for x in (future, today)]:
@@ -66,53 +75,43 @@ class HBR(BasicNewsRecipe):
                 return soup
         raise Exception('Could not find current issue')
 
-    def hbr_parse_section(self, container, feeds):
-        current_section = None
-        current_articles = []
-        for x in container.findAll(name=['li', 'h3', 'h4']):
-            if x.name in ['h3', 'h4'] and not x.findAll(True):
-                if current_section and current_articles:
-                    feeds.append((current_section, current_articles))
-                current_section = self.tag_to_string(x)
-                current_articles = []
-                self.log('\tFound section:', current_section)
-            if x.name == 'li':
-                a = x.find('a', href=True)
-                if a is not None:
-                    title = self.tag_to_string(a)
-                    url = a.get('href')
-                    if '/ar/' not in url:
-                        continue
-                    if url.startswith('/'):
-                        url = 'http://hbr.org'+url
-                    url = self.map_url(url)
-                    p = x.find('p')
-                    desc = ''
-                    if p is not None:
-                        desc = self.tag_to_string(p)
-                    if not title or not url:
-                        continue
-                    self.log('\t\tFound article:', title)
-                    self.log('\t\t\t', url)
-                    self.log('\t\t\t', desc)
-                    current_articles.append({'title':title, 'url':url,
-                        'description':desc, 'date':''})
-        if current_section and current_articles:
-            feeds.append((current_section, current_articles))
-
-
-
     def hbr_parse_toc(self, soup):
         feeds = []
-        features = soup.find(id='issueFeaturesContent')
-        self.hbr_parse_section(features, feeds)
-        departments = soup.find(id='issueDepartments')
-        self.hbr_parse_section(departments, feeds)
+        current_section = None
+        articles = []
+        for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
+            if x.name == 'h3':
+                if current_section is not None and articles:
+                    feeds.append((current_section, articles))
+                current_section = self.tag_to_string(x).capitalize()
+                articles = []
+                self.log('\tFound section:', current_section)
+            else:
+                a = x.find('a', href=True)
+                if a is None: continue
+                title = self.tag_to_string(a)
+                url = a['href']
+                if '/ar/' not in url:
+                    continue
+                if url.startswith('/'):
+                    url = 'http://hbr.org' + url
+                url = self.map_url(url)
+                p = x.parent.find('p')
+                desc = ''
+                if p is not None:
+                    desc = self.tag_to_string(p)
+                self.log('\t\tFound article:', title)
+                self.log('\t\t\t', url)
+                self.log('\t\t\t', desc)
+
+                articles.append({'title':title, 'url':url, 'description':desc,
+                    'date':''})
         return feeds
 
 
     def parse_index(self):
         soup = self.hbr_get_toc()
+        #open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
         feeds = self.hbr_parse_toc(soup)
         return feeds
 

From 7a78fb5e9ac8e17cc718000ab9d56afdcd6dd98a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 21:18:29 -0600
Subject: [PATCH 05/39] Fix HBR Blogs

---
 recipes/hbr.recipe       |   3 +-
 recipes/hbr_blogs.recipe | 148 ++++++---------------------------------
 2 files changed, 23 insertions(+), 128 deletions(-)

diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe
index 30cf54bf8d..214ae14f33 100644
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@@ -36,6 +36,8 @@ class HBR(BasicNewsRecipe):
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
+        self.logout_url = None
+
         #'''
         br.open(self.LOGIN_URL)
         br.select_form(name='signin-form')
@@ -44,7 +46,6 @@ class HBR(BasicNewsRecipe):
         raw = br.submit().read()
         if 'My Account' not in raw:
             raise Exception('Failed to login, are you sure your username and password are correct?')
-        self.logout_url = None
         try:
             link = br.find_link(text='Sign out')
             if link:
diff --git a/recipes/hbr_blogs.recipe b/recipes/hbr_blogs.recipe
index acee567d8d..0ca205ab5c 100644
--- a/recipes/hbr_blogs.recipe
+++ b/recipes/hbr_blogs.recipe
@@ -11,28 +11,16 @@ class HBR(BasicNewsRecipe):
     no_stylesheets = True
 
     LOGIN_URL = 'http://hbr.org/login?request_url=/'
+    LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
+
     INDEX = 'http://hbr.org/current'
 
-    #
-    # Blog Stuff
-    #
-
-
-    INCLUDE_BLOGS = True
-    INCLUDE_ARTICLES = False
-
-    # option-specific settings.
-
-    if INCLUDE_BLOGS == True:
-        remove_tags_after = dict(id='articleBody')
-        remove_tags_before = dict(id='pageFeature')
-        feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
-        oldest_article = 30
-        max_articles_per_feed = 100
-        use_embedded_content = False
-    else:
-        timefmt                = ' [%B %Y]'
-
+    remove_tags_after = dict(id='articleBody')
+    remove_tags_before = dict(id='pageFeature')
+    feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
+    oldest_article = 30
+    max_articles_per_feed = 100
+    use_embedded_content = False
 
     keep_only_tags = [	dict(name='div', id='pageContainer')
 				]
@@ -41,21 +29,15 @@ class HBR(BasicNewsRecipe):
         'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
         'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
         'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
-        'mailingListTout', 'partnerCenter', 'pageFooter']),
-        dict(name='iframe')]
+        'mailingListTout', 'partnerCenter', 'pageFooter', 'shareWidgetTop']),
+        dict(name=['iframe', 'style'])]
 
-    extra_css = '''
-                a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
-                .article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
-                h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
-                h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small;  }
-                #articleBody{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
-                #summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
-                '''
-#-------------------------------------------------------------------------------------------------
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
+        self.logout_url = None
+
+        #'''
         br.open(self.LOGIN_URL)
         br.select_form(name='signin-form')
         br['signin-form:username'] = self.username
@@ -63,11 +45,15 @@ class HBR(BasicNewsRecipe):
         raw = br.submit().read()
         if 'My Account' not in raw:
             raise Exception('Failed to login, are you sure your username and password are correct?')
-        self.logout_url = None
-        link = br.find_link(text='Sign out')
-        if link:
-            self.logout_url = link.absolute_url
+        try:
+            link = br.find_link(text='Sign out')
+            if link:
+                self.logout_url = link.absolute_url
+        except:
+            self.logout_url = self.LOGOUT_URL
+        #'''
         return br
+
 #-------------------------------------------------------------------------------------------------
     def cleanup(self):
         if self.logout_url is not None:
@@ -76,99 +62,7 @@ class HBR(BasicNewsRecipe):
     def map_url(self, url):
         if url.endswith('/ar/1'):
             return url[:-1]+'pr'
-#-------------------------------------------------------------------------------------------------
 
-    def hbr_get_toc(self):
-        soup = self.index_to_soup(self.INDEX)
-        url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
-        return self.index_to_soup('http://hbr.org'+url)
-
-#-------------------------------------------------------------------------------------------------
-
-    def hbr_parse_section(self, container, feeds):
-        current_section = None
-        current_articles = []
-        for x in container.findAll(name=['li', 'h3', 'h4']):
-            if x.name in ['h3', 'h4'] and not x.findAll(True):
-                if current_section and current_articles:
-                    feeds.append((current_section, current_articles))
-                current_section = self.tag_to_string(x)
-                current_articles = []
-                self.log('\tFound section:', current_section)
-            if x.name == 'li':
-                a = x.find('a', href=True)
-                if a is not None:
-                    title = self.tag_to_string(a)
-                    url = a.get('href')
-                    if '/ar/' not in url:
-                        continue
-                    if url.startswith('/'):
-                        url = 'http://hbr.org'+url
-                    url = self.map_url(url)
-                    p = x.find('p')
-                    desc = ''
-                    if p is not None:
-                        desc = self.tag_to_string(p)
-                    if not title or not url:
-                        continue
-                    self.log('\t\tFound article:', title)
-                    self.log('\t\t\t', url)
-                    self.log('\t\t\t', desc)
-                    current_articles.append({'title':title, 'url':url,
-                        'description':desc, 'date':''})
-        if current_section and current_articles:
-            feeds.append((current_section, current_articles))
-
-#-------------------------------------------------------------------------------------------------
-
-    def hbr_parse_toc(self, soup):
-        feeds = []
-        features = soup.find(id='issueFeaturesContent')
-        self.hbr_parse_section(features, feeds)
-        departments = soup.find(id='issueDepartments')
-        self.hbr_parse_section(departments, feeds)
-        return feeds
-#-------------------------------------------------------------------------------------------------
-    def feed_to_index_append(self, feedObject, masterFeed):
-        # Loop thru the feed object and build the correct type of article list
-        for feed in feedObject:
-        # build the correct structure from the feed object
-            newArticles = []
-            for article in feed.articles:
-                newArt = {
-                'title' : article.title,
-                'url'   : article.url,
-                'date'  : article.date,
-                'description' : article.text_summary
-                }
-                newArticles.append(newArt)
-
-		# Append the earliest/latest dates of the feed to the feed title
-		startDate, endDate = self.get_feed_dates(feed, '%d-%b')
-		newFeedTitle = feed.title + '  (' + startDate + ' thru ' + endDate + ')'
-
-		# append the newly-built list object to the index object passed in
-		# as masterFeed.
-		masterFeed.append( (newFeedTitle,newArticles) )
-
-#-------------------------------------------------------------------------------------------------
-    def get_feed_dates(self, feedObject, dateMask):
-        startDate = feedObject.articles[len(feedObject.articles)-1].localtime.strftime(dateMask)
-        endDate   = feedObject.articles[0].localtime.strftime(dateMask)
-
-        return startDate, endDate
-
-#-------------------------------------------------------------------------------------------------
-
-    def parse_index(self):
-        if self.INCLUDE_ARTICLES == True:
-            soup = self.hbr_get_toc()
-            feeds = self.hbr_parse_toc(soup)
-        else:
-            return BasicNewsRecipe.parse_index(self)
-
-        return feeds
-#-------------------------------------------------------------------------------------------------
     def get_cover_url(self):
         cover_url = None
         index = 'http://hbr.org/current'

From 16af96badc84850fe2e707d0322783012626f304 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 21:52:51 -0600
Subject: [PATCH 06/39] Brasil de Fato by Alex Mitrani

---
 recipes/brasil_de_fato.recipe | 31 +++++++++++++++++++++++++++++++
 recipes/hbr_blogs.recipe      |  2 +-
 2 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 recipes/brasil_de_fato.recipe

diff --git a/recipes/brasil_de_fato.recipe b/recipes/brasil_de_fato.recipe
new file mode 100644
index 0000000000..ba7636c703
--- /dev/null
+++ b/recipes/brasil_de_fato.recipe
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BrasilDeFato(BasicNewsRecipe):
+    news                  = True
+    title          = u'Brasil de Fato'
+    __author__            = 'Alex Mitrani'
+    description           = u'Uma vis�o popular do Brasil e do mundo.'
+    publisher             = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
+    category              = 'news, politics, Brazil, rss, Portuguese'
+    oldest_article = 10
+    max_articles_per_feed = 100
+    summary_length        = 1000
+    language              = 'pt_BR'
+
+    remove_javascript     = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
+    keep_only_tags = [dict(name='div', attrs={'id':'main'})]
+    remove_tags = [dict(name='div', attrs={'class':'links'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'links'})]
+
+    feeds          = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
+	,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
+	,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
+	,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
+	,(u'An�lise', u'http://www.brasildefato.com.br/rss_analise')
+	]
diff --git a/recipes/hbr_blogs.recipe b/recipes/hbr_blogs.recipe
index 0ca205ab5c..0deaef7a73 100644
--- a/recipes/hbr_blogs.recipe
+++ b/recipes/hbr_blogs.recipe
@@ -6,7 +6,7 @@ class HBR(BasicNewsRecipe):
     title = 'Harvard Business Review Blogs'
     description = 'To subscribe go to http://hbr.harvardbusiness.org'
     needs_subscription = True
-    __author__ = 'Kovid Goyal, enhanced by BrianG'
+    __author__ = 'Kovid Goyal'
     language = 'en'
     no_stylesheets = True
 

From 039d2ae54f0bf8a3be4cbffa8a48ee602925f44c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 19 Aug 2011 21:57:04 -0600
Subject: [PATCH 07/39] Fluter by Armin Geller

---
 recipes/brasil_de_fato.recipe | 62 +++++++++++++++++------------------
 recipes/fluter_de.recipe      | 39 ++++++++++++++++++++++
 2 files changed, 70 insertions(+), 31 deletions(-)
 create mode 100644 recipes/fluter_de.recipe

diff --git a/recipes/brasil_de_fato.recipe b/recipes/brasil_de_fato.recipe
index ba7636c703..d060544ece 100644
--- a/recipes/brasil_de_fato.recipe
+++ b/recipes/brasil_de_fato.recipe
@@ -1,31 +1,31 @@
-# -*- coding: utf-8 -*-
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class BrasilDeFato(BasicNewsRecipe):
-    news                  = True
-    title          = u'Brasil de Fato'
-    __author__            = 'Alex Mitrani'
-    description           = u'Uma vis�o popular do Brasil e do mundo.'
-    publisher             = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
-    category              = 'news, politics, Brazil, rss, Portuguese'
-    oldest_article = 10
-    max_articles_per_feed = 100
-    summary_length        = 1000
-    language              = 'pt_BR'
-
-    remove_javascript     = True
-    no_stylesheets        = True
-    use_embedded_content  = False
-    remove_empty_feeds    = True
-    masthead_url          = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
-    keep_only_tags = [dict(name='div', attrs={'id':'main'})]
-    remove_tags = [dict(name='div', attrs={'class':'links'})]
-    remove_tags_after = [dict(name='div', attrs={'class':'links'})]
-
-    feeds          = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
-	,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
-	,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
-	,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
-	,(u'An�lise', u'http://www.brasildefato.com.br/rss_analise')
-	]
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class BrasilDeFato(BasicNewsRecipe):
+    news                  = True
+    title          = u'Brasil de Fato'
+    __author__            = 'Alex Mitrani'
+    description           = u'Uma visão popular do Brasil e do mundo.'
+    publisher             = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
+    category              = 'news, politics, Brazil, rss, Portuguese'
+    oldest_article = 10
+    max_articles_per_feed = 100
+    summary_length        = 1000
+    language              = 'pt_BR'
+
+    remove_javascript     = True
+    no_stylesheets        = True
+    use_embedded_content  = False
+    remove_empty_feeds    = True
+    masthead_url          = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
+    keep_only_tags = [dict(name='div', attrs={'id':'main'})]
+    remove_tags = [dict(name='div', attrs={'class':'links'})]
+    remove_tags_after = [dict(name='div', attrs={'class':'links'})]
+
+    feeds          = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
+	,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
+	,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
+	,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
+	,(u'Análise', u'http://www.brasildefato.com.br/rss_analise')
+	]
diff --git a/recipes/fluter_de.recipe b/recipes/fluter_de.recipe
new file mode 100644
index 0000000000..1f8576cf81
--- /dev/null
+++ b/recipes/fluter_de.recipe
@@ -0,0 +1,39 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch fluter.de
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1313693926(BasicNewsRecipe):
+
+    title = u'Fluter'
+    description = 'fluter.de Magazin der Bundeszentrale für politische Bildung/bpb'
+    language = 'de'
+    encoding = 'UTF-8'
+
+    __author__ = 'Armin Geller' # 2011-08-19
+
+    oldest_article = 7
+    max_articles_per_feed = 50
+
+
+    remove_tags = [
+                        dict(name='div', attrs={'id':["comments"]}),
+                        dict(attrs={'class':['commentlink']}),
+                    ]
+
+
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':["grid_8 articleText"]}),
+                        dict(name='div', attrs={'class':["articleTextInnerText"]}),
+                      ]
+
+    feeds =  [
+               (u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'),
+              ]
+
+    extra_css = '.cs_img {margin-right: 10pt;}'
+

From 9d7ae9090bb12b7146b5e5ca4a2286dd84784a05 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 11:45:04 -0600
Subject: [PATCH 08/39] Various Polish news sources by fenuks

---
 recipes/android_com_pl.recipe    |  12 ++++++++++
 recipes/bash_org_pl.recipe       |  15 ++++++++++++
 recipes/cd_action.recipe         |  16 +++++++++++++
 recipes/dobreprogamy.recipe      |  21 ++++++++++++++++
 recipes/film_web.recipe          |  40 +++++++++++++++++++++++++++++++
 recipes/gram_pl.recipe           |  16 +++++++++++++
 recipes/icons/android_com_pl.png | Bin 0 -> 1452 bytes
 recipes/icons/bash_org_pl.png    | Bin 0 -> 391 bytes
 recipes/icons/cd_action.png      | Bin 0 -> 972 bytes
 recipes/icons/dobreprogamy.png   | Bin 0 -> 1127 bytes
 recipes/icons/film_web.png       | Bin 0 -> 3433 bytes
 recipes/icons/gram_pl.png        | Bin 0 -> 1101 bytes
 recipes/icons/niebezpiecznik.png | Bin 0 -> 795 bytes
 recipes/icons/wnp.png            | Bin 0 -> 576 bytes
 recipes/niebezpiecznik.recipe    |  16 +++++++++++++
 recipes/wnp.recipe               |  21 ++++++++++++++++
 16 files changed, 157 insertions(+)
 create mode 100644 recipes/android_com_pl.recipe
 create mode 100644 recipes/bash_org_pl.recipe
 create mode 100644 recipes/cd_action.recipe
 create mode 100644 recipes/dobreprogamy.recipe
 create mode 100644 recipes/film_web.recipe
 create mode 100644 recipes/gram_pl.recipe
 create mode 100644 recipes/icons/android_com_pl.png
 create mode 100644 recipes/icons/bash_org_pl.png
 create mode 100644 recipes/icons/cd_action.png
 create mode 100644 recipes/icons/dobreprogamy.png
 create mode 100644 recipes/icons/film_web.png
 create mode 100644 recipes/icons/gram_pl.png
 create mode 100644 recipes/icons/niebezpiecznik.png
 create mode 100644 recipes/icons/wnp.png
 create mode 100644 recipes/niebezpiecznik.recipe
 create mode 100644 recipes/wnp.recipe

diff --git a/recipes/android_com_pl.recipe b/recipes/android_com_pl.recipe
new file mode 100644
index 0000000000..a44d5e560a
--- /dev/null
+++ b/recipes/android_com_pl.recipe
@@ -0,0 +1,12 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Android_com_pl(BasicNewsRecipe):
+    title          = u'Android.com.pl'
+    __author__        = 'fenuks'
+    description   = 'Android.com.pl - biggest polish Android site'
+    category       = 'Android, mobile'
+    language       = 'pl'
+    cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    feeds          = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]
diff --git a/recipes/bash_org_pl.recipe b/recipes/bash_org_pl.recipe
new file mode 100644
index 0000000000..037870ed6c
--- /dev/null
+++ b/recipes/bash_org_pl.recipe
@@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Bash_org_pl(BasicNewsRecipe):
+    title          = u'Bash.org.pl'
+    __author__        = 'fenuks'
+    description   = 'Bash.org.pl - funny quotations from IRC discussions'
+    category       = 'funny quotations, humour'
+    language       = 'pl'
+    oldest_article = 15
+    cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    keep_only_tags= [dict(name='div', attrs={'class':'quote post-content post-body'})]
+    feeds          = [(u'Cytaty', u'http://bash.org.pl/rss')]
diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe
new file mode 100644
index 0000000000..b4cf6b326c
--- /dev/null
+++ b/recipes/cd_action.recipe
@@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class CD_Action(BasicNewsRecipe):
+    title          = u'CD-Action'
+    __author__        = 'fenuks'
+    description   = 'cdaction.pl - polish magazine about games site'
+    category       = 'games'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
+    keep_only_tags= dict(id='news_content')
+    remove_tags_after= dict(name='div', attrs={'class':'tresc'})
+    feeds          = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe
new file mode 100644
index 0000000000..d9b2db591d
--- /dev/null
+++ b/recipes/dobreprogamy.recipe
@@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Dobreprogramy_pl(BasicNewsRecipe):
+    title = 'Dobreprogramy.pl'
+    __author__  = 'fenuks'
+    __licence__ ='GPL v3'
+    category       = 'IT'
+    language       = 'pl'
+    cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
+    description = u'Aktualności i blogi z dobreprogramy.pl'
+    encoding = 'utf-8'
+    no_stylesheets = True
+    language       = 'pl'
+    extra_css      = '.title {font-size:22px;}'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
+    keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
+    feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
+                 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe
new file mode 100644
index 0000000000..0061573742
--- /dev/null
+++ b/recipes/film_web.recipe
@@ -0,0 +1,40 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Filmweb_pl(BasicNewsRecipe):
+    title          = u'FilmWeb'
+    __author__        = 'fenuks'
+    description   = 'FilmWeb - biggest polish movie site'
+    cover_url      = 'http://userlogos.org/files/logos/crudus/filmweb.png'
+    category       = 'movies'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    extra_css      = '.hdrBig {font-size:22px;}'
+    remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
+    keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
+    feeds          = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
+                         (u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
+                         (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
+                         (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
+                         (u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
+                         (u'News / Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
+                         (u'News / Dystrybucja dvd / blu-ray', u'http://www.filmweb.pl/feed/news/category/video'),
+                         (u'News / Dystrybucja kinowa', u'http://www.filmweb.pl/feed/news/category/cinema'),
+                         (u'News / off', u'http://www.filmweb.pl/feed/news/category/off'),
+                         (u'News / Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
+                         (u'News / Organizacje branżowe', u'http://www.filmweb.pl/feed/news/category/organizations'),
+                         (u'News / Internet', u'http://www.filmweb.pl/feed/news/category/internet'),
+                         (u'News / Różne', u'http://www.filmweb.pl/feed/news/category/other'),
+                         (u'News / Kino polskie', u'http://www.filmweb.pl/feed/news/category/polish.cinema'),
+                         (u'News / Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
+                         (u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
+                         (u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')]
+
+    def skip_ad_pages(self, soup):
+        skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})['href']
+        #self.log.warn(skip_tag)
+        if skip_tag is not None:
+            return self.index_to_soup(skip_tag, raw=True)
+        else:
+            None
diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe
new file mode 100644
index 0000000000..091c0bb1dc
--- /dev/null
+++ b/recipes/gram_pl.recipe
@@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gram_pl(BasicNewsRecipe):
+    title          = u'Gram.pl'
+    __author__        = 'fenuks'
+    description   = 'Gram.pl - site about computer games'
+    category       = 'games'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
+    remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
+    keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
+    feeds          = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
+	      (u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]
diff --git a/recipes/icons/android_com_pl.png b/recipes/icons/android_com_pl.png
new file mode 100644
index 0000000000000000000000000000000000000000..d68bac8810ebf42e8f553459a72a49bc6eb211e2
GIT binary patch
literal 1452
zcmV;d1ylNoP)<h;3K|Lk000e1NJLTq000mG000mW0ssI2zyY*%00006VoOIv|NsC0
z|NjYC_uK#g010qNS#tmY07w7;07w8v$!k6U00inuL_t(I%VkviGn9W6KJVwXmbBPi
zO1l<&u`LrLjmj*x$(lAq$>@ryq#2ilutS!lk&H@`=)#y<);-tFB^t^oquVMa#%gyP
zlgYo(e!g!z-@fzx<(V^Q-g%yLp67F3P^;g*mB}<3oh~r2y<Ml1N;^Asy5L}pAt4<d
zxNdKso0CeZZ`5j?E+C+(>Ep-k+Z!6*z4P_0ub-Lm_O7j+nvzJWt0yNVk{dV1#w3#R
z^5J1GFVJWf7y10IE~Amd>FqTb+}!&6jYd9yaL{BD2nGgBCO*HX$6(;`y1EPo4yU!%
zXmoad{Mcx8bZl(Y>+S67>lYSiw3?bvp8#&%djB4vy!`cRfGbxfClP}7_N66}sJGW-
z;_(ItjYgqxWW;0=i@Uq$=Y_(ao^RiT!r|f7RiW_dQ!LZgwzA^l($ZowvDsLL!Fcds
zagk21t(~7osH*D22Y`x-nHhl6(pRqt0y;Yv7d<`u`%NZycdhpOcTdl$sb9Z*e1?Yf
z`fb~qnx?1u{DA?zUL?|LS64kewA%G`S68)qWyQ(q;lrgR2IJAA<z*Y2d-vw&NwTU6
zZ*cwkn>PSgul^BOS`v%<`V0nl_lb$MHHoCTd3IJLijOZXwYQhcQ&UM&F3-py$>`{!
zB09aSY;=^#ynTCimdV8CFc{b<Tid&LKYvE3uI|ehfID~Q=BN$K7PYo6EQmxyLqC6d
zc+}QTOmH~6b|oePq@`6?J2)s57cP+Gjvct;=XdH90GE6CFhD}W{rhx!XXpC5gF{{2
z*ROPXL&NeiLMbV2Z2+;cH8lX0m9w*E0gYy1K`89#(CaxInJgm%As?UP#{mu;P%5ph
zgM(922x8~X<YWLJpVOxSA|gslt*oTd{CtE|s_}6f8@2k!4>}zOOAri3R1|=V%duks
zI1jTxXsA-@<dl~7<jKa35fR13*47F|K>>{x7?_d*;O~F-tc6-6I(ZU6E-x-7Nq_(B
zY=oksZr((wt!-_MMhgm3sQ?h#yBFZxxt10xYO}!Z-C0>|wp6NA;@Ojv2_h)y;zg3&
zx392}Mhg!wDWTEA!c;0NtH8kAT!h48yp4myp+f))MR_?vD3#BinTcS}9)OULq9ST`
zfJ>J~N6r0%2P-O^oc#PUGXVqw?6RllnKKx1y>)9+5`eF7dO9{LFApJ^thgAVfPmaw
zf{2W)ti+q)E!k|GO-aeb1VCBY)D%Er;q&KaftZ-eN*2q*1Jh9`5MWZF5TihV#oXL*
z$Q;gz6Bd8n+!7M-%$F|%SXqUI0VF0iHd+p?sAzl~prGKN4ZOUPl9<dbTX3*kE_T_~
zH9j7I$HNp<P4y{(O`DD!0oc6x=uu2DG6Im3GcfS)^}c)wpi+&E0sOI!%Z-a;vDoa`
zSO90|xHt>N-$?Z-w6lwh1n~AwO|^K9pCk2Sz_Y)2@wc#9z{MptmdUiYkB+uP$7IID
z0602Qh&xoDsu2nerQ-dsAr#_Z0rK*+T7b;Vo*wfhJ3B{3u~;@X`}YHE*bot6neqR%
zt!;QX07+tj<mBdNfNR&Lr>QUj^7Ds>E!5fBeSHAw>0Mps4OlFNg2^NZ{C*LV%P|Fo
zI64dB4nfFd01O6}R4RvtFg4{hu5)q*2LZCOF!lNK-QD;aT3ZR?H}0bkxBQDP<NyEw
zC3HntbYx+4WjbSWWnpw>05UK!F)c7SEiy1vF)=zbIXW>gEig7ZFfgh*e<}a~03~!q
zSaf7zbY(hiZ)9m^c>ppnF)%GKHZ3tZR53C-H8eUfFfA}PIxsN0_{}*00000<MNUMn
GLSTZX%&yA-

literal 0
HcmV?d00001

diff --git a/recipes/icons/bash_org_pl.png b/recipes/icons/bash_org_pl.png
new file mode 100644
index 0000000000000000000000000000000000000000..5fc18a38e0fa86973c2a34740b783196906906f3
GIT binary patch
literal 391
zcmeAS@N?(olHy`uVBq!ia0vp^3LwnF3?v&v(vJfv^#Gp`*H9ox1A}r9=$JWk_wL<i
z?w;xB=qT?1@@4|1XRcaRYH@NFPzz^)M`SUO_5fqIli7Aa#u85##}JM4$q5SV23o6I
zSe3XtrcIwNuFSoGiPdWYtHlLYi%#K$FFAq^Vk;uz;^MM;RT$W&a&bf}BpB{;ILg-I
zaJEn(Fs;F0v8RNJ5Qq2*p@af=K?bWcJgy8cqGc7X@``)3t(ej9Cc0dtF^KV4nZnbC
z0{6wNCk)wE)HSj)IVmul)erU9XUisJ(IX-^dH1Q);Ti{&P8~TK>3HJE%?WEV*q9g?
zwc?(5>F6&4x>vQtHKHUXu_V<hxhNG#F&G&b8tNKY=o%S>7#doc7+8V0W>y9UH<P`t
rqiD#@PsvQHgljM`4>2&eGBUO@w1jBrshJ%L)WG2B>gTe~DWM4ffJS=q

literal 0
HcmV?d00001

diff --git a/recipes/icons/cd_action.png b/recipes/icons/cd_action.png
new file mode 100644
index 0000000000000000000000000000000000000000..823e09a43effb9a62061d4dd6740cc03af68c161
GIT binary patch
literal 972
zcmV;-12g=IP)<h;3K|Lk000e1NJLTq000mG000mW0ssI2zyY*%00006VoOIv|NsC0
z|NjYC_uK#g010qNS#tmY07w7;07w8v$!k6U00Rn1L_t(I%bk>4ODa(m$M@_B57mSD
zBKZ*c&<BV<LG%gw1Z59B6a`686eQWzP^d{tnu^daq*++VJb0-nOQ|%pu#C#kkP0X4
zLKlfT3`=2$$Con?!=ANg)^GjS`tLz+Z_m#U4;;rZwY4Twd%IkIcxbb!)iX0qO(i8R
z7t7Yw!8<?ybs?Kgqp7Jen|pdHE7R%s_vdHWfHlO8#-=8WU9R3<it>1dhee`5U~;m&
z{N{$^6pD`zo<BRw3&@u`8a+PV*dQ?+*V&2C#RbD4bapm2hLGQ{)l!t-uh+|D_xA?}
zTJ1joFE2bFi{(~5F;QQS(D}K?V>AK`8o0WuR3dbBH9n3|JZ>-u?+D1FqtQ{ZIGGFt
zbh=*v_!u5w5|G+%@901XCUJPUzTVeI;?3sP)+~nSjYf=bZ)ayQ4h9Vdne6lP@iE)9
zOp0VO9uI|}UWfq3?Vg!wXdo*hF;;6^8$~6PCKFA+zAi7*^yw+XU>uJ-912ASz;P;-
zL_%T`iAZE;CpR@&1c~wahK4W(KrA*rjq%9|%gW^$z}}ulBNBN${rwmN#_L^K>FUZ&
z4Z9xC(vngMacD1S1pv>_&(rk&{>X?(lmTpS4-8OLDz&&s)8VinkW>SeLL^rc35dm5
z7USz{gF!H~T1`<I0MLN^^t7}@(;%6|z&)QP0D#A!X(m&1GsbW{5%dds2QmTUcB@nr
zMPhPOgX>@_xfTcn>~@U?!Ng(u03b2DyMu#&{$#tqxY*Vf3_2VPLmo`h=w!rVQK_n`
z5O@~+3tk4m`}^8jVd2-8(^*;?jT((vr;#2b=R^W*w&CIGYM;+)?e0cd;lc?F1Q<pv
z7BDpZ{q1y0r9VHLn=%<-!r`eYv3OyjrKO+%`VL4E0RIKp71r@M`aL=U6AH0xS=sqH
z%N7;g-&-sa$=e&pNu}T4ZnsPZYrmgiii=?eWCl<M3NkLr{XM1AsZ=DANNjC2HipA<
za}^anpH5d;2!@9B!GT_1R+i7ucXyYUUN2<(3yR|oFJ`Vj(*OVfC3HntbYx+4WjbSW
zWnpw>05UK!F)c7SEiy1vF)=zcFgi3gEig7ZFfddkdOZLD03~!qSaf7zbY(hiZ)9m^
uc>ppnF)%GKH!U(TR4_I=Gc-ChGA%GRIxsNnMc3K@0000<MNUMnLSTXr{F{~l

literal 0
HcmV?d00001

diff --git a/recipes/icons/dobreprogamy.png b/recipes/icons/dobreprogamy.png
new file mode 100644
index 0000000000000000000000000000000000000000..fcb658cfe1e82223fc948b6ba354b5aed894960b
GIT binary patch
literal 1127
zcmV-t1ep7YP)<h;3K|Lk000e1NJLTq000mG000mW1^@s6Ph%q@00006VoOIv|NsC0
z|NjYC_uK#g010qNS#tmY07w7;07w8v$!k6U00X5-L_t(Y$EB2ANE1*PhL1BBShlT2
z&PrNEW{H?!KT>m2L|p{dOueaK!7MM_DiS8aN`?fL4EmAPh`gC)x*2mO$Z{K7o6|LH
zv^h16Vh}~rDwzpxZ=Ms~1VL@j&Bw>#ocH~HIE19h>(~AL`T2(rzj$FX<>qeQobG#n
zB^tHac)XoEf9Gzu(a7ViTNjHtogz^z7L5)M3x)alfq>O26c!ZB%nS~SL>5aZWVh$$
zZrb$Uf;QW&TUl8<cKpuYzI8fFN(Kgk!Ol)X1cHePlWEy9y?$<PY>dxOCPzn~JXy7>
zqT=1VzP`-N*(CyjEn7M|Qm%M<dSGDVMxW2)X=@{d%l+~t7^L;O-47n{cpA;ek6v#^
z#==58K0ZzjOi!Ck9M0_Q{Cp_1W=%Zq_4f7(g&Q{fx3Jx=(-D%HIWyDW&*df(LqmGK
zK%mpj%|#+PIl!k+UavraB_=0192&>Qenp>;%LU@`sj1S^>(^7R+-|3rv$OsF`}YX}
zzI_`R$<79T{CN423WY*ub4CUg^mu5DM!UOdoSd{+)~`=86bi98Wdja}R!cizwKg>o
zl1MBpJbOlseE&W)R9p-!F1lRDkJH>{tF8tTiQ!?noW@Ax%9U6wW#K=0^hhqx%KGqu
z?!ci#v;(7~S}m<S7`$-<n40S9I(rsKCL@u1_vm>zY%nY?zIxTubMfNdy=fcJYEPcr
zy4C447*wj#Qd+Uc)7nZaotSv|5cvAl?Y?@Ij%aXDt)|Ap;k$Q%&z~a^qfwzq+rZ_^
zb#+BW&COcvsZ-mx1Ac!;2W@pM=5k%T2F%ZojI_1U9eDawr2@vsdwZLjT&_2742IRK
zSFTLkz|EU-d0E-EZ4C`FSz#fd(NtF#6#;7X>C+MkaOu*C6ExT96pE4(prxg;v8Kjq
zZEwGGr@h_b$jhs#$;&G!AVex9Wcl(GEL2qyqS4S{&?Vfx+h(Ie`}a#E)IbFTP%WWU
z0u2nHzW&06`g(xRzkQp}2V^pzPbTZ>Nx{I8BZPEy5z^dD4dmoJe@_1askF9MDh19l
zfcFfbj{y`G9z9AsMo3Ewy;oW3_g7Yi!zmazc8n1E6)Tm57>x%Hn#~6ebaskFYuAcI
zz&-}>m;p30fISSLyj-O!FLye{Vv|WM-nA?3ojAkbaMTQk!@vN}GXRO?*J!F3@wkOi
zT%5M>p9B~yR_Gb6tr~{ct7MqX-Hfube|_&Kz1upkb-jiS0000bbVXQnWMOn=I%9HW
zVRU5xGB7bQEigDOGB8vzF*-CcIx{gXFg7|cFm!|_lmGw#C3HntbYx+4WjbwdWNBu3
t05UK!FfB1LEiyJ#F)=zaF*-FgEigAaFfcwEzL5X`002ovPDHLkV1f$d05$*s

literal 0
HcmV?d00001

diff --git a/recipes/icons/film_web.png b/recipes/icons/film_web.png
new file mode 100644
index 0000000000000000000000000000000000000000..3ddcdf1cdeaab29afa1af2962ac7cd93a4ba87aa
GIT binary patch
literal 3433
zcmV-v4VLnWP)<h;3K|Lk000e1NJLTq001BW001Bm1^@s6BkHEN00006VoOIv|NsC0
z|NjYC_uK#g010qNS#tmY07w7;07w8v$!k6U01T%|L_t(&-o2T5RMbZj#|@s`!#P|c
zC?KN94I(H4A{r0y1+R5AL1Ppz&>-23N)k{|A2B9GMdDMy2qJ<6C3v8?AeVrG96p@L
z<%v%gK|zk0Uv0XJ&g}e<psU%>Ki|jD-P2WFUDe$)WXO9(PELWMt1D0O^~Jw;?o^~4
zI)tC6rJ<fb9~-NsrKcAcw`Y$~XkZW#5gBP@WMZ;nMR>TY>-6cSrlFyqeB$pfFaIS)
zPVW8A_Z=8Tk&_!qnKA_p+Pzzwa_iPqN_Y1X3cxxFz;+72R}_HC$}L+$LIBFjR;*aH
z3Lr1f&CSb8EY8R<Ha0gGiIS7Yjx{%rjXipF$`n`EnKQ$}Hf`FxSydGsGIZ$w-Edq{
zJ-rc>$jAwl&d!f10Ea06zfb`FMHyIITJrMp^QHRiSxyf5ys<GR=IBv?@^XLwwQB*g
zvnNcjv3c<#IoZTy;>6_SAAXoW-`98QRCI)*BI*Yp{AGp@qc}TjP>PHHP660W0dS=B
z^q5e(yB#PJ30vt*N`HO&G%M@UrNQ;lqf@6+QUGq>Mu%i%n3`Hz*3}(6=<Pjs?#Caa
z12i??dt~5B`}&$uT3dHf075AM<0&s+E}}FyZ=h6Ed_*ZLWBvX~0XRdEYIF11vl$uv
z_1?Y6$fziQnwpIpw{Csi|99=$yO%`9#EEux?d?a7czSw!Z{Lm%(AP&DHtc^kd^lzH
zY)eW<#~}(p7zMzH($f=2X>7zVJb4l|AOO|ZcReLAa5Uxd<1Z)x@f5PMKmVMRBvp|p
zG?a{g?wq^3mlwyy$J^UGJ9l@Nl+2nnZyvyd2XS#HPDtA~G#oe(9ZeiDX_BpNXJ>RY
z*&56d1_o~(5t!6WO);50d9sHh-P<k{fH{<o4*Y_&G}MI)QFV1um6TA0LTgHRxD`b@
z-h&6RvB!=9G&TMBV@wQ5f>u^eP5@C+%a^ZQ+1iTzFJJ!Yrx`Q6yh=)nid<biJvpwa
zDLVSU``6Wlhm(0`Wsw}w-o9_&%9Z~9>(*g^ZS6O#BEyE^%BQ8Rppc0Wq&k@buz=Fs
zj01;;q8c0XlM(ydkg{Y6<_PI{O-;Ch)zw%95)*B0ot(sC?3bU9x_&+C%$diJPn+iM
zo|sr%?Be3#A(7m_&m3{^;E^Mws`&XWTLzGvOd_JT7W>Vb#TI0cB)4!O?rCY6;;SzO
z0JDEv8}99p5VrUJJhEbAXH%r(H8g~WM@Ev+4+;tn2FS^oI@QfhA~}3`?%V|nm;*dL
zQCayLKmNoCCns0e#KapntgRg#Ij*wO$H&hPptRJ^&e@raqpq%}$8pio3m5wNB_!<F
zvF&#(E&JsL6^eoa*7Zx5n1P>D`fCsc0P8%*VMv#hgiu^u@C!zbLWd3=in?{H9i_W_
zGX-EPg(S0(5OOZ4tNZdx;t-K&!-g$e0E&xkY)B-WJn7&tb!t))j%#KnyS2uTPfsTy
zpP!$bD=)9E4v?EWa-={Y)zs7p6RfS9oA>Se=%dApzxaY}$v<)f8t&$%OX=xZO#%3f
z0<ek#z_!FE6adx^4dXaXN`1Y6a`7ULTUdxmw5yAaw~+#X$&(l!989Xiy?cRyBmzXD
z)vE&nNcQ*fS+oeCpg>pG*jOx%kDor>)ARUoSw|=;#>C8<=j-e3os%OF=<2q%=I5i~
zOtZ7Kv<wWy;=_lXoLpQA3b3D>+aFs3CmI%p69rgG0q~{(;NAt$q5z<y09+{m%P0Vz
z6ls!Vq0St~^lub^4=Df@6|Sxx9^@owVPS0zP*kL;si#K_S60^4B!-L{rK$>W<qG-y
z)vE&smMvSpe8&znVDxCVH8L}ejLghZQdoPY*RRXV6Gt37W@ct#(b9tLH*I27qHk}e
zrsBZ`U>XHr9EIcjDDCZQDYdm|KzTXV_kaI;3I$+31z;wHZ4rQp6sZI8P5^*g1E8=F
zCs<gB6CO+x5~ff0@VIqr`*xCh=FCAul$A3w%*`z=KmD|zfQ%=R@cW%SsjpAUHn#u%
zdsGXHJ_qFIJ5c}(D80R(Ql37QEptIZsD6GJ!T|wT-|Oly^a1dw41gI10PhHFT#m!`
zVzEA@w>LAB?BU+ttSn>W2@`sI3knn!wX`_y#*Gmp$asAH^;gpV#f$K8awpiilSRVW
zvy&&=+dDWE6euZaX|=TQ`=z94Xz1%pB(mofJG;J<US%Z<ZFI!*=hz@C3w8N&*^pyN
zNJ|Tkz`?;dPFEK*oT({ABEcjn65&DU{P}GZHj()Fu&~XW0|OHiuU<7UFfnOqDJ>l}
zN==Pqby>~L)zTsk+_Pux+I8z#e_Px1bSo<d2Q#z6LRD3vu%Ut9FC~TK0*=G>si~+=
zPJQLCzMf6kjl$}CR1^yp{)vj!cdY9(W}pLFTlo&avdTj5%o)_hi`fb=1K8iWxk90#
zVMRr0sk}TIAgkA}qao;k%a^gAhsW8oHa3oqW5*U23Isx-NW|}#lA^6GbwGML>Xa#c
z5l~gdCX5EL2nY*f2b_WGJMQVAAUR5RH{Su6OS-!;`^UxM_1Mh~w}hVFxpS75wze7?
z*RJX78yg!L6%`2tIyyIRmX+be`I?tU9K>-E5$o4)*l_I{&d=NXmtU-`?CniVii=fM
zwYBT&`2A8-wY2p0U*iBYAUm5)cqoPAu)LL&{DUH`@3MD=S+mgahY$INGsT?1aY9OS
zGoCvtE3x{Nl@%9jXz1u%zkcI}y1GzUQgZXAl9HNQXJ>i2f&xiie9g?Xw6wMsi@*9R
z_;=2o!+E*8XJ?NaH-5aeby=B;in{u<XZ(I?X+q&^9e@)`NEpZf9m<_Mn8lNl&|#-f
ze?WQpa-e>1sudNAie!Rssz;Cd`1ty^wV?rr4xw6E6&C8~7#J*CgzY2}e!tVF$BiSo
zNOmhaIu5~=2@ZZ&LzuGi_NMKtsuUDRhz_QCc}7O2rW_X*78n>5R9!7w&vkWg-BME%
z3S(mU{mRQ#R7m+vNU*bWa_Z{B_Hl8j*49I8DK<7(4qIB@*5EM|u^8V2o<GNjlKOft
zN@JrsrGIW<T2rH_NXpbeT2Y~{uB{F5-FM_|klhaB<58zi&(9w-#>B+Nrn;JJowhc9
zfBZkW6OhPx@&x-uM4+0Q_GNPg1&oB#r{B(Sj@v}Je;>zTXZ9^yP*<%&$3#afQ93&K
z<J`HUq(mml*Ya{rO`(uno0l$Kw(R9g9Ph#f*|MyyT~#FzXlf=U4K&V~Gj49~?p<Bq
zd_&GHSFd8fl`C2QJ_pFjVg2{=!sOJ}HkbqO#izYp)^HymRQ4jHriM!PAK}y~{y4R@
zDk>yYndau2n_F0LTvQafJ3M=a{WCLB*Q`MuGp4FaQ&U^}^UtxLM8Y4Zv~=`nBO`!_
zh>aUJZ6a4K!meE`XYiAHpCgo&u?)t<3}iT#O?I}gug88{x1zEa80o_UJ3nB=Y}mjb
zPb5-SCKG0wlQU_Ol~r#q_G5M4-~a2c$!*lgsJdEHQ%5H#=<Z#T?K?X7{qNjSQzLsm
zK7PRhUtj)%&4(YNs;IDY49@{_a(Ee<I8l#MQZkSuW>8q@uU?I+u0G_o3?Gh1;<z~e
zIQQ=1#F>gjs;U|q<m^6o?)>>hMc;nAX3b}xEm%-fLk!=u=eOU8;jdot`!UVPAipOt
z2NoC0CQCy@{sQ%bg$5p2yu9#;eD^MY;<%@Kd-2wI;sn0^u?L6Y!?FG=DPhC}1Yp1Z
z$*{hj7}Q_O%MA^UjXOJU-!?EXGOGI>;ux`bp!Sz9J2;T{gV4}z+d@KWYGgxx@?_Kz
zBZdqH3k?=380z!q<H@bRBQR(5^x!4u+BG~k?B9<oo}P~FdV2?oq{c>7RkCt#s|^hT
z0Wti-1!rd$7qK`jj6AU1y^HhSzaQ1vnI-u_4gYgT2n2YUad)3YDJ;Yl?&%pUSq`T6
z@2jekZ2hKMTdS)}p1M*}X3w58ht>6)H&OTQMRj#$Ij!Gbe>X?40UaIZQH~rjraXCq
z4jFi*2hvB61Ol?61F1-)uWw*LlB1&|3Gssm$%_=btX;f_%HAZ|rHb8F|AJqNSdKtt
zPg6^mE~F$U<EwE~lPaaBXRwpu<Hu@hWMv;d)X*RyU0Z8tIC?a|#f#IXxw`iBpuvwH
z<M&*-A{*jMmY|xLppG2*-VOe9M+_UrvcGJyVH0CXh@BJIim?MpPR{G~(k0a7WYpce
zWyfX7mYp-+;TNF4#vuX$>bP;J_V%dLrlGP&$r&?VudJPegX{{kao?Z8{{frPYYyp9
zyS4xT03~!qSaf7zbY(hYa%Ew3WdJfTF)=MLI4v?TR539+G%`9eG%YYTIxsK+A+oLj
z001R)MObuXVRU6WZEs|0W_bWIFfchSFf%POGgL4+Ix{&sG&e0UHaajcFe4^y00000
LNkvXXu0mjfe(H17

literal 0
HcmV?d00001

diff --git a/recipes/icons/gram_pl.png b/recipes/icons/gram_pl.png
new file mode 100644
index 0000000000000000000000000000000000000000..0a87f28825b077d3fbf9f0f1e298c10239071712
GIT binary patch
literal 1101
zcmV-T1hV^yP)<h;3K|Lk000e1NJLTq000mG000mW1^@s6Ph%q@00006VoOIv|NsC0
z|NjYC_uK#g010qNS#tmY07w7;07w8v$!k6U00WCjL_t(Y$F-HqOH^SP#*d9=V^S7T
z;DxRdbi-N%C6kM`-4<O|1wk9jb_I>qPA4t2{(~q{Gm4@t^D?6gg{VL$HS!YGjLtZ>
zW*lioYVzrO-?y)WSy4p4&BNim=R41PJI64-k;yDraFjWA>>^WHd7g1N%9%e`JL7O%
zU>uI)%!w0OOm?<@uVG;_GHx&p4fxO8XPTQOVXM_BWQvMDGQbxG_{7llzr8S(no$5V
zGoqTUOi77R#FUgwGQeksR08Aip`qbnV084-rK&1mV4$q*^l8%4(vv5F*RSpN@^ZlM
zr_6viZ!Rk+zN)Ia8b~B26qJY<(1~ql4jeExGd5d@F&)R_?-W31=jzpK*8<JW)G*+|
z1JQ>MXX~R!qB^dnB`wWj0b;R!1yw*2nqq)$%)WibCdOu)m=LF_lmvRcckbN1YaZO%
zN~0qgUbzB<!(9p>6zb@3I)VQF($Z6>fX9#L&7&8$x98+6T?$N24l2xZb)s_&_!ugY
z1ipQH^JZP0X`rKH*|O!!f$8a(Vs5X4!5#$}FpRV`GO&2@k|jViDiug1QYmU{fcc4m
zF$PE`<v@?;o<a_k3RtaKS-{}nnKNh4(*35U+qX#@8X6mc7cUMTJamZSEtd3jvjX98
zU!U7erJq&+y3#U)%D_g(G%z)lPyio3j4Obio*g@ii_Oe*&-E;5ZEKT^TU)cU7cDXk
zghIW&eSOr5WRjAbV_*Ygvw0aB8Yyu!DjEp5-IT$|$knUYt^q?s6&010H03olwY9+5
zm{h{+y>+Xp33&2^w%cq08R+e$^JL9`X~4r!=5Yn!^SxBi7OYuATi|pS6s%kcJb!-p
z@R1|5e$~~qZm(WdD1h$noja*gdS+a%yu5+}N=On5hb19xK-T}efnB?_fnZRkG#HF1
z0Jr<xIlG;PP)gd;GJEaJEtx+*GZX0QTDPvSkSbDNPv6y8ESaR1>fxJhVE=w&J!7^&
z2l#w`1w9y5(1%9WX?(m>LE5gMIG2mA1A%7>Dz!Wpjmj+f{c^1}$mkaA-D}9tVa19z
z#^XsQWmv)t5Q#`7bZaOhx*v<lRFlfJ9$qQ6eokv5A@_TjNF;|@wd(s%Nh+~>_jRVb
z+r_+p?_|8*A64$p)t|?CJax>|r(2jkdyL;>wWc$<xfx7;zGz<F-)VlnK1a@7xbVj}
zeFbrPF*rK9hAjX903~!qSaf7zbY(hYa%Ew3WdJfTF)=MLI4v?TR539+G&4FdG%YYT
zIxsM-#@_7!001R)MObuXVRU6WZEs|0W_bWIFflPLFf%PNG*mG(Ix#ppH8U+RHaajc
T=@>pT00000NkvXXu0mjfPN(ne

literal 0
HcmV?d00001

diff --git a/recipes/icons/niebezpiecznik.png b/recipes/icons/niebezpiecznik.png
new file mode 100644
index 0000000000000000000000000000000000000000..4188d91d3646c53624267c08378ed6cae2355cbe
GIT binary patch
literal 795
zcmY+ANlX&~7=`~bEu{^GwsfOA?Myq<MQ1>;j<~lnjRY%{QsUNymLNe~hznlajRy@u
z3@QgTDQ=0&$+U1$BNFwZh>=!Jidq@H7!#x77OS2-y!XA!cX>yB4Rs=+S_l9lFXL{^
zQ~W1INWO7rX%he^=BxL6EEe2~6F6ZbY<8R7VRtwk4yVm#M=^^TwHPZ*CNqXvte6GI
zaht`88;mB6R&O$+W)#yKDlp7yG@9lbWG7Z|2i&|`gWw#4==8>YiwaSFd4<WmxthNO
zlbmIcCB~ww^&E{xr_~vbFGZYckyWR#>XdtH3(M77wMuo`%}FpYyBU6T0~?2l_0Z}N
z7?t7!biwu21uX=3AB`xKs?I9DNnZSG3w-H<t<}7(HT+MVfGJCsn~T#S7-m3PCYQ;T
zeuDqA8`>%U$4yXg6%4wO2d!Y&N*T21>3X=`4C#&Vcpbb9!&nQ<cEKJOl5FG*dpSF*
z3+ga|TwHP|09_Q4@N$fbQoT~L&dwbVf=ntclPLy02&xp3+TvTyaB~f0BXF+;hCBt4
zIlOv{z()u(At1_2cFpIh%Stg-Nv30yDF3@)3)8wiPv&n{-tSzTAK$$0Wq#F@XeiVc
ziHBpcXe`_w$!6a~vTw7~u{Y7ktJgC#GO<|PmX%5s(~8`*L@q8H>>KEh_emsuCtvr^
zylV8xTKWTlu&=qP>Di;niOI=AA@03QvMl@lokpcrmk*sfb9!yCk`1<MGVJqAdZCM6
zFq#~pl6$6ldx@#u?_V?DW>;~ORSd5%eT94E(!s-r>Tu8DB<1$l*);d=)GmfMmKh%#
zrBKwyT00nQl(?9<Fx;M?xQTc?6(7&Bxhtco;r1kzOr>%;B`n);erRZJaB1q23V$Ey
zZx+g#o(7&^fI}Sf-%!W^0()J9FN6=D<zK*hJi(r}a8HbmZj0psl_W(uNtd&d^ivc)
zuZo`Ma*{PPNq%sQME?`Iy4yP9yZ;E}U!lfNl5{0SQ&s<j+Rr=LyZ~NLgZtX@(Ei__
CGxPHR

literal 0
HcmV?d00001

diff --git a/recipes/icons/wnp.png b/recipes/icons/wnp.png
new file mode 100644
index 0000000000000000000000000000000000000000..3781f671cdf0a83d738cfcbf77eb5c34ac0fcc26
GIT binary patch
literal 576
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbMf#sHrXS0H_vL5how&BDwK$mZqb
zOvp(8`t|Go|Nm#tnzd=urqbf#=B6e=KE4V4{hC@@4;Ta=GDtpP;Cap<Q&Cp-;ll@E
z0fE<vIj>_<KXmkc*}jvFnOROo=I@s;Ph@l-i>UoLedZ;%+Mnmo?=!HyWK?i*c7DJr
z`TO4eM?BImn3dkVd2?X@{ztruZ?j4sKYA1w8+-NY)nC_dzOQfp_xJDTRcn6z`t`!m
z_o1rsePOjnj0(Eu#@9Bip8<3zV@Z%-FoVOh8)+a;lDE4HN87!rRX`4Bfk$L9koEv$
zx0Bg+K*ml_7sn8Z%dY3Ii!~Vtuv}2p5$kFR@LiO^Tz#<V*ME66&Z;@v_ojbKWZ;oq
zl(dNT{_PiE*$*##tMHR?S1rH$Hvfi83JY|OX!u>|QD(|?Yc}*3<yd)OhEHVNnSTz?
zA0M5fAN}FhbkW91;jic98{G2Be9zKotIxNz>G8`PZ{C-#=1Ka3hk^paX12LROlxzE
zEpgcA@cFf_i~2XK6PC$y+F#sL&06u?wEE7pe`Oc=0vXO4Ys|hLy*8iGF2PA?Qt!O9
zTA(LXOI#yLQW8s2t&)pUffR$0fuW(UfrYM-L5QKDm5GU!p@pu2nU#TotHAm?6b-rg
pDVb@NPz|QKM#doq7FLEvR)(e!4Z5pRLx36>JYD@<);T3K0RT3m;0*u(

literal 0
HcmV?d00001

diff --git a/recipes/niebezpiecznik.recipe b/recipes/niebezpiecznik.recipe
new file mode 100644
index 0000000000..b33a0a3513
--- /dev/null
+++ b/recipes/niebezpiecznik.recipe
@@ -0,0 +1,16 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Niebezpiecznik_pl(BasicNewsRecipe):
+    title          = u'Niebezpiecznik.pl'
+    __author__        = 'fenuks'
+    description   = 'Niebezpiecznik.pl'
+    category       = 'hacking, IT'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
+    remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
+    keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})]
+    feeds          = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
+      	      ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
diff --git a/recipes/wnp.recipe b/recipes/wnp.recipe
new file mode 100644
index 0000000000..e53e4cc66b
--- /dev/null
+++ b/recipes/wnp.recipe
@@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class AdvancedUserRecipe1312886443(BasicNewsRecipe):
+    title          = u'WNP'
+    cover_url= 'http://k.wnp.pl/images/wnpLogo.gif'
+    __author__        = 'fenuks'
+    description   = u'Wirtualny Nowy Przemysł'
+    category       = 'economy'
+    language       = 'pl'
+    oldest_article = 8
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    keep_only_tags = dict(name='div', attrs={'id':'contentText'})
+    feeds          = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
+                          (u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
+          (u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
+          (u'Serwis Hutnictwo', u'http://www.wnp.pl/rss/serwis_rss_3.xml'),
+          (u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
+          (u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
+          (u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]

From 1fdac7d5a014ca2f73e7cf855126c65ed1d20a5d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 11:46:08 -0600
Subject: [PATCH 09/39] Cvece Zla by Darko Miletic. Fixes #830143 (New recipe
 for serbian blog Cvece Zla)

---
 recipes/cvecezla.recipe | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 recipes/cvecezla.recipe

diff --git a/recipes/cvecezla.recipe b/recipes/cvecezla.recipe
new file mode 100644
index 0000000000..712c898a3e
--- /dev/null
+++ b/recipes/cvecezla.recipe
@@ -0,0 +1,47 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
+'''
+cvecezla.wordpress.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CveceZla(BasicNewsRecipe):
+    title                 = 'Cvece zla i naopakog'
+    __author__            = 'Darko Miletic'
+    description           = 'Haoticnost razmisljanja poradja haoticnost pisanja. Muzika, stripovi, igre, knjige, generalno glupiranje...'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    language              = 'sr'
+    encoding              = 'utf-8'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    publication_type      = 'blog'
+    extra_css             = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{display: block } '
+
+    conversion_options = {
+                          'comment'  : description
+                        , 'tags'     : 'igre, muzika, film, blog, Srbija'
+                        , 'publisher': 'Mehmet Krljic'
+                        , 'language' : language
+                        }
+
+    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+    remove_tags_before = dict(attrs={'class':'navigation'})
+    remove_tags_after = dict(attrs={'class':'commentlist'})
+    remove_tags = [
+                     dict(attrs={'class':['postmetadata alt','sharedaddy sharedaddy-dark sd-like-enabled sd-sharing-enabled','reply','navigation']})
+                    ,dict(attrs={'id':'respond'})
+                  ]
+
+    feeds = [(u'Clanci', u'http://cvecezla.wordpress.com/feed/')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+
+

From 564ffc7e947ed468d0bc1a30a66f97cb0ed51d20 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 15:47:51 -0600
Subject: [PATCH 10/39] ...

---
 src/calibre/gui2/__init__.py              | 4 +++-
 src/calibre/gui2/preferences/look_feel.py | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index fc02ad7fae..94f392ae65 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -186,7 +186,9 @@ def _config(): # {{{
     c.add_opt('enforce_cpu_limit', default=True,
             help=_('Limit max simultaneous jobs to number of CPUs'))
     c.add_opt('gui_layout', choices=['wide', 'narrow'],
-            help=_('The layout of the user interface'), default='wide')
+            help=_('The layout of the user interface.\nWide has the '
+                'book details panel on the right and narrow has '
+                'it at the bottom.'), default='wide')
     c.add_opt('show_avg_rating', default=True,
             help=_('Show the average rating per item indication in the tag browser'))
     c.add_opt('disable_animations', default=False,
diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py
index b34c5e6042..c87cad7cad 100644
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@@ -10,12 +10,11 @@ from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog,
 
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
 from calibre.gui2.preferences.look_feel_ui import Ui_Form
-from calibre.gui2 import config, gprefs, qt_app
+from calibre.gui2 import config, gprefs, qt_app, NONE
 from calibre.utils.localization import (available_translations,
     get_language, get_lang)
 from calibre.utils.config import prefs
 from calibre.utils.icu import sort_key
-from calibre.gui2 import NONE
 from calibre.gui2.book_details import get_field_list
 from calibre.gui2.preferences.coloring import EditRules
 

From 2337570c9f60220f33d1ebaf36af0bb1061bb929 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 19:48:25 -0600
Subject: [PATCH 11/39] Fix #829912 (Edit metadata dialog: Splitters' positions
 are not saved)

---
 src/calibre/gui2/__init__.py  | 2 +-
 src/calibre/gui2/languages.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 94f392ae65..1967f734cc 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -186,7 +186,7 @@ def _config(): # {{{
     c.add_opt('enforce_cpu_limit', default=True,
             help=_('Limit max simultaneous jobs to number of CPUs'))
     c.add_opt('gui_layout', choices=['wide', 'narrow'],
-            help=_('The layout of the user interface.\nWide has the '
+            help=_('The layout of the user interface. Wide has the '
                 'book details panel on the right and narrow has '
                 'it at the bottom.'), default='wide')
     c.add_opt('show_avg_rating', default=True,
diff --git a/src/calibre/gui2/languages.py b/src/calibre/gui2/languages.py
index 1e192a0c94..3398081c5f 100644
--- a/src/calibre/gui2/languages.py
+++ b/src/calibre/gui2/languages.py
@@ -16,6 +16,8 @@ class LanguagesEdit(MultiCompleteComboBox):
     def __init__(self, parent=None):
         MultiCompleteComboBox.__init__(self, parent)
 
+        self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
+        self.setMinimumContentsLength(20)
         self._lang_map = lang_map()
         self.names_with_commas = [x for x in self._lang_map.itervalues() if ',' in x]
         self.comma_map = {k:k.replace(',', '|') for k in self.names_with_commas}

From 2a80b4ac998ddb337d6182f66dc968e6780eaa41 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 21:12:50 -0600
Subject: [PATCH 12/39] Fix #830060 (Houston Chronicle news fetch fails)

---
 recipes/houston_chronicle.recipe | 67 ++++++++++----------------------
 1 file changed, 20 insertions(+), 47 deletions(-)

diff --git a/recipes/houston_chronicle.recipe b/recipes/houston_chronicle.recipe
index 3390228455..8d231dac16 100644
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 
-import string, pprint
-
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class HoustonChronicle(BasicNewsRecipe):
@@ -13,53 +11,28 @@ class HoustonChronicle(BasicNewsRecipe):
     language       = 'en'
     timefmt        = ' [%a, %d %b, %Y]'
     no_stylesheets = True
+    use_embedded_content = False
+    remove_attributes = ['style']
 
-    keep_only_tags = [
-                        dict(id=['story-head', 'story'])
-                     ]
-
-    remove_tags    = [
-                        dict(id=['share-module', 'resource-box',
-                        'resource-box-header'])
-                     ]
-
-    extra_css      = '''
-                        h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
-                        h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
-                        h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
-                        h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
-                        p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                        #story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
-                        #story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
-                        #story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                        #story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
-                        #story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
-                        #Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
-                        .p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
-                        .p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
-                     '''
-
-
-    def parse_index(self):
-        categories = ['news', 'sports', 'business', 'entertainment', 'life',
-                'travel']
-        feeds = []
-        for cat in categories:
-            articles = []
-            soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
-            for elem in soup.findAll(comptype='story', storyid=True):
-                a = elem.find('a', href=True)
-                if a is None: continue
-                url = a['href']
-                if not url.startswith('http://'):
-                    url = 'http://www.chron.com'+url
-                articles.append({'title':self.tag_to_string(a), 'url':url,
-                    'description':'', 'date':''})
-                pprint.pprint(articles[-1])
-            if articles:
-                feeds.append((string.capwords(cat), articles))
-        return feeds
+    oldest_article = 2.0
 
+    keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
+        'hst-articletext' in x or 'hst-galleryitem' in x)}
 
+    feeds = [
+            ('News', "http://www.chron.com/rss/feed/News-270.php"),
+            ('Sports',
+                'http://www.chron.com/sports/headlines/collectionRss/Sports-Headlines-Staff-Stories-10767.php'),
+            ('Neighborhood',
+                'http://www.chron.com/rss/feed/Neighborhood-305.php'),
+            ('Business', 'http://www.chron.com/rss/feed/Business-287.php'),
+            ('Entertainment',
+                'http://www.chron.com/rss/feed/Entertainment-293.php'),
+            ('Editorials',
+                'http://www.chron.com/opinion/editorials/collectionRss/Opinion-Editorials-Headline-List-10567.php'),
+            ('Life', 'http://www.chron.com/rss/feed/Life-297.php'),
+            ('Science & Tech',
+                'http://www.chron.com/rss/feed/AP-Technology-and-Science-266.php'),
+        ]
 
 

From ab1ca47fa7478ed4e721841c91390ec5f78f871f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 21:47:04 -0600
Subject: [PATCH 13/39] ...

---
 src/calibre/gui2/actions/catalog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/actions/catalog.py b/src/calibre/gui2/actions/catalog.py
index e57ce06742..d18eb990b3 100644
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@@ -17,7 +17,7 @@ from calibre.gui2.actions import InterfaceAction
 class GenerateCatalogAction(InterfaceAction):
 
     name = 'Generate Catalog'
-    action_spec = (_('Create a catalog of the books in your calibre library'), 'catalog.png', 'Catalog builder', None)
+    action_spec = (_('Create catalog'), 'catalog.png', 'Catalog builder', None)
     dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
 
     def genesis(self):

From af5cd6ba674b8dd125a5190017ea2cdb349b676e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 20 Aug 2011 21:50:06 -0600
Subject: [PATCH 14/39] When downloading news set the language field correctly

---
 src/calibre/web/feeds/news.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 2017248cfc..1d513082f1 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -28,6 +28,7 @@ from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.date import now as nowf
 from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image
+from calibre.utils.localization import canonicalize_lang
 
 class LoginFailed(ValueError):
     pass
@@ -1117,6 +1118,9 @@ class BasicNewsRecipe(Recipe):
         mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
         mi.timestamp = nowf()
         mi.comments = self.description
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
         if not isinstance(mi.comments, unicode):
             mi.comments = mi.comments.decode('utf-8', 'replace')
         mi.pubdate = nowf()

From a4a6086d87d4f10ac58c3423de4341ad4d2a02fa Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Aug 2011 00:07:56 -0600
Subject: [PATCH 15/39] ...

---
 src/calibre/ebooks/metadata/sources/identify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 97fbae4727..4987b8cead 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -481,7 +481,7 @@ def identify(log, abort, # {{{
     log('The identify phase took %.2f seconds'%(time.time() - start_time))
     log('The longest time (%f) was taken by:'%longest, lp)
     log('Merging results from different sources and finding earliest',
-            'publication dates')
+            'publication dates from the xisbn service')
     start_time = time.time()
     results = merge_identify_results(results, log)
 

From 1a5f6d741d53cb46aaf3352be3d215dfff46e71a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Aug 2011 10:06:25 -0600
Subject: [PATCH 16/39] Fix #830078 (New Language-function translated search
 error)

---
 src/calibre/library/caches.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 5f9dca6d23..62cad827c4 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs
 from calibre.utils.date import parse_date, now, UNDEFINED_DATE
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.utils.pyparsing import ParseException
-from calibre.utils.localization import canonicalize_lang
+from calibre.utils.localization import canonicalize_lang, lang_map
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre import prints
@@ -728,7 +728,9 @@ class ResultCache(SearchQueryParser): # {{{
                 elif loc == db_col['languages']:
                     q = canonicalize_lang(query)
                     if q is None:
-                        q = query
+                        lm = lang_map()
+                        rm = {v.lower():k for k,v in lm.iteritems()}
+                        q = rm.get(query, query)
                 else:
                     q = query
 

From 8812d9eadc756cd1226f8f22eab48cc93da46ad0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Aug 2011 10:16:04 -0600
Subject: [PATCH 17/39] Fix #830081 (Edit Metadata - layout error past isbn)

---
 src/calibre/gui2/metadata/single.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 7f2ea036d6..dc3983171b 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -723,7 +723,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
 
         tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
         tl.addWidget(self.manage_authors_button, 2, 0, 1, 1)
-        tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
+        tl.addWidget(self.paste_isbn_button, 12, 0, 1, 1)
 
         create_row(0, self.title, self.title_sort,
                    button=self.deduce_title_sort_button, span=2,
@@ -859,7 +859,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
 
         tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
         tl.addWidget(self.manage_authors_button, 2, 0, 2, 1)
-        tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
+        tl.addWidget(self.paste_isbn_button, 12, 0, 1, 1)
 
         create_row(0, self.title, self.title_sort,
                    button=self.deduce_title_sort_button, span=2,

From 0e4e082ed7efac305ba05894d3c7af8b1e8614c5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Aug 2011 10:57:06 -0600
Subject: [PATCH 18/39] When adding books that have no language specified, do
 not automaticallly set the langauge to calibre's interface language. Fixes
 #830092 (Downloading and adding books causes default language to be assigned)

---
 src/calibre/ebooks/metadata/opf2.py   | 5 +++--
 src/calibre/ebooks/metadata/worker.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index c52d089c70..9b8ae12b10 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -1312,7 +1312,7 @@ class OPFCreator(Metadata):
             ncx_stream.flush()
 
 
-def metadata_to_opf(mi, as_string=True):
+def metadata_to_opf(mi, as_string=True, default_lang=None):
     from lxml import etree
     import textwrap
     from calibre.ebooks.oeb.base import OPF, DC
@@ -1328,7 +1328,8 @@ def metadata_to_opf(mi, as_string=True):
             '[http://calibre-ebook.com]'
 
     if not mi.languages:
-        lang = get_lang().replace('_', '-').partition('-')[0]
+        lang = (get_lang().replace('_', '-').partition('-')[0] if default_lang
+                is None else default_lang)
         mi.languages = [lang]
 
     root = etree.fromstring(textwrap.dedent(
diff --git a/src/calibre/ebooks/metadata/worker.py b/src/calibre/ebooks/metadata/worker.py
index ca8707258b..cab582a264 100644
--- a/src/calibre/ebooks/metadata/worker.py
+++ b/src/calibre/ebooks/metadata/worker.py
@@ -33,7 +33,7 @@ def serialize_metadata_for(formats, tdir, id_):
     if not mi.application_id:
         mi.application_id = '__calibre_dummy__'
     with open(os.path.join(tdir, '%s.opf'%id_), 'wb') as f:
-        f.write(metadata_to_opf(mi))
+        f.write(metadata_to_opf(mi, default_lang='und'))
     if cdata:
         with open(os.path.join(tdir, str(id_)), 'wb') as f:
             f.write(cdata)

From b22f38d71b155942406d55601a1346e0c7742ca9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 21 Aug 2011 20:48:11 -0600
Subject: [PATCH 19/39] Fix #830800 (fix #760384 not applied to both WSJ
 recipes)

---
 recipes/wsj_free.recipe | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe
index 331a393c03..42d791294a 100644
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@@ -53,6 +53,12 @@ class WallStreetJournal(BasicNewsRecipe):
 
         return soup
 
+    def abs_wsj_url(self, href):
+        if not href.startswith('http'):
+            href = 'http://online.wsj.com' + href
+        return href
+
+
     def wsj_get_index(self):
         return self.index_to_soup('http://online.wsj.com/itp')
 
@@ -83,14 +89,14 @@ class WallStreetJournal(BasicNewsRecipe):
             pageone = a['href'].endswith('pageone')
             if pageone:
                title = 'Front Section'
-               url = 'http://online.wsj.com' + a['href']
+               url = self.abs_wsj_url(a['href'])
                feeds = self.wsj_add_feed(feeds,title,url)
                title = 'What''s News'
                url = url.replace('pageone','whatsnews')
                feeds = self.wsj_add_feed(feeds,title,url)
             else:
                title = self.tag_to_string(a)
-               url = 'http://online.wsj.com' + a['href']
+               url = self.abs_wsj_url(a['href'])
                feeds = self.wsj_add_feed(feeds,title,url)
         return feeds
 
@@ -146,7 +152,7 @@ class WallStreetJournal(BasicNewsRecipe):
                 title = self.tag_to_string(a).strip() + ' [%s]'%meta
             else:
                 title = self.tag_to_string(a).strip()
-            url = 'http://online.wsj.com'+a['href']
+            url = self.abs_wsj_url(a['href'])
             desc = ''
             for p in container.findAll('p'):
                 desc = self.tag_to_string(p)

From e48e7932fa01be41a041a1bc85019bc2fd979af3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Aug 2011 10:41:17 -0600
Subject: [PATCH 20/39] MOBI Output: Add support for the start attribute on
 <ol> tags

---
 src/calibre/ebooks/mobi/mobiml.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index eefa9d9e03..56a7a8b9ca 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -308,6 +308,11 @@ class MobiMLizer(object):
         istate = copy.copy(istates[-1])
         istate.rendered = False
         istate.list_num = 0
+        if tag == 'ol' and 'start' in elem.attrib:
+            try:
+                istate.list_num = int(elem.attrib['start'])-1
+            except:
+                pass
         istates.append(istate)
         left = 0
         display = style['display']

From 101817b1fc632723af00f47a48b74e36ac930783 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Aug 2011 14:46:39 -0600
Subject: [PATCH 21/39] Fix #831622 (Updated recipe for MSDN Magazine)

---
 recipes/msdnmag_en.recipe | 52 ++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/recipes/msdnmag_en.recipe b/recipes/msdnmag_en.recipe
index 77b8da17a8..341ca027f6 100644
--- a/recipes/msdnmag_en.recipe
+++ b/recipes/msdnmag_en.recipe
@@ -6,11 +6,13 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 msdn.microsoft.com/en-us/magazine
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
 
 class MSDNMagazine_en(BasicNewsRecipe):
     title                 = 'MSDN Magazine'
     __author__            = 'Darko Miletic'
     description           = 'The Microsoft Journal for Developers'
+    masthead_url          = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
     publisher             = 'Microsoft Press'
     category              = 'news, IT, Microsoft, programming, windows'
     oldest_article        = 31
@@ -19,25 +21,45 @@ class MSDNMagazine_en(BasicNewsRecipe):
     use_embedded_content  = False
     encoding              = 'utf-8'
     language              = 'en'
+    
+    base_url              = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
+    rss_url               = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
 
 
-
-    feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
-
-    keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
+    keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
 
     remove_tags = [
-                     dict(name=['object','link','base','table'])
-                    ,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
+                    dict(name='div', attrs={'class':'DivRatingsOnly'})
+                    ,dict(name='div', attrs={'class':'ShareThisButton4'})
                   ]
-    remove_tags_after = dict(name='div', attrs={'class':'navpage'})
+                  
+    def find_articles(self):
+        idx_contents = self.browser.open(self.rss_url).read()
+        idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
+        
+        for article in idx.findAll('item'):
+            desc_html = self.tag_to_string(article.find('description'))
+            description = self.tag_to_string(BeautifulSoup(desc_html))
+            
+            a = {
+                    'title':  self.tag_to_string(article.find('title')),
+                    'url': self.tag_to_string(article.find('link')),
+                    'description': description,
+                    'date' : self.tag_to_string(article.find('pubdate')),
+                    }
+            yield a
 
-    def preprocess_html(self, soup):
-        for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
-            item.name="h2"
-        for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
-            item.name="h1"
-        for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
-            item.name="h3"
-        return soup
 
+    def parse_index(self):
+        soup = self.index_to_soup(self.base_url)
+        
+        #find issue name, eg "August 2011"
+        issue_name = self.tag_to_string(soup.find('h1'))
+        
+        # find cover pic
+        img = soup.find('img',attrs ={'alt':issue_name})
+        if img is not None:
+            self.cover_url = img['src']
+
+        return [(issue_name, list(self.find_articles()))]
+        

From 161644a7527303aa6ac861c9c38b76f35ea68d54 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Aug 2011 14:47:58 -0600
Subject: [PATCH 22/39] Fix #831622 (Updated recipe for MSDN Magazine)

---
 recipes/svd_se.recipe | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipes/svd_se.recipe b/recipes/svd_se.recipe
index ef43caa7cd..7fa92c47f2 100644
--- a/recipes/svd_se.recipe
+++ b/recipes/svd_se.recipe
@@ -40,11 +40,11 @@ class SVD_se(BasicNewsRecipe):
               ,(u'Kultur'      , u'http://www.svd.se/kulturnoje/nyheter/?service=rss')
             ]
 
-    keep_only_tags = [dict(name='div', attrs={'id':'articlecontent'})]
-    remove_tags_after = dict(name='div',attrs={'class':'articlebody normal'})
+    keep_only_tags = [dict(name='div', attrs={'id':['article-content', 'articlecontent']})]
+    remove_tags_after = dict(name='div',attrs={'class':'articlebody'})
     remove_tags = [
                      dict(name=['object','link','base'])
-                    ,dict(name='div',attrs={'class':['articlead','factcolumn']})
+                    ,dict(name='div',attrs={'class':['articlead','factcolumn', 'article-ad']})
                     ,dict(name='ul', attrs={'class':'toolbar articletop clearfix'})
                     ,dict(name='p', attrs={'class':'more'})
                   ]

From ac30f8edd472b755d18db484eaecba5bf7cafe48 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Aug 2011 18:18:46 -0600
Subject: [PATCH 23/39] Fix #831695 (Updated recipe for Financial times UK
 edition)

---
 recipes/financial_times_uk.recipe | 36 +++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe
index f3ad824bc3..4c331f115f 100644
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@@ -5,6 +5,7 @@ www.ft.com/uk-edition
 '''
 
 import datetime
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 
@@ -22,6 +23,8 @@ class FinancialTimes(BasicNewsRecipe):
     needs_subscription    = True
     encoding              = 'utf8'
     publication_type      = 'newspaper'
+    articles_are_obfuscated = True
+    temp_files              = []
     masthead_url          = 'http://im.media.ft.com/m/img/masthead_main.jpg'
     LOGIN                 = 'https://registration.ft.com/registration/barrier/login'
     LOGIN2                = 'http://media.ft.com/h/subs3.html'
@@ -47,7 +50,12 @@ class FinancialTimes(BasicNewsRecipe):
             br.submit()
         return br
 
-    keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
+    keep_only_tags = [
+                        dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
+                       ,dict(name='div', attrs={'class':'standfirst'})
+                       ,dict(name='div', attrs={'id'   :'storyContent'})
+                       ,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
+                     ]
     remove_tags = [
                       dict(name='div', attrs={'id':'floating-con'})
                      ,dict(name=['meta','iframe','base','object','embed','link'])
@@ -69,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):
 
     def get_artlinks(self, elem):
         articles = []
+        count = 0
         for item in elem.findAll('a',href=True):
+            count = count + 1
+            if self.test and count > 2:
+               return articles
             rawlink = item['href']
             if rawlink.startswith('http://'):
                url = rawlink
             else:
                url   = self.PREFIX + rawlink
+            urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
             title = self.tag_to_string(item)
             date = strftime(self.timefmt)
             articles.append({
                               'title'      :title
                              ,'date'       :date
-                             ,'url'        :url
+                             ,'url'        :urlverified
                              ,'description':''
                             })
         return articles
@@ -97,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
         st = wide.find('h4',attrs={'class':'section-no-arrow'})
         if st:
            strest.insert(0,st)
+        count = 0
         for item in strest:
+            count = count + 1
+            if self.test and count > 2:
+               return feeds
             ftitle   = self.tag_to_string(item)
             self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
             feedarts = self.get_artlinks(item.parent.ul)
@@ -136,4 +153,19 @@ class FinancialTimes(BasicNewsRecipe):
         if cdate.isoweekday() == 7:
            cdate -= datetime.timedelta(days=1)
         return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
+
+    def get_obfuscated_article(self, url):
+        count = 0
+        while (count < 10):
+            try:
+                response = self.browser.open(url)
+                html = response.read()
+                count = 10
+            except:
+                print "Retrying download..."
+            count += 1        
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(html)
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name
         
\ No newline at end of file

From 2afef9211e75b0803edfaabc2730f819287a309b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Aug 2011 18:33:41 -0600
Subject: [PATCH 24/39] Conversion pipeline: More robust handling of case
 insensitve tag and class css selectors

---
 src/calibre/ebooks/cssselect.py    | 1007 ++++++++++++++++++++++++++++
 src/calibre/ebooks/oeb/stylizer.py |   94 +--
 2 files changed, 1060 insertions(+), 41 deletions(-)
 create mode 100644 src/calibre/ebooks/cssselect.py

diff --git a/src/calibre/ebooks/cssselect.py b/src/calibre/ebooks/cssselect.py
new file mode 100644
index 0000000000..c4167a8e4d
--- /dev/null
+++ b/src/calibre/ebooks/cssselect.py
@@ -0,0 +1,1007 @@
+"""CSS Selectors based on XPath.
+
+This module supports selecting XML/HTML tags based on CSS selectors.
+See the `CSSSelector` class for details.
+"""
+
+import re
+from lxml import etree
+
+__all__ = ['SelectorSyntaxError', 'ExpressionError',
+           'CSSSelector']
+
+try:
+    _basestring = basestring
+except NameError:
+    _basestring = str
+
+class SelectorSyntaxError(SyntaxError):
+    pass
+
+class ExpressionError(RuntimeError):
+    pass
+
+class CSSSelector(etree.XPath):
+    """A CSS selector.
+
+    Usage::
+
+        >>> from lxml import etree, cssselect
+        >>> select = cssselect.CSSSelector("a tag > child")
+
+        >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
+        >>> [ el.tag for el in select(root) ]
+        ['child']
+
+    To use CSS namespaces, you need to pass a prefix-to-namespace
+    mapping as ``namespaces`` keyword argument::
+
+        >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+        >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
+        ...                                   namespaces={'rdf': rdfns})
+
+        >>> rdf = etree.XML((
+        ...     '<root xmlns:rdf="%s">'
+        ...       '<rdf:Description>blah</rdf:Description>'
+        ...     '</root>') % rdfns)
+        >>> [(el.tag, el.text) for el in select_ns(rdf)]
+        [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
+    """
+    def __init__(self, css, namespaces=None):
+        path = css_to_xpath_no_case(css)
+        etree.XPath.__init__(self, path, namespaces=namespaces)
+        self.css = css
+
+    def __repr__(self):
+        return '<%s %s for %r>' % (
+            self.__class__.__name__,
+            hex(abs(id(self)))[2:],
+            self.css)
+
+##############################
+## Token objects:
+
+try:
+    _unicode = unicode
+    _unichr = unichr
+except NameError:
+    # Python 3
+    _unicode = str
+    _unichr = chr
+
+class _UniToken(_unicode):
+    def __new__(cls, contents, pos):
+        obj = _unicode.__new__(cls, contents)
+        obj.pos = pos
+        return obj
+
+    def __repr__(self):
+        return '%s(%s, %r)' % (
+            self.__class__.__name__,
+            _unicode.__repr__(self),
+            self.pos)
+
+class Symbol(_UniToken):
+    pass
+
+class String(_UniToken):
+    pass
+
+class Token(_UniToken):
+    pass
+
+############################################################
+## Parsing
+############################################################
+
+##############################
+## Syntax objects:
+
+class Class(object):
+    """
+    Represents selector.class_name
+    """
+
+    def __init__(self, selector, class_name):
+        self.selector = selector
+        # Kovid: Lowercased
+        self.class_name = class_name.lower()
+
+    def __repr__(self):
+        return '%s[%r.%s]' % (
+            self.__class__.__name__,
+            self.selector,
+            self.class_name)
+
+    def xpath(self):
+        sel_xpath = self.selector.xpath()
+        # Kovid: Lowercased
+        sel_xpath.add_condition(
+            "contains(concat(' ', css:lower-case(normalize-space(@class)), ' '), %s)" % xpath_literal(' '+self.class_name+' '))
+        return sel_xpath
+
+class Function(object):
+    """
+    Represents selector:name(expr)
+    """
+
+    unsupported = [
+        'target', 'lang', 'enabled', 'disabled',]
+
+    def __init__(self, selector, type, name, expr):
+        self.selector = selector
+        self.type = type
+        self.name = name
+        self.expr = expr
+
+    def __repr__(self):
+        return '%s[%r%s%s(%r)]' % (
+            self.__class__.__name__,
+            self.selector,
+            self.type, self.name, self.expr)
+
+    def xpath(self):
+        sel_path = self.selector.xpath()
+        if self.name in self.unsupported:
+            raise ExpressionError(
+                "The pseudo-class %r is not supported" % self.name)
+        method = '_xpath_' + self.name.replace('-', '_')
+        if not hasattr(self, method):
+            raise ExpressionError(
+                "The pseudo-class %r is unknown" % self.name)
+        method = getattr(self, method)
+        return method(sel_path, self.expr)
+
+    def _xpath_nth_child(self, xpath, expr, last=False,
+                         add_name_test=True):
+        a, b = parse_series(expr)
+        if not a and not b and not last:
+            # a=0 means nothing is returned...
+            xpath.add_condition('false() and position() = 0')
+            return xpath
+        if add_name_test:
+            xpath.add_name_test()
+        xpath.add_star_prefix()
+        if a == 0:
+            if last:
+                b = 'last() - %s' % b
+            xpath.add_condition('position() = %s' % b)
+            return xpath
+        if last:
+            # FIXME: I'm not sure if this is right
+            a = -a
+            b = -b
+        if b > 0:
+            b_neg = str(-b)
+        else:
+            b_neg = '+%s' % (-b)
+        if a != 1:
+            expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
+        else:
+            expr = []
+        if b >= 0:
+            expr.append('position() >= %s' % b)
+        elif b < 0 and last:
+            expr.append('position() < (last() %s)' % b)
+        expr = ' and '.join(expr)
+        if expr:
+            xpath.add_condition(expr)
+        return xpath
+        # FIXME: handle an+b, odd, even
+        # an+b means every-a, plus b, e.g., 2n+1 means odd
+        # 0n+b means b
+        # n+0 means a=1, i.e., all elements
+        # an means every a elements, i.e., 2n means even
+        # -n means -1n
+        # -1n+6 means elements 6 and previous
+
+    def _xpath_nth_last_child(self, xpath, expr):
+        return self._xpath_nth_child(xpath, expr, last=True)
+
+    def _xpath_nth_of_type(self, xpath, expr):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:nth-of-type() is not implemented")
+        return self._xpath_nth_child(xpath, expr, add_name_test=False)
+
+    def _xpath_nth_last_of_type(self, xpath, expr):
+        return self._xpath_nth_child(xpath, expr, last=True, add_name_test=False)
+
+    def _xpath_contains(self, xpath, expr):
+        # text content, minus tags, must contain expr
+        if isinstance(expr, Element):
+            expr = expr._format_element()
+        xpath.add_condition('contains(css:lower-case(string(.)), %s)'
+                            % xpath_literal(expr.lower()))
+        # FIXME: Currently case insensitive matching doesn't seem to be happening
+        return xpath
+
+    def _xpath_not(self, xpath, expr):
+        # everything for which not expr applies
+        expr = expr.xpath()
+        cond = expr.condition
+        # FIXME: should I do something about element_path?
+        xpath.add_condition('not(%s)' % cond)
+        return xpath
+
+def _make_lower_case(context, s):
+    return s.lower()
+
+ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
+ns.prefix = 'css'
+ns['lower-case'] = _make_lower_case
+
+class Pseudo(object):
+    """
+    Represents selector:ident
+    """
+
+    unsupported = ['indeterminate', 'first-line', 'first-letter',
+                   'selection', 'before', 'after', 'link', 'visited',
+                   'active', 'focus', 'hover']
+
+    def __init__(self, element, type, ident):
+        self.element = element
+        assert type in (':', '::')
+        self.type = type
+        self.ident = ident
+
+    def __repr__(self):
+        return '%s[%r%s%s]' % (
+            self.__class__.__name__,
+            self.element,
+            self.type, self.ident)
+
+    def xpath(self):
+        el_xpath = self.element.xpath()
+        if self.ident in self.unsupported:
+            raise ExpressionError(
+                "The pseudo-class %r is unsupported" % self.ident)
+        method = '_xpath_' + self.ident.replace('-', '_')
+        if not hasattr(self, method):
+            raise ExpressionError(
+                "The pseudo-class %r is unknown" % self.ident)
+        method = getattr(self, method)
+        el_xpath = method(el_xpath)
+        return el_xpath
+
+    def _xpath_checked(self, xpath):
+        # FIXME: is this really all the elements?
+        xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')")
+        return xpath
+
+    def _xpath_root(self, xpath):
+        # if this element is the root element
+        raise NotImplementedError
+
+    def _xpath_first_child(self, xpath):
+        xpath.add_star_prefix()
+        xpath.add_name_test()
+        xpath.add_condition('position() = 1')
+        return xpath
+
+    def _xpath_last_child(self, xpath):
+        xpath.add_star_prefix()
+        xpath.add_name_test()
+        xpath.add_condition('position() = last()')
+        return xpath
+
+    def _xpath_first_of_type(self, xpath):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:first-of-type is not implemented")
+        xpath.add_star_prefix()
+        xpath.add_condition('position() = 1')
+        return xpath
+
+    def _xpath_last_of_type(self, xpath):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:last-of-type is not implemented")
+        xpath.add_star_prefix()
+        xpath.add_condition('position() = last()')
+        return xpath
+
+    def _xpath_only_child(self, xpath):
+        xpath.add_name_test()
+        xpath.add_star_prefix()
+        xpath.add_condition('last() = 1')
+        return xpath
+
+    def _xpath_only_of_type(self, xpath):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:only-of-type is not implemented")
+        xpath.add_condition('last() = 1')
+        return xpath
+
+    def _xpath_empty(self, xpath):
+        xpath.add_condition("not(*) and not(normalize-space())")
+        return xpath
+
+class Attrib(object):
+    """
+    Represents selector[namespace|attrib operator value]
+    """
+
+    def __init__(self, selector, namespace, attrib, operator, value):
+        self.selector = selector
+        self.namespace = namespace
+        self.attrib = attrib
+        self.operator = operator
+        self.value = value
+
+    def __repr__(self):
+        if self.operator == 'exists':
+            return '%s[%r[%s]]' % (
+                self.__class__.__name__,
+                self.selector,
+                self._format_attrib())
+        else:
+            return '%s[%r[%s %s %r]]' % (
+                self.__class__.__name__,
+                self.selector,
+                self._format_attrib(),
+                self.operator,
+                self.value)
+
+    def _format_attrib(self):
+        if self.namespace == '*':
+            return self.attrib
+        else:
+            return '%s|%s' % (self.namespace, self.attrib)
+
+    def _xpath_attrib(self):
+        # FIXME: if attrib is *?
+        if self.namespace == '*':
+            return '@' + self.attrib
+        else:
+            return '@%s:%s' % (self.namespace, self.attrib)
+
+    def xpath(self):
+        path = self.selector.xpath()
+        attrib = self._xpath_attrib()
+        value = self.value
+        if self.operator == 'exists':
+            assert not value
+            path.add_condition(attrib)
+        elif self.operator == '=':
+            path.add_condition('%s = %s' % (attrib,
+                                            xpath_literal(value)))
+        elif self.operator == '!=':
+            # FIXME: this seems like a weird hack...
+            if value:
+                path.add_condition('not(%s) or %s != %s'
+                                   % (attrib, attrib, xpath_literal(value)))
+            else:
+                path.add_condition('%s != %s'
+                                   % (attrib, xpath_literal(value)))
+            #path.add_condition('%s != %s' % (attrib, xpath_literal(value)))
+        elif self.operator == '~=':
+            path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_literal(' '+value+' ')))
+        elif self.operator == '|=':
+            # Weird, but true...
+            path.add_condition('%s = %s or starts-with(%s, %s)' % (
+                attrib, xpath_literal(value),
+                attrib, xpath_literal(value + '-')))
+        elif self.operator == '^=':
+            path.add_condition('starts-with(%s, %s)' % (
+                attrib, xpath_literal(value)))
+        elif self.operator == '$=':
+            # Oddly there is a starts-with in XPath 1.0, but not ends-with
+            path.add_condition('substring(%s, string-length(%s)-%s) = %s'
+                               % (attrib, attrib, len(value)-1, xpath_literal(value)))
+        elif self.operator == '*=':
+            # FIXME: case sensitive?
+            path.add_condition('contains(%s, %s)' % (
+                attrib, xpath_literal(value)))
+        else:
+            assert 0, ("Unknown operator: %r" % self.operator)
+        return path
+
+class Element(object):
+    """
+    Represents namespace|element
+    """
+
+    def __init__(self, namespace, element):
+        self.namespace = namespace
+        self.element = element
+
+    def __repr__(self):
+        return '%s[%s]' % (
+            self.__class__.__name__,
+            self._format_element())
+
+    def _format_element(self):
+        if self.namespace == '*':
+            return self.element
+        else:
+            return '%s|%s' % (self.namespace, self.element)
+
+    def xpath(self):
+        if self.namespace == '*':
+            el = self.element.lower()
+        else:
+            # Kovid: Lowercased
+            el = '%s:%s' % (self.namespace, self.element.lower())
+        return XPathExpr(element=el)
+
+class Hash(object):
+    """
+    Represents selector#id
+    """
+
+    def __init__(self, selector, id):
+        self.selector = selector
+        self.id = id
+
+    def __repr__(self):
+        return '%s[%r#%s]' % (
+            self.__class__.__name__,
+            self.selector, self.id)
+
+    def xpath(self):
+        path = self.selector.xpath()
+        path.add_condition('@id = %s' % xpath_literal(self.id))
+        return path
+
+class Or(object):
+
+    def __init__(self, items):
+        self.items = items
+    def __repr__(self):
+        return '%s(%r)' % (
+            self.__class__.__name__,
+            self.items)
+
+    def xpath(self):
+        paths = [item.xpath() for item in self.items]
+        return XPathExprOr(paths)
+
+class CombinedSelector(object):
+
+    _method_mapping = {
+        ' ': 'descendant',
+        '>': 'child',
+        '+': 'direct_adjacent',
+        '~': 'indirect_adjacent',
+        }
+
+    def __init__(self, selector, combinator, subselector):
+        assert selector is not None
+        self.selector = selector
+        self.combinator = combinator
+        self.subselector = subselector
+
+    def __repr__(self):
+        if self.combinator == ' ':
+            comb = '<followed>'
+        else:
+            comb = self.combinator
+        return '%s[%r %s %r]' % (
+            self.__class__.__name__,
+            self.selector,
+            comb,
+            self.subselector)
+
+    def xpath(self):
+        if self.combinator not in self._method_mapping:
+            raise ExpressionError(
+                "Unknown combinator: %r" % self.combinator)
+        method = '_xpath_' + self._method_mapping[self.combinator]
+        method = getattr(self, method)
+        path = self.selector.xpath()
+        return method(path, self.subselector)
+
+    def _xpath_descendant(self, xpath, sub):
+        # when sub is a descendant in any way of xpath
+        xpath.join('/descendant::', sub.xpath())
+        return xpath
+
+    def _xpath_child(self, xpath, sub):
+        # when sub is an immediate child of xpath
+        xpath.join('/', sub.xpath())
+        return xpath
+
+    def _xpath_direct_adjacent(self, xpath, sub):
+        # when sub immediately follows xpath
+        xpath.join('/following-sibling::', sub.xpath())
+        xpath.add_name_test()
+        xpath.add_condition('position() = 1')
+        return xpath
+
+    def _xpath_indirect_adjacent(self, xpath, sub):
+        # when sub comes somewhere after xpath as a sibling
+        xpath.join('/following-sibling::', sub.xpath())
+        return xpath
+
+##############################
+## XPathExpr objects:
+
+_el_re = re.compile(r'^\w+\s*$', re.UNICODE)
+_id_re = re.compile(r'^(\w*)#(\w+)\s*$', re.UNICODE)
+_class_re = re.compile(r'^(\w*)\.(\w+)\s*$', re.UNICODE)
+
+def css_to_xpath_no_case(css_expr, prefix='descendant-or-self::'):
+    if isinstance(css_expr, _basestring):
+        match = _el_re.search(css_expr)
+        if match is not None:
+            # Kovid: Lowercased
+            return '%s%s' % (prefix, match.group(0).strip().lower())
+        match = _id_re.search(css_expr)
+        if match is not None:
+            return "%s%s[@id = '%s']" % (
+                prefix, match.group(1) or '*', match.group(2))
+        match = _class_re.search(css_expr)
+        if match is not None:
+            # Kovid: lowercased
+            return "%s%s[contains(concat(' ', css:lower-case(normalize-space(@class)), ' '), ' %s ')]" % (
+                prefix, match.group(1).lower() or '*', match.group(2).lower())
+        css_expr = parse(css_expr)
+    expr = css_expr.xpath()
+    assert expr is not None, (
+        "Got None for xpath expression from %s" % repr(css_expr))
+    if prefix:
+        expr.add_prefix(prefix)
+    return _unicode(expr)
+
+class XPathExpr(object):
+
+    def __init__(self, prefix=None, path=None, element='*', condition=None,
+                 star_prefix=False):
+        self.prefix = prefix
+        self.path = path
+        self.element = element
+        self.condition = condition
+        self.star_prefix = star_prefix
+
+    def __str__(self):
+        path = ''
+        if self.prefix is not None:
+            path += _unicode(self.prefix)
+        if self.path is not None:
+            path += _unicode(self.path)
+        path += _unicode(self.element)
+        if self.condition:
+            path += '[%s]' % self.condition
+        return path
+
+    def __repr__(self):
+        return '%s[%s]' % (
+            self.__class__.__name__, self)
+
+    def add_condition(self, condition):
+        if self.condition:
+            self.condition = '%s and (%s)' % (self.condition, condition)
+        else:
+            self.condition = condition
+
+    def add_path(self, part):
+        if self.path is None:
+            self.path = self.element
+        else:
+            self.path += self.element
+        self.element = part
+
+    def add_prefix(self, prefix):
+        if self.prefix:
+            self.prefix = prefix + self.prefix
+        else:
+            self.prefix = prefix
+
+    def add_name_test(self):
+        if self.element == '*':
+            # We weren't doing a test anyway
+            return
+        self.add_condition("name() = %s" % xpath_literal(self.element))
+        self.element = '*'
+
+    def add_star_prefix(self):
+        """
+        Adds a /* prefix if there is no prefix.  This is when you need
+        to keep context's constrained to a single parent.
+        """
+        if self.path:
+            self.path += '*/'
+        else:
+            self.path = '*/'
+        self.star_prefix = True
+
+    def join(self, combiner, other):
+        prefix = _unicode(self)
+        prefix += combiner
+        path = (other.prefix or '') + (other.path or '')
+        # We don't need a star prefix if we are joining to this other
+        # prefix; so we'll get rid of it
+        if other.star_prefix and path == '*/':
+            path = ''
+        self.prefix = prefix
+        self.path = path
+        self.element = other.element
+        self.condition = other.condition
+
+class XPathExprOr(XPathExpr):
+    """
+    Represents |'d expressions.  Note that unfortunately it isn't
+    the union, it's the sum, so duplicate elements will appear.
+    """
+
+    def __init__(self, items, prefix=None):
+        for item in items:
+            assert item is not None
+        self.items = items
+        self.prefix = prefix
+
+    def __str__(self):
+        prefix = self.prefix or ''
+        return ' | '.join(["%s%s" % (prefix,i) for i in self.items])
+
+split_at_single_quotes = re.compile("('+)").split
+
+def xpath_literal(s):
+    if isinstance(s, Element):
+        # This is probably a symbol that looks like an expression...
+        s = s._format_element()
+    else:
+        s = _unicode(s)
+    if "'" not in s:
+        s = "'%s'" % s
+    elif '"' not in s:
+        s = '"%s"' % s
+    else:
+        s = "concat(%s)" % ','.join([
+            (("'" in part) and '"%s"' or "'%s'") % part
+            for part in split_at_single_quotes(s) if part
+            ])
+    return s
+
+##############################
+## Parsing functions
+
+def parse(string):
+    stream = TokenStream(tokenize(string))
+    stream.source = string
+    try:
+        return parse_selector_group(stream)
+    except SelectorSyntaxError:
+        import sys
+        e = sys.exc_info()[1]
+        message = "%s at %s -> %r" % (
+            e, stream.used, stream.peek())
+        e.msg = message
+        if sys.version_info < (2,6):
+            e.message = message
+        e.args = tuple([message])
+        raise
+
+def parse_selector_group(stream):
+    result = []
+    while 1:
+        result.append(parse_selector(stream))
+        if stream.peek() == ',':
+            stream.next()
+        else:
+            break
+    if len(result) == 1:
+        return result[0]
+    else:
+        return Or(result)
+
+def parse_selector(stream):
+    result = parse_simple_selector(stream)
+    while 1:
+        peek = stream.peek()
+        if peek == ',' or peek is None:
+            return result
+        elif peek in ('+', '>', '~'):
+            # A combinator
+            combinator = stream.next()
+        else:
+            combinator = ' '
+        consumed = len(stream.used)
+        next_selector = parse_simple_selector(stream)
+        if consumed == len(stream.used):
+            raise SelectorSyntaxError(
+                "Expected selector, got '%s'" % stream.peek())
+        result = CombinedSelector(result, combinator, next_selector)
+    return result
+
+def parse_simple_selector(stream):
+    peek = stream.peek()
+    if peek != '*' and not isinstance(peek, Symbol):
+        element = namespace = '*'
+    else:
+        next = stream.next()
+        if next != '*' and not isinstance(next, Symbol):
+            raise SelectorSyntaxError(
+                "Expected symbol, got '%s'" % next)
+        if stream.peek() == '|':
+            namespace = next
+            stream.next()
+            element = stream.next()
+            if element != '*' and not isinstance(next, Symbol):
+                raise SelectorSyntaxError(
+                    "Expected symbol, got '%s'" % next)
+        else:
+            namespace = '*'
+            element = next
+    result = Element(namespace, element)
+    has_hash = False
+    while 1:
+        peek = stream.peek()
+        if peek == '#':
+            if has_hash:
+                # You can't have two hashes
+                # (FIXME: is there some more general rule I'm missing?)
+                break
+            stream.next()
+            result = Hash(result, stream.next())
+            has_hash = True
+            continue
+        elif peek == '.':
+            stream.next()
+            result = Class(result, stream.next())
+            continue
+        elif peek == '[':
+            stream.next()
+            result = parse_attrib(result, stream)
+            next = stream.next()
+            if not next == ']':
+                raise SelectorSyntaxError(
+                    "] expected, got '%s'" % next)
+            continue
+        elif peek == ':' or peek == '::':
+            type = stream.next()
+            ident = stream.next()
+            if not isinstance(ident, Symbol):
+                raise SelectorSyntaxError(
+                    "Expected symbol, got '%s'" % ident)
+            if stream.peek() == '(':
+                stream.next()
+                peek = stream.peek()
+                if isinstance(peek, String):
+                    selector = stream.next()
+                elif isinstance(peek, Symbol) and is_int(peek):
+                    selector = int(stream.next())
+                else:
+                    # FIXME: parse_simple_selector, or selector, or...?
+                    selector = parse_simple_selector(stream)
+                next = stream.next()
+                if not next == ')':
+                    raise SelectorSyntaxError(
+                        "Expected ')', got '%s' and '%s'"
+                        % (next, selector))
+                result = Function(result, type, ident, selector)
+            else:
+                result = Pseudo(result, type, ident)
+            continue
+        else:
+            if peek == ' ':
+                stream.next()
+            break
+        # FIXME: not sure what "negation" is
+    return result
+
+def is_int(v):
+    try:
+        int(v)
+    except ValueError:
+        return False
+    else:
+        return True
+
+def parse_attrib(selector, stream):
+    attrib = stream.next()
+    if stream.peek() == '|':
+        namespace = attrib
+        stream.next()
+        attrib = stream.next()
+    else:
+        namespace = '*'
+    if stream.peek() == ']':
+        return Attrib(selector, namespace, attrib, 'exists', None)
+    op = stream.next()
+    if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
+        raise SelectorSyntaxError(
+            "Operator expected, got '%s'" % op)
+    value = stream.next()
+    if not isinstance(value, (Symbol, String)):
+        raise SelectorSyntaxError(
+            "Expected string or symbol, got '%s'" % value)
+    return Attrib(selector, namespace, attrib, op, value)
+
+def parse_series(s):
+    """
+    Parses things like '1n+2', or 'an+b' generally, returning (a, b)
+    """
+    if isinstance(s, Element):
+        s = s._format_element()
+    if not s or s == '*':
+        # Happens when there's nothing, which the CSS parser thinks of as *
+        return (0, 0)
+    if isinstance(s, int):
+        # Happens when you just get a number
+        return (0, s)
+    if s == 'odd':
+        return (2, 1)
+    elif s == 'even':
+        return (2, 0)
+    elif s == 'n':
+        return (1, 0)
+    if 'n' not in s:
+        # Just a b
+        return (0, int(s))
+    a, b = s.split('n', 1)
+    if not a:
+        a = 1
+    elif a == '-' or a == '+':
+        a = int(a+'1')
+    else:
+        a = int(a)
+    if not b:
+        b = 0
+    elif b == '-' or b == '+':
+        b = int(b+'1')
+    else:
+        b = int(b)
+    return (a, b)
+
+
+############################################################
+## Tokenizing
+############################################################
+
+_match_whitespace = re.compile(r'\s+', re.UNICODE).match
+
+_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub
+
+_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match
+
+def tokenize(s):
+    pos = 0
+    s = _replace_comments('', s)
+    while 1:
+        match = _match_whitespace(s, pos=pos)
+        if match:
+            preceding_whitespace_pos = pos
+            pos = match.end()
+        else:
+            preceding_whitespace_pos = 0
+        if pos >= len(s):
+            return
+        match = _match_count_number(s, pos=pos)
+        if match and match.group() != 'n':
+            sym = s[pos:match.end()]
+            yield Symbol(sym, pos)
+            pos = match.end()
+            continue
+        c = s[pos]
+        c2 = s[pos:pos+2]
+        if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='):
+            yield Token(c2, pos)
+            pos += 2
+            continue
+        if c in '>+~,.*=[]()|:#':
+            if c in '.#[' and preceding_whitespace_pos > 0:
+                yield Token(' ', preceding_whitespace_pos)
+            yield Token(c, pos)
+            pos += 1
+            continue
+        if c == '"' or c == "'":
+            # Quoted string
+            old_pos = pos
+            sym, pos = tokenize_escaped_string(s, pos)
+            yield String(sym, old_pos)
+            continue
+        old_pos = pos
+        sym, pos = tokenize_symbol(s, pos)
+        yield Symbol(sym, old_pos)
+        continue
+
+split_at_string_escapes = re.compile(r'(\\(?:%s))'
+                                     % '|'.join(['[A-Fa-f0-9]{1,6}(?:\r\n|\s)?',
+                                                 '[^A-Fa-f0-9]'])).split
+
+def unescape_string_literal(literal):
+    substrings = []
+    for substring in split_at_string_escapes(literal):
+        if not substring:
+            continue
+        elif '\\' in substring:
+            if substring[0] == '\\' and len(substring) > 1:
+                substring = substring[1:]
+                if substring[0] in '0123456789ABCDEFabcdef':
+                    # int() correctly ignores the potentially trailing whitespace
+                    substring = _unichr(int(substring, 16))
+            else:
+                raise SelectorSyntaxError(
+                    "Invalid escape sequence %r in string %r"
+                    % (substring.split('\\')[1], literal))
+        substrings.append(substring)
+    return ''.join(substrings)
+
+def tokenize_escaped_string(s, pos):
+    quote = s[pos]
+    assert quote in ('"', "'")
+    pos = pos+1
+    start = pos
+    while 1:
+        next = s.find(quote, pos)
+        if next == -1:
+            raise SelectorSyntaxError(
+                "Expected closing %s for string in: %r"
+                % (quote, s[start:]))
+        result = s[start:next]
+        if result.endswith('\\'):
+            # next quote character is escaped
+            pos = next+1
+            continue
+        if '\\' in result:
+            result = unescape_string_literal(result)
+        return result, next+1
+
+_illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE)
+
+def tokenize_symbol(s, pos):
+    start = pos
+    match = _illegal_symbol.search(s, pos=pos)
+    if not match:
+        # Goes to end of s
+        return s[start:], len(s)
+    if match.start() == pos:
+        assert 0, (
+            "Unexpected symbol: %r at %s" % (s[pos], pos))
+    if not match:
+        result = s[start:]
+        pos = len(s)
+    else:
+        result = s[start:match.start()]
+        pos = match.start()
+    try:
+        result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
+    except UnicodeDecodeError:
+        import sys
+        e = sys.exc_info()[1]
+        raise SelectorSyntaxError(
+            "Bad symbol %r: %s" % (result, e))
+    return result, pos
+
+class TokenStream(object):
+
+    def __init__(self, tokens, source=None):
+        self.used = []
+        self.tokens = iter(tokens)
+        self.source = source
+        self.peeked = None
+        self._peeking = False
+        try:
+            self.next_token = self.tokens.next
+        except AttributeError:
+            # Python 3
+            self.next_token = self.tokens.__next__
+
+    def next(self):
+        if self._peeking:
+            self._peeking = False
+            self.used.append(self.peeked)
+            return self.peeked
+        else:
+            try:
+                next = self.next_token()
+                self.used.append(next)
+                return next
+            except StopIteration:
+                return None
+
+    def __iter__(self):
+        return iter(self.next, None)
+
+    def peek(self):
+        if not self._peeking:
+            try:
+                self.peeked = self.next_token()
+            except StopIteration:
+                return None
+            self._peeking = True
+        return self.peeked
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index f6ff594701..88e074320d 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -27,6 +27,7 @@ from calibre import force_unicode
 from calibre.ebooks import unit_convert
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
+from calibre.ebooks.cssselect import css_to_xpath_no_case
 
 cssutils_log.setLevel(logging.WARN)
 
@@ -98,32 +99,72 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
                        'x-large', 'xx-large'])
 
 
-class CSSSelector(etree.XPath):
-    MIN_SPACE_RE = re.compile(r' *([>~+]) *')
+class CSSSelector(object):
+
     LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
 
     def __init__(self, css, namespaces=XPNSMAP):
-        css = self.MIN_SPACE_RE.sub(r'\1', css)
         if isinstance(css, unicode):
             # Workaround for bug in lxml on windows/OS X that causes a massive
             # memory leak with non ASCII selectors
             css = css.encode('ascii', 'ignore').decode('ascii')
         try:
-            path = css_to_xpath(css)
-        except UnicodeEncodeError: # Bug in css_to_xpath
-            path = '/'
-        except NotImplementedError: # Probably a subselect like :hover
-            path = '/'
-        path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
-        etree.XPath.__init__(self, path, namespaces=namespaces)
+            path = self.LOCAL_NAME_RE.sub(r"local-name() = '", css_to_xpath(css))
+            self.sel1 = etree.XPath(css_to_xpath(css), namespaces=namespaces)
+        except:
+            self.sel1 = lambda x: []
+        try:
+            path = self.LOCAL_NAME_RE.sub(r"local-name() = '",
+                    css_to_xpath_no_case(css))
+            self.sel2 = etree.XPath(path, namespaces=namespaces)
+        except:
+            raise
+            self.sel2 = lambda x: []
+        self.sel2_use_logged = False
         self.css = css
 
+    def __call__(self, node, log):
+        try:
+            ans = self.sel1(node)
+        except (AssertionError, ExpressionError, etree.XPathSyntaxError,
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
+                    SelectorSyntaxError):
+            return []
+
+        if not ans:
+            try:
+                ans = self.sel2(node)
+            except:
+                return []
+            else:
+                if ans and not self.sel2_use_logged:
+                    self.sel2_use_logged = True
+                    log.warn('Interpreting class and tag selectors case'
+                        ' insensitively in the CSS selector: %s'%self.css)
+        return ans
+
+
     def __repr__(self):
         return '<%s %s for %r>' % (
             self.__class__.__name__,
             hex(abs(id(self)))[2:],
             self.css)
 
+_selector_cache = {}
+
+MIN_SPACE_RE = re.compile(r' *([>~+]) *')
+
+def get_css_selector(raw_selector):
+    css = MIN_SPACE_RE.sub(r'\1', raw_selector)
+    if isinstance(css, unicode):
+        # Workaround for bug in lxml on windows/OS X that causes a massive
+        # memory leak with non ASCII selectors
+        css = css.encode('ascii', 'ignore').decode('ascii')
+    ans = _selector_cache.get(css, None)
+    if ans is None:
+        ans = CSSSelector(css)
+        _selector_cache[css] = ans
+    return ans
 
 class Stylizer(object):
     STYLESHEETS = WeakKeyDictionary()
@@ -223,41 +264,12 @@ class Stylizer(object):
         rules.sort()
         self.rules = rules
         self._styles = {}
-        class_sel_pat = re.compile(r'\.[a-z]+', re.IGNORECASE)
-        capital_sel_pat = re.compile(r'h|[A-Z]+')
         for _, _, cssdict, text, _ in rules:
             fl = ':first-letter' in text
             if fl:
                 text = text.replace(':first-letter', '')
-            try:
-                selector = CSSSelector(text)
-            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
-                    NameError, # thrown on OS X instead of SelectorSyntaxError
-                    SelectorSyntaxError):
-                continue
-            try:
-                matches = selector(tree)
-            except etree.XPathEvalError:
-                continue
-
-            if not matches:
-                ntext = capital_sel_pat.sub(lambda m: m.group().lower(), text)
-                if ntext != text:
-                    self.logger.warn('Transformed CSS selector', text, 'to',
-                            ntext)
-                    selector = CSSSelector(ntext)
-                    matches = selector(tree)
-
-            if not matches and class_sel_pat.match(text) and text.lower() != text:
-                found = False
-                ltext = text.lower()
-                for x in tree.xpath('//*[@class]'):
-                    if ltext.endswith('.'+x.get('class').lower()):
-                        matches.append(x)
-                        found = True
-                if found:
-                    self.logger.warn('Ignoring case mismatches for CSS selector: %s in %s'
-                        %(text, item.href))
+            selector = get_css_selector(text)
+            matches = selector(tree, self.logger)
             if fl:
                 from lxml.builder import ElementMaker
                 E = ElementMaker(namespace=XHTML_NS)

From b3f5484cbe90f99c6f912cfaa9504e692f6ce89d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Aug 2011 00:29:02 -0600
Subject: [PATCH 25/39] Add a search for individual tweaks to
 Preferences->Tweaks

---
 src/calibre/gui2/__init__.py           |   2 +
 src/calibre/gui2/preferences/tweaks.py | 109 +++++++++++++++++++++++--
 src/calibre/gui2/preferences/tweaks.ui |  83 +++++++++++++++----
 3 files changed, 174 insertions(+), 20 deletions(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 1967f734cc..83ade61200 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -173,6 +173,8 @@ def _config(): # {{{
         help='Search history for the plugin preferences')
     c.add_opt('shortcuts_search_history', default=[],
         help='Search history for the keyboard preferences')
+    c.add_opt('tweaks_search_history', default=[],
+        help='Search history for tweaks')
     c.add_opt('worker_limit', default=6,
             help=_(
         'Maximum number of simultaneous conversion/news download jobs. '
diff --git a/src/calibre/gui2/preferences/tweaks.py b/src/calibre/gui2/preferences/tweaks.py
index a1756bf1ba..04c11ad40e 100644
--- a/src/calibre/gui2/preferences/tweaks.py
+++ b/src/calibre/gui2/preferences/tweaks.py
@@ -9,14 +9,19 @@ import textwrap
 
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit
 from calibre.gui2.preferences.tweaks_ui import Ui_Form
-from calibre.gui2 import error_dialog, NONE
+from calibre.gui2 import error_dialog, NONE, info_dialog
 from calibre.utils.config import read_raw_tweaks, write_tweaks
 from calibre.gui2.widgets import PythonHighlighter
 from calibre import isbytestring
+from calibre.utils.icu import lower
+from calibre.utils.search_query_parser import (ParseException,
+        SearchQueryParser)
 
 from PyQt4.Qt import (QAbstractListModel, Qt, QStyledItemDelegate, QStyle,
     QStyleOptionViewItem, QFont, QDialogButtonBox, QDialog,
-    QVBoxLayout, QPlainTextEdit, QLabel)
+    QVBoxLayout, QPlainTextEdit, QLabel, QModelIndex)
+
+ROOT = QModelIndex()
 
 class Delegate(QStyledItemDelegate): # {{{
     def __init__(self, view):
@@ -35,7 +40,7 @@ class Delegate(QStyledItemDelegate): # {{{
 class Tweak(object): # {{{
 
     def __init__(self, name, doc, var_names, defaults, custom):
-        translate = __builtins__['_']
+        translate = _
         self.name = translate(name)
         self.doc = translate(doc.strip())
         self.var_names = var_names
@@ -87,10 +92,11 @@ class Tweak(object): # {{{
 
 # }}}
 
-class Tweaks(QAbstractListModel): # {{{
+class Tweaks(QAbstractListModel, SearchQueryParser): # {{{
 
     def __init__(self, parent=None):
         QAbstractListModel.__init__(self, parent)
+        SearchQueryParser.__init__(self, ['all'])
         raw_defaults, raw_custom = read_raw_tweaks()
 
         self.parse_tweaks(raw_defaults, raw_custom)
@@ -223,6 +229,54 @@ class Tweaks(QAbstractListModel): # {{{
     def set_plugin_tweaks(self, d):
         self.plugin_tweaks = d
 
+    def universal_set(self):
+        return set(xrange(self.rowCount()))
+
+    def get_matches(self, location, query, candidates=None):
+        if candidates is None:
+            candidates = self.universal_set()
+        ans = set()
+        if not query:
+            return ans
+        query = lower(query)
+        for r in candidates:
+            dat = self.data(self.index(r), Qt.UserRole)
+            if query in lower(dat.name):# or query in lower(dat.doc):
+                ans.add(r)
+        return ans
+
+    def find(self, query):
+        query = query.strip()
+        if not query:
+            return ROOT
+        matches = self.parse(query)
+        if not matches:
+            return ROOT
+        matches = list(sorted(matches))
+        return self.index(matches[0])
+
+    def find_next(self, idx, query, backwards=False):
+        query = query.strip()
+        if not query:
+            return idx
+        matches = self.parse(query)
+        if not matches:
+            return idx
+        loc = idx.row()
+        if loc not in matches:
+            return self.find(query)
+        if len(matches) == 1:
+            return ROOT
+        matches = list(sorted(matches))
+        i = matches.index(loc)
+        if backwards:
+            ans = i - 1 if i - 1 >= 0 else len(matches)-1
+        else:
+            ans = i + 1 if i + 1 < len(matches) else 0
+
+        ans = matches[ans]
+        return self.index(ans)
+
 # }}}
 
 class PluginTweaks(QDialog): # {{{
@@ -257,12 +311,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         self.delegate = Delegate(self.tweaks_view)
         self.tweaks_view.setItemDelegate(self.delegate)
         self.tweaks_view.currentChanged = self.current_changed
+        self.view = self.tweaks_view
         self.highlighter = PythonHighlighter(self.edit_tweak.document())
         self.restore_default_button.clicked.connect(self.restore_to_default)
         self.apply_button.clicked.connect(self.apply_tweak)
         self.plugin_tweaks_button.clicked.connect(self.plugin_tweaks)
         self.splitter.setStretchFactor(0, 1)
         self.splitter.setStretchFactor(1, 100)
+        self.next_button.clicked.connect(self.find_next)
+        self.previous_button.clicked.connect(self.find_previous)
+        self.search.initialize('tweaks_search_history', help_text=
+                _('Search for tweak'))
+        self.search.search.connect(self.find)
 
 
     def plugin_tweaks(self):
@@ -290,7 +350,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         self.changed_signal.emit()
 
     def initialize(self):
-        self.tweaks = Tweaks()
+        self.tweaks = self._model = Tweaks()
         self.tweaks_view.setModel(self.tweaks)
 
     def restore_to_default(self, *args):
@@ -338,6 +398,45 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         ConfigWidgetBase.commit(self)
         return True
 
+    def find(self, query):
+        if not query:
+            return
+        try:
+            idx = self._model.find(query)
+        except ParseException:
+            self.search.search_done(False)
+            return
+        self.search.search_done(True)
+        if not idx.isValid():
+            info_dialog(self, _('No matches'),
+                    _('Could not find any shortcuts matching %s')%query,
+                    show=True, show_copy_button=False)
+            return
+        self.highlight_index(idx)
+
+    def highlight_index(self, idx):
+        if not idx.isValid(): return
+        self.view.scrollTo(idx)
+        self.view.selectionModel().select(idx,
+                self.view.selectionModel().ClearAndSelect)
+        self.view.setCurrentIndex(idx)
+
+    def find_next(self, *args):
+        idx = self.view.currentIndex()
+        if not idx.isValid():
+            idx = self._model.index(0)
+        idx = self._model.find_next(idx,
+                unicode(self.search.currentText()))
+        self.highlight_index(idx)
+
+    def find_previous(self, *args):
+        idx = self.view.currentIndex()
+        if not idx.isValid():
+            idx = self._model.index(0)
+        idx = self._model.find_next(idx,
+            unicode(self.search.currentText()), backwards=True)
+        self.highlight_index(idx)
+
 
 if __name__ == '__main__':
     from PyQt4.Qt import QApplication
diff --git a/src/calibre/gui2/preferences/tweaks.ui b/src/calibre/gui2/preferences/tweaks.ui
index ab3f6b2bc3..19f6c836d5 100644
--- a/src/calibre/gui2/preferences/tweaks.ui
+++ b/src/calibre/gui2/preferences/tweaks.ui
@@ -6,7 +6,7 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>660</width>
+    <width>756</width>
     <height>531</height>
    </rect>
   </property>
@@ -14,8 +14,24 @@
    <string>Form</string>
   </property>
   <layout class="QVBoxLayout" name="verticalLayout_4">
+   <item>
+    <widget class="QLabel" name="label_18">
+     <property name="text">
+      <string>Values for the tweaks are shown below. Edit them to change the behavior of calibre. Your changes will only take effect &lt;b&gt;after a restart&lt;/b&gt; of calibre.</string>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
    <item>
     <widget class="QSplitter" name="splitter">
+     <property name="sizePolicy">
+      <sizepolicy hsizetype="Expanding" vsizetype="Preferred">
+       <horstretch>0</horstretch>
+       <verstretch>10</verstretch>
+      </sizepolicy>
+     </property>
      <property name="orientation">
       <enum>Qt::Horizontal</enum>
      </property>
@@ -24,16 +40,6 @@
      </property>
      <widget class="QWidget" name="layoutWidget">
       <layout class="QVBoxLayout" name="verticalLayout_2">
-       <item>
-        <widget class="QLabel" name="label_18">
-         <property name="text">
-          <string>Values for the tweaks are shown below. Edit them to change the behavior of calibre. Your changes will only take effect &lt;b&gt;after a restart&lt;/b&gt; of calibre.</string>
-         </property>
-         <property name="wordWrap">
-          <bool>true</bool>
-         </property>
-        </widget>
-       </item>
        <item>
         <widget class="QListView" name="tweaks_view">
          <property name="sizePolicy">
@@ -72,8 +78,8 @@
       </layout>
      </widget>
      <widget class="QWidget" name="layoutWidget">
-      <layout class="QVBoxLayout" name="verticalLayout_3">
-       <item>
+      <layout class="QGridLayout" name="gridLayout_3">
+       <item row="1" column="0" colspan="3">
         <widget class="QGroupBox" name="groupBox">
          <property name="title">
           <string>Help</string>
@@ -92,7 +98,7 @@
          </layout>
         </widget>
        </item>
-       <item>
+       <item row="2" column="0" colspan="3">
         <widget class="QGroupBox" name="groupBox_2">
          <property name="title">
           <string>Edit tweak</string>
@@ -128,12 +134,59 @@
          </layout>
         </widget>
        </item>
+       <item row="0" column="0">
+        <widget class="SearchBox2" name="search">
+         <property name="sizePolicy">
+          <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+           <horstretch>10</horstretch>
+           <verstretch>0</verstretch>
+          </sizepolicy>
+         </property>
+         <property name="sizeAdjustPolicy">
+          <enum>QComboBox::AdjustToMinimumContentsLength</enum>
+         </property>
+         <property name="minimumContentsLength">
+          <number>10</number>
+         </property>
+        </widget>
+       </item>
+       <item row="0" column="1">
+        <widget class="QPushButton" name="next_button">
+         <property name="text">
+          <string>&amp;Next</string>
+         </property>
+         <property name="icon">
+          <iconset resource="../../../../resources/images.qrc">
+           <normaloff>:/images/arrow-down.png</normaloff>:/images/arrow-down.png</iconset>
+         </property>
+        </widget>
+       </item>
+       <item row="0" column="2">
+        <widget class="QPushButton" name="previous_button">
+         <property name="text">
+          <string>&amp;Previous</string>
+         </property>
+         <property name="icon">
+          <iconset resource="../../../../resources/images.qrc">
+           <normaloff>:/images/arrow-up.png</normaloff>:/images/arrow-up.png</iconset>
+         </property>
+        </widget>
+       </item>
       </layout>
      </widget>
     </widget>
    </item>
   </layout>
  </widget>
- <resources/>
+ <customwidgets>
+  <customwidget>
+   <class>SearchBox2</class>
+   <extends>QComboBox</extends>
+   <header>calibre/gui2/search_box.h</header>
+  </customwidget>
+ </customwidgets>
+ <resources>
+  <include location="../../../../resources/images.qrc"/>
+ </resources>
  <connections/>
 </ui>

From ec32d0f3f10724a55cdce3c952106414423d2a6d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Aug 2011 09:51:46 -0600
Subject: [PATCH 26/39] When adding a text indent to paragraphs as part of the
 remove spacing between paragraphs transformation, do not add an indent to
 paragraphs that are directly centerd or right aligned. Fixes #830439
 ([Enhancement]Indenting should ignore centered text)

---
 src/calibre/ebooks/oeb/transforms/flatcss.py | 3 ++-
 src/calibre/library/sqlite.py                | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index d006d8dd2d..1493a647ae 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -320,7 +320,8 @@ class CSSFlattener(object):
             if self.context.insert_blank_line:
                 cssdict['margin-top'] = cssdict['margin-bottom'] = \
                     '%fem'%self.context.insert_blank_line_size
-            if self.context.remove_paragraph_spacing:
+            if (self.context.remove_paragraph_spacing and
+                cssdict.get('text-align', None) not in ('center', 'right')):
                 cssdict['text-indent'] =  "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
 
         if cssdict:
diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py
index b5917f1a55..90d293ba64 100644
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@@ -290,7 +290,10 @@ class DatabaseException(Exception):
 
     def __init__(self, err, tb):
         tb = '\n\t'.join(('\tRemote'+tb).splitlines())
-        msg = unicode(err) +'\n' + tb
+        try:
+            msg = unicode(err) +'\n' + tb
+        except:
+            msg = repr(err) + '\n' + tb
         Exception.__init__(self, msg)
         self.orig_err = err
         self.orig_tb  = tb

From 2c33b9b4097d866e05a317a7f3ada4987e9d011f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Aug 2011 10:49:32 -0600
Subject: [PATCH 27/39] Add an option to Preferences->Look and Feel->Cover
 Browser to show the cover browser full screen. Fixes #829855 ([Enhancements]
 in cover browser)

---
 src/calibre/gui2/__init__.py              |  1 +
 src/calibre/gui2/cover_flow.py            | 37 ++++++++++++++++++++---
 src/calibre/gui2/preferences/look_feel.py |  8 ++++-
 src/calibre/gui2/preferences/look_feel.ui | 22 +++++++++++++-
 4 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py
index 83ade61200..715696a89e 100644
--- a/src/calibre/gui2/__init__.py
+++ b/src/calibre/gui2/__init__.py
@@ -98,6 +98,7 @@ gprefs.defaults['book_display_fields'] = [
         ]
 gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}'
 gprefs.defaults['preserve_date_on_ctl'] = True
+gprefs.defaults['cb_fullscreen'] = False
 
 # }}}
 
diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py
index ca108a592e..67a8f08bcd 100644
--- a/src/calibre/gui2/cover_flow.py
+++ b/src/calibre/gui2/cover_flow.py
@@ -9,8 +9,8 @@ Module to implement the Cover Flow feature
 
 import sys, os, time
 
-from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize,
-        QStackedLayout, QLabel, QByteArray, pyqtSignal)
+from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize, QAction,
+        QStackedLayout, QLabel, QByteArray, pyqtSignal, QKeySequence)
 
 from calibre import plugins
 from calibre.gui2 import config, available_height, available_width, gprefs
@@ -150,12 +150,39 @@ class CBDialog(QDialog):
         if not self.restoreGeometry(geom):
             h, w = available_height()-60, int(available_width()/1.5)
             self.resize(w, h)
+        self.action_fs_toggle = a = QAction(self)
+        self.addAction(a)
+        a.setShortcuts([QKeySequence('F11', QKeySequence.PortableText),
+            QKeySequence('Ctrl+Shift+F', QKeySequence.PortableText)])
+        a.triggered.connect(self.toggle_fullscreen)
+        self.action_esc_fs = a = QAction(self)
+        a.triggered.connect(self.show_normal)
+        self.addAction(a)
+        a.setShortcuts([QKeySequence('Esc', QKeySequence.PortableText)])
+
+        self.pre_fs_geom = None
 
     def closeEvent(self, *args):
-        geom = bytearray(self.saveGeometry())
-        gprefs['cover_browser_dialog_geometry'] = geom
+        if not self.isFullScreen():
+            geom = bytearray(self.saveGeometry())
+            gprefs['cover_browser_dialog_geometry'] = geom
         self.closed.emit()
 
+    def show_normal(self):
+        self.showNormal()
+        if self.pre_fs_geom is not None:
+            self.restoreGeometry(self.pre_fs_geom)
+            self.pre_fs_geom = None
+
+    def toggle_fullscreen(self, *args):
+        if self.isFullScreen():
+            self.show_normal()
+        else:
+            self.pre_fs_geom = bytearray(self.saveGeometry())
+            self.showFullScreen()
+
+
+
 class CoverFlowMixin(object):
 
     def __init__(self):
@@ -228,7 +255,7 @@ class CoverFlowMixin(object):
         d.addAction(self.cb_splitter.action_toggle)
         self.cover_flow.setVisible(True)
         self.cover_flow.setFocus(Qt.OtherFocusReason)
-        d.show()
+        d.showFullScreen() if gprefs['cb_fullscreen'] else d.show()
         self.cb_splitter.button.set_state_to_hide()
         d.closed.connect(self.cover_browser_closed)
         self.cb_dialog = d
diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py
index c87cad7cad..c017fe69c2 100644
--- a/src/calibre/gui2/preferences/look_feel.py
+++ b/src/calibre/gui2/preferences/look_feel.py
@@ -6,7 +6,7 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog,
-        QAbstractListModel, Qt, QIcon)
+        QAbstractListModel, Qt, QIcon, QKeySequence)
 
 from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
 from calibre.gui2.preferences.look_feel_ui import Ui_Form
@@ -129,6 +129,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         r('disable_tray_notification', config)
         r('use_roman_numerals_for_series_number', config)
         r('separate_cover_flow', config, restart_required=True)
+        r('cb_fullscreen', gprefs)
 
         choices = [(_('Off'), 'off'), (_('Small'), 'small'),
             (_('Medium'), 'medium'), (_('Large'), 'large')]
@@ -170,6 +171,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         self.tabWidget.addTab(self.edit_rules,
                 QIcon(I('format-fill-color.png')), _('Column coloring'))
         self.tabWidget.setCurrentIndex(0)
+        keys = [QKeySequence('F11', QKeySequence.PortableText), QKeySequence(
+            'Ctrl+Shift+F', QKeySequence.PortableText)]
+        keys = [unicode(x.toString(QKeySequence.NativeText)) for x in keys]
+        self.fs_help_msg.setText(unicode(self.fs_help_msg.text())%(
+            _(' or ').join(keys)))
 
     def initialize(self):
         ConfigWidgetBase.initialize(self)
diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui
index 07d533fdef..498013a68b 100644
--- a/src/calibre/gui2/preferences/look_feel.ui
+++ b/src/calibre/gui2/preferences/look_feel.ui
@@ -417,7 +417,7 @@ then the tags will be displayed each on their own line.</string>
        <item row="1" column="1">
         <widget class="QSpinBox" name="opt_cover_flow_queue_length"/>
        </item>
-       <item row="2" column="0" colspan="2">
+       <item row="4" column="0" colspan="2">
         <spacer name="verticalSpacer_4">
          <property name="orientation">
           <enum>Qt::Vertical</enum>
@@ -430,6 +430,26 @@ then the tags will be displayed each on their own line.</string>
          </property>
         </spacer>
        </item>
+       <item row="2" column="0" colspan="2">
+        <widget class="QCheckBox" name="opt_cb_fullscreen">
+         <property name="text">
+          <string>When showing cover browser in separate window, show it &amp;fullscreen</string>
+         </property>
+        </widget>
+       </item>
+       <item row="3" column="0" colspan="2">
+        <widget class="QLabel" name="fs_help_msg">
+         <property name="styleSheet">
+          <string notr="true">margin-left: 1.5em</string>
+         </property>
+         <property name="text">
+          <string>You can press the %s keys to toggle full screen mode.</string>
+         </property>
+         <property name="wordWrap">
+          <bool>true</bool>
+         </property>
+        </widget>
+       </item>
       </layout>
      </widget>
     </widget>

From 967285b9f6dcc495b1f892587724732c51960323 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Aug 2011 11:33:33 -0600
Subject: [PATCH 28/39] When automatically computing author sort from author's
 name, if the name contains certain words like Inc., Company, Team, etc. use
 the author name as the sort string directly. The list of such words can be
 controlled via Preferences->Tweaks. Fixes #797895 (author name sort order
 copy keywords)

---
 resources/default_tweaks.py                |  6 ++++++
 src/calibre/ebooks/metadata/__init__.py    |  7 +++++++
 src/calibre/gui2/metadata/basic_widgets.py | 17 +++++++++++++++--
 src/calibre/gui2/metadata/single.py        |  7 ++++++-
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py
index 12731a8c42..f11a0b7bc0 100644
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@@ -62,10 +62,16 @@ authors_completer_append_separator = False
 # The author name suffixes are words that are ignored when they occur at the
 # end of an author name. The case of the suffix is ignored and trailing
 # periods are automatically handled.
+# The author name copy words are a set of words which if they occur in an
+# author name cause the automatically geenrated author sort string to be
+# identical to the author name. This means that the sort for a string like Acme
+# Inc. will be Acme Inc. instead of Inc., Acme
 author_sort_copy_method = 'comma'
 author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
                         'MD', 'M.D', 'I', 'II', 'III', 'IV',
                         'Junior', 'Senior')
+author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
+        'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
 
 #: Use author sort in Tag Browser
 # Set which author field to display in the tags pane (the list of authors,
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 2c26d011b7..a9816db5ae 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -36,8 +36,15 @@ def author_to_author_sort(author, method=None):
         return author
     if method is None:
         method = tweaks['author_sort_copy_method']
+
+    ltoks = frozenset(x.lower() for x in tokens)
+    copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords'])
+    if ltoks.intersection(copy_words):
+        method = u'copy'
+
     if method == u'copy':
         return author
+
     suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
     suffixes |= set([x+u'.' for x in suffixes])
 
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 29f6fffa0b..3ec34938af 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -308,7 +308,7 @@ class AuthorSortEdit(EnLineEdit):
     LABEL = _('Author s&ort:')
 
     def __init__(self, parent, authors_edit, autogen_button, db,
-            copy_a_to_as_action, copy_as_to_a_action):
+            copy_a_to_as_action, copy_as_to_a_action, a_to_as, as_to_a):
         EnLineEdit.__init__(self, parent)
         self.authors_edit = authors_edit
         self.db = db
@@ -333,6 +333,8 @@ class AuthorSortEdit(EnLineEdit):
         autogen_button.clicked.connect(self.auto_generate)
         copy_a_to_as_action.triggered.connect(self.auto_generate)
         copy_as_to_a_action.triggered.connect(self.copy_to_authors)
+        a_to_as.triggered.connect(self.author_to_sort)
+        as_to_a.triggered.connect(self.sort_to_author)
         self.update_state()
 
     @dynamic_property
@@ -389,10 +391,21 @@ class AuthorSortEdit(EnLineEdit):
 
     def auto_generate(self, *args):
         au = unicode(self.authors_edit.text())
-        au = re.sub(r'\s+et al\.$', '', au)
+        au = re.sub(r'\s+et al\.$', '', au).strip()
         authors = string_to_authors(au)
         self.current_val = self.db.author_sort_from_authors(authors)
 
+    def author_to_sort(self, *args):
+        au = unicode(self.authors_edit.text())
+        au = re.sub(r'\s+et al\.$', '', au).strip()
+        if au:
+            self.current_val = au
+
+    def sort_to_author(self, *args):
+        aus = self.current_val
+        if aus:
+            self.authors_edit.current_val = [aus]
+
     def initialize(self, db, id_):
         self.current_val = db.author_sort(id_, index_is_id=True)
 
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index dc3983171b..a2666b0351 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -130,10 +130,15 @@ class MetadataSingleDialogBase(ResizableDialog):
         ac = m.addAction(QIcon(I('forward.png')), _('Set author sort from author'))
         ac2 = m.addAction(QIcon(I('back.png')), _('Set author from author sort'))
         ac3 = m.addAction(QIcon(I('user_profile.png')), _('Manage authors'))
+        ac4 = m.addAction(QIcon(I('next.png')),
+                _('Copy author to author sort'))
+        ac5 = m.addAction(QIcon(I('previous.png')),
+                _('Copy author sort to author'))
+
         b.setMenu(m)
         self.authors = AuthorsEdit(self, ac3)
         self.author_sort = AuthorSortEdit(self, self.authors, b, self.db, ac,
-                ac2)
+                ac2, ac4, ac5)
         self.basic_metadata_widgets.extend([self.authors, self.author_sort])
 
         self.swap_title_author_button = QToolButton(self)

From 0901c5807c5f956e044639350f41cdbb9ebcf07f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Aug 2011 16:08:08 -0600
Subject: [PATCH 29/39] Various Turkish news sources by thomass

---
 recipes/bugun_gazetesi.recipe     | 57 +++++++++++++++++++++++++++
 recipes/yagmur_dergisi.recipe     | 52 +++++++++++++++++++++++++
 recipes/yeni_umit_dergisi.recipe  | 52 +++++++++++++++++++++++++
 recipes/yenisafak_gazetesi.recipe | 64 +++++++++++++++++++++++++++++++
 4 files changed, 225 insertions(+)
 create mode 100644 recipes/bugun_gazetesi.recipe
 create mode 100644 recipes/yagmur_dergisi.recipe
 create mode 100644 recipes/yeni_umit_dergisi.recipe
 create mode 100644 recipes/yenisafak_gazetesi.recipe

diff --git a/recipes/bugun_gazetesi.recipe b/recipes/bugun_gazetesi.recipe
new file mode 100644
index 0000000000..0a1d27f517
--- /dev/null
+++ b/recipes/bugun_gazetesi.recipe
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Bugun (BasicNewsRecipe):
+
+    title                  = u'BUGÜN Gazetesi'
+    __author__             = u'thomass'
+    oldest_article         = 2
+    max_articles_per_feed  =100
+    #no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'UTF-8'
+    publisher              = 'thomass'
+    category               = 'news, haberler,TR,gazete'
+    language               = 'tr'
+    publication_type = 'newspaper '
+    extra_css              = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
+    #introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
+    masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
+
+    keep_only_tags      = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ]
+
+    #keep_only_tags      = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']})  ]
+    #remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
+
+
+    #remove_attributes = ['width','height']
+    remove_empty_feeds= True
+
+    feeds          = [
+                      ( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'),
+                     ( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'),
+                     ( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'),
+                     ( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'),
+                     ( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'),
+                     ( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'),
+                     ( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'),
+                     ( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'),
+                     ( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'),
+                     ( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'),
+                     ( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'),
+                     ( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'),
+                     ( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'),
+
+
+
+
+                        ]
diff --git a/recipes/yagmur_dergisi.recipe b/recipes/yagmur_dergisi.recipe
new file mode 100644
index 0000000000..786a628a0c
--- /dev/null
+++ b/recipes/yagmur_dergisi.recipe
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Yagmur(BasicNewsRecipe):
+    title          = u'Yagmur Dergisi'
+    __author__            = u'thomass'
+    description            = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
+    oldest_article         = 90
+    max_articles_per_feed  =100
+    no_stylesheets         = True
+    #delay                  = 1
+    #use_embedded_content   = False
+
+    #publisher              = '  '
+    category               = 'dergi, ilim, kültür, edebiyat,Türkçe'
+    language               = 'tr'
+    publication_type = 'magazine'
+    encoding               = 'ISO 8859-9'
+    publisher              = 'thomass'
+
+ 
+
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    #keep_only_tags    = [dict(name='h1', attrs={'class':['georgia_30']})]
+
+    #remove_attributes = ['aria-describedby']
+    #remove_tags  = [dict(name='div', attrs={'id':['renk10']}) ]
+    cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
+    masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
+    #remove_tags_before = dict(id='content-right')
+
+
+    #remove_empty_feeds= True
+    #remove_attributes = ['width','height']
+
+    feeds          = [
+                      ( u'Yagmur', u'http://open.dapper.net/services/yagmur'),
+                        ]
+
+    #def preprocess_html(self, soup):
+     #   return self.adeify_images(soup)
+    def print_version(self, url):       #there is a probem caused by table format
+     return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')
+
diff --git a/recipes/yeni_umit_dergisi.recipe b/recipes/yeni_umit_dergisi.recipe
new file mode 100644
index 0000000000..24b95acae4
--- /dev/null
+++ b/recipes/yeni_umit_dergisi.recipe
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class YeniUmit(BasicNewsRecipe):
+    title          = u'Yeni Umit Dergisi'
+    __author__            = u'thomass'
+    description            = 'Aylık Dini İlimler ve Kültür Dergisi'
+    oldest_article         = 45
+    max_articles_per_feed  =100
+    no_stylesheets         = True
+    #delay                  = 1
+    #use_embedded_content   = False
+
+    #publisher              = '  '
+    category               = 'dergi, ilim, kültür, edebiyat,Türkçe'
+    language               = 'tr'
+    publication_type = 'magazine'
+    encoding               = 'ISO 8859-9'
+    publisher              = 'thomass'
+
+ 
+
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    #keep_only_tags    = [dict(name='h1', attrs={'class':['georgia_30']})]
+
+    #remove_attributes = ['aria-describedby']
+    #remove_tags  = [dict(name='div', attrs={'id':['renk10']}) ]
+    cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
+    masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
+    #remove_tags_before = dict(id='content-right')
+
+
+    #remove_empty_feeds= True
+    #remove_attributes = ['width','height']
+
+    feeds          = [
+                      ( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
+                        ]
+
+    #def preprocess_html(self, soup):
+     #   return self.adeify_images(soup)
+    def print_version(self, url):       #there is a probem caused by table format
+     return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')
+
diff --git a/recipes/yenisafak_gazetesi.recipe b/recipes/yenisafak_gazetesi.recipe
new file mode 100644
index 0000000000..afcec76508
--- /dev/null
+++ b/recipes/yenisafak_gazetesi.recipe
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Bugun (BasicNewsRecipe):
+
+    title                  = u'Yenişafak Gazetesi'
+    __author__             = u'thomass'
+    oldest_article         = 2
+    max_articles_per_feed  =100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'ISO 8859-9' #'UTF-8'
+    publisher              = 'thomass'
+    category               = 'news, haberler,TR,gazete'
+    language               = 'tr'
+    publication_type = 'newspaper '
+    #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
+    conversion_options = {
+                            'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                            ,'linearize_tables': True
+                         }
+    cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif'
+    masthead_url = 'http://yenisafak.com.tr/resim/logo.gif'
+
+    keep_only_tags      = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ]
+    extra_css              = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small}   '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
+
+    #keep_only_tags      = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']})  ]
+    remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
+
+
+    #remove_attributes = ['width','height']
+    remove_empty_feeds= True
+
+    feeds          = [
+                      ( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'),
+                      ( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'),
+                      ( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'),
+                      ( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'),
+                      ( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'),
+                      ( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'),
+                      ( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'),
+                      ( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'),
+                      ( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'),
+                      ( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'),
+                      ( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'),
+                      ( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'),
+                      ( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'),
+                      ( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'),
+                      ( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'),
+                      ( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'),
+                      ( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'),
+                      ( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'),
+                      ( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'),
+                      ( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'),
+                      ( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'),
+
+                    
+
+                        ]

From 36c4b70030e5a516b6daad02c521959a603bbce9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 23 Aug 2011 23:09:23 -0600
Subject: [PATCH 30/39] ...

---
 recipes/politifact.recipe | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/recipes/politifact.recipe b/recipes/politifact.recipe
index e3550ce7f1..6f5344ae4e 100644
--- a/recipes/politifact.recipe
+++ b/recipes/politifact.recipe
@@ -5,7 +5,6 @@ class PolitiFactCom(BasicNewsRecipe):
     __author__     = u'Michael Heinz'
     oldest_article = 21
     max_articles_per_feed = 100
-    recursion = 0
     language = 'en'
 
     no_stylesheets = True
@@ -27,4 +26,9 @@ class PolitiFactCom(BasicNewsRecipe):
                      (u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
                      ]
 
-
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup

From fdc97e33284d984d4780a65eca798a9b314ee2e0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 09:38:31 -0600
Subject: [PATCH 31/39] ...

---
 recipes/politifact.recipe | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/recipes/politifact.recipe b/recipes/politifact.recipe
index 6f5344ae4e..a0f0d786dd 100644
--- a/recipes/politifact.recipe
+++ b/recipes/politifact.recipe
@@ -26,9 +26,4 @@ class PolitiFactCom(BasicNewsRecipe):
                      (u'Statements', u'http://www.politifact.com/feeds/statements/truth-o-meter/')
                      ]
 
-    def preprocess_html(self, soup):
-        for alink in soup.findAll('a'):
-            if alink.string is not None:
-                tstr = alink.string
-                alink.replaceWith(tstr)
-        return soup
+

From 6ee7c3661fe6b7e3f6847a506d5a77f14b49c15b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 10:32:05 -0600
Subject: [PATCH 32/39] Fix #832761 (translation input is case sensitive)

---
 src/calibre/gui2/languages.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/languages.py b/src/calibre/gui2/languages.py
index 3398081c5f..d7f34df1b4 100644
--- a/src/calibre/gui2/languages.py
+++ b/src/calibre/gui2/languages.py
@@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 
 from calibre.gui2.complete import MultiCompleteComboBox
 from calibre.utils.localization import lang_map
-from calibre.utils.icu import sort_key
+from calibre.utils.icu import sort_key, lower
 
 class LanguagesEdit(MultiCompleteComboBox):
 
@@ -22,7 +22,7 @@ class LanguagesEdit(MultiCompleteComboBox):
         self.names_with_commas = [x for x in self._lang_map.itervalues() if ',' in x]
         self.comma_map = {k:k.replace(',', '|') for k in self.names_with_commas}
         self.comma_rmap = {v:k for k, v in self.comma_map.iteritems()}
-        self._rmap = {v:k for k,v in self._lang_map.iteritems()}
+        self._rmap = {lower(v):k for k,v in self._lang_map.iteritems()}
 
         all_items = sorted(self._lang_map.itervalues(),
             key=sort_key)
@@ -46,7 +46,7 @@ class LanguagesEdit(MultiCompleteComboBox):
             ans = []
             for name in vals:
                 if name:
-                    code = self._rmap.get(name, None)
+                    code = self._rmap.get(lower(name), None)
                     if code is not None:
                         ans.append(code)
             return ans
@@ -66,7 +66,7 @@ class LanguagesEdit(MultiCompleteComboBox):
         bad = []
         for name in vals:
             if name:
-                code = self._rmap.get(name, None)
+                code = self._rmap.get(lower(name), None)
                 if code is None:
                     bad.append(name)
         return bad

From 6becd633baf30be65e58dd769efb165694598ab8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 11:02:40 -0600
Subject: [PATCH 33/39] Fairbanks Daily by Roger

---
 recipes/fairbanks_daily.recipe | 128 +++++++++++++++++++++++++++++++++
 recipes/msdnmag_en.recipe      |  16 ++---
 2 files changed, 136 insertions(+), 8 deletions(-)
 create mode 100644 recipes/fairbanks_daily.recipe

diff --git a/recipes/fairbanks_daily.recipe b/recipes/fairbanks_daily.recipe
new file mode 100644
index 0000000000..282925728e
--- /dev/null
+++ b/recipes/fairbanks_daily.recipe
@@ -0,0 +1,128 @@
+#import re          # Provides preprocess_regexps re.compile
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FairbanksDailyNewsminer(BasicNewsRecipe):
+    title          = u'Fairbanks Daily News-miner'
+    __author__ = 'Roger'
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    description = ''''The voice of interior Alaska since 1903'''
+    publisher   = 'http://www.newsminer.com/'
+    category    = 'news, Alaska, Fairbanks'
+    language    = 'en'
+    #extra_css   = '''
+    #                p{font-weight: normal;text-align: justify}
+    #              '''
+
+    remove_javascript = True
+    use_embedded_content = False
+    no_stylesheets = True
+    language = 'en'
+    encoding = 'utf8'
+    conversion_options = {'linearize_tables':True}
+    # TODO: I don't see any photos in my Mobi file with this masterhead_url!
+    masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
+
+
+    # In order to omit seeing number of views, number of posts and the pipe
+    # symbol for divider after the title and date of the article, a regex or
+    # manual processing is needed to get just the "story_item_date updated"
+    # (which contains the date).  Everything else on this line is pretty much not needed.
+    #
+    # HTML line containing story_item_date:
+    # <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span>&nbsp;|&nbsp;2370&nbsp;views&nbsp;|&nbsp;52&nbsp;<a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a>&nbsp;|&nbsp;<span id="number_recommendations_15183753" class="number_recommendations">9</span>&nbsp;<a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a>&nbsp;|&nbsp;<a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a>&nbsp;|&nbsp;<span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div>
+
+    # The following was suggested, but it looks like I also need to define self & soup
+    # (as well as bring in extra soup depends?)
+    #date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
+
+    #preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
+    #preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
+
+    #preprocess_regexps = [
+    #           (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
+    #               ]
+
+    #def get_browser(self):
+    #def preprocess_html(soup, first_fetch):
+    #    date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
+    #    return
+
+
+    # Try to keep some tags - some might not be needed here
+    keep_only_tags = [
+                        #date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'})),
+                        dict(name='div', attrs={'class':'hnews hentry item'}),
+                        dict(name='div', attrs={'class':'story_item_headline entry-title'}),
+                        #dict(name='span', attrs={'class':'story_item_date updated'}),
+                        dict(name='div', attrs={'class':'full_story'})
+                     ]
+    #remove_tags = [
+    #                dict(name='div', attrs={'class':'story_tools'}),
+    #                dict(name='p', attrs={'class':'ad_label'}),
+    #              ]
+
+    # Try to remove some bothersome tags
+    remove_tags = [
+                    #dict(name='img', attrs={'alt'}),
+                    dict(name='img', attrs={'class':'dont_touch_me'}),
+                    dict(name='span', attrs={'class':'number_recommendations'}),
+                    #dict(name='div', attrs={'class':'signature_line'}),
+                    dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
+                    dict(name='div', attrs={'class':['addthis_toolbox','addthis_default_style']}),
+                    dict(name='span', attrs={'class':'addthis_separator'}),
+                    dict(name='div', attrs={'class':'related_content'}),
+                    dict(name='div', attrs={'class':'comments_container'}),
+                    #dict(name='div', attrs={'class':'signature_line'}),
+                    dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
+                    dict(name='div', attrs={'id':'comments_container'})
+                  ]
+
+
+    # This one works but only gets title, date and clips article content!
+    #remove_tags_after = [
+    #                        dict(name='span', attrs={'class':'story_item_date updated'})
+    #                    ]
+
+    #remove_tags_after = [
+    #                        dict(name='div', attrs={'class':'advertisement'}),
+    #                    ]
+
+    # Try clipping tags before and after to prevent pulling img views/posts numbers after date?
+    #remove_tags_before = [
+    #                        dict(name='span', attrs={'class':'story_item_date updated'})
+    #                     ]
+
+    #extra_css # tweak the appearance # TODO: Change article titles <h2?> to bold?
+
+
+    # Comment-out or uncomment any of the following RSS feeds according to your
+    # liking.
+    #
+    # TODO: Adding more then one RSS Feed, and newline will be omitted for
+    # entries within the Table of Contents or Index of Articles
+    #
+    # TODO: Some random bits of text is trailing the last page (or TOC on MOBI
+    # files), these are bits of public posts and comments and need to also be
+    # removed.
+    #
+    feeds = [
+        (u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
+        (u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
+        (u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
+        (u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
+        (u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
+     #  (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
+        (u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
+     #  (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
+     #  (u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
+        (u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
+     #  (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
+     #  (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
+     #  (u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
+     #  (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
+        (u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
+             ]
+
diff --git a/recipes/msdnmag_en.recipe b/recipes/msdnmag_en.recipe
index 341ca027f6..cf9cfc4f6a 100644
--- a/recipes/msdnmag_en.recipe
+++ b/recipes/msdnmag_en.recipe
@@ -6,7 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 msdn.microsoft.com/en-us/magazine
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
 
 class MSDNMagazine_en(BasicNewsRecipe):
     title                 = 'MSDN Magazine'
@@ -21,7 +21,7 @@ class MSDNMagazine_en(BasicNewsRecipe):
     use_embedded_content  = False
     encoding              = 'utf-8'
     language              = 'en'
-    
+
     base_url              = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
     rss_url               = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
 
@@ -32,15 +32,15 @@ class MSDNMagazine_en(BasicNewsRecipe):
                     dict(name='div', attrs={'class':'DivRatingsOnly'})
                     ,dict(name='div', attrs={'class':'ShareThisButton4'})
                   ]
-                  
+
     def find_articles(self):
         idx_contents = self.browser.open(self.rss_url).read()
         idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
-        
+
         for article in idx.findAll('item'):
             desc_html = self.tag_to_string(article.find('description'))
             description = self.tag_to_string(BeautifulSoup(desc_html))
-            
+
             a = {
                     'title':  self.tag_to_string(article.find('title')),
                     'url': self.tag_to_string(article.find('link')),
@@ -52,14 +52,14 @@ class MSDNMagazine_en(BasicNewsRecipe):
 
     def parse_index(self):
         soup = self.index_to_soup(self.base_url)
-        
+
         #find issue name, eg "August 2011"
         issue_name = self.tag_to_string(soup.find('h1'))
-        
+
         # find cover pic
         img = soup.find('img',attrs ={'alt':issue_name})
         if img is not None:
             self.cover_url = img['src']
 
         return [(issue_name, list(self.find_articles()))]
-        
+

From fb08ceb1e55c0880219ac0bfc68b44d7fbf79d52 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 11:13:31 -0600
Subject: [PATCH 34/39] ...

---
 src/calibre/ebooks/oeb/stylizer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 88e074320d..5e4f389262 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -118,7 +118,6 @@ class CSSSelector(object):
                     css_to_xpath_no_case(css))
             self.sel2 = etree.XPath(path, namespaces=namespaces)
         except:
-            raise
             self.sel2 = lambda x: []
         self.sel2_use_logged = False
         self.css = css

From 23f4463f275b9caa0c978615e7ad8b422c36ea64 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 11:57:09 -0600
Subject: [PATCH 35/39] ...

---
 recipes/bbc.recipe | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/recipes/bbc.recipe b/recipes/bbc.recipe
index 9c8b92f25c..2bccbaf4ae 100644
--- a/recipes/bbc.recipe
+++ b/recipes/bbc.recipe
@@ -36,8 +36,9 @@ class BBC(BasicNewsRecipe):
                         ]
 
     remove_tags = [
-                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
-                       'story-feature wide ', 'story-feature narrow']})
+                       dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
+                                                    'story-feature wide ', 'story-feature narrow']}),
+                       dict(id=['hypertab', 'comment-form']),
                         ]
 
     remove_attributes = ['width','height']

From 5119925922f3f0c0485d80fa957a7d83274d7394 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 14:45:21 -0600
Subject: [PATCH 36/39] Revert CSS pipeline changes, as python functions in
 lxml are broken, badly

---
 src/calibre/ebooks/cssselect.py            | 1007 --------------------
 src/calibre/ebooks/mobi/writer2/indexer.py |    3 +
 src/calibre/ebooks/oeb/stylizer.py         |   93 +-
 3 files changed, 44 insertions(+), 1059 deletions(-)
 delete mode 100644 src/calibre/ebooks/cssselect.py

diff --git a/src/calibre/ebooks/cssselect.py b/src/calibre/ebooks/cssselect.py
deleted file mode 100644
index c4167a8e4d..0000000000
--- a/src/calibre/ebooks/cssselect.py
+++ /dev/null
@@ -1,1007 +0,0 @@
-"""CSS Selectors based on XPath.
-
-This module supports selecting XML/HTML tags based on CSS selectors.
-See the `CSSSelector` class for details.
-"""
-
-import re
-from lxml import etree
-
-__all__ = ['SelectorSyntaxError', 'ExpressionError',
-           'CSSSelector']
-
-try:
-    _basestring = basestring
-except NameError:
-    _basestring = str
-
-class SelectorSyntaxError(SyntaxError):
-    pass
-
-class ExpressionError(RuntimeError):
-    pass
-
-class CSSSelector(etree.XPath):
-    """A CSS selector.
-
-    Usage::
-
-        >>> from lxml import etree, cssselect
-        >>> select = cssselect.CSSSelector("a tag > child")
-
-        >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
-        >>> [ el.tag for el in select(root) ]
-        ['child']
-
-    To use CSS namespaces, you need to pass a prefix-to-namespace
-    mapping as ``namespaces`` keyword argument::
-
-        >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
-        >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
-        ...                                   namespaces={'rdf': rdfns})
-
-        >>> rdf = etree.XML((
-        ...     '<root xmlns:rdf="%s">'
-        ...       '<rdf:Description>blah</rdf:Description>'
-        ...     '</root>') % rdfns)
-        >>> [(el.tag, el.text) for el in select_ns(rdf)]
-        [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
-    """
-    def __init__(self, css, namespaces=None):
-        path = css_to_xpath_no_case(css)
-        etree.XPath.__init__(self, path, namespaces=namespaces)
-        self.css = css
-
-    def __repr__(self):
-        return '<%s %s for %r>' % (
-            self.__class__.__name__,
-            hex(abs(id(self)))[2:],
-            self.css)
-
-##############################
-## Token objects:
-
-try:
-    _unicode = unicode
-    _unichr = unichr
-except NameError:
-    # Python 3
-    _unicode = str
-    _unichr = chr
-
-class _UniToken(_unicode):
-    def __new__(cls, contents, pos):
-        obj = _unicode.__new__(cls, contents)
-        obj.pos = pos
-        return obj
-
-    def __repr__(self):
-        return '%s(%s, %r)' % (
-            self.__class__.__name__,
-            _unicode.__repr__(self),
-            self.pos)
-
-class Symbol(_UniToken):
-    pass
-
-class String(_UniToken):
-    pass
-
-class Token(_UniToken):
-    pass
-
-############################################################
-## Parsing
-############################################################
-
-##############################
-## Syntax objects:
-
-class Class(object):
-    """
-    Represents selector.class_name
-    """
-
-    def __init__(self, selector, class_name):
-        self.selector = selector
-        # Kovid: Lowercased
-        self.class_name = class_name.lower()
-
-    def __repr__(self):
-        return '%s[%r.%s]' % (
-            self.__class__.__name__,
-            self.selector,
-            self.class_name)
-
-    def xpath(self):
-        sel_xpath = self.selector.xpath()
-        # Kovid: Lowercased
-        sel_xpath.add_condition(
-            "contains(concat(' ', css:lower-case(normalize-space(@class)), ' '), %s)" % xpath_literal(' '+self.class_name+' '))
-        return sel_xpath
-
-class Function(object):
-    """
-    Represents selector:name(expr)
-    """
-
-    unsupported = [
-        'target', 'lang', 'enabled', 'disabled',]
-
-    def __init__(self, selector, type, name, expr):
-        self.selector = selector
-        self.type = type
-        self.name = name
-        self.expr = expr
-
-    def __repr__(self):
-        return '%s[%r%s%s(%r)]' % (
-            self.__class__.__name__,
-            self.selector,
-            self.type, self.name, self.expr)
-
-    def xpath(self):
-        sel_path = self.selector.xpath()
-        if self.name in self.unsupported:
-            raise ExpressionError(
-                "The pseudo-class %r is not supported" % self.name)
-        method = '_xpath_' + self.name.replace('-', '_')
-        if not hasattr(self, method):
-            raise ExpressionError(
-                "The pseudo-class %r is unknown" % self.name)
-        method = getattr(self, method)
-        return method(sel_path, self.expr)
-
-    def _xpath_nth_child(self, xpath, expr, last=False,
-                         add_name_test=True):
-        a, b = parse_series(expr)
-        if not a and not b and not last:
-            # a=0 means nothing is returned...
-            xpath.add_condition('false() and position() = 0')
-            return xpath
-        if add_name_test:
-            xpath.add_name_test()
-        xpath.add_star_prefix()
-        if a == 0:
-            if last:
-                b = 'last() - %s' % b
-            xpath.add_condition('position() = %s' % b)
-            return xpath
-        if last:
-            # FIXME: I'm not sure if this is right
-            a = -a
-            b = -b
-        if b > 0:
-            b_neg = str(-b)
-        else:
-            b_neg = '+%s' % (-b)
-        if a != 1:
-            expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
-        else:
-            expr = []
-        if b >= 0:
-            expr.append('position() >= %s' % b)
-        elif b < 0 and last:
-            expr.append('position() < (last() %s)' % b)
-        expr = ' and '.join(expr)
-        if expr:
-            xpath.add_condition(expr)
-        return xpath
-        # FIXME: handle an+b, odd, even
-        # an+b means every-a, plus b, e.g., 2n+1 means odd
-        # 0n+b means b
-        # n+0 means a=1, i.e., all elements
-        # an means every a elements, i.e., 2n means even
-        # -n means -1n
-        # -1n+6 means elements 6 and previous
-
-    def _xpath_nth_last_child(self, xpath, expr):
-        return self._xpath_nth_child(xpath, expr, last=True)
-
-    def _xpath_nth_of_type(self, xpath, expr):
-        if xpath.element == '*':
-            raise NotImplementedError(
-                "*:nth-of-type() is not implemented")
-        return self._xpath_nth_child(xpath, expr, add_name_test=False)
-
-    def _xpath_nth_last_of_type(self, xpath, expr):
-        return self._xpath_nth_child(xpath, expr, last=True, add_name_test=False)
-
-    def _xpath_contains(self, xpath, expr):
-        # text content, minus tags, must contain expr
-        if isinstance(expr, Element):
-            expr = expr._format_element()
-        xpath.add_condition('contains(css:lower-case(string(.)), %s)'
-                            % xpath_literal(expr.lower()))
-        # FIXME: Currently case insensitive matching doesn't seem to be happening
-        return xpath
-
-    def _xpath_not(self, xpath, expr):
-        # everything for which not expr applies
-        expr = expr.xpath()
-        cond = expr.condition
-        # FIXME: should I do something about element_path?
-        xpath.add_condition('not(%s)' % cond)
-        return xpath
-
-def _make_lower_case(context, s):
-    return s.lower()
-
-ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
-ns.prefix = 'css'
-ns['lower-case'] = _make_lower_case
-
-class Pseudo(object):
-    """
-    Represents selector:ident
-    """
-
-    unsupported = ['indeterminate', 'first-line', 'first-letter',
-                   'selection', 'before', 'after', 'link', 'visited',
-                   'active', 'focus', 'hover']
-
-    def __init__(self, element, type, ident):
-        self.element = element
-        assert type in (':', '::')
-        self.type = type
-        self.ident = ident
-
-    def __repr__(self):
-        return '%s[%r%s%s]' % (
-            self.__class__.__name__,
-            self.element,
-            self.type, self.ident)
-
-    def xpath(self):
-        el_xpath = self.element.xpath()
-        if self.ident in self.unsupported:
-            raise ExpressionError(
-                "The pseudo-class %r is unsupported" % self.ident)
-        method = '_xpath_' + self.ident.replace('-', '_')
-        if not hasattr(self, method):
-            raise ExpressionError(
-                "The pseudo-class %r is unknown" % self.ident)
-        method = getattr(self, method)
-        el_xpath = method(el_xpath)
-        return el_xpath
-
-    def _xpath_checked(self, xpath):
-        # FIXME: is this really all the elements?
-        xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')")
-        return xpath
-
-    def _xpath_root(self, xpath):
-        # if this element is the root element
-        raise NotImplementedError
-
-    def _xpath_first_child(self, xpath):
-        xpath.add_star_prefix()
-        xpath.add_name_test()
-        xpath.add_condition('position() = 1')
-        return xpath
-
-    def _xpath_last_child(self, xpath):
-        xpath.add_star_prefix()
-        xpath.add_name_test()
-        xpath.add_condition('position() = last()')
-        return xpath
-
-    def _xpath_first_of_type(self, xpath):
-        if xpath.element == '*':
-            raise NotImplementedError(
-                "*:first-of-type is not implemented")
-        xpath.add_star_prefix()
-        xpath.add_condition('position() = 1')
-        return xpath
-
-    def _xpath_last_of_type(self, xpath):
-        if xpath.element == '*':
-            raise NotImplementedError(
-                "*:last-of-type is not implemented")
-        xpath.add_star_prefix()
-        xpath.add_condition('position() = last()')
-        return xpath
-
-    def _xpath_only_child(self, xpath):
-        xpath.add_name_test()
-        xpath.add_star_prefix()
-        xpath.add_condition('last() = 1')
-        return xpath
-
-    def _xpath_only_of_type(self, xpath):
-        if xpath.element == '*':
-            raise NotImplementedError(
-                "*:only-of-type is not implemented")
-        xpath.add_condition('last() = 1')
-        return xpath
-
-    def _xpath_empty(self, xpath):
-        xpath.add_condition("not(*) and not(normalize-space())")
-        return xpath
-
-class Attrib(object):
-    """
-    Represents selector[namespace|attrib operator value]
-    """
-
-    def __init__(self, selector, namespace, attrib, operator, value):
-        self.selector = selector
-        self.namespace = namespace
-        self.attrib = attrib
-        self.operator = operator
-        self.value = value
-
-    def __repr__(self):
-        if self.operator == 'exists':
-            return '%s[%r[%s]]' % (
-                self.__class__.__name__,
-                self.selector,
-                self._format_attrib())
-        else:
-            return '%s[%r[%s %s %r]]' % (
-                self.__class__.__name__,
-                self.selector,
-                self._format_attrib(),
-                self.operator,
-                self.value)
-
-    def _format_attrib(self):
-        if self.namespace == '*':
-            return self.attrib
-        else:
-            return '%s|%s' % (self.namespace, self.attrib)
-
-    def _xpath_attrib(self):
-        # FIXME: if attrib is *?
-        if self.namespace == '*':
-            return '@' + self.attrib
-        else:
-            return '@%s:%s' % (self.namespace, self.attrib)
-
-    def xpath(self):
-        path = self.selector.xpath()
-        attrib = self._xpath_attrib()
-        value = self.value
-        if self.operator == 'exists':
-            assert not value
-            path.add_condition(attrib)
-        elif self.operator == '=':
-            path.add_condition('%s = %s' % (attrib,
-                                            xpath_literal(value)))
-        elif self.operator == '!=':
-            # FIXME: this seems like a weird hack...
-            if value:
-                path.add_condition('not(%s) or %s != %s'
-                                   % (attrib, attrib, xpath_literal(value)))
-            else:
-                path.add_condition('%s != %s'
-                                   % (attrib, xpath_literal(value)))
-            #path.add_condition('%s != %s' % (attrib, xpath_literal(value)))
-        elif self.operator == '~=':
-            path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_literal(' '+value+' ')))
-        elif self.operator == '|=':
-            # Weird, but true...
-            path.add_condition('%s = %s or starts-with(%s, %s)' % (
-                attrib, xpath_literal(value),
-                attrib, xpath_literal(value + '-')))
-        elif self.operator == '^=':
-            path.add_condition('starts-with(%s, %s)' % (
-                attrib, xpath_literal(value)))
-        elif self.operator == '$=':
-            # Oddly there is a starts-with in XPath 1.0, but not ends-with
-            path.add_condition('substring(%s, string-length(%s)-%s) = %s'
-                               % (attrib, attrib, len(value)-1, xpath_literal(value)))
-        elif self.operator == '*=':
-            # FIXME: case sensitive?
-            path.add_condition('contains(%s, %s)' % (
-                attrib, xpath_literal(value)))
-        else:
-            assert 0, ("Unknown operator: %r" % self.operator)
-        return path
-
-class Element(object):
-    """
-    Represents namespace|element
-    """
-
-    def __init__(self, namespace, element):
-        self.namespace = namespace
-        self.element = element
-
-    def __repr__(self):
-        return '%s[%s]' % (
-            self.__class__.__name__,
-            self._format_element())
-
-    def _format_element(self):
-        if self.namespace == '*':
-            return self.element
-        else:
-            return '%s|%s' % (self.namespace, self.element)
-
-    def xpath(self):
-        if self.namespace == '*':
-            el = self.element.lower()
-        else:
-            # Kovid: Lowercased
-            el = '%s:%s' % (self.namespace, self.element.lower())
-        return XPathExpr(element=el)
-
-class Hash(object):
-    """
-    Represents selector#id
-    """
-
-    def __init__(self, selector, id):
-        self.selector = selector
-        self.id = id
-
-    def __repr__(self):
-        return '%s[%r#%s]' % (
-            self.__class__.__name__,
-            self.selector, self.id)
-
-    def xpath(self):
-        path = self.selector.xpath()
-        path.add_condition('@id = %s' % xpath_literal(self.id))
-        return path
-
-class Or(object):
-
-    def __init__(self, items):
-        self.items = items
-    def __repr__(self):
-        return '%s(%r)' % (
-            self.__class__.__name__,
-            self.items)
-
-    def xpath(self):
-        paths = [item.xpath() for item in self.items]
-        return XPathExprOr(paths)
-
-class CombinedSelector(object):
-
-    _method_mapping = {
-        ' ': 'descendant',
-        '>': 'child',
-        '+': 'direct_adjacent',
-        '~': 'indirect_adjacent',
-        }
-
-    def __init__(self, selector, combinator, subselector):
-        assert selector is not None
-        self.selector = selector
-        self.combinator = combinator
-        self.subselector = subselector
-
-    def __repr__(self):
-        if self.combinator == ' ':
-            comb = '<followed>'
-        else:
-            comb = self.combinator
-        return '%s[%r %s %r]' % (
-            self.__class__.__name__,
-            self.selector,
-            comb,
-            self.subselector)
-
-    def xpath(self):
-        if self.combinator not in self._method_mapping:
-            raise ExpressionError(
-                "Unknown combinator: %r" % self.combinator)
-        method = '_xpath_' + self._method_mapping[self.combinator]
-        method = getattr(self, method)
-        path = self.selector.xpath()
-        return method(path, self.subselector)
-
-    def _xpath_descendant(self, xpath, sub):
-        # when sub is a descendant in any way of xpath
-        xpath.join('/descendant::', sub.xpath())
-        return xpath
-
-    def _xpath_child(self, xpath, sub):
-        # when sub is an immediate child of xpath
-        xpath.join('/', sub.xpath())
-        return xpath
-
-    def _xpath_direct_adjacent(self, xpath, sub):
-        # when sub immediately follows xpath
-        xpath.join('/following-sibling::', sub.xpath())
-        xpath.add_name_test()
-        xpath.add_condition('position() = 1')
-        return xpath
-
-    def _xpath_indirect_adjacent(self, xpath, sub):
-        # when sub comes somewhere after xpath as a sibling
-        xpath.join('/following-sibling::', sub.xpath())
-        return xpath
-
-##############################
-## XPathExpr objects:
-
-_el_re = re.compile(r'^\w+\s*$', re.UNICODE)
-_id_re = re.compile(r'^(\w*)#(\w+)\s*$', re.UNICODE)
-_class_re = re.compile(r'^(\w*)\.(\w+)\s*$', re.UNICODE)
-
-def css_to_xpath_no_case(css_expr, prefix='descendant-or-self::'):
-    if isinstance(css_expr, _basestring):
-        match = _el_re.search(css_expr)
-        if match is not None:
-            # Kovid: Lowercased
-            return '%s%s' % (prefix, match.group(0).strip().lower())
-        match = _id_re.search(css_expr)
-        if match is not None:
-            return "%s%s[@id = '%s']" % (
-                prefix, match.group(1) or '*', match.group(2))
-        match = _class_re.search(css_expr)
-        if match is not None:
-            # Kovid: lowercased
-            return "%s%s[contains(concat(' ', css:lower-case(normalize-space(@class)), ' '), ' %s ')]" % (
-                prefix, match.group(1).lower() or '*', match.group(2).lower())
-        css_expr = parse(css_expr)
-    expr = css_expr.xpath()
-    assert expr is not None, (
-        "Got None for xpath expression from %s" % repr(css_expr))
-    if prefix:
-        expr.add_prefix(prefix)
-    return _unicode(expr)
-
-class XPathExpr(object):
-
-    def __init__(self, prefix=None, path=None, element='*', condition=None,
-                 star_prefix=False):
-        self.prefix = prefix
-        self.path = path
-        self.element = element
-        self.condition = condition
-        self.star_prefix = star_prefix
-
-    def __str__(self):
-        path = ''
-        if self.prefix is not None:
-            path += _unicode(self.prefix)
-        if self.path is not None:
-            path += _unicode(self.path)
-        path += _unicode(self.element)
-        if self.condition:
-            path += '[%s]' % self.condition
-        return path
-
-    def __repr__(self):
-        return '%s[%s]' % (
-            self.__class__.__name__, self)
-
-    def add_condition(self, condition):
-        if self.condition:
-            self.condition = '%s and (%s)' % (self.condition, condition)
-        else:
-            self.condition = condition
-
-    def add_path(self, part):
-        if self.path is None:
-            self.path = self.element
-        else:
-            self.path += self.element
-        self.element = part
-
-    def add_prefix(self, prefix):
-        if self.prefix:
-            self.prefix = prefix + self.prefix
-        else:
-            self.prefix = prefix
-
-    def add_name_test(self):
-        if self.element == '*':
-            # We weren't doing a test anyway
-            return
-        self.add_condition("name() = %s" % xpath_literal(self.element))
-        self.element = '*'
-
-    def add_star_prefix(self):
-        """
-        Adds a /* prefix if there is no prefix.  This is when you need
-        to keep context's constrained to a single parent.
-        """
-        if self.path:
-            self.path += '*/'
-        else:
-            self.path = '*/'
-        self.star_prefix = True
-
-    def join(self, combiner, other):
-        prefix = _unicode(self)
-        prefix += combiner
-        path = (other.prefix or '') + (other.path or '')
-        # We don't need a star prefix if we are joining to this other
-        # prefix; so we'll get rid of it
-        if other.star_prefix and path == '*/':
-            path = ''
-        self.prefix = prefix
-        self.path = path
-        self.element = other.element
-        self.condition = other.condition
-
-class XPathExprOr(XPathExpr):
-    """
-    Represents |'d expressions.  Note that unfortunately it isn't
-    the union, it's the sum, so duplicate elements will appear.
-    """
-
-    def __init__(self, items, prefix=None):
-        for item in items:
-            assert item is not None
-        self.items = items
-        self.prefix = prefix
-
-    def __str__(self):
-        prefix = self.prefix or ''
-        return ' | '.join(["%s%s" % (prefix,i) for i in self.items])
-
-split_at_single_quotes = re.compile("('+)").split
-
-def xpath_literal(s):
-    if isinstance(s, Element):
-        # This is probably a symbol that looks like an expression...
-        s = s._format_element()
-    else:
-        s = _unicode(s)
-    if "'" not in s:
-        s = "'%s'" % s
-    elif '"' not in s:
-        s = '"%s"' % s
-    else:
-        s = "concat(%s)" % ','.join([
-            (("'" in part) and '"%s"' or "'%s'") % part
-            for part in split_at_single_quotes(s) if part
-            ])
-    return s
-
-##############################
-## Parsing functions
-
-def parse(string):
-    stream = TokenStream(tokenize(string))
-    stream.source = string
-    try:
-        return parse_selector_group(stream)
-    except SelectorSyntaxError:
-        import sys
-        e = sys.exc_info()[1]
-        message = "%s at %s -> %r" % (
-            e, stream.used, stream.peek())
-        e.msg = message
-        if sys.version_info < (2,6):
-            e.message = message
-        e.args = tuple([message])
-        raise
-
-def parse_selector_group(stream):
-    result = []
-    while 1:
-        result.append(parse_selector(stream))
-        if stream.peek() == ',':
-            stream.next()
-        else:
-            break
-    if len(result) == 1:
-        return result[0]
-    else:
-        return Or(result)
-
-def parse_selector(stream):
-    result = parse_simple_selector(stream)
-    while 1:
-        peek = stream.peek()
-        if peek == ',' or peek is None:
-            return result
-        elif peek in ('+', '>', '~'):
-            # A combinator
-            combinator = stream.next()
-        else:
-            combinator = ' '
-        consumed = len(stream.used)
-        next_selector = parse_simple_selector(stream)
-        if consumed == len(stream.used):
-            raise SelectorSyntaxError(
-                "Expected selector, got '%s'" % stream.peek())
-        result = CombinedSelector(result, combinator, next_selector)
-    return result
-
-def parse_simple_selector(stream):
-    peek = stream.peek()
-    if peek != '*' and not isinstance(peek, Symbol):
-        element = namespace = '*'
-    else:
-        next = stream.next()
-        if next != '*' and not isinstance(next, Symbol):
-            raise SelectorSyntaxError(
-                "Expected symbol, got '%s'" % next)
-        if stream.peek() == '|':
-            namespace = next
-            stream.next()
-            element = stream.next()
-            if element != '*' and not isinstance(next, Symbol):
-                raise SelectorSyntaxError(
-                    "Expected symbol, got '%s'" % next)
-        else:
-            namespace = '*'
-            element = next
-    result = Element(namespace, element)
-    has_hash = False
-    while 1:
-        peek = stream.peek()
-        if peek == '#':
-            if has_hash:
-                # You can't have two hashes
-                # (FIXME: is there some more general rule I'm missing?)
-                break
-            stream.next()
-            result = Hash(result, stream.next())
-            has_hash = True
-            continue
-        elif peek == '.':
-            stream.next()
-            result = Class(result, stream.next())
-            continue
-        elif peek == '[':
-            stream.next()
-            result = parse_attrib(result, stream)
-            next = stream.next()
-            if not next == ']':
-                raise SelectorSyntaxError(
-                    "] expected, got '%s'" % next)
-            continue
-        elif peek == ':' or peek == '::':
-            type = stream.next()
-            ident = stream.next()
-            if not isinstance(ident, Symbol):
-                raise SelectorSyntaxError(
-                    "Expected symbol, got '%s'" % ident)
-            if stream.peek() == '(':
-                stream.next()
-                peek = stream.peek()
-                if isinstance(peek, String):
-                    selector = stream.next()
-                elif isinstance(peek, Symbol) and is_int(peek):
-                    selector = int(stream.next())
-                else:
-                    # FIXME: parse_simple_selector, or selector, or...?
-                    selector = parse_simple_selector(stream)
-                next = stream.next()
-                if not next == ')':
-                    raise SelectorSyntaxError(
-                        "Expected ')', got '%s' and '%s'"
-                        % (next, selector))
-                result = Function(result, type, ident, selector)
-            else:
-                result = Pseudo(result, type, ident)
-            continue
-        else:
-            if peek == ' ':
-                stream.next()
-            break
-        # FIXME: not sure what "negation" is
-    return result
-
-def is_int(v):
-    try:
-        int(v)
-    except ValueError:
-        return False
-    else:
-        return True
-
-def parse_attrib(selector, stream):
-    attrib = stream.next()
-    if stream.peek() == '|':
-        namespace = attrib
-        stream.next()
-        attrib = stream.next()
-    else:
-        namespace = '*'
-    if stream.peek() == ']':
-        return Attrib(selector, namespace, attrib, 'exists', None)
-    op = stream.next()
-    if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
-        raise SelectorSyntaxError(
-            "Operator expected, got '%s'" % op)
-    value = stream.next()
-    if not isinstance(value, (Symbol, String)):
-        raise SelectorSyntaxError(
-            "Expected string or symbol, got '%s'" % value)
-    return Attrib(selector, namespace, attrib, op, value)
-
-def parse_series(s):
-    """
-    Parses things like '1n+2', or 'an+b' generally, returning (a, b)
-    """
-    if isinstance(s, Element):
-        s = s._format_element()
-    if not s or s == '*':
-        # Happens when there's nothing, which the CSS parser thinks of as *
-        return (0, 0)
-    if isinstance(s, int):
-        # Happens when you just get a number
-        return (0, s)
-    if s == 'odd':
-        return (2, 1)
-    elif s == 'even':
-        return (2, 0)
-    elif s == 'n':
-        return (1, 0)
-    if 'n' not in s:
-        # Just a b
-        return (0, int(s))
-    a, b = s.split('n', 1)
-    if not a:
-        a = 1
-    elif a == '-' or a == '+':
-        a = int(a+'1')
-    else:
-        a = int(a)
-    if not b:
-        b = 0
-    elif b == '-' or b == '+':
-        b = int(b+'1')
-    else:
-        b = int(b)
-    return (a, b)
-
-
-############################################################
-## Tokenizing
-############################################################
-
-_match_whitespace = re.compile(r'\s+', re.UNICODE).match
-
-_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub
-
-_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match
-
-def tokenize(s):
-    pos = 0
-    s = _replace_comments('', s)
-    while 1:
-        match = _match_whitespace(s, pos=pos)
-        if match:
-            preceding_whitespace_pos = pos
-            pos = match.end()
-        else:
-            preceding_whitespace_pos = 0
-        if pos >= len(s):
-            return
-        match = _match_count_number(s, pos=pos)
-        if match and match.group() != 'n':
-            sym = s[pos:match.end()]
-            yield Symbol(sym, pos)
-            pos = match.end()
-            continue
-        c = s[pos]
-        c2 = s[pos:pos+2]
-        if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='):
-            yield Token(c2, pos)
-            pos += 2
-            continue
-        if c in '>+~,.*=[]()|:#':
-            if c in '.#[' and preceding_whitespace_pos > 0:
-                yield Token(' ', preceding_whitespace_pos)
-            yield Token(c, pos)
-            pos += 1
-            continue
-        if c == '"' or c == "'":
-            # Quoted string
-            old_pos = pos
-            sym, pos = tokenize_escaped_string(s, pos)
-            yield String(sym, old_pos)
-            continue
-        old_pos = pos
-        sym, pos = tokenize_symbol(s, pos)
-        yield Symbol(sym, old_pos)
-        continue
-
-split_at_string_escapes = re.compile(r'(\\(?:%s))'
-                                     % '|'.join(['[A-Fa-f0-9]{1,6}(?:\r\n|\s)?',
-                                                 '[^A-Fa-f0-9]'])).split
-
-def unescape_string_literal(literal):
-    substrings = []
-    for substring in split_at_string_escapes(literal):
-        if not substring:
-            continue
-        elif '\\' in substring:
-            if substring[0] == '\\' and len(substring) > 1:
-                substring = substring[1:]
-                if substring[0] in '0123456789ABCDEFabcdef':
-                    # int() correctly ignores the potentially trailing whitespace
-                    substring = _unichr(int(substring, 16))
-            else:
-                raise SelectorSyntaxError(
-                    "Invalid escape sequence %r in string %r"
-                    % (substring.split('\\')[1], literal))
-        substrings.append(substring)
-    return ''.join(substrings)
-
-def tokenize_escaped_string(s, pos):
-    quote = s[pos]
-    assert quote in ('"', "'")
-    pos = pos+1
-    start = pos
-    while 1:
-        next = s.find(quote, pos)
-        if next == -1:
-            raise SelectorSyntaxError(
-                "Expected closing %s for string in: %r"
-                % (quote, s[start:]))
-        result = s[start:next]
-        if result.endswith('\\'):
-            # next quote character is escaped
-            pos = next+1
-            continue
-        if '\\' in result:
-            result = unescape_string_literal(result)
-        return result, next+1
-
-_illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE)
-
-def tokenize_symbol(s, pos):
-    start = pos
-    match = _illegal_symbol.search(s, pos=pos)
-    if not match:
-        # Goes to end of s
-        return s[start:], len(s)
-    if match.start() == pos:
-        assert 0, (
-            "Unexpected symbol: %r at %s" % (s[pos], pos))
-    if not match:
-        result = s[start:]
-        pos = len(s)
-    else:
-        result = s[start:match.start()]
-        pos = match.start()
-    try:
-        result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
-    except UnicodeDecodeError:
-        import sys
-        e = sys.exc_info()[1]
-        raise SelectorSyntaxError(
-            "Bad symbol %r: %s" % (result, e))
-    return result, pos
-
-class TokenStream(object):
-
-    def __init__(self, tokens, source=None):
-        self.used = []
-        self.tokens = iter(tokens)
-        self.source = source
-        self.peeked = None
-        self._peeking = False
-        try:
-            self.next_token = self.tokens.next
-        except AttributeError:
-            # Python 3
-            self.next_token = self.tokens.__next__
-
-    def next(self):
-        if self._peeking:
-            self._peeking = False
-            self.used.append(self.peeked)
-            return self.peeked
-        else:
-            try:
-                next = self.next_token()
-                self.used.append(next)
-                return next
-            except StopIteration:
-                return None
-
-    def __iter__(self):
-        return iter(self.next, None)
-
-    def peek(self):
-        if not self._peeking:
-            try:
-                self.peeked = self.next_token()
-            except StopIteration:
-                return None
-            self._peeking = True
-        return self.peeked
diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py
index ba2bd01c3c..8592392d93 100644
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@@ -504,6 +504,9 @@ class Indexer(object): # {{{
         else:
             self.indices = self.create_book_index()
 
+        if not self.indices:
+            raise ValueError('No valid entries in TOC, cannot generate index')
+
         self.records.append(self.create_index_record())
         self.records.insert(0, self.create_header())
         self.records.extend(self.cncx.records)
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 5e4f389262..f6ff594701 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -27,7 +27,6 @@ from calibre import force_unicode
 from calibre.ebooks import unit_convert
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
-from calibre.ebooks.cssselect import css_to_xpath_no_case
 
 cssutils_log.setLevel(logging.WARN)
 
@@ -99,71 +98,32 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
                        'x-large', 'xx-large'])
 
 
-class CSSSelector(object):
-
+class CSSSelector(etree.XPath):
+    MIN_SPACE_RE = re.compile(r' *([>~+]) *')
     LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
 
     def __init__(self, css, namespaces=XPNSMAP):
+        css = self.MIN_SPACE_RE.sub(r'\1', css)
         if isinstance(css, unicode):
             # Workaround for bug in lxml on windows/OS X that causes a massive
             # memory leak with non ASCII selectors
             css = css.encode('ascii', 'ignore').decode('ascii')
         try:
-            path = self.LOCAL_NAME_RE.sub(r"local-name() = '", css_to_xpath(css))
-            self.sel1 = etree.XPath(css_to_xpath(css), namespaces=namespaces)
-        except:
-            self.sel1 = lambda x: []
-        try:
-            path = self.LOCAL_NAME_RE.sub(r"local-name() = '",
-                    css_to_xpath_no_case(css))
-            self.sel2 = etree.XPath(path, namespaces=namespaces)
-        except:
-            self.sel2 = lambda x: []
-        self.sel2_use_logged = False
+            path = css_to_xpath(css)
+        except UnicodeEncodeError: # Bug in css_to_xpath
+            path = '/'
+        except NotImplementedError: # Probably a subselect like :hover
+            path = '/'
+        path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
+        etree.XPath.__init__(self, path, namespaces=namespaces)
         self.css = css
 
-    def __call__(self, node, log):
-        try:
-            ans = self.sel1(node)
-        except (AssertionError, ExpressionError, etree.XPathSyntaxError,
-                    NameError, # thrown on OS X instead of SelectorSyntaxError
-                    SelectorSyntaxError):
-            return []
-
-        if not ans:
-            try:
-                ans = self.sel2(node)
-            except:
-                return []
-            else:
-                if ans and not self.sel2_use_logged:
-                    self.sel2_use_logged = True
-                    log.warn('Interpreting class and tag selectors case'
-                        ' insensitively in the CSS selector: %s'%self.css)
-        return ans
-
-
     def __repr__(self):
         return '<%s %s for %r>' % (
             self.__class__.__name__,
             hex(abs(id(self)))[2:],
             self.css)
 
-_selector_cache = {}
-
-MIN_SPACE_RE = re.compile(r' *([>~+]) *')
-
-def get_css_selector(raw_selector):
-    css = MIN_SPACE_RE.sub(r'\1', raw_selector)
-    if isinstance(css, unicode):
-        # Workaround for bug in lxml on windows/OS X that causes a massive
-        # memory leak with non ASCII selectors
-        css = css.encode('ascii', 'ignore').decode('ascii')
-    ans = _selector_cache.get(css, None)
-    if ans is None:
-        ans = CSSSelector(css)
-        _selector_cache[css] = ans
-    return ans
 
 class Stylizer(object):
     STYLESHEETS = WeakKeyDictionary()
@@ -263,12 +223,41 @@ class Stylizer(object):
         rules.sort()
         self.rules = rules
         self._styles = {}
+        class_sel_pat = re.compile(r'\.[a-z]+', re.IGNORECASE)
+        capital_sel_pat = re.compile(r'h|[A-Z]+')
         for _, _, cssdict, text, _ in rules:
             fl = ':first-letter' in text
             if fl:
                 text = text.replace(':first-letter', '')
-            selector = get_css_selector(text)
-            matches = selector(tree, self.logger)
+            try:
+                selector = CSSSelector(text)
+            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
+                    SelectorSyntaxError):
+                continue
+            try:
+                matches = selector(tree)
+            except etree.XPathEvalError:
+                continue
+
+            if not matches:
+                ntext = capital_sel_pat.sub(lambda m: m.group().lower(), text)
+                if ntext != text:
+                    self.logger.warn('Transformed CSS selector', text, 'to',
+                            ntext)
+                    selector = CSSSelector(ntext)
+                    matches = selector(tree)
+
+            if not matches and class_sel_pat.match(text) and text.lower() != text:
+                found = False
+                ltext = text.lower()
+                for x in tree.xpath('//*[@class]'):
+                    if ltext.endswith('.'+x.get('class').lower()):
+                        matches.append(x)
+                        found = True
+                if found:
+                    self.logger.warn('Ignoring case mismatches for CSS selector: %s in %s'
+                        %(text, item.href))
             if fl:
                 from lxml.builder import ElementMaker
                 E = ElementMaker(namespace=XHTML_NS)

From 6a4bfa920c78019f0fd420e0252daf06423865da Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 15:26:36 -0600
Subject: [PATCH 37/39] Restore CSS pipeline changes without needing to use a
 python xpath func

---
 src/calibre/ebooks/cssselect.py    | 1012 ++++++++++++++++++++++++++++
 src/calibre/ebooks/oeb/stylizer.py |   93 +--
 2 files changed, 1064 insertions(+), 41 deletions(-)
 create mode 100644 src/calibre/ebooks/cssselect.py

diff --git a/src/calibre/ebooks/cssselect.py b/src/calibre/ebooks/cssselect.py
new file mode 100644
index 0000000000..1c2bfcc4fa
--- /dev/null
+++ b/src/calibre/ebooks/cssselect.py
@@ -0,0 +1,1012 @@
+"""CSS Selectors based on XPath.
+
+This module supports selecting XML/HTML tags based on CSS selectors.
+See the `CSSSelector` class for details.
+"""
+
+import re
+from lxml import etree
+
+__all__ = ['SelectorSyntaxError', 'ExpressionError',
+           'CSSSelector']
+
+try:
+    _basestring = basestring
+except NameError:
+    _basestring = str
+
+class SelectorSyntaxError(SyntaxError):
+    pass
+
+class ExpressionError(RuntimeError):
+    pass
+
+class CSSSelector(etree.XPath):
+    """A CSS selector.
+
+    Usage::
+
+        >>> from lxml import etree, cssselect
+        >>> select = cssselect.CSSSelector("a tag > child")
+
+        >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
+        >>> [ el.tag for el in select(root) ]
+        ['child']
+
+    To use CSS namespaces, you need to pass a prefix-to-namespace
+    mapping as ``namespaces`` keyword argument::
+
+        >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+        >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
+        ...                                   namespaces={'rdf': rdfns})
+
+        >>> rdf = etree.XML((
+        ...     '<root xmlns:rdf="%s">'
+        ...       '<rdf:Description>blah</rdf:Description>'
+        ...     '</root>') % rdfns)
+        >>> [(el.tag, el.text) for el in select_ns(rdf)]
+        [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
+    """
+    def __init__(self, css, namespaces=None):
+        path = css_to_xpath_no_case(css)
+        etree.XPath.__init__(self, path, namespaces=namespaces)
+        self.css = css
+
+    def __repr__(self):
+        return '<%s %s for %r>' % (
+            self.__class__.__name__,
+            hex(abs(id(self)))[2:],
+            self.css)
+
+##############################
+## Token objects:
+
+try:
+    _unicode = unicode
+    _unichr = unichr
+except NameError:
+    # Python 3
+    _unicode = str
+    _unichr = chr
+
+class _UniToken(_unicode):
+    def __new__(cls, contents, pos):
+        obj = _unicode.__new__(cls, contents)
+        obj.pos = pos
+        return obj
+
+    def __repr__(self):
+        return '%s(%s, %r)' % (
+            self.__class__.__name__,
+            _unicode.__repr__(self),
+            self.pos)
+
+class Symbol(_UniToken):
+    pass
+
+class String(_UniToken):
+    pass
+
+class Token(_UniToken):
+    pass
+
+############################################################
+## Parsing
+############################################################
+
+##############################
+## Syntax objects:
+
+class Class(object):
+    """
+    Represents selector.class_name
+    """
+
+    def __init__(self, selector, class_name):
+        self.selector = selector
+        # Kovid: Lowercased
+        self.class_name = class_name.lower()
+
+    def __repr__(self):
+        return '%s[%r.%s]' % (
+            self.__class__.__name__,
+            self.selector,
+            self.class_name)
+
+    def xpath(self):
+        sel_xpath = self.selector.xpath()
+        # Kovid: Lowercased
+        sel_xpath.add_condition(
+            "contains(concat(' ', normalize-space(%s), ' '), %s)" % (
+                lower_case('@class'),
+                xpath_literal(' '+self.class_name+' ')))
+        return sel_xpath
+
+class Function(object):
+    """
+    Represents selector:name(expr)
+    """
+
+    unsupported = [
+        'target', 'lang', 'enabled', 'disabled',]
+
+    def __init__(self, selector, type, name, expr):
+        self.selector = selector
+        self.type = type
+        self.name = name
+        self.expr = expr
+
+    def __repr__(self):
+        return '%s[%r%s%s(%r)]' % (
+            self.__class__.__name__,
+            self.selector,
+            self.type, self.name, self.expr)
+
+    def xpath(self):
+        sel_path = self.selector.xpath()
+        if self.name in self.unsupported:
+            raise ExpressionError(
+                "The pseudo-class %r is not supported" % self.name)
+        method = '_xpath_' + self.name.replace('-', '_')
+        if not hasattr(self, method):
+            raise ExpressionError(
+                "The pseudo-class %r is unknown" % self.name)
+        method = getattr(self, method)
+        return method(sel_path, self.expr)
+
+    def _xpath_nth_child(self, xpath, expr, last=False,
+                         add_name_test=True):
+        a, b = parse_series(expr)
+        if not a and not b and not last:
+            # a=0 means nothing is returned...
+            xpath.add_condition('false() and position() = 0')
+            return xpath
+        if add_name_test:
+            xpath.add_name_test()
+        xpath.add_star_prefix()
+        if a == 0:
+            if last:
+                b = 'last() - %s' % b
+            xpath.add_condition('position() = %s' % b)
+            return xpath
+        if last:
+            # FIXME: I'm not sure if this is right
+            a = -a
+            b = -b
+        if b > 0:
+            b_neg = str(-b)
+        else:
+            b_neg = '+%s' % (-b)
+        if a != 1:
+            expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
+        else:
+            expr = []
+        if b >= 0:
+            expr.append('position() >= %s' % b)
+        elif b < 0 and last:
+            expr.append('position() < (last() %s)' % b)
+        expr = ' and '.join(expr)
+        if expr:
+            xpath.add_condition(expr)
+        return xpath
+        # FIXME: handle an+b, odd, even
+        # an+b means every-a, plus b, e.g., 2n+1 means odd
+        # 0n+b means b
+        # n+0 means a=1, i.e., all elements
+        # an means every a elements, i.e., 2n means even
+        # -n means -1n
+        # -1n+6 means elements 6 and previous
+
+    def _xpath_nth_last_child(self, xpath, expr):
+        return self._xpath_nth_child(xpath, expr, last=True)
+
+    def _xpath_nth_of_type(self, xpath, expr):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:nth-of-type() is not implemented")
+        return self._xpath_nth_child(xpath, expr, add_name_test=False)
+
+    def _xpath_nth_last_of_type(self, xpath, expr):
+        return self._xpath_nth_child(xpath, expr, last=True, add_name_test=False)
+
+    def _xpath_contains(self, xpath, expr):
+        # text content, minus tags, must contain expr
+        if isinstance(expr, Element):
+            expr = expr._format_element()
+        # Kovid: Use ASCII lower case that works
+        xpath.add_condition('contains(%s), %s)' % (
+                            lower_case('string(.)'),
+                            xpath_literal(expr.lower())))
+        return xpath
+
+    def _xpath_not(self, xpath, expr):
+        # everything for which not expr applies
+        expr = expr.xpath()
+        cond = expr.condition
+        # FIXME: should I do something about element_path?
+        xpath.add_condition('not(%s)' % cond)
+        return xpath
+
+# Kovid: Python functions dont work in lxml, so use translate()
+# instead of the python lowercase function
+def lower_case(arg):
+    'An ASCII lowercase function'
+    return ("translate(%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+            "'abcdefghijklmnopqrstuvwxyz')")%arg
+
+class Pseudo(object):
+    """
+    Represents selector:ident
+    """
+
+    unsupported = ['indeterminate', 'first-line', 'first-letter',
+                   'selection', 'before', 'after', 'link', 'visited',
+                   'active', 'focus', 'hover']
+
+    def __init__(self, element, type, ident):
+        self.element = element
+        assert type in (':', '::')
+        self.type = type
+        self.ident = ident
+
+    def __repr__(self):
+        return '%s[%r%s%s]' % (
+            self.__class__.__name__,
+            self.element,
+            self.type, self.ident)
+
+    def xpath(self):
+        el_xpath = self.element.xpath()
+        if self.ident in self.unsupported:
+            raise ExpressionError(
+                "The pseudo-class %r is unsupported" % self.ident)
+        method = '_xpath_' + self.ident.replace('-', '_')
+        if not hasattr(self, method):
+            raise ExpressionError(
+                "The pseudo-class %r is unknown" % self.ident)
+        method = getattr(self, method)
+        el_xpath = method(el_xpath)
+        return el_xpath
+
+    def _xpath_checked(self, xpath):
+        # FIXME: is this really all the elements?
+        xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')")
+        return xpath
+
+    def _xpath_root(self, xpath):
+        # if this element is the root element
+        raise NotImplementedError
+
+    def _xpath_first_child(self, xpath):
+        xpath.add_star_prefix()
+        xpath.add_name_test()
+        xpath.add_condition('position() = 1')
+        return xpath
+
+    def _xpath_last_child(self, xpath):
+        xpath.add_star_prefix()
+        xpath.add_name_test()
+        xpath.add_condition('position() = last()')
+        return xpath
+
+    def _xpath_first_of_type(self, xpath):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:first-of-type is not implemented")
+        xpath.add_star_prefix()
+        xpath.add_condition('position() = 1')
+        return xpath
+
+    def _xpath_last_of_type(self, xpath):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:last-of-type is not implemented")
+        xpath.add_star_prefix()
+        xpath.add_condition('position() = last()')
+        return xpath
+
+    def _xpath_only_child(self, xpath):
+        xpath.add_name_test()
+        xpath.add_star_prefix()
+        xpath.add_condition('last() = 1')
+        return xpath
+
+    def _xpath_only_of_type(self, xpath):
+        if xpath.element == '*':
+            raise NotImplementedError(
+                "*:only-of-type is not implemented")
+        xpath.add_condition('last() = 1')
+        return xpath
+
+    def _xpath_empty(self, xpath):
+        xpath.add_condition("not(*) and not(normalize-space())")
+        return xpath
+
+class Attrib(object):
+    """
+    Represents selector[namespace|attrib operator value]
+    """
+
+    def __init__(self, selector, namespace, attrib, operator, value):
+        self.selector = selector
+        self.namespace = namespace
+        self.attrib = attrib
+        self.operator = operator
+        self.value = value
+
+    def __repr__(self):
+        if self.operator == 'exists':
+            return '%s[%r[%s]]' % (
+                self.__class__.__name__,
+                self.selector,
+                self._format_attrib())
+        else:
+            return '%s[%r[%s %s %r]]' % (
+                self.__class__.__name__,
+                self.selector,
+                self._format_attrib(),
+                self.operator,
+                self.value)
+
+    def _format_attrib(self):
+        if self.namespace == '*':
+            return self.attrib
+        else:
+            return '%s|%s' % (self.namespace, self.attrib)
+
+    def _xpath_attrib(self):
+        # FIXME: if attrib is *?
+        if self.namespace == '*':
+            return '@' + self.attrib
+        else:
+            return '@%s:%s' % (self.namespace, self.attrib)
+
+    def xpath(self):
+        path = self.selector.xpath()
+        attrib = self._xpath_attrib()
+        value = self.value
+        if self.operator == 'exists':
+            assert not value
+            path.add_condition(attrib)
+        elif self.operator == '=':
+            path.add_condition('%s = %s' % (attrib,
+                                            xpath_literal(value)))
+        elif self.operator == '!=':
+            # FIXME: this seems like a weird hack...
+            if value:
+                path.add_condition('not(%s) or %s != %s'
+                                   % (attrib, attrib, xpath_literal(value)))
+            else:
+                path.add_condition('%s != %s'
+                                   % (attrib, xpath_literal(value)))
+            #path.add_condition('%s != %s' % (attrib, xpath_literal(value)))
+        elif self.operator == '~=':
+            path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_literal(' '+value+' ')))
+        elif self.operator == '|=':
+            # Weird, but true...
+            path.add_condition('%s = %s or starts-with(%s, %s)' % (
+                attrib, xpath_literal(value),
+                attrib, xpath_literal(value + '-')))
+        elif self.operator == '^=':
+            path.add_condition('starts-with(%s, %s)' % (
+                attrib, xpath_literal(value)))
+        elif self.operator == '$=':
+            # Oddly there is a starts-with in XPath 1.0, but not ends-with
+            path.add_condition('substring(%s, string-length(%s)-%s) = %s'
+                               % (attrib, attrib, len(value)-1, xpath_literal(value)))
+        elif self.operator == '*=':
+            # FIXME: case sensitive?
+            path.add_condition('contains(%s, %s)' % (
+                attrib, xpath_literal(value)))
+        else:
+            assert 0, ("Unknown operator: %r" % self.operator)
+        return path
+
+class Element(object):
+    """
+    Represents namespace|element
+    """
+
+    def __init__(self, namespace, element):
+        self.namespace = namespace
+        self.element = element
+
+    def __repr__(self):
+        return '%s[%s]' % (
+            self.__class__.__name__,
+            self._format_element())
+
+    def _format_element(self):
+        if self.namespace == '*':
+            return self.element
+        else:
+            return '%s|%s' % (self.namespace, self.element)
+
+    def xpath(self):
+        if self.namespace == '*':
+            el = self.element.lower()
+        else:
+            # Kovid: Lowercased
+            el = '%s:%s' % (self.namespace, self.element.lower())
+        return XPathExpr(element=el)
+
+class Hash(object):
+    """
+    Represents selector#id
+    """
+
+    def __init__(self, selector, id):
+        self.selector = selector
+        self.id = id
+
+    def __repr__(self):
+        return '%s[%r#%s]' % (
+            self.__class__.__name__,
+            self.selector, self.id)
+
+    def xpath(self):
+        path = self.selector.xpath()
+        path.add_condition('@id = %s' % xpath_literal(self.id))
+        return path
+
+class Or(object):
+
+    def __init__(self, items):
+        self.items = items
+    def __repr__(self):
+        return '%s(%r)' % (
+            self.__class__.__name__,
+            self.items)
+
+    def xpath(self):
+        paths = [item.xpath() for item in self.items]
+        return XPathExprOr(paths)
+
+class CombinedSelector(object):
+
+    _method_mapping = {
+        ' ': 'descendant',
+        '>': 'child',
+        '+': 'direct_adjacent',
+        '~': 'indirect_adjacent',
+        }
+
+    def __init__(self, selector, combinator, subselector):
+        assert selector is not None
+        self.selector = selector
+        self.combinator = combinator
+        self.subselector = subselector
+
+    def __repr__(self):
+        if self.combinator == ' ':
+            comb = '<followed>'
+        else:
+            comb = self.combinator
+        return '%s[%r %s %r]' % (
+            self.__class__.__name__,
+            self.selector,
+            comb,
+            self.subselector)
+
+    def xpath(self):
+        if self.combinator not in self._method_mapping:
+            raise ExpressionError(
+                "Unknown combinator: %r" % self.combinator)
+        method = '_xpath_' + self._method_mapping[self.combinator]
+        method = getattr(self, method)
+        path = self.selector.xpath()
+        return method(path, self.subselector)
+
+    def _xpath_descendant(self, xpath, sub):
+        # when sub is a descendant in any way of xpath
+        xpath.join('/descendant::', sub.xpath())
+        return xpath
+
+    def _xpath_child(self, xpath, sub):
+        # when sub is an immediate child of xpath
+        xpath.join('/', sub.xpath())
+        return xpath
+
+    def _xpath_direct_adjacent(self, xpath, sub):
+        # when sub immediately follows xpath
+        xpath.join('/following-sibling::', sub.xpath())
+        xpath.add_name_test()
+        xpath.add_condition('position() = 1')
+        return xpath
+
+    def _xpath_indirect_adjacent(self, xpath, sub):
+        # when sub comes somewhere after xpath as a sibling
+        xpath.join('/following-sibling::', sub.xpath())
+        return xpath
+
+##############################
+## XPathExpr objects:
+
+_el_re = re.compile(r'^\w+\s*$', re.UNICODE)
+_id_re = re.compile(r'^(\w*)#(\w+)\s*$', re.UNICODE)
+_class_re = re.compile(r'^(\w*)\.(\w+)\s*$', re.UNICODE)
+
+
+def css_to_xpath_no_case(css_expr, prefix='descendant-or-self::'):
+    if isinstance(css_expr, _basestring):
+        match = _el_re.search(css_expr)
+        if match is not None:
+            # Kovid: Lowercased
+            return '%s%s' % (prefix, match.group(0).strip().lower())
+        match = _id_re.search(css_expr)
+        if match is not None:
+            return "%s%s[@id = '%s']" % (
+                prefix, match.group(1) or '*', match.group(2))
+        match = _class_re.search(css_expr)
+        if match is not None:
+            # Kovid: lowercased
+            return "%s%s[contains(concat(' ', normalize-space(%s), ' '), ' %s ')]" % (
+                prefix, match.group(1).lower() or '*',
+                lower_case('@class'), match.group(2).lower())
+        css_expr = parse(css_expr)
+    expr = css_expr.xpath()
+    assert expr is not None, (
+        "Got None for xpath expression from %s" % repr(css_expr))
+    if prefix:
+        expr.add_prefix(prefix)
+    return _unicode(expr)
+
+class XPathExpr(object):
+
+    def __init__(self, prefix=None, path=None, element='*', condition=None,
+                 star_prefix=False):
+        self.prefix = prefix
+        self.path = path
+        self.element = element
+        self.condition = condition
+        self.star_prefix = star_prefix
+
+    def __str__(self):
+        path = ''
+        if self.prefix is not None:
+            path += _unicode(self.prefix)
+        if self.path is not None:
+            path += _unicode(self.path)
+        path += _unicode(self.element)
+        if self.condition:
+            path += '[%s]' % self.condition
+        return path
+
+    def __repr__(self):
+        return '%s[%s]' % (
+            self.__class__.__name__, self)
+
+    def add_condition(self, condition):
+        if self.condition:
+            self.condition = '%s and (%s)' % (self.condition, condition)
+        else:
+            self.condition = condition
+
+    def add_path(self, part):
+        if self.path is None:
+            self.path = self.element
+        else:
+            self.path += self.element
+        self.element = part
+
+    def add_prefix(self, prefix):
+        if self.prefix:
+            self.prefix = prefix + self.prefix
+        else:
+            self.prefix = prefix
+
+    def add_name_test(self):
+        if self.element == '*':
+            # We weren't doing a test anyway
+            return
+        self.add_condition("name() = %s" % xpath_literal(self.element))
+        self.element = '*'
+
+    def add_star_prefix(self):
+        """
+        Adds a /* prefix if there is no prefix.  This is when you need
+        to keep context's constrained to a single parent.
+        """
+        if self.path:
+            self.path += '*/'
+        else:
+            self.path = '*/'
+        self.star_prefix = True
+
+    def join(self, combiner, other):
+        prefix = _unicode(self)
+        prefix += combiner
+        path = (other.prefix or '') + (other.path or '')
+        # We don't need a star prefix if we are joining to this other
+        # prefix; so we'll get rid of it
+        if other.star_prefix and path == '*/':
+            path = ''
+        self.prefix = prefix
+        self.path = path
+        self.element = other.element
+        self.condition = other.condition
+
+class XPathExprOr(XPathExpr):
+    """
+    Represents |'d expressions.  Note that unfortunately it isn't
+    the union, it's the sum, so duplicate elements will appear.
+    """
+
+    def __init__(self, items, prefix=None):
+        for item in items:
+            assert item is not None
+        self.items = items
+        self.prefix = prefix
+
+    def __str__(self):
+        prefix = self.prefix or ''
+        return ' | '.join(["%s%s" % (prefix,i) for i in self.items])
+
+split_at_single_quotes = re.compile("('+)").split
+
+def xpath_literal(s):
+    if isinstance(s, Element):
+        # This is probably a symbol that looks like an expression...
+        s = s._format_element()
+    else:
+        s = _unicode(s)
+    if "'" not in s:
+        s = "'%s'" % s
+    elif '"' not in s:
+        s = '"%s"' % s
+    else:
+        s = "concat(%s)" % ','.join([
+            (("'" in part) and '"%s"' or "'%s'") % part
+            for part in split_at_single_quotes(s) if part
+            ])
+    return s
+
+##############################
+## Parsing functions
+
+def parse(string):
+    stream = TokenStream(tokenize(string))
+    stream.source = string
+    try:
+        return parse_selector_group(stream)
+    except SelectorSyntaxError:
+        import sys
+        e = sys.exc_info()[1]
+        message = "%s at %s -> %r" % (
+            e, stream.used, stream.peek())
+        e.msg = message
+        if sys.version_info < (2,6):
+            e.message = message
+        e.args = tuple([message])
+        raise
+
+def parse_selector_group(stream):
+    result = []
+    while 1:
+        result.append(parse_selector(stream))
+        if stream.peek() == ',':
+            stream.next()
+        else:
+            break
+    if len(result) == 1:
+        return result[0]
+    else:
+        return Or(result)
+
+def parse_selector(stream):
+    result = parse_simple_selector(stream)
+    while 1:
+        peek = stream.peek()
+        if peek == ',' or peek is None:
+            return result
+        elif peek in ('+', '>', '~'):
+            # A combinator
+            combinator = stream.next()
+        else:
+            combinator = ' '
+        consumed = len(stream.used)
+        next_selector = parse_simple_selector(stream)
+        if consumed == len(stream.used):
+            raise SelectorSyntaxError(
+                "Expected selector, got '%s'" % stream.peek())
+        result = CombinedSelector(result, combinator, next_selector)
+    return result
+
+def parse_simple_selector(stream):
+    peek = stream.peek()
+    if peek != '*' and not isinstance(peek, Symbol):
+        element = namespace = '*'
+    else:
+        next = stream.next()
+        if next != '*' and not isinstance(next, Symbol):
+            raise SelectorSyntaxError(
+                "Expected symbol, got '%s'" % next)
+        if stream.peek() == '|':
+            namespace = next
+            stream.next()
+            element = stream.next()
+            if element != '*' and not isinstance(next, Symbol):
+                raise SelectorSyntaxError(
+                    "Expected symbol, got '%s'" % next)
+        else:
+            namespace = '*'
+            element = next
+    result = Element(namespace, element)
+    has_hash = False
+    while 1:
+        peek = stream.peek()
+        if peek == '#':
+            if has_hash:
+                # You can't have two hashes
+                # (FIXME: is there some more general rule I'm missing?)
+                break
+            stream.next()
+            result = Hash(result, stream.next())
+            has_hash = True
+            continue
+        elif peek == '.':
+            stream.next()
+            result = Class(result, stream.next())
+            continue
+        elif peek == '[':
+            stream.next()
+            result = parse_attrib(result, stream)
+            next = stream.next()
+            if not next == ']':
+                raise SelectorSyntaxError(
+                    "] expected, got '%s'" % next)
+            continue
+        elif peek == ':' or peek == '::':
+            type = stream.next()
+            ident = stream.next()
+            if not isinstance(ident, Symbol):
+                raise SelectorSyntaxError(
+                    "Expected symbol, got '%s'" % ident)
+            if stream.peek() == '(':
+                stream.next()
+                peek = stream.peek()
+                if isinstance(peek, String):
+                    selector = stream.next()
+                elif isinstance(peek, Symbol) and is_int(peek):
+                    selector = int(stream.next())
+                else:
+                    # FIXME: parse_simple_selector, or selector, or...?
+                    selector = parse_simple_selector(stream)
+                next = stream.next()
+                if not next == ')':
+                    raise SelectorSyntaxError(
+                        "Expected ')', got '%s' and '%s'"
+                        % (next, selector))
+                result = Function(result, type, ident, selector)
+            else:
+                result = Pseudo(result, type, ident)
+            continue
+        else:
+            if peek == ' ':
+                stream.next()
+            break
+        # FIXME: not sure what "negation" is
+    return result
+
+def is_int(v):
+    try:
+        int(v)
+    except ValueError:
+        return False
+    else:
+        return True
+
+def parse_attrib(selector, stream):
+    attrib = stream.next()
+    if stream.peek() == '|':
+        namespace = attrib
+        stream.next()
+        attrib = stream.next()
+    else:
+        namespace = '*'
+    if stream.peek() == ']':
+        return Attrib(selector, namespace, attrib, 'exists', None)
+    op = stream.next()
+    if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
+        raise SelectorSyntaxError(
+            "Operator expected, got '%s'" % op)
+    value = stream.next()
+    if not isinstance(value, (Symbol, String)):
+        raise SelectorSyntaxError(
+            "Expected string or symbol, got '%s'" % value)
+    return Attrib(selector, namespace, attrib, op, value)
+
+def parse_series(s):
+    """
+    Parses things like '1n+2', or 'an+b' generally, returning (a, b)
+    """
+    if isinstance(s, Element):
+        s = s._format_element()
+    if not s or s == '*':
+        # Happens when there's nothing, which the CSS parser thinks of as *
+        return (0, 0)
+    if isinstance(s, int):
+        # Happens when you just get a number
+        return (0, s)
+    if s == 'odd':
+        return (2, 1)
+    elif s == 'even':
+        return (2, 0)
+    elif s == 'n':
+        return (1, 0)
+    if 'n' not in s:
+        # Just a b
+        return (0, int(s))
+    a, b = s.split('n', 1)
+    if not a:
+        a = 1
+    elif a == '-' or a == '+':
+        a = int(a+'1')
+    else:
+        a = int(a)
+    if not b:
+        b = 0
+    elif b == '-' or b == '+':
+        b = int(b+'1')
+    else:
+        b = int(b)
+    return (a, b)
+
+
+############################################################
+## Tokenizing
+############################################################
+
+_match_whitespace = re.compile(r'\s+', re.UNICODE).match
+
+_replace_comments = re.compile(r'/\*.*?\*/', re.DOTALL).sub
+
+_match_count_number = re.compile(r'[+-]?\d*n(?:[+-]\d+)?').match
+
+def tokenize(s):
+    pos = 0
+    s = _replace_comments('', s)
+    while 1:
+        match = _match_whitespace(s, pos=pos)
+        if match:
+            preceding_whitespace_pos = pos
+            pos = match.end()
+        else:
+            preceding_whitespace_pos = 0
+        if pos >= len(s):
+            return
+        match = _match_count_number(s, pos=pos)
+        if match and match.group() != 'n':
+            sym = s[pos:match.end()]
+            yield Symbol(sym, pos)
+            pos = match.end()
+            continue
+        c = s[pos]
+        c2 = s[pos:pos+2]
+        if c2 in ('~=', '|=', '^=', '$=', '*=', '::', '!='):
+            yield Token(c2, pos)
+            pos += 2
+            continue
+        if c in '>+~,.*=[]()|:#':
+            if c in '.#[' and preceding_whitespace_pos > 0:
+                yield Token(' ', preceding_whitespace_pos)
+            yield Token(c, pos)
+            pos += 1
+            continue
+        if c == '"' or c == "'":
+            # Quoted string
+            old_pos = pos
+            sym, pos = tokenize_escaped_string(s, pos)
+            yield String(sym, old_pos)
+            continue
+        old_pos = pos
+        sym, pos = tokenize_symbol(s, pos)
+        yield Symbol(sym, old_pos)
+        continue
+
+split_at_string_escapes = re.compile(r'(\\(?:%s))'
+                                     % '|'.join(['[A-Fa-f0-9]{1,6}(?:\r\n|\s)?',
+                                                 '[^A-Fa-f0-9]'])).split
+
+def unescape_string_literal(literal):
+    substrings = []
+    for substring in split_at_string_escapes(literal):
+        if not substring:
+            continue
+        elif '\\' in substring:
+            if substring[0] == '\\' and len(substring) > 1:
+                substring = substring[1:]
+                if substring[0] in '0123456789ABCDEFabcdef':
+                    # int() correctly ignores the potentially trailing whitespace
+                    substring = _unichr(int(substring, 16))
+            else:
+                raise SelectorSyntaxError(
+                    "Invalid escape sequence %r in string %r"
+                    % (substring.split('\\')[1], literal))
+        substrings.append(substring)
+    return ''.join(substrings)
+
+def tokenize_escaped_string(s, pos):
+    quote = s[pos]
+    assert quote in ('"', "'")
+    pos = pos+1
+    start = pos
+    while 1:
+        next = s.find(quote, pos)
+        if next == -1:
+            raise SelectorSyntaxError(
+                "Expected closing %s for string in: %r"
+                % (quote, s[start:]))
+        result = s[start:next]
+        if result.endswith('\\'):
+            # next quote character is escaped
+            pos = next+1
+            continue
+        if '\\' in result:
+            result = unescape_string_literal(result)
+        return result, next+1
+
+_illegal_symbol = re.compile(r'[^\w\\-]', re.UNICODE)
+
+def tokenize_symbol(s, pos):
+    start = pos
+    match = _illegal_symbol.search(s, pos=pos)
+    if not match:
+        # Goes to end of s
+        return s[start:], len(s)
+    if match.start() == pos:
+        assert 0, (
+            "Unexpected symbol: %r at %s" % (s[pos], pos))
+    if not match:
+        result = s[start:]
+        pos = len(s)
+    else:
+        result = s[start:match.start()]
+        pos = match.start()
+    try:
+        result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
+    except UnicodeDecodeError:
+        import sys
+        e = sys.exc_info()[1]
+        raise SelectorSyntaxError(
+            "Bad symbol %r: %s" % (result, e))
+    return result, pos
+
+class TokenStream(object):
+
+    def __init__(self, tokens, source=None):
+        self.used = []
+        self.tokens = iter(tokens)
+        self.source = source
+        self.peeked = None
+        self._peeking = False
+        try:
+            self.next_token = self.tokens.next
+        except AttributeError:
+            # Python 3
+            self.next_token = self.tokens.__next__
+
+    def next(self):
+        if self._peeking:
+            self._peeking = False
+            self.used.append(self.peeked)
+            return self.peeked
+        else:
+            try:
+                next = self.next_token()
+                self.used.append(next)
+                return next
+            except StopIteration:
+                return None
+
+    def __iter__(self):
+        return iter(self.next, None)
+
+    def peek(self):
+        if not self._peeking:
+            try:
+                self.peeked = self.next_token()
+            except StopIteration:
+                return None
+            self._peeking = True
+        return self.peeked
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index f6ff594701..5e4f389262 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -27,6 +27,7 @@ from calibre import force_unicode
 from calibre.ebooks import unit_convert
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
+from calibre.ebooks.cssselect import css_to_xpath_no_case
 
 cssutils_log.setLevel(logging.WARN)
 
@@ -98,32 +99,71 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
                        'x-large', 'xx-large'])
 
 
-class CSSSelector(etree.XPath):
-    MIN_SPACE_RE = re.compile(r' *([>~+]) *')
+class CSSSelector(object):
+
     LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
 
     def __init__(self, css, namespaces=XPNSMAP):
-        css = self.MIN_SPACE_RE.sub(r'\1', css)
         if isinstance(css, unicode):
             # Workaround for bug in lxml on windows/OS X that causes a massive
             # memory leak with non ASCII selectors
             css = css.encode('ascii', 'ignore').decode('ascii')
         try:
-            path = css_to_xpath(css)
-        except UnicodeEncodeError: # Bug in css_to_xpath
-            path = '/'
-        except NotImplementedError: # Probably a subselect like :hover
-            path = '/'
-        path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
-        etree.XPath.__init__(self, path, namespaces=namespaces)
+            path = self.LOCAL_NAME_RE.sub(r"local-name() = '", css_to_xpath(css))
+            self.sel1 = etree.XPath(css_to_xpath(css), namespaces=namespaces)
+        except:
+            self.sel1 = lambda x: []
+        try:
+            path = self.LOCAL_NAME_RE.sub(r"local-name() = '",
+                    css_to_xpath_no_case(css))
+            self.sel2 = etree.XPath(path, namespaces=namespaces)
+        except:
+            self.sel2 = lambda x: []
+        self.sel2_use_logged = False
         self.css = css
 
+    def __call__(self, node, log):
+        try:
+            ans = self.sel1(node)
+        except (AssertionError, ExpressionError, etree.XPathSyntaxError,
+                    NameError, # thrown on OS X instead of SelectorSyntaxError
+                    SelectorSyntaxError):
+            return []
+
+        if not ans:
+            try:
+                ans = self.sel2(node)
+            except:
+                return []
+            else:
+                if ans and not self.sel2_use_logged:
+                    self.sel2_use_logged = True
+                    log.warn('Interpreting class and tag selectors case'
+                        ' insensitively in the CSS selector: %s'%self.css)
+        return ans
+
+
     def __repr__(self):
         return '<%s %s for %r>' % (
             self.__class__.__name__,
             hex(abs(id(self)))[2:],
             self.css)
 
+_selector_cache = {}
+
+MIN_SPACE_RE = re.compile(r' *([>~+]) *')
+
+def get_css_selector(raw_selector):
+    css = MIN_SPACE_RE.sub(r'\1', raw_selector)
+    if isinstance(css, unicode):
+        # Workaround for bug in lxml on windows/OS X that causes a massive
+        # memory leak with non ASCII selectors
+        css = css.encode('ascii', 'ignore').decode('ascii')
+    ans = _selector_cache.get(css, None)
+    if ans is None:
+        ans = CSSSelector(css)
+        _selector_cache[css] = ans
+    return ans
 
 class Stylizer(object):
     STYLESHEETS = WeakKeyDictionary()
@@ -223,41 +263,12 @@ class Stylizer(object):
         rules.sort()
         self.rules = rules
         self._styles = {}
-        class_sel_pat = re.compile(r'\.[a-z]+', re.IGNORECASE)
-        capital_sel_pat = re.compile(r'h|[A-Z]+')
         for _, _, cssdict, text, _ in rules:
             fl = ':first-letter' in text
             if fl:
                 text = text.replace(':first-letter', '')
-            try:
-                selector = CSSSelector(text)
-            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
-                    NameError, # thrown on OS X instead of SelectorSyntaxError
-                    SelectorSyntaxError):
-                continue
-            try:
-                matches = selector(tree)
-            except etree.XPathEvalError:
-                continue
-
-            if not matches:
-                ntext = capital_sel_pat.sub(lambda m: m.group().lower(), text)
-                if ntext != text:
-                    self.logger.warn('Transformed CSS selector', text, 'to',
-                            ntext)
-                    selector = CSSSelector(ntext)
-                    matches = selector(tree)
-
-            if not matches and class_sel_pat.match(text) and text.lower() != text:
-                found = False
-                ltext = text.lower()
-                for x in tree.xpath('//*[@class]'):
-                    if ltext.endswith('.'+x.get('class').lower()):
-                        matches.append(x)
-                        found = True
-                if found:
-                    self.logger.warn('Ignoring case mismatches for CSS selector: %s in %s'
-                        %(text, item.href))
+            selector = get_css_selector(text)
+            matches = selector(tree, self.logger)
             if fl:
                 from lxml.builder import ElementMaker
                 E = ElementMaker(namespace=XHTML_NS)

From b5bc1ef8f7c8ba876a4db93120b8d177c84ed3f8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 15:48:14 -0600
Subject: [PATCH 38/39] ...

---
 src/calibre/ebooks/mobi/writer2/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index ed0e43a303..7e748aac95 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -590,7 +590,7 @@ class MobiWriter(object):
         Write the PalmDB header
         '''
         title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace(
-                ' ', '_')
+                ' ', '_')[:32]
         title = title + (b'\0' * (32 - len(title)))
         now = int(time.time())
         nrecords = len(self.records)

From 8824104847b17328e67a6e369592ca29658131c2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 24 Aug 2011 16:06:48 -0600
Subject: [PATCH 39/39] ...

---
 src/calibre/ebooks/mobi/writer2/serializer.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py
index 377b29655c..9bbaa436a7 100644
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@@ -116,6 +116,12 @@ class Serializer(object):
         buf.write(b'</html>')
         self.end_offset = buf.tell()
         self.fixup_links()
+        if self.start_offset is None:
+            # If we don't set a start offset, the stupid Kindle will
+            # open the book at the location of the first IndexEntry, which
+            # could be anywhere. So ensure the book is always opened at the
+            # beginning, instead.
+            self.start_offset = self.body_start_offset
         return buf.getvalue()
 
     def serialize_head(self):