From 506cd50dd1774b3961150b3a97d132a7c1cd0c1e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Oct 2010 07:49:13 -0700
Subject: [PATCH 1/6] Fix #7228 (The Economic Times of India - News Headers are
 missing)

---
 resources/recipes/theeconomictimes_india.recipe | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/resources/recipes/theeconomictimes_india.recipe b/resources/recipes/theeconomictimes_india.recipe
index 59cd56b67e..92d2a64a70 100644
--- a/resources/recipes/theeconomictimes_india.recipe
+++ b/resources/recipes/theeconomictimes_india.recipe
@@ -21,8 +21,9 @@ class TheEconomicTimes(BasicNewsRecipe):
     language               = 'en_IN'
     publication_type       = 'newspaper'
     masthead_url           = 'http://economictimes.indiatimes.com/photo/2676871.cms'
-    extra_css              = """ body{font-family: Arial,Helvetica,sans-serif}
-                                .heading1{font-size: xx-large; font-weight: bold} """
+    extra_css              = """
+                                 body{font-family: Arial,Helvetica,sans-serif}
+                             """
 
     conversion_options = {
                           'comment'          : description
@@ -31,8 +32,9 @@ class TheEconomicTimes(BasicNewsRecipe):
                         , 'language'         : language
                         }
 
-    keep_only_tags = [dict(attrs={'class':['heading1','headingnext','Normal']})]
+    keep_only_tags = [dict(attrs={'class':'printdiv'})]
     remove_tags    = [dict(name=['object','link','embed','iframe','base','table','meta'])]
+    remove_attributes = ['name']
 
     feeds          = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
 

From 6dce871b053c803f4a0283ad1929fb8e375ce82c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Oct 2010 07:50:22 -0700
Subject: [PATCH 2/6] Fix #7187 (New Scientist recipe update)

---
 resources/recipes/new_scientist.recipe | 29 +++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/resources/recipes/new_scientist.recipe b/resources/recipes/new_scientist.recipe
index 2e864565ff..02bbbe4d42 100644
--- a/resources/recipes/new_scientist.recipe
+++ b/resources/recipes/new_scientist.recipe
@@ -8,11 +8,11 @@ import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class NewScientist(BasicNewsRecipe):
-    title                 = 'New Scientist - Online News'
+    title                 = 'New Scientist - Online News w. subscription'
     __author__            = 'Darko Miletic'
     description           = 'Science news and science articles from New Scientist.'
     language              = 'en'
-    publisher             = 'New Scientist'
+    publisher             = 'Reed Business Information Ltd.'
     category              = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
     oldest_article        = 7
     max_articles_per_feed = 100
@@ -21,7 +21,12 @@ class NewScientist(BasicNewsRecipe):
     cover_url             = 'http://www.newscientist.com/currentcover.jpg'
     masthead_url          = 'http://www.newscientist.com/img/misc/ns_logo.jpg'
     encoding              = 'utf-8'
-    extra_css             = ' body{font-family: Arial,sans-serif} img{margin-bottom: 0.8em} '
+    needs_subscription    = 'optional'
+    extra_css             = """
+                                 body{font-family: Arial,sans-serif}
+                                 img{margin-bottom: 0.8em}
+                                 .quotebx{font-size: x-large; font-weight: bold; margin-right: 2em; margin-left: 2em}
+                            """
 
     conversion_options = {
                           'comment'          : description
@@ -33,15 +38,27 @@ class NewScientist(BasicNewsRecipe):
 
     keep_only_tags = [dict(name='div', attrs={'id':['pgtop','maincol','blgmaincol','nsblgposts','hldgalcols']})]
 
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.open('http://www.newscientist.com/')
+        if self.username is not None and self.password is not None:
+            br.open('https://www.newscientist.com/user/login?redirectURL=')
+            br.select_form(nr=2)
+            br['loginId' ] = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
     remove_tags = [
                      dict(name='div'  , attrs={'class':['hldBd','adline','pnl','infotext' ]})
                     ,dict(name='div'  , attrs={'id'   :['compnl','artIssueInfo','artTools','comments','blgsocial','sharebtns']})
                     ,dict(name='p'    , attrs={'class':['marker','infotext'               ]})
                     ,dict(name='meta' , attrs={'name' :'description'                       })
-                    ,dict(name='a'    , attrs={'rel'  :'tag'                                })
+                    ,dict(name='a'    , attrs={'rel'  :'tag'                               })
+                    ,dict(name=['link','base','meta','iframe','object','embed'])
                   ]
     remove_tags_after = dict(attrs={'class':['nbpcopy','comments']})
-    remove_attributes = ['height','width']
+    remove_attributes = ['height','width','lang']
 
     feeds          = [
                         (u'Latest Headlines'        , u'http://feeds.newscientist.com/science-news'              )
@@ -62,6 +79,8 @@ class NewScientist(BasicNewsRecipe):
         return url + '?full=true&print=true'
 
     def preprocess_html(self, soup):
+        for item in soup.findAll(['quote','quotetext']):
+            item.name='p'
         for tg in soup.findAll('a'):
             if tg.string == 'Home':
                 tg.parent.extract()

From eafc6e72c11864f6b0d084938d940624cc704144 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Oct 2010 07:51:31 -0700
Subject: [PATCH 3/6] Fix #7180 (Fox news website changed)

---
 resources/recipes/foxnews.recipe | 39 +++++++++-----------------------
 1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/resources/recipes/foxnews.recipe b/resources/recipes/foxnews.recipe
index e7e76390b5..916bd28ad2 100644
--- a/resources/recipes/foxnews.recipe
+++ b/resources/recipes/foxnews.recipe
@@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 foxnews.com
 '''
 
-import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class FoxNews(BasicNewsRecipe):
@@ -21,11 +20,10 @@ class FoxNews(BasicNewsRecipe):
     language              = 'en'
     publication_type      = 'newsportal'
     remove_empty_feeds    = True
-    extra_css             = ' body{font-family: Arial,sans-serif } img{margin-bottom: 0.4em} .caption{font-size: x-small} '
-
-    preprocess_regexps = [
-       (re.compile(r'</title>.*?</head>', re.DOTALL|re.IGNORECASE),lambda match: '</title></head>')
-    ]
+    extra_css             = """
+                                body{font-family: Arial,sans-serif }
+                                .caption{font-size: x-small}
+                            """
 
     conversion_options = {
                           'comment'   : description
@@ -34,27 +32,15 @@ class FoxNews(BasicNewsRecipe):
                         , 'language'  : language
                         }
 
-    remove_attributes = ['xmlns']
-
-    keep_only_tags      = [
-                            dict(name='div', attrs={'id'   :['story','browse-story-content']})
-                           ,dict(name='div', attrs={'class':['posts articles','slideshow']})
-                           ,dict(name='h4' , attrs={'class':'storyDate'})
-                           ,dict(name='h1' , attrs={'xmlns:functx':'http://www.functx.com'})
-                           ,dict(name='div', attrs={'class':'authInfo'})
-                           ,dict(name='div', attrs={'id':'articleCont'})
-                          ]
+    remove_attributes = ['xmlns','lang']
 
     remove_tags = [
-                     dict(name='div', attrs={'class':['share-links','quigo quigo2','share-text','storyControls','socShare','btm-links']})
-                    ,dict(name='div', attrs={'id'   :['otherMedia','loomia_display','img-all-path','story-vcmId','story-url','pane-browse-story-comments','story_related']})
-                    ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2','tabs']})
-                    ,dict(name='a' , attrs={'class':'join-discussion'})
-                    ,dict(name='ul' , attrs={'class':['tools','tools alt','tools alt2']})
-                    ,dict(name='p' , attrs={'class':'see_fullarchive'})
-                    ,dict(name=['object','embed','link','script'])
+                     dict(name=['object','embed','link','script','iframe','meta','base'])
+                    ,dict(attrs={'class':['user-control','url-description','ad-context']})
                   ]
 
+    remove_tags_before=dict(name='h1')
+    remove_tags_after =dict(attrs={'class':'url-description'})
 
     feeds = [
               (u'Latest Headlines', u'http://feeds.foxnews.com/foxnews/latest'        )
@@ -67,8 +53,5 @@ class FoxNews(BasicNewsRecipe):
              ,(u'Entertainment'   , u'http://feeds.foxnews.com/foxnews/entertainment' )
             ]
 
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return self.adeify_images(soup)
-
+    def print_version(self, url):
+        return url + 'print'

From 0559826083bf6f653aba88e6bbbedb67d1288064 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Oct 2010 07:53:53 -0700
Subject: [PATCH 4/6] Fix #7147 (Auto merge books not respecting article sort
 tweak)

---
 src/calibre/library/database2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index bbfef47977..b21299c335 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -748,10 +748,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         return False
 
     def find_identical_books(self, mi):
-        fuzzy_title_patterns = [(re.compile(pat), repl) for pat, repl in
+        fuzzy_title_patterns = [(re.compile(pat, re.IGNORECASE), repl) for pat, repl in
                 [
                     (r'[\[\](){}<>\'";,:#]', ''),
-                    (r'^(the|a|an) ', ''),
+                    (tweaks.get('title_sort_articles', r'^(a|the|an)\s+'), ''),
                     (r'[-._]', ' '),
                     (r'\s+', ' ')
                 ]

From 2a2f1fd988e6e35ef4c71d5b9c2343c1c0570ba7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Oct 2010 07:56:16 -0700
Subject: [PATCH 5/6] Fix #7252 (New recipe for The Economic Collapse blog)

---
 resources/images/news/theecocolapse.png | Bin 0 -> 1264 bytes
 resources/recipes/theecocolapse.recipe  |  46 ++++++++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 resources/images/news/theecocolapse.png
 create mode 100644 resources/recipes/theecocolapse.recipe

diff --git a/resources/images/news/theecocolapse.png b/resources/images/news/theecocolapse.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c45ec14bf41c20c9a86311f4af9ccd47de799df
GIT binary patch
literal 1264
zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|<EHm6d=LTavfC%YQK7jQD;BD8gCb5m^kR
zJ;2!QWVRhhu&lr_9Y}-qGsGNQdzgWNMbgv7F~sBe)TyVlg<~b!@6Ytno9y#i<?Pzd
zpE6UTJ_ZGwyK$G@>AWFkDz^H9*yf;%kG#rN>m48Y1xyIaN$&n8)EXe}nRBoyVD>IS
z-#0E7udq$NamUy=eV^+KH%`xcPal5RbAIN}x!)5mr1bXgt$JF-%Ohi%WF%|<?}=yW
zRE2BTggU>!zx&R;|JQGI^}YApDl0ZK{r-0G%fG*Q+fTX#rfrV9f6p&ajN4b<F2_K|
z`r8A?&3oEd^X|vR$Vete&h$}J?wBUVGg(E3o#B8153>S?jC#klu<6-#|8AM~r)OlO
zJ^Pf}w(|JVSGhY%vkMC=DoVC2f4={JRrP$kP%TcUwfAy44t(Ya)nYJ`>^q&purp@q
zr1Q)vo1Ye$ac|fp)UU5!z4!9v=4So4JG;M|&tIW_;cBYUT#JVvS6nT!bY&FbS`?wd
za3tyFm+kp7)4fEySBe~$<dN#Vn6dqJiOS0Bm$w9Y3UIL=xVP6Nck8Ac-8x^!>DhUC
z86ByfK?^TrWJnz^_m9wc^M;}3#{>@-rMq|S?mu{Ndiu<s%a*351`d}w7!^#+mIa-B
zswH~($>*a<cK_d>e9gttq|&K${No;tFf9fSPa!7;t<bOH4=maq{`j#2sPXE3dA{yN
zDvBapXV1p&fA@X&-QKj-d2H|Q$4Bj|HkWww=F01Sxzxz6NvBPv8giyR-yGQhbY+)o
zL(;_O-l`4n@0XW7`uO#0hRMVqzplQm`}^xy@6x3|e@4|r+A?-O{qytNx7OCAK#}q?
z89pB71BGpZt}QH#4GJknO$-^ip<3VHFlJ;cdo~};*%q-Ss{84yP=<Eqg&rzmpMRU1
z+t+6O;=ZzK-@eYy;^z@!ih(XGwr}5jIqBsK30d0$hvlzdBuD@aHE7tL|M=t7sR1XR
z?=DI$`}XE$n{mO%N1M}YYfaVygTrg-@yVyBdhLu+dGwv1pPeag{e62QA&wTOt9KU)
zoVTyj;dr<qZhe?^@4<xV*>1@O3^UH^>Z(n?8@HPOWB%ilC%MHXo?TE}{PDrTgbQhl
zGiUcEvN#H`Fgh%sJ^y+ZJKMHxlTN;U+sf)WNyYP##p<gG8Ts<^;j2Ztdf61_uRqs(
z_U!%rmU-b@7#STWs5qWTnRK$Iu6p+4Rm#&7%MPmhN3C7%zd2`Ng29s_ooGGfBcB<h
zdYv2t85pkK{`A~`=bV!aOM+gOXnp;Vcv$hzpUONvDcz~puGQ@8?d|0BEV2;jI?l>?
zyz*Y2y64V0Dw(sEg=(!8YfxEz-DtAr^vjtITerF-)dE4XvQ)22-o2Q(eZ|l3?six(
z&Ec}5jLe+5rB;U@&Xb${_R31*^s@U+SFg(3hlShM&6&5aBr|Q<>eYd9cLV>F|NS-9
zCok8wUamXv%5?p^cR7HbW&HDJ=hmqndnzyMysx+8IJeT=>Ei}qiJ@BJ8c~vxSdwa$
zT$Bo=7>o=I4RnFX$S}mf+{)C%%Fsd=$S^Rt;`i$jiiX_$l+3hB6b+VECWclfW)KYt
Sz52mG4Gf;HelF{r5}E+n8aB)T

literal 0
HcmV?d00001

diff --git a/resources/recipes/theecocolapse.recipe b/resources/recipes/theecocolapse.recipe
new file mode 100644
index 0000000000..6743ca68b5
--- /dev/null
+++ b/resources/recipes/theecocolapse.recipe
@@ -0,0 +1,46 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+theeconomiccollapseblog.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class TheEconomicCollapse(BasicNewsRecipe):
+    title                 = 'The Economic Collapse'
+    __author__            = 'Darko Miletic'
+    description           = 'Are You Prepared For The Coming Economic Collapse And The Next Great Depression?'
+    publisher             = 'The Economic Collapse'
+    category              = 'news, politics, USA, economy'
+    oldest_article        = 2
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'en'
+    remove_empty_feeds    = True
+    extra_css             = """
+                                body{font-family: Tahoma,Arial,sans-serif }
+                                img{margin-bottom: 0.4em}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [
+                     dict(attrs={'class':'sociable'})
+                    ,dict(name=['iframe','object','embed','meta','link','base'])
+                  ]
+    remove_attributes=['lang','onclick','width','height']
+    keep_only_tags=[dict(attrs={'class':['post-headline','post-bodycopy clearfix','']})]
+
+    feeds = [(u'Posts', u'http://theeconomiccollapseblog.com/feed')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return self.adeify_images(soup)
+

From 91f8c368c19e7d47019694457977911e4eba28e7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Oct 2010 07:59:42 -0700
Subject: [PATCH 6/6] Fix #7249 (updated "el_pais" recipe, corrects
 disappearing titles)

---
 resources/recipes/el_pais.recipe | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/resources/recipes/el_pais.recipe b/resources/recipes/el_pais.recipe
index 1e2164b2af..2e358060b8 100644
--- a/resources/recipes/el_pais.recipe
+++ b/resources/recipes/el_pais.recipe
@@ -2,7 +2,7 @@
 __license__   = 'GPL v3'
 __author__    = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-description   = 'Main daily newspaper from Spain - v1.03 (03, September 2010)'
+description   = 'Main daily newspaper from Spain - v1.04 (19, October 2010)'
 __docformat__ = 'restructuredtext en'
 
 '''
@@ -32,19 +32,16 @@ class ElPais(BasicNewsRecipe):
     remove_javascript = True
     no_stylesheets = True
 
-    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia','cabecera_noticia_reportaje','cabecera_noticia_opinion','contenido_noticia','caja_despiece','presentacion']})]
-	
-    extra_css      = '''
-                        p{style:normal size:12 serif}
+    keep_only_tags = [ dict(name='div', attrs={'class':['cabecera_noticia_reportaje estirar','cabecera_noticia_opinion estirar','cabecera_noticia estirar','contenido_noticia','caja_despiece']})]
 
-                    '''
+    extra_css             = ' p{text-align: justify; font-size: 100%} body{ text-align: left; font-family: serif; font-size: 100% } h1{ font-family: sans-serif; font-size:200%; font-weight: bolder; text-align: justify; } h2{ font-family: sans-serif; font-size:150%; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size:125%; font-weight: 500; text-align: justify } img{margin-bottom: 0.4em} '
 
     remove_tags    = [
                         dict(name='div', attrs={'class':['zona_superior','pie_enlaces_inferiores','contorno_f','ampliar']}),
-                        dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
+                        dict(name='div', attrs={'class':['limpiar','mod_apoyo','borde_sup','votos estirar','info_complementa','info_relacionada','buscador_m','nav_ant_sig']}),
                         dict(name='div', attrs={'id':['suscribirse suscrito','google_noticia','utilidades','coment','foros_not','pie','lomas','calendar']}),
                         dict(name='p', attrs={'class':'nav_meses'}),
-                        dict(attrs={'class':['enlaces_m','miniaturas_m']})
+                        dict(attrs={'class':['enlaces_m','miniaturas_m','nav_miniaturas_m']})
                     ]
 
     feeds          = [