From b2857225db8c34e21dac25d825770742c83c6633 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Jan 2010 09:25:16 -0700
Subject: [PATCH 01/32] New recipe for The Kitsap Sun by Darko Miletic

---
 resources/images/news/kitsapun.png | Bin 0 -> 2356 bytes
 resources/recipes/kitsapun.recipe  |  44 +++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 resources/images/news/kitsapun.png
 create mode 100644 resources/recipes/kitsapun.recipe
diff --git a/resources/images/news/kitsapun.png b/resources/images/news/kitsapun.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b7b883d52e86f751173b3f135e782bf56bae334
GIT binary patch
literal 2356
zcmV-43Cs40P)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800006VoOIv0RI60
z0RN!9r;`8x010qNS#tmY07w7;07w8v$!k6U000Sga6xAP001BW001BWhx(kI000PG
zNkl<ZSi^;vY0O?#5yyXX&bjwpy00vSmQqtgrDdxS0TC;eAg&RLCKA^KNKlC=afwDv
zj6@+Ai6n#&mv4j*h@oM#Qi_;Bx7gCKw6Qi_Y>VyN(zoyXKKGuP@x#5(?PA39<m9>c
z+;eB<%>4f|bLGiD-0fKvRmaS5ryC-MnYY(M2to{j7y~gt2muFjR<asyj+*10kbsDs
zbPp*u@23<%grr(iSDCst%#9#|h;aIFcg&rXB4rVg6p{XVcIsMDJz+3FjMf>TW&mgw
ziXsw1L6kreGb|f;VET-Cj2$0gd~~E!+Bars@(hNCr*rhsE|L^T6vV;IFjKUyN#$gQ
zC(mp>2FFJmS*aLg+Kh7m7(YIO<uNP@MVUx~;g*?o#$qn}&=+~Pk?RRwTKxd7W8env
z?KDg?7hSoES!bO8t}=GM@mJpZ`)W!8;)eY3(c68Zm#M0noRxDHEoaH{PmzkjQ(hcD
zKElSezhiv#AOiyh17#uwLCxt^nYvc$tPBiI<BFBHGc;+&Dev$7$40hqeV)4a4k-o(
z2MWqEl8Pu;67WP>Bo6G_%)#BS@sXQ<dWwLTSKkj_Q=~|WP7Fp9ZO2`S5n_z=CPsMY
z@LmRor*~f4wC?vDJ+zaujE&M5NQICBq%t{z5X7l#BbLJW#2AMT>|n;+MF1Qd*@xvZ
z1_w$+oFq<ze@E3CR|0jNnSIs;%$~OdbLZHRf3b1RLx@!j6^S5_l2arn1&F~&DL9e<
z7u+FdLonP=&?#4A1c4Cx)4>&U$f}4qb)A_q>pYfRaSQIw{(o*@$L2o~sYqqxwW1J;
z*r#!GfE2|6Be{{?P&Yy!SS@wDzh;Ucs5;plH85+G&pG!}-uwPr0NDNJi|pL`6e$Hl
z43vdXq=x&kJ%9*AhZF={AS5BEAT5O&fO|(r1cXMR*NV9}iAS9|=U&3%<*RUa-dg_{
zhxTlyOpzD^WfF?i8W01t$ib{hL~gkO5(ch-f-7!{_dQ?+mT64rGtbD`Saj(s<}O%{
zo3VYvV;tVQnSmk^0+a=<a%oR(eF?27Ck0asN8DTG23UVv_wVhxJ7{jQ-lEGs$D9Sr
z$@K(VU;GWd(fyP~LPRN3q)3orlhQ%JPspS_PIAk5lT#5h#9AZziKcM@j)F&WopB15
zt@tLBrq4sGakj2~kZSAzWsxXiqqHm_L??tcz0-i&tVz!zf(Iedon8n6NE42eS>Wyj
zK<f-7F1+?mhNqp)=#f3V`L{>0>L^87kRlX?kfKloh|+jYyeVdVe1SI+u$Bed-{cC`
z@Y>s77X(wvDQB?ws;@CHJd@F*dwBhY2MN@qB2WaUj8Ft420@VKVEua1iSmA_#ob8(
z?|6?XffE?$gAQPL>X|IL_8Y`vh_R!4d427#2~v?#Bm`%mpml#kF_G4MIW_JfBtq*|
z6(iMJHQ>vj?wBcPM%+8~lc%4<`#yXZu^0ki-?r66pP)z)+=w0;PKyu`+!b-~5Sxl>
z+V&Fxw~B22kQgC|6KEQZAP__d(vrTNu^b|nL*4!Si*Ld_QP;Vt@s_)3mb)@8rVXgu
z6T3CQ?pI*MCW5(OF1Q$mVeVMVnm$@Z1j`fM_2A@LELm|k*%G~8P1dF&R<%-RLsd~T
z+*;nZSZI0&>kKrr1Hsz`H^EIh&;<fPntANtcD6o$AC`OF+LW2|S+?>!7zMe`)Y+*j
zC1=GHOcis*%+PjP+f|u6>KXB{Be*urgysw?t%Wtx_H#`~B#a%|&6elxMe7rum^^bn
z%RYJ!>WN;jV!W4`sFbR1`U5jMPuOWwv!kk8f?7aLFcia3b<`bco6Zmd5(G74?8sg=
zKl5`ePjt_w&RW1FH+-M$1$C{R5w$uww+j0qQ0;JlJDUB~;9?y`oEmq<EkY1NY^qrZ
zO?65!GJa$)8&>}e(~0i+v^nQ-`6qseNr|epMl_hnxk=xB*Y~rbyKixHUBD~cYRCao
zBS?eNAb}7fAw)z1?hRH_3QQc`!}_Ow(qN+9Gi}a7F1Yem>Rh8{^eUrQE4{W=Q}a_W
z;O<YXxNY^mVCEeQT<8GPeFL<v20^7pLnah3HnN*np8PSEC%S#}FId6SYwkdECacn`
zYI2pyIip%()}G$-CNp~+AUFXxL#+l?u%_D~1VW6282gj~6})y_A;!S5!@GFxrQde@
zLX6D6@H&=$_)e-`O_iPFmC>t|sxnNQ7T(+*x$cgqw*jR6BQ-N70R&VdMvc3XBoJe4
z(`Q6g5ibb>jv{K5QP}t93*f@#AG@o=(!!-H5eaO5_5o6n7_R~YMQAd7()(lZhr87L
z%ImOkJM2AdjMp_;VXT+Pc2WpLh)ww-TW-8k6gW;X5ZSx^&un<+SKa;?0*jWdWcdx>
zBAa8L=~WdIb^UOzx$nlW|D`Jrsj6%CydBteFf%w9Da(?#-yUOSn%a5btc4eI_*l)5
z4Kh?l3ah~*B93LH2#H<W)*vD9fgA7cJhAAa>k$#wKmALjNcUUjzE$5{v;X9FKOv4;
z#!MJJ)}yM9bM}HoEI5DhDc=Pn#bs=LV=FVK3G=2W=1$9$1Bs&Th=&Si*Vd=O;i^yG
z)4lhei*MK@B9Cl(_MwMw{n2yIX+yqx%Qc=cGz;2OR(|{@KKuDEGdw)`l#OF!qpV%C
zn)NTQW9M62IBRyvnR82K4#PmK2r*(=(b_q8=}NBt^!K&^PXLb>MX`<lG2}}(U*iB1
zy_!pwU(L6FaPNO9KHZpqAKcGvU-=BPrc7e$FboaJ#PEO|m_0>yOdF`SxYw`FTKK_r
z*MH#``}jZOKU7_5ka2wYAOHXWC3HntbYx+4WjbSWWnpw>05UK!FfA}KEipJ$FgZFh
zGdeUgEigAaFffzMOmYAK03~!qSaf7zbY(hiZ)9m^c>ppnF)%GKF)cATR53C-G%z|a
aHZ3qWIxsLzr}_o}0000<MNUMnLSTaQOh5ks

literal 0
HcmV?d00001

diff --git a/resources/recipes/kitsapun.recipe b/resources/recipes/kitsapun.recipe
new file mode 100644
index 0000000000..e9a7c42f06
--- /dev/null
+++ b/resources/recipes/kitsapun.recipe
@@ -0,0 +1,44 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.kitsapun.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kitsapsun(BasicNewsRecipe):
+    title                 = 'Kitsap Sun'
+    __author__            = 'Darko Miletic'
+    description           = 'News from Kitsap County'
+    publisher             = 'Scripps Interactive Newspapers Group'
+    category              = 'news, Kitsap county, USA'    
+    language              = 'en'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+
+    conversion_options = {  
+                             'comments' : description
+                            ,'tags'     : category
+                            ,'language' : language
+                            ,'publisher': publisher
+                         }
+
+    
+    keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
+
+    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
+                  
+    feeds = [
+               (u'News'         , u'http://www.kitsapsun.com/rss/headlines/news/'         )
+              ,(u'Business'     , u'http://www.kitsapsun.com/rss/headlines/business/'     )
+              ,(u'Communities'  , u'http://www.kitsapsun.com/rss/headlines/communities/'  )
+              ,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
+              ,(u'Lifestyles'   , u'http://www.kitsapsun.com/rss/headlines/lifestyles/'   )
+            ]
+    
+    def print_version(self, url):
+        return url.rpartition('/')[0] + '/?print=1'

From 55fc72ac939221d52a9e544c75f1886f4924b432 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Jan 2010 09:33:46 -0700
Subject: [PATCH 02/32] Improved recipe for FTD

---
 resources/recipes/ftd.recipe      |  31 ++++--
 resources/recipes/ledevoir.recipe | 158 +++++++++++++++---------------
 2 files changed, 100 insertions(+), 89 deletions(-)

diff --git a/resources/recipes/ftd.recipe b/resources/recipes/ftd.recipe
index db53a3ed19..d18f9bdc56 100644
--- a/resources/recipes/ftd.recipe
+++ b/resources/recipes/ftd.recipe
@@ -9,16 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
 
 
 class FTDe(BasicNewsRecipe):
-
+    
     title = 'FTD'
     description = 'Financial Times Deutschland'
     __author__ = 'Oliver Niesner'
     use_embedded_content   = False
     timefmt = ' [%d %b %Y]'
-    language = 'de'
+    language = _('German')
     max_articles_per_feed = 40
     no_stylesheets = True
-
+    
     remove_tags = [dict(id='navi_top'),
 		   dict(id='topbanner'),
 		   dict(id='seitenkopf'),
@@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe):
 		   dict(id='ADS_Top'),
 		   dict(id='spinner'),
 		   dict(id='ftd-contentad'),
+		   dict(id='ftd-promo'),
 		   dict(id='nava-50009007-1-0'),
 		   dict(id='navli-50009007-1-0'),
+		   dict(id='Box5000534-0-0-0'),
+		   dict(id='ExpV-1-0-0-1'),
+		   dict(id='ExpV-1-0-0-0'),
+		   dict(id='PollExpV-2-0-0-0'),
 		   dict(id='starRating'),
 		   dict(id='saveRating'),
 		   dict(id='yLayer'),
@@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='ul', attrs={'class':'nav'}),
 		   dict(name='p', attrs={'class':'articleOptionHead'}),
 		   dict(name='p', attrs={'class':'articleOptionFoot'}),
+		   dict(name='p', attrs={'class':'moreInfo'}),
 		   dict(name='div', attrs={'class':'chartBox'}),
 		   dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
-		   dict(name='div', attrs={'class':'box boxNavTabs '}),
+		   dict(name='div', attrs={'class':'box boxNavTabs'}),
+		   dict(name='div', attrs={'class':'boxMMRgtLow'}),
 		   dict(name='span', attrs={'class':'vote_455857'}),
 		   dict(name='div', attrs={'class':'relatedhalb'}),
 		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
+		   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
+		   dict(name='div', attrs={'class':'box boxTeaser'}),
 		   dict(name='div', attrs={'class':'tagCloud'}),
+		   dict(name='div', attrs={'class':'pollView'}),
 		   dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
 		   dict(name='div', attrs={'class':'ftdHpNav'}),
 		   dict(name='div', attrs={'class':'ftdHead'}),
@@ -67,11 +77,12 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'wertungoben'}),
 		   dict(name='div', attrs={'class':'artikelfuss'}),
 		   dict(name='a', attrs={'class':'rating'}),
+		   dict(name='a', attrs={'href':'#rt'}),
 		   dict(name='div', attrs={'class':'articleOptionFootFrame'}),
 		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
-    remove_tags_after = [dict(name='a', attrs={'class':'more'})]
-
-    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
+    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
+    
+    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), 
 	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
 	       ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
 	       ('Politik', 'http://www.ftd.de/rss2/politik'),
@@ -82,8 +93,8 @@ class FTDe(BasicNewsRecipe):
 	       ('Auto', 'http://www.ftd.de/rss2/auto'),
 	       ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
 
-	     ]
-
+	     ] 
+    
 
     def print_version(self, url):
-        return url + '?mode=print'
+        return url.replace('.html', '.html?mode=print')
diff --git a/resources/recipes/ledevoir.recipe b/resources/recipes/ledevoir.recipe
index c9dbd8c5d7..97b33c43a7 100644
--- a/resources/recipes/ledevoir.recipe
+++ b/resources/recipes/ledevoir.recipe
@@ -1,79 +1,79 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__author__    = 'Lorenzo Vigentini'
-__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
-__version__     = 'v1.01'
-__date__        = '14, January 2010'
-__description__   = 'Canadian Paper '
-
-'''
-http://www.ledevoir.com/
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class ledevoir(BasicNewsRecipe):
-    author        = 'Lorenzo Vigentini'
-    description   = 'Canadian Paper'
-
-    cover_url      = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
-    title          = u'Le Devoir'
-    publisher      = 'leDevoir.com'
-    category       = 'News, finance, economy, politics'
-
-    language       = 'fr'
-    encoding       = 'utf-8'
-    timefmt        = '[%a, %d %b, %Y]'
-
-    max_articles_per_feed = 50
-    use_embedded_content  = False
-    recursion             = 10
-
-    remove_javascript     = True
-    no_stylesheets        = True
-
-    keep_only_tags  = [
-                        dict(name='div', attrs={'id':'article'}),
-                        dict(name='ul', attrs={'id':'ariane'})
-                    ]
-
-    remove_tags     = [
-                        dict(name='div', attrs={'id':'dialog'}),
-                        dict(name='div', attrs={'class':['interesse_actions','reactions']}),
-                        dict(name='ul', attrs={'class':'mots_cles'}),
-                        dict(name='a', attrs={'class':'haut'}),
-                        dict(name='h5', attrs={'class':'interesse_actions'})
-                    ]
-
-    feeds          = [
-                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
-                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
-                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
-                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
-                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
-                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
-                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
-                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
-                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
-                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
-                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
-                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
-                     ]
-
-    extra_css = '''
-                h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
-                h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
-                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
-                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
-                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
-                .specs {line-height:1em;margin:1px 0;}
-                .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
-                .specs span.auteur a,
-                .specs span.auteur span {text-transform:uppercase;color:#787878;}
-                .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
-                ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
-                ul#ariane li {display:inline;}
-                ul#ariane a {color:#2E2E2E;text-decoration:underline;}
-                .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
-                .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
-                '''
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__author__    = 'Lorenzo Vigentini'
+__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
+__version__     = 'v1.01'
+__date__        = '14, January 2010'
+__description__   = 'Canadian Paper '
+
+'''
+http://www.ledevoir.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ledevoir(BasicNewsRecipe):
+    author        = 'Lorenzo Vigentini'
+    description   = 'Canadian Paper'
+
+    cover_url      = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
+    title          = u'Le Devoir'
+    publisher      = 'leDevoir.com'
+    category       = 'News, finance, economy, politics'
+
+    language       = 'fr'
+    encoding       = 'utf-8'
+    timefmt        = '[%a, %d %b, %Y]'
+
+    max_articles_per_feed = 50
+    use_embedded_content  = False
+    recursion             = 10
+
+    remove_javascript     = True
+    no_stylesheets        = True
+
+    keep_only_tags  = [
+                        dict(name='div', attrs={'id':'article'}),
+                        dict(name='ul', attrs={'id':'ariane'})
+                    ]
+
+    remove_tags     = [
+                        dict(name='div', attrs={'id':'dialog'}),
+                        dict(name='div', attrs={'class':['interesse_actions','reactions']}),
+                        dict(name='ul', attrs={'class':'mots_cles'}),
+                        dict(name='a', attrs={'class':'haut'}),
+                        dict(name='h5', attrs={'class':'interesse_actions'})
+                    ]
+
+    feeds          = [
+                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
+                       (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
+                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
+                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
+                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
+                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
+                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
+                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
+                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
+                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
+                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
+                       (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
+                     ]
+
+    extra_css = '''
+                h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
+                h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
+                h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+                h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+                h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+                .specs {line-height:1em;margin:1px 0;}
+                .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
+                .specs span.auteur a,
+                .specs span.auteur span {text-transform:uppercase;color:#787878;}
+                .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
+                ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
+                ul#ariane li {display:inline;}
+                ul#ariane a {color:#2E2E2E;text-decoration:underline;}
+                .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
+                .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
+                '''

From 7535f5862712a54e4dd3ae54132f2a3060229306 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Jan 2010 09:46:52 -0700
Subject: [PATCH 03/32] New recipe for The Yemen Times by kwetal

---
 resources/recipes/yementimes.recipe | 125 ++++++++++++++++++++++++++++
 src/calibre/utils/localization.py   |   1 +
 2 files changed, 126 insertions(+)
 create mode 100644 resources/recipes/yementimes.recipe

diff --git a/resources/recipes/yementimes.recipe b/resources/recipes/yementimes.recipe
new file mode 100644
index 0000000000..426c9a748c
--- /dev/null
+++ b/resources/recipes/yementimes.recipe
@@ -0,0 +1,125 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+
+class YemenTimesRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'en_YE'
+    country = 'YE'
+    version = 1
+
+    title = u'Yemen Times'
+    publisher = u'yementimes.com'
+    category = u'News, Opinion, Yemen'
+    description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    encoding = 'utf-8'
+
+    remove_empty_feeds = True
+    no_stylesheets = True
+    remove_javascript = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
+                                                      'class': 'DMAIN2'}))
+    remove_attributes = ['style']
+
+    INDEX = 'http://www.yementimes.com/'
+    feeds = []
+    feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
+    feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
+    feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
+    feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
+    feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
+    feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
+    feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
+    feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
+    feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
+    feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
+    feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
+    feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
+    feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
+
+    extra_css = '''
+                body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
+                div.yemen_byline {font-size: medium; font-weight: bold;}
+                div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
+                .yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
+                '''
+
+    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
+                          'publisher': publisher, 'linearize_tables': True}
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.set_handle_gzip(True)
+
+        return br
+
+    def parse_index(self):
+        answer = []
+        for feed_title, feed in self.feeds:
+            soup = self.index_to_soup(feed)
+
+            newsbox = soup.find('div', 'newsbox')
+            main = newsbox.findNextSibling('table')
+
+            articles = []
+            for li in main.findAll('li'):
+                title = self.tag_to_string(li.a)
+                url = self.INDEX + li.a['href']
+                articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/>&nbsp;'})
+
+            answer.append((feed_title, articles))
+
+        return answer
+
+    def preprocess_html(self, soup):
+        freshSoup = self.getFreshSoup(soup)
+
+        headline = soup.find('div', attrs = {'id': 'DVMTIT'})
+        if headline:
+            div = headline.findNext('div', attrs = {'id': 'DVTOP'})
+            img = None
+            if div:
+                img = div.find('img')
+
+            headline.name = 'h1'
+            freshSoup.body.append(headline)
+            if img is not None:
+                freshSoup.body.append(img)
+
+        byline = soup.find('div', attrs = {'id': 'DVTIT'})
+        if byline:
+            date_el = byline.find('span')
+            if date_el:
+                pub_date = self.tag_to_string(date_el)
+                date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
+                date.append(pub_date)
+                date_el.extract()
+
+            raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
+            author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
+
+            if date is not None:
+                freshSoup.body.append(date)
+            freshSoup.body.append(author)
+
+        story = soup.find('div', attrs = {'id': 'DVDET'})
+        if story:
+            for table in story.findAll('table'):
+                if table.find('img'):
+                    table['class'] = 'yemen_caption'
+
+            freshSoup.body.append(story)
+
+        return freshSoup
+
+    def getFreshSoup(self, oldSoup):
+        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
+        if oldSoup.head.title:
+            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
+        return freshSoup
diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py
index 1ade012b1f..90f86a8368 100644
--- a/src/calibre/utils/localization.py
+++ b/src/calibre/utils/localization.py
@@ -104,6 +104,7 @@ _extra_lang_codes = {
         'en_CY' : _('English (Cyprus)'),
         'en_PK' : _('English (Pakistan)'),
         'en_SG' : _('English (Singapore)'),
+        'en_YE' : _('English (Yemen)'),
         'de_AT' : _('German (AT)'),
         'nl'    : _('Dutch (NL)'),
         'nl_BE' : _('Dutch (BE)'),

From e71b23e5c37a9d90139416772455695859bb8404 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 18 Jan 2010 09:48:12 -0700
Subject: [PATCH 04/32] ebook-meta: Fix setting of series metadata

---
 src/calibre/ebooks/metadata/cli.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/calibre/ebooks/metadata/cli.py b/src/calibre/ebooks/metadata/cli.py
index e4ea1a3931..5de8b76c43 100644
--- a/src/calibre/ebooks/metadata/cli.py
+++ b/src/calibre/ebooks/metadata/cli.py
@@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
         mi.title_sort = title_sort(opts.title)
     if getattr(opts, 'tags', None) is not None:
         mi.tags = [t.strip() for t in opts.tags.split(',')]
+    if getattr(opts, 'series', None) is not None:
+        mi.series = opts.series.strip()
+    if getattr(opts, 'series_index', None) is not None:
+        mi.series_index = float(opts.series_index.strip())
 
     if getattr(opts, 'cover', None) is not None:
         ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()

From e8d1e03f737ccbb765276c608f474fe0670a9ea6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 08:31:41 -0700
Subject: [PATCH 05/32] Fix #4607 (Updated recipe for The Amercian Spectator)

---
 resources/recipes/amspec.recipe | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/resources/recipes/amspec.recipe b/resources/recipes/amspec.recipe
index 62bec5ae18..e5a76a4f86 100644
--- a/resources/recipes/amspec.recipe
+++ b/resources/recipes/amspec.recipe
@@ -1,7 +1,5 @@
-#!/usr/bin/env  python
-
 __license__   = 'GPL v3'
-__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 spectator.org
 '''
@@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class TheAmericanSpectator(BasicNewsRecipe):
     title                 = 'The American Spectator'
     __author__            = 'Darko Miletic'
-    language = 'en'
-
     description           = 'News from USA'
+    category              = 'news, politics, USA, world'
+    publisher             = 'The American Spectator'
     oldest_article        = 7
     max_articles_per_feed = 100
     no_stylesheets        = True
     use_embedded_content  = False
+    language              = 'en'
     INDEX                 = 'http://spectator.org'
       
-    html2lrf_options = [
-                             '--comment'       , description
-                           , '--category'      , 'news, politics, USA'
-                           , '--publisher'     , title
-                         ]
+    conversion_options = {  
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }
 
     keep_only_tags   = [
                              dict(name='div', attrs={'class':'post inner'})
@@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
 
     remove_tags     = [
                              dict(name='object')
-                            ,dict(name='div', attrs={'class':'col3'         })
-                            ,dict(name='div', attrs={'class':'post-options' })
-                            ,dict(name='p'  , attrs={'class':'letter-editor'})
-                            ,dict(name='div', attrs={'class':'social'       })
+                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
+                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
                         ]
                          
-    feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
+    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
 
     def get_cover_url(self):
         cover_url = None
@@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
           
     def print_version(self, url):
         return url + '/print'
+        
+    def get_article_url(self, article):
+        return article.get('guid', None)
+        

From 0e0863103a12bed55e75bc48ae30f51ea80bbc48 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 08:43:52 -0700
Subject: [PATCH 06/32] ...

---
 src/calibre/ebooks/pdf/reflow.py | 13 ++++++++++---
 src/calibre/gui2/ui.py           |  2 +-
 src/calibre/library/database2.py |  9 +++++++--
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 1b2149cf3a..f4bdb9c7ac 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -20,6 +20,10 @@ class Font(object):
 
 class Column(object):
 
+    # A column contains an element is the element bulges out to
+    # the left or the right by at most HFUZZ*col width.
+    HFUZZ = 0.2
+
     def __init__(self):
         self.left = self.right = self.top = self.bottom = 0
         self.width = self.height = 0
@@ -41,6 +45,10 @@ class Column(object):
         for x in self.elements:
             yield x
 
+    def contains(self, elem):
+        return elem.left > self.left - self.HFUZZ*self.width and \
+               elem.right < self.right + self.HFUZZ*self.width
+
 class Element(object):
 
     def __eq__(self, other):
@@ -238,11 +246,10 @@ class Page(object):
         return columns
 
     def find_elements_in_row_of(self, x):
-        interval = Interval(x.top - self.YFUZZ * self.average_text_height,
+        interval = Interval(x.top,
                 x.top + self.YFUZZ*(1+self.average_text_height))
         h_interval = Interval(x.left, x.right)
-        m = max(0, x.idx-15)
-        for y in self.elements[m:x.idx+15]:
+        for y in self.elements[x.idx:x.idx+15]:
             if y is not x:
                 y_interval = Interval(y.top, y.bottom)
                 x_interval = Interval(y.left, y.right)
diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py
index 6cbae7f7b0..98b416eaa3 100644
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@@ -1361,7 +1361,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
 
     def generate_catalog(self):
         rows = self.library_view.selectionModel().selectedRows()
-        if not rows:
+        if not rows or len(rows) < 3:
             rows = xrange(self.library_view.model().rowCount(QModelIndex()))
         ids = map(self.library_view.model().id, rows)
         dbspec = None
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 84638410c7..db75516292 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
         for i in iter(self):
             yield i[x]
 
-    def get_data_as_dict(self, prefix=None, authors_as_string=False):
+    def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
         '''
         Return all metadata stored in the database as a dict. Includes paths to
         the cover and each format.
 
         :param prefix: The prefix for all paths. By default, the prefix is the absolute path
         to the library folder.
+        :param ids: Set of ids to return the data for. If None return data for
+        all entries in database.
         '''
         if prefix is None:
             prefix = self.library_path
@@ -1649,12 +1651,15 @@ class LibraryDatabase2(LibraryDatabase):
             'isbn', 'uuid', 'pubdate'])
         data = []
         for record in self.data:
+            db_id = record[FIELD_MAP['id']]
+            if ids is not None and db_id not in ids:
+                continue
             if record is None: continue
             x = {}
             for field in FIELDS:
                 x[field] = record[FIELD_MAP[field]]
             data.append(x)
-            x['id'] = record[FIELD_MAP['id']]
+            x['id'] = db_id
             x['formats'] = []
             if not x['authors']:
                 x['authors'] = _('Unknown')

From c290fc198c013a90dffc4a643e4dedfe53192c16 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 08:51:11 -0700
Subject: [PATCH 07/32] ...

---
 src/calibre/library/database2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index db75516292..7b0f7a083e 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1651,10 +1651,10 @@ class LibraryDatabase2(LibraryDatabase):
             'isbn', 'uuid', 'pubdate'])
         data = []
         for record in self.data:
+            if record is None: continue
             db_id = record[FIELD_MAP['id']]
             if ids is not None and db_id not in ids:
                 continue
-            if record is None: continue
             x = {}
             for field in FIELDS:
                 x[field] = record[FIELD_MAP[field]]

From 4332e1a6412c9896b7e0130a823321013a8b8c59 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 09:31:20 -0700
Subject: [PATCH 08/32] Faster recipe for The BBC by Darko Miletic

---
 resources/recipes/bbc_fast.recipe | 60 +++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 resources/recipes/bbc_fast.recipe

diff --git a/resources/recipes/bbc_fast.recipe b/resources/recipes/bbc_fast.recipe
new file mode 100644
index 0000000000..12ae9ce1eb
--- /dev/null
+++ b/resources/recipes/bbc_fast.recipe
@@ -0,0 +1,60 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+news.bbc.co.uk
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class BBC(BasicNewsRecipe):
+    title                  = 'BBC News (fast)'
+    __author__             = 'Darko Miletic'
+    description            = 'News from UK. A much faster version that does not download pictures'
+    oldest_article         = 2
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'BBC'
+    category               = 'news, UK, world'
+    language               = 'en'
+    extra_css              = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } '
+
+    conversion_options = {
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }
+
+    remove_tags_before = dict(name='div',attrs={'class':'headline'})
+    remove_tags_after  = dict(name='div', attrs={'class':'footer'})
+    remove_tags       = [
+                           dict(name=['object','link','script','iframe'])
+                          ,dict(name='div', attrs={'class':'footer'})
+                        ]
+
+    feeds          = [
+                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
+                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
+                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
+                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
+                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
+                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
+                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
+                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
+                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
+                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
+                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
+                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
+                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
+                    ]
+
+    def print_version(self, url):
+        emp,sep,rstrip = url.partition('http://')
+        return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip
+
+    def get_article_url(self, article):
+        return article.get('guid', None)
+

From f226bdfe9daa3f64c9d0f4c750face8fa7ef9549 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 12:42:44 -0700
Subject: [PATCH 09/32] New recipe for The Reader's Digest by BrianG

---
 resources/recipes/readers_digest.recipe | 188 ++++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 resources/recipes/readers_digest.recipe

diff --git a/resources/recipes/readers_digest.recipe b/resources/recipes/readers_digest.recipe
new file mode 100644
index 0000000000..3689ca4c53
--- /dev/null
+++ b/resources/recipes/readers_digest.recipe
@@ -0,0 +1,188 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+'''
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds import Feed
+
+
+class ReadersDigest(BasicNewsRecipe):
+
+    title       = 'Readers Digest'
+    __author__  = 'BrianG'
+    language = 'en'
+    description = 'Readers Digest Feeds'
+    no_stylesheets        = True
+    use_embedded_content  = False
+    oldest_article = 60
+    max_articles_per_feed = 200
+
+    language = 'en'
+    remove_javascript     = True
+
+    extra_css      = ''' h1 {font-family:georgia,serif;color:#000000;}
+                        .mainHd{font-family:georgia,serif;color:#000000;}
+                         h2 {font-family:Arial,Sans-serif;}
+                        .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
+                        .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
+                        .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
+                        .photoBkt{ font-size:x-small ;}
+                        .vertPhoto{font-size:x-small ;}
+                        .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .artTxt{font-family:georgia,serif;}
+                        .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
+                        .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
+                        a:link{color:#CC0000;}
+                        .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
+                        '''
+
+
+    remove_tags = [
+        dict(name='h4', attrs={'class':'close'}),
+        dict(name='div', attrs={'class':'fromLine'}),
+        dict(name='img', attrs={'class':'colorTag'}),
+        dict(name='div', attrs={'id':'sponsorArticleHeader'}),
+        dict(name='div', attrs={'class':'horizontalAd'}),
+        dict(name='div', attrs={'id':'imageCounterLeft'}),
+        dict(name='div', attrs={'id':'commentsPrint'})
+        ]
+
+
+    feeds = [
+            ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
+            ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
+            ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
+            ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
+        ]
+
+    cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
+
+
+
+#-------------------------------------------------------------------------------------------------
+
+    def print_version(self, url):
+
+        # Get the identity number of the current article and append it to the root print URL
+
+        if url.find('/article') > 0:
+            ident = url[url.find('/article')+8:url.find('.html?')-4]
+            url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
+
+        elif url.find('/post') > 0:
+
+            # in this case, have to get the page itself to derive the Print page.
+            soup = self.index_to_soup(url)
+            newsoup = soup.find('ul',attrs={'class':'printBlock'})
+            url = 'http://www.rd.com' + newsoup('a')[0]['href']
+            url = url[0:url.find('&Keep')]
+
+        return url
+
+#-------------------------------------------------------------------------------------------------
+
+    def parse_index(self):
+
+        pages = [
+                ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
+                # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
+                ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
+
+            ]
+
+        feeds = []
+
+        for page in pages:
+            section, url, divider, attrList = page
+            newArticles = self.page_parse(url, divider, attrList)
+            feeds.append((section,newArticles))
+
+        # after the pages of the site have been processed, parse several RSS feeds for additional sections
+        newfeeds = Feed()
+        newfeeds = self.parse_rss()
+
+
+        # The utility code in parse_rss returns a Feed object.  Convert each feed/article combination into a form suitable
+        # for this module (parse_index).
+
+        for feed in newfeeds:
+            newArticles = []
+            for article in feed.articles:
+                newArt = {
+                            'title' : article.title,
+                            'url'   : article.url,
+                            'date'  : article.date,
+                            'description' : article.text_summary
+                        }
+                newArticles.append(newArt)
+
+
+            # New and Blogs should be the first two feeds.
+            if feed.title == 'New in RD':
+                feeds.insert(0,(feed.title,newArticles))
+            elif feed.title == 'Blogs':
+                feeds.insert(1,(feed.title,newArticles))
+            else:
+                feeds.append((feed.title,newArticles))
+
+
+        return feeds
+
+#-------------------------------------------------------------------------------------------------
+
+    def page_parse(self, mainurl, divider, attrList):
+
+        articles = []
+        mainsoup = self.index_to_soup(mainurl)
+        for item in mainsoup.findAll(attrs=attrList):
+            newArticle = {
+                        'title' : item('img')[0]['alt'],
+                        'url'   : 'http://www.rd.com'+item('a')[0]['href'],
+                        'date'  : '',
+                        'description' : ''
+                    }
+            articles.append(newArticle)
+
+
+
+        return articles
+
+
+
+#-------------------------------------------------------------------------------------------------
+
+    def parse_rss (self):
+
+        # Do the "official" parse_feeds first
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+
+        # Loop thru the articles in all feeds to find articles with "recipe" in it
+        recipeArticles = []
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if curarticle.title.upper().find('RECIPE') >= 0:
+                    recipeArticles.append(curarticle)
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        # If there are any recipes found, create a new Feed object and append.
+        if len(recipeArticles) > 0:
+            pfeed = Feed()
+            pfeed.title = 'Recipes'
+            pfeed.descrition = 'Recipe Feed (Virtual)'
+            pfeed.image_url  = None
+            pfeed.oldest_article = 30
+            pfeed.id_counter = len(recipeArticles)
+            # Create a new Feed, add the recipe articles, and then append
+            # to "official" list of feeds
+            pfeed.articles = recipeArticles[:]
+            feeds.append(pfeed)
+
+        return feeds
+

From b72a0652ff26ad7770825f940d588db696076f9f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 12:57:49 -0700
Subject: [PATCH 10/32] E-book viewer: Fit images to viewer window (can be
 turned off via Preferences)

---
 resources/viewer/images.js              | 22 ++++++++++++++++
 src/calibre/gui2/viewer/config.ui       | 13 +++++++---
 src/calibre/gui2/viewer/documentview.py | 34 ++++++++++++-------------
 3 files changed, 48 insertions(+), 21 deletions(-)
 create mode 100644 resources/viewer/images.js

diff --git a/resources/viewer/images.js b/resources/viewer/images.js
new file mode 100644
index 0000000000..cc6e6d47e5
--- /dev/null
+++ b/resources/viewer/images.js
@@ -0,0 +1,22 @@
+/*
+ * images management
+ * Copyright 2008 Kovid Goyal
+ * License: GNU GPL v3
+ */
+
+function scale_images() {
+    $("img:visible").each(function() {
+        var offset = $(this).offset();
+        $(this).css("max-width", (window.innerWidth-offset.left-5)+"px");
+        $(this).css("max-height", (window.innerHeight-5)+"px");
+    });
+}
+
+function setup_image_scaling_handlers() {
+   scale_images();
+   $(window).resize(function(){
+        scale_images();
+   });
+}
+
+
diff --git a/src/calibre/gui2/viewer/config.ui b/src/calibre/gui2/viewer/config.ui
index fe1dc85c93..d6e71c77d2 100644
--- a/src/calibre/gui2/viewer/config.ui
+++ b/src/calibre/gui2/viewer/config.ui
@@ -7,14 +7,14 @@
     <x>0</x>
     <y>0</y>
     <width>479</width>
-    <height>574</height>
+    <height>606</height>
    </rect>
   </property>
   <property name="windowTitle">
    <string>Configure Ebook viewer</string>
   </property>
   <property name="windowIcon">
-   <iconset resource="../../../../resources/images.qrc">
+   <iconset>
     <normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
   </property>
   <layout class="QGridLayout" name="gridLayout_4">
@@ -164,7 +164,7 @@
               </item>
              </widget>
             </item>
-            <item row="6" column="0" colspan="2">
+            <item row="7" column="0" colspan="2">
              <widget class="QCheckBox" name="opt_remember_window_size">
               <property name="text">
                <string>Remember last used &amp;window size</string>
@@ -218,6 +218,13 @@
               </property>
              </widget>
             </item>
+            <item row="6" column="0" colspan="2">
+             <widget class="QCheckBox" name="opt_fit_images">
+              <property name="text">
+               <string>&amp;Resize images larger than the viewer window (needs restart)</string>
+              </property>
+             </widget>
+            </item>
            </layout>
           </item>
           <item row="3" column="0">
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index b35e28121a..790b1c4f2f 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -10,7 +10,7 @@ from base64 import b64encode
 from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
                      QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
                      QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
-                     QFont, QObject, QApplication, pyqtSignature, QAction
+                     QFont, pyqtSignature, QAction
 from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 
 from calibre.utils.config import Config, StringConfig
@@ -21,7 +21,7 @@ from calibre.constants import iswindows
 from calibre import prints, guess_type
 from calibre.gui2.viewer.keys import SHORTCUTS
 
-bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = None
+bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = images =None
 
 def load_builtin_fonts():
     base = P('fonts/liberation/*.ttf')
@@ -42,6 +42,8 @@ def config(defaults=None):
               help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
     c.add_opt('max_view_width', default=6000,
             help=_('Maximum width of the viewer window, in pixels.'))
+    c.add_opt('fit_images', default=True,
+            help=_('Resize images larger than the viewer window to fit inside it'))
     c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
     c.add_opt('hyphenate_default_lang', default='en',
             help=_('Default language for hyphenation rules'))
@@ -59,20 +61,6 @@ def config(defaults=None):
 
     return c
 
-class PythonJS(QObject):
-
-    def __init__(self, callback):
-        QObject.__init__(self, QApplication.instance())
-        self.setObjectName("py_bridge")
-        self._callback = callback
-
-    @pyqtSignature("QString")
-    def callback(self, msg):
-        print "callback called"
-        self._callback(msg)
-
-
-
 class ConfigDialog(QDialog, Ui_Dialog):
 
     def __init__(self, shortcuts, parent=None):
@@ -110,6 +98,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
         self.shortcut_config = ShortcutConfig(shortcuts, parent=self)
         p = self.tabs.widget(1)
         p.layout().addWidget(self.shortcut_config)
+        self.opt_fit_images.setChecked(opts.fit_images)
 
 
     def accept(self, *args):
@@ -122,6 +111,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
         c.set('standard_font', {0:'serif', 1:'sans', 2:'mono'}[self.standard_font.currentIndex()])
         c.set('user_css', unicode(self.css.toPlainText()))
         c.set('remember_window_size', self.opt_remember_window_size.isChecked())
+        c.set('fit_images', self.opt_fit_images.isChecked())
         c.set('max_view_width', int(self.max_view_width.value()))
         c.set('hyphenate', self.hyphenate.isChecked())
         idx = self.hyphenate_default_lang.currentIndex()
@@ -157,7 +147,6 @@ class Document(QWebPage):
         self.setObjectName("py_bridge")
         self.debug_javascript = False
         self.current_language = None
-        #self.js_bridge = PythonJS(self.js_callback)
 
         self.setLinkDelegationPolicy(self.DelegateAllLinks)
         self.scroll_marks = []
@@ -197,9 +186,14 @@ class Document(QWebPage):
         opts = config().parse()
         self.hyphenate = opts.hyphenate
         self.hyphenate_default_lang = opts.hyphenate_default_lang
+        self.do_fit_images = opts.fit_images
+
+    def fit_images(self):
+        if self.do_fit_images:
+            self.javascript('setup_image_scaling_handlers()')
 
     def load_javascript_libraries(self):
-        global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator
+        global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator, images
         self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
         if jquery is None:
             jquery = P('content_server/jquery.js', data=True)
@@ -215,6 +209,9 @@ class Document(QWebPage):
         if referencing is None:
             referencing = P('viewer/referencing.js', data=True)
         self.javascript(referencing)
+        if images is None:
+            images = P('viewer/images.js', data=True)
+        self.javascript(images)
         if hyphenation is None:
             hyphenation = P('viewer/hyphenation.js', data=True)
         self.javascript(hyphenation)
@@ -541,6 +538,7 @@ class DocumentView(QWebView):
             return
         self.loading_url = None
         self.document.set_bottom_padding(0)
+        self.document.fit_images()
         self._size_hint = self.document.mainFrame().contentsSize()
         scrolled = False
         if self.to_bottom:

From e936eb0c84a19765796dfa070b11878be6aaa8f7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 14:02:16 -0700
Subject: [PATCH 11/32] MOBI Input: Rescale img width and height attributes
 that were specified in em units. Fixes #4608 (Built in viewre picture issue)

---
 resources/viewer/images.js         |  1 +
 src/calibre/ebooks/mobi/reader.py  | 10 ++++++++++
 src/calibre/ebooks/oeb/stylizer.py |  4 ++++
 3 files changed, 15 insertions(+)

diff --git a/resources/viewer/images.js b/resources/viewer/images.js
index cc6e6d47e5..ea68009254 100644
--- a/resources/viewer/images.js
+++ b/resources/viewer/images.js
@@ -7,6 +7,7 @@
 function scale_images() {
     $("img:visible").each(function() {
         var offset = $(this).offset();
+        //window.py_bridge.debug(window.getComputedStyle(this, '').getPropertyValue('max-width'));
         $(this).css("max-width", (window.innerWidth-offset.left-5)+"px");
         $(this).css("max-height", (window.innerHeight-5)+"px");
     });
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index b8557aea98..4f894ce088 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -563,6 +563,16 @@ class MobiReader(object):
                     recindex = attrib.pop(attr, None) or recindex
                 if recindex is not None:
                     attrib['src'] = 'images/%s.jpg' % recindex
+                for attr in ('width', 'height'):
+                    if attr in attrib:
+                        val = attrib[attr]
+                        if val.lower().endswith('em'):
+                            try:
+                                nval = float(val[:-2])
+                                nval *= 16 * (168.451/72) # Assume this was set using the Kindle profile
+                                attrib[attr] = "%dpx"%int(nval)
+                            except:
+                                del attrib[attr]
             elif tag.tag == 'pre':
                 if not tag.text:
                     tag.tag = 'div'
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 9f50796615..d0e394b9e5 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -411,6 +411,7 @@ class Style(object):
         return result
 
     def _unit_convert(self, value, base=None, font=None):
+        ' Return value in pts'
         if isinstance(value, (int, long, float)):
             return value
         try:
@@ -447,6 +448,9 @@ class Style(object):
                 result = value * 0.40
         return result
 
+    def pt_to_px(self, value):
+        return (self._profile.dpi / 72.0) * value
+
     @property
     def fontSize(self):
         def normalize_fontsize(value, base):

From ab2c79226d1123e660cce6935d9c9a6a3273d3b7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jan 2010 15:46:16 -0700
Subject: [PATCH 12/32] New recipe for Algemeen Dagblad by kwetal

---
 resources/images/news/ad.png | Bin 0 -> 569 bytes
 resources/recipes/ad.recipe  |  86 +++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 resources/images/news/ad.png
 create mode 100644 resources/recipes/ad.recipe

diff --git a/resources/images/news/ad.png b/resources/images/news/ad.png
new file mode 100644
index 0000000000000000000000000000000000000000..8b017910dff028186af8079acec32baebde0fe90
GIT binary patch
literal 569
zcmV-90>=G`P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00001b5ch_0Itp)
z=>Px#24YJ`L;(K){{a7>y{D4^000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2igG>
z6gLiREyA||00Fs4L_t(I%f*vFYZOrw#eWl*Ku9qxSiKc|D{Lb&u;3poBKZUcs|2eB
z8~;FLRRp{J01M9Am{R-%yHmzur;t>Uu!CA8NTzV6SwRIQ5S3%`W_M?aT?!9ec=x_{
z?>qP1^E^r76j<PtsLXj$4Dma#27+a{WMMm=?dK_61wLEn8;e(Nq34|glR;j?vdG%F
z?+RQj4txZ5V`oJHG{In~ked3`r>5aR|HS@CTB%A~RV@G{9KovyYqKfam>&=w%;<L?
zK^(btBY?Xd7?v3HsVk%=wYyStL!rU3m|&&qY_I41@knalatx-GO-6F(F+91B&FiEb
z!$C6Z7vOc#|JeYHEynmFCN+oHyiPEVVE30xHW|EpGe5U0>D*L2Ng8+;rm;z(GFH{c
z-)Qmu!yf*#9^Br9-w8bJP$7y*ImWUGa3UFdh@ULM%@+RsRk*R~;%`%@3$?qm2OI56
zlD=sqwYv(bDKxl;E#uQbT5n40O;t!UxCU$X0R#;Id;(}K!Sfv+w$H=m2FVcwKNEPL
z!dA!Key>nsl0#rl>O4)r2BpB41ik`iQut$`%Dj~S%WwP(-S7}zQQ>S$00000NkvXX
Hu0mjf<wx)t

literal 0
HcmV?d00001

diff --git a/resources/recipes/ad.recipe b/resources/recipes/ad.recipe
new file mode 100644
index 0000000000..bc3fe40dad
--- /dev/null
+++ b/resources/recipes/ad.recipe
@@ -0,0 +1,86 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ADRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'nl'
+    country = 'NL'
+    version = 1
+
+    title = u'AD'
+    publisher = u'de Persgroep Publishing Nederland NV'
+    category = u'News, Sports, the Netherlands'
+    description = u'News and Sports from the Netherlands'
+
+    oldest_article = 1.2
+    max_articles_per_feed = 100
+    use_embedded_content = False
+
+    remove_empty_feeds = True
+    no_stylesheets = True
+    remove_javascript = True
+
+    keep_only_tags = []
+    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
+    keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
+
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
+
+    remove_attributes = ['style']
+
+    # feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
+    feeds = []
+    feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
+    feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
+    feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
+    feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
+    feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
+    feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
+    feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
+    feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
+    feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
+    feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
+    feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
+    feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
+    feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
+    feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
+    feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
+    feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
+    feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
+    feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
+    feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
+    feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
+    feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
+    feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
+    feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
+    feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
+    feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
+
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+                div.captionEmbeddedMasterObject {font-size: x-small; font-style: italic; color: #696969;}
+                .gen_footnote3 {font-size: small; color: #666666; margin-top: 0.6em;}
+                '''
+
+    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
+                          'publisher': publisher}
+
+    def print_version(self, url):
+        parts = url.split('/')
+        print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
+                + parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
+
+        return print_url
+
+    def preprocess_html(self, soup):
+        for br in soup.findAll('br'):
+            prev = br.findPreviousSibling(True)
+            if hasattr(prev, 'name') and prev.name == 'br':
+                next = br.findNextSibling(True)
+                if hasattr(next, 'name') and next.name == 'br':
+                    br.extract()
+
+        return soup

From aa589bea380325092256e469078c759eeb01049a Mon Sep 17 00:00:00 2001
From: Starson17 <starson17@gmail.com>
Date: Wed, 20 Jan 2010 10:01:29 -0500
Subject: [PATCH 13/32] Swap author option splits name at comma if comma found,
 otherwise splits at first space

---
 src/calibre/ebooks/metadata/meta.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py
index 479b513ea5..1c22481263 100644
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@@ -134,7 +134,10 @@ def metadata_from_filename(name, pat=None):
             mi.authors = aus
             if prefs['swap_author_names'] and mi.authors:
                 def swap(a):
-                    parts = a.split()
+                    if ',' in a:
+                        parts = a.split(',', 1)
+                    else:
+                        parts = a.split(' ', 1)
                     if len(parts) > 1:
                         t = parts[-1]
                         parts = parts[:-1]

From d5b5af5b3fed17af056ed58e51c04c1d93266869 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 09:30:43 -0700
Subject: [PATCH 14/32] New recipe for Pajamas Media by Krittika Goyal

---
 resources/recipes/pajama.recipe | 48 +++++++++++++++++++++++++++++++++
 src/calibre/utils/config.py     |  1 +
 2 files changed, 49 insertions(+)
 create mode 100644 resources/recipes/pajama.recipe

diff --git a/resources/recipes/pajama.recipe b/resources/recipes/pajama.recipe
new file mode 100644
index 0000000000..8c5ba74317
--- /dev/null
+++ b/resources/recipes/pajama.recipe
@@ -0,0 +1,48 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class PajamasMedia(BasicNewsRecipe):
+    title          = u'Pajamas Media'
+    description = u'Provides exclusive news and opinion for forty countries.'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    recursions = 1
+    match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$']
+    #encoding = 'latin1'
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='div', attrs={'class':'paged-nav'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':['pages']}),
+       #dict(name='div', attrs={'id':['bookmark']}),
+       #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
+       #dict(name='ul', attrs={'class':'articleTools'}),
+    ]
+
+    feeds          = [
+('pajamas Media',
+ 'http://feeds.feedburner.com/PajamasMedia'),
+
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'innerpage-content'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
+
+    def postprocess_html(self, soup, first):
+        if not first:
+            h = soup.find(attrs={'class':'innerpage-header'})
+            if h: h.extract()
+            auth = soup.find(attrs={'class':'author'})
+            if auth: auth.extract()
+        return soup
diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py
index a0e5632cb7..22e31c3005 100644
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@@ -524,6 +524,7 @@ class DynamicConfig(dict):
                     pass
                 except:
                     import traceback
+                    print 'Failed to unpickle stored object:'
                     traceback.print_exc()
                     d = {}
         self.clear()

From 0b5541edc2e4c666d7cbbfdd0d197cbaeb69b87f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 09:32:51 -0700
Subject: [PATCH 15/32] Updated recipe for Physics Today

---
 resources/recipes/physics_today.recipe | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/resources/recipes/physics_today.recipe b/resources/recipes/physics_today.recipe
index 9b236ff23c..d1ce17cf32 100644
--- a/resources/recipes/physics_today.recipe
+++ b/resources/recipes/physics_today.recipe
@@ -8,8 +8,7 @@ class Physicstoday(BasicNewsRecipe):
     description           = u'Physics Today magazine'
     publisher             = 'American Institute of Physics'
     category              = 'Physics'
-    language = 'en'
-
+    language              = 'en'
     cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
     oldest_article = 30
     max_articles_per_feed = 100
@@ -30,11 +29,11 @@ class Physicstoday(BasicNewsRecipe):
     def get_browser(self):
         br = BasicNewsRecipe.get_browser()
         if self.username is not None and self.password is not None:
-            br.open('http://www.physicstoday.org/pt/sso_login.jsp')
-            br.select_form(name='login')
+            br.open('http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f')
+            br.select_form(name='login_form')
             br['username'] = self.username
             br['password'] = self.password
             br.submit()
         return br
 
-    feeds          = [(u'All', u'http://www.physicstoday.org/feed.xml')]
\ No newline at end of file
+    feeds          = [(u'All', u'http://www.physicstoday.org/feed.xml')]

From 096735a456c79cefd6ebe0a5c9df1b142757845c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 14:58:07 -0700
Subject: [PATCH 16/32] Improved free WSJ recipe

---
 resources/recipes/wsj_free.recipe | 170 ++++++++++++++++++------------
 1 file changed, 103 insertions(+), 67 deletions(-)

diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe
index b05da400ae..495a7c343b 100644
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@@ -3,47 +3,122 @@
 __license__   = 'GPL v3'
 
 '''
-online.wsj.com.com
+online.wsj.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag, NavigableString
+from datetime import timedelta, datetime, date
 
 class WSJ(BasicNewsRecipe):
     # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
     title          = u'Wall Street Journal (free)'
     __author__     = 'Nick Redding'
     language = 'en'
-    description = ('All the free content from the Wall Street Journal (business'
-            ', financial and political news)')
+    description = ('All the free content from the Wall Street Journal (business, financial and political news)')
+
     no_stylesheets = True
     timefmt = ' [%b %d]'
-    extra_css   = '''h1{font-size:large; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
-                    h2{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
-                    .subhead{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
-                    .insettipUnit {font-family:Arial,Sans-serif;font-size:xx-small;}
-                    .targetCaption{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
-                    .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
-                    .tagline { ont-size:xx-small;}
-                    .dateStamp {font-family:Arial,Helvetica,sans-serif;}
-                    h3{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
-                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small; list-style-type: none;}
+
+    # customization notes: delete sections you are not interested in
+    # set omit_paid_content to False if you want the paid content article snippets
+    # set oldest_article to the maximum number of days back from today to include articles
+    sectionlist = [
+                        ['/home-page','Front Page'],
+                        ['/public/page/news-opinion-commentary.html','Commentary'],
+                        ['/public/page/news-global-world.html','World News'],
+                        ['/public/page/news-world-business.html','US News'],
+                        ['/public/page/news-business-us.html','Business'],
+                        ['/public/page/news-financial-markets-stock.html','Markets'],
+                        ['/public/page/news-tech-technology.html','Technology'],
+                        ['/public/page/news-personal-finance.html','Personal Finnce'],
+                        ['/public/page/news-lifestyle-arts-entertainment.html','Life & Style'],
+                        ['/public/page/news-real-estate-homes.html','Real Estate'],
+                        ['/public/page/news-career-jobs.html','Careers'],
+                        ['/public/page/news-small-business-marketing.html','Small Business']
+                    ]
+    oldest_article = 2
+    omit_paid_content = True
+
+    extra_css   = '''h1{font-size:large; font-family:Times,serif;}
+                    h2{font-family:Times,serif; font-size:small; font-style:italic;}
+                    .subhead{font-family:Times,serif; font-size:small; font-style:italic;}
+                    .insettipUnit {font-family:Times,serif;font-size:xx-small;}
+                    .targetCaption{font-size:x-small; font-family:Times,serif; font-style:italic; margin-top: 0.25em;}
+                    .article{font-family:Times,serif; font-size:x-small;}
+                    .tagline { font-size:xx-small;}
+                    .dateStamp {font-family:Times,serif;}
+                    h3{font-family:Times,serif; font-size:xx-small;}
+                    .byline {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
                     .metadataType-articleCredits {list-style-type: none;}
-                    h6{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic;}
+                    h6{font-family:Times,serif; font-size:small; font-style:italic;}
                     .paperLocation{font-size:xx-small;}'''
 
-    remove_tags_before = dict(name='h1')
-    remove_tags =   [   dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
-                                 "articleTabs_tab_interactive","articleTabs_tab_video",
-                                 "articleTabs_tab_map","articleTabs_tab_slideshow"]),
-			{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
-			'insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', 'tooltip',
-			'adSummary', 'nav-inline','insetFullBracket']},
-                        dict(rel='shortcut icon'),
+
+    remove_tags_before = dict({'class':re.compile('^articleHeadlineBox')})
+    remove_tags =   [   dict({'id':re.compile('^articleTabs_tab_')}),
+                        #dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
+                        #         "articleTabs_tab_interactive","articleTabs_tab_video",
+                        #         "articleTabs_tab_map","articleTabs_tab_slideshow"]),
+			{'class':  ['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
+                                    'insettip','insetClose','more_in', "insetContent",
+                        #            'articleTools_bottom','articleTools_bottom mjArticleTools',
+                                    'aTools', 'tooltip',
+                                    'adSummary', 'nav-inline','insetFullBracket']},
+                        dict({'class':re.compile('^articleTools_bottom')}),
+                        dict(rel='shortcut icon')
                     ]
     remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
 
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        return br
 
     def preprocess_html(self,soup):
+        # check if article is too old
+        datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
+        if datetag:
+            dateline_string = self.tag_to_string(datetag,False)
+            date_items = dateline_string.split(',')
+            datestring = date_items[0]+date_items[1]
+            article_date = datetime.strptime(datestring.title(),"%B %d %Y")
+            earliest_date = date.today() - timedelta(days=self.oldest_article)
+            if article_date.date() < earliest_date:
+                self.log("Skipping article dated %s" % datestring)
+                return None
+            datetag.parent.extract()
+
+            # place dateline in article heading
+
+            bylinetag = soup.find('h3','byline')
+            if bylinetag:
+                h3bylinetag = bylinetag
+            else:
+                bylinetag = soup.find('li','byline')
+                if bylinetag:
+                    h3bylinetag = bylinetag.h3
+                    if not h3bylinetag:
+                        h3bylinetag = bylinetag
+                    bylinetag = bylinetag.parent
+            if bylinetag:
+                if h3bylinetag.a:
+                    bylinetext = 'By '+self.tag_to_string(h3bylinetag.a,False)
+                else:
+                    bylinetext = self.tag_to_string(h3bylinetag,False)
+                h3byline = Tag(soup,'h3',[('class','byline')])
+                if bylinetext.isspace() or (bylinetext == ''):
+                    h3byline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
+                else:
+                    h3byline.insert(0,NavigableString(bylinetext+u'\u2014'+date_items[0]+','+date_items[1]))
+                bylinetag.replaceWith(h3byline)
+            else:
+                headlinetag = soup.find('div',attrs={'class' : re.compile("^articleHeadlineBox")})
+                if headlinetag:
+                    dateline = Tag(soup,'h3', [('class','byline')])
+                    dateline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
+                    headlinetag.insert(len(headlinetag),dateline)
+        else: # if no date tag, don't process this page--it's not a news item
+            return None
         # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
         ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
         if ultag:
@@ -58,7 +133,7 @@ class WSJ(BasicNewsRecipe):
         key = None
         ans = []
 
-        def parse_index_page(page_name,page_title,omit_paid_content):
+        def parse_index_page(page_name,page_title):
 
             def article_title(tag):
                 atag = tag.find('h2') # title is usually in an h2 tag
@@ -119,7 +194,6 @@ class WSJ(BasicNewsRecipe):
             soup = self.index_to_soup(pageurl)
             # Find each instance of div with class including "headlineSummary"
             for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
-
                 # divtag contains all article data as ul's and li's
                 # first, check if there is an h3 tag which provides a section name
                 stag = divtag.find('h3')
@@ -162,7 +236,7 @@ class WSJ(BasicNewsRecipe):
                         # now skip paid subscriber articles if desired
                         subscriber_tag = litag.find(text="Subscriber Content")
                         if subscriber_tag:
-                                if omit_paid_content:
+                                if self.omit_paid_content:
                                     continue
                                 # delete the tip div so it doesn't get in the way
                                 tiptag = litag.find("div", { "class" : "tipTargetBox" })
@@ -185,7 +259,7 @@ class WSJ(BasicNewsRecipe):
                             continue
                         if url.startswith("/article"):
                             url = mainurl+url
-                        if not url.startswith("http"):
+                        if not url.startswith("http://online.wsj.com"):
                             continue
                         if not url.endswith(".html"):
                             continue
@@ -214,48 +288,10 @@ class WSJ(BasicNewsRecipe):
                             articles[page_title] = []
                         articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 
-        # customization notes: delete sections you are not interested in
-        # set omit_paid_content to False if you want the paid content article previews
-        sectionlist = ['Front Page','Commentary','World News','US News','Business','Markets',
-                       'Technology','Personal Finance','Life & Style','Real Estate','Careers','Small Business']
-        omit_paid_content = True
 
-        if 'Front Page' in sectionlist:
-            parse_index_page('/home-page','Front Page',omit_paid_content)
-            ans.append('Front Page')
-        if 'Commentary' in sectionlist:
-            parse_index_page('/public/page/news-opinion-commentary.html','Commentary',omit_paid_content)
-            ans.append('Commentary')
-        if 'World News' in sectionlist:
-            parse_index_page('/public/page/news-global-world.html','World News',omit_paid_content)
-            ans.append('World News')
-        if 'US News' in sectionlist:
-            parse_index_page('/public/page/news-world-business.html','US News',omit_paid_content)
-            ans.append('US News')
-        if 'Business' in sectionlist:
-            parse_index_page('/public/page/news-business-us.html','Business',omit_paid_content)
-            ans.append('Business')
-        if 'Markets' in sectionlist:
-            parse_index_page('/public/page/news-financial-markets-stock.html','Markets',omit_paid_content)
-            ans.append('Markets')
-        if 'Technology' in sectionlist:
-            parse_index_page('/public/page/news-tech-technology.html','Technology',omit_paid_content)
-            ans.append('Technology')
-        if 'Personal Finance' in sectionlist:
-            parse_index_page('/public/page/news-personal-finance.html','Personal Finance',omit_paid_content)
-            ans.append('Personal Finance')
-        if 'Life & Style' in sectionlist:
-            parse_index_page('/public/page/news-lifestyle-arts-entertainment.html','Life & Style',omit_paid_content)
-            ans.append('Life & Style')
-        if 'Real Estate' in sectionlist:
-            parse_index_page('/public/page/news-real-estate-homes.html','Real Estate',omit_paid_content)
-            ans.append('Real Estate')
-        if 'Careers' in sectionlist:
-            parse_index_page('/public/page/news-career-jobs.html','Careers',omit_paid_content)
-            ans.append('Careers')
-        if 'Small Business' in sectionlist:
-            parse_index_page('/public/page/news-small-business-marketing.html','Small Business',omit_paid_content)
-            ans.append('Small Business')
+        for page_name,page_title in self.sectionlist:
+            parse_index_page(page_name,page_title)
+            ans.append(page_title)
 
         ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
         return ans

From 88b6d0035ee6398dc941553bc2ead23db1c1ed3b Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 18:43:55 -0700
Subject: [PATCH 17/32] ...

---
 src/calibre/gui2/dialogs/metadata_single.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index a9130b2ea2..89b7c92125 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -532,7 +532,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
             if self.cover_fetcher.exception is not None:
                 err = self.cover_fetcher.exception
                 error_dialog(self, _('Cannot fetch cover'),
-                    _('<b>Could not fetch cover.</b><br/>')+repr(err)).exec_()
+                    _('<b>Could not fetch cover.</b><br/>')+unicode(err)).exec_()
                 return
 
             pix = QPixmap()

From df019215ca394a89aba0922c909ffd036d50aeb2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 18:47:43 -0700
Subject: [PATCH 18/32] Fix #4618 (New recipe for digitaljournal.com)

---
 resources/images/news/digitaljournal.png | Bin 0 -> 253 bytes
 resources/recipes/digitaljournal.recipe  |  52 +++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 resources/images/news/digitaljournal.png
 create mode 100644 resources/recipes/digitaljournal.recipe

diff --git a/resources/images/news/digitaljournal.png b/resources/images/news/digitaljournal.png
new file mode 100644
index 0000000000000000000000000000000000000000..ea4637b8ad5c1d1a0639614d6f4bf0aabb8fb7c2
GIT binary patch
literal 253
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*VR95qiD#}JFt$q5St3>I<E5ls2f@85i~I>gpz
z%C3Yf4S~#VybFXF82qj<%V=$SodDFKTH+c}l9E`GYL#4+3Zxi}3=9o)4GeXS3_=Vo
ztW1ooOn_W-D+7b0&fKd|H00)|WTsUjYcR01GB&pY=?7^jE{%2rYGCkm^>bP0l+XkK
D#WO;p

literal 0
HcmV?d00001

diff --git a/resources/recipes/digitaljournal.recipe b/resources/recipes/digitaljournal.recipe
new file mode 100644
index 0000000000..c49caf9580
--- /dev/null
+++ b/resources/recipes/digitaljournal.recipe
@@ -0,0 +1,52 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+digitaljournal.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DigitalJournal(BasicNewsRecipe):
+    title                 = 'Digital Journal'
+    __author__            = 'Darko Miletic'
+    description           = 'A Global Citizen Journalism News Network'
+    category              = 'news, politics, USA, world'
+    publisher             = 'Digital Journal'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf8'
+    language              = 'en'
+      
+    conversion_options = {  
+                             'comments'        : description
+                            ,'tags'            : category
+                            ,'language'        : language
+                            ,'publisher'       : publisher
+                         }
+
+    keep_only_tags   = [dict(name='div', attrs={'class':['article','body']})]
+
+    remove_tags     = [dict(name=['object','table'])]
+                         
+    feeds = [ 
+                (u'Latest News'  , u'http://digitaljournal.com/rss/?feed=latest_news'                   )
+               ,(u'Business'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Business'     )
+               ,(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment')
+               ,(u'Environment'  , u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment'  )
+               ,(u'Food'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Food'         )
+               ,(u'Health'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Health'       )
+               ,(u'Internet'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet'     )
+               ,(u'Politics'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics'     )
+               ,(u'Religion'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion'     )
+               ,(u'Science'      , u'http://digitaljournal.com/rss/?feed=top_news&depname=Science'      )
+               ,(u'Sports'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports'       )
+               ,(u'Technology'   , u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology'   )
+               ,(u'World'        , u'http://digitaljournal.com/rss/?feed=top_news&depname=World'        )
+               ,(u'Arts'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts'         )
+            ]
+          
+    def print_version(self, url):
+        return url.replace('digitaljournal.com/','digitaljournal.com/print/')
+        

From 5132aba5f0434b231cbfe3d5d02acf64d1433f6c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 21:01:43 -0700
Subject: [PATCH 19/32] New recipes for various CanWest Canadian news sources
 by Nick Redding

---
 resources/recipes/calgary_herald.recipe       | 121 +++++++++++++++
 resources/recipes/edmonton_journal.recipe     | 126 ++++++++++++++++
 resources/recipes/montreal_gazette.recipe     |  96 ++++++++++++
 resources/recipes/ottawa_citizen.recipe       | 101 +++++++++++++
 resources/recipes/regina_leader_post.recipe   | 116 ++++++++++++++
 .../recipes/saskatoon_star_phoenix.recipe     | 111 ++++++++++++++
 resources/recipes/vancouver_provice.recipe    | 136 +++++++++++++++++
 resources/recipes/vancouver_sun.recipe        | 131 ++++++++++++++++
 resources/recipes/vic_times.recipe            | 141 ++++++++++++++++++
 resources/recipes/windows_star.recipe         | 106 +++++++++++++
 10 files changed, 1185 insertions(+)
 create mode 100644 resources/recipes/calgary_herald.recipe
 create mode 100644 resources/recipes/edmonton_journal.recipe
 create mode 100644 resources/recipes/montreal_gazette.recipe
 create mode 100644 resources/recipes/ottawa_citizen.recipe
 create mode 100644 resources/recipes/regina_leader_post.recipe
 create mode 100644 resources/recipes/saskatoon_star_phoenix.recipe
 create mode 100644 resources/recipes/vancouver_provice.recipe
 create mode 100644 resources/recipes/vancouver_sun.recipe
 create mode 100644 resources/recipes/vic_times.recipe
 create mode 100644 resources/recipes/windows_star.recipe

diff --git a/resources/recipes/calgary_herald.recipe b/resources/recipes/calgary_herald.recipe
new file mode 100644
index 0000000000..884a951d96
--- /dev/null
+++ b/resources/recipes/calgary_herald.recipe
@@ -0,0 +1,121 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Calgary Herald
+    title = u'Calgary Herald'
+    url_prefix = 'http://www.calgaryherald.com'
+    description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/edmonton_journal.recipe b/resources/recipes/edmonton_journal.recipe
new file mode 100644
index 0000000000..ac28b18f71
--- /dev/null
+++ b/resources/recipes/edmonton_journal.recipe
@@ -0,0 +1,126 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Edmonton Journal
+    title = u'Edmonton Journal'
+    url_prefix = 'http://www.edmontonjournal.com'
+    description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/montreal_gazette.recipe b/resources/recipes/montreal_gazette.recipe
new file mode 100644
index 0000000000..3061cc37e4
--- /dev/null
+++ b/resources/recipes/montreal_gazette.recipe
@@ -0,0 +1,96 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Montreal Gazette
+    title = u'Montreal Gazette'
+    url_prefix = 'http://www.montrealgazette.com'
+    description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/ottawa_citizen.recipe b/resources/recipes/ottawa_citizen.recipe
new file mode 100644
index 0000000000..5465212d4c
--- /dev/null
+++ b/resources/recipes/ottawa_citizen.recipe
@@ -0,0 +1,101 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Ottawa Citizen
+    title = u'Ottawa Citizen'
+    url_prefix = 'http://www.ottawacitizen.com'
+    description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/regina_leader_post.recipe b/resources/recipes/regina_leader_post.recipe
new file mode 100644
index 0000000000..9efec51848
--- /dev/null
+++ b/resources/recipes/regina_leader_post.recipe
@@ -0,0 +1,116 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Regina Leader-Post
+    title = u'Regina Leader-Post'
+    url_prefix = 'http://www.leaderpost.com'
+    description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/saskatoon_star_phoenix.recipe b/resources/recipes/saskatoon_star_phoenix.recipe
new file mode 100644
index 0000000000..25330478d4
--- /dev/null
+++ b/resources/recipes/saskatoon_star_phoenix.recipe
@@ -0,0 +1,111 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    title = u'Saskatoon Star-Phoenix'
+    url_prefix = 'http://www.thestarphoenix.com'
+    description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/vancouver_provice.recipe b/resources/recipes/vancouver_provice.recipe
new file mode 100644
index 0000000000..9375670c59
--- /dev/null
+++ b/resources/recipes/vancouver_provice.recipe
@@ -0,0 +1,136 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Vancouver Province
+    title = u'Vancouver Province'
+    url_prefix = 'http://www.theprovince.com'
+    description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Vancouver Sun
+    #title = u'Vancouver Sun'
+    #url_prefix = 'http://www.vancouversun.com'
+    #description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Edmonton Journal
+    #title = u'Edmonton Journal'
+    #url_prefix = 'http://www.edmontonjournal.com'
+    #description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/vancouver_sun.recipe b/resources/recipes/vancouver_sun.recipe
new file mode 100644
index 0000000000..8f12869bf9
--- /dev/null
+++ b/resources/recipes/vancouver_sun.recipe
@@ -0,0 +1,131 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Vancouver Sun
+    title = u'Vancouver Sun'
+    url_prefix = 'http://www.vancouversun.com'
+    description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Edmonton Journal
+    #title = u'Edmonton Journal'
+    #url_prefix = 'http://www.edmontonjournal.com'
+    #description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/vic_times.recipe b/resources/recipes/vic_times.recipe
new file mode 100644
index 0000000000..2dc8e96003
--- /dev/null
+++ b/resources/recipes/vic_times.recipe
@@ -0,0 +1,141 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Victoria Times Colonist
+    title = u'Victoria Times Colonist'
+    url_prefix = 'http://www.timescolonist.com'
+    description = u'News from Victoria, BC'
+
+    # un-comment the following three lines for the Vancouver Province
+    #title = u'Vancouver Province'
+    #url_prefix = 'http://www.theprovince.com'
+    #description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Vancouver Sun
+    #title = u'Vancouver Sun'
+    #url_prefix = 'http://www.vancouversun.com'
+    #description = u'News from Vancouver, BC'
+
+    # un-comment the following three lines for the Edmonton Journal
+    #title = u'Edmonton Journal'
+    #url_prefix = 'http://www.edmontonjournal.com'
+    #description = u'News from Edmonton, AB'
+
+    # un-comment the following three lines for the Calgary Herald
+    #title = u'Calgary Herald'
+    #url_prefix = 'http://www.calgaryherald.com'
+    #description = u'News from Calgary, AB'
+
+    # un-comment the following three lines for the Regina Leader-Post
+    #title = u'Regina Leader-Post'
+    #url_prefix = 'http://www.leaderpost.com'
+    #description = u'News from Regina, SK'
+
+    # un-comment the following three lines for the Saskatoon Star-Phoenix
+    #title = u'Saskatoon Star-Phoenix'
+    #url_prefix = 'http://www.thestarphoenix.com'
+    #description = u'News from Saskatoon, SK'
+
+    # un-comment the following three lines for the Windsor Star
+    #title = u'Windsor Star'
+    #url_prefix = 'http://www.windsorstar.com'
+    #description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans
diff --git a/resources/recipes/windows_star.recipe b/resources/recipes/windows_star.recipe
new file mode 100644
index 0000000000..4d34261bb7
--- /dev/null
+++ b/resources/recipes/windows_star.recipe
@@ -0,0 +1,106 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+
+'''
+www.canada.com
+'''
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+
+class CanWestPaper(BasicNewsRecipe):
+
+    # un-comment the following three lines for the Windsor Star
+    title = u'Windsor Star'
+    url_prefix = 'http://www.windsorstar.com'
+    description = u'News from Windsor, ON'
+
+    # un-comment the following three lines for the Ottawa Citizen
+    #title = u'Ottawa Citizen'
+    #url_prefix = 'http://www.ottawacitizen.com'
+    #description = u'News from Ottawa, ON'
+
+    # un-comment the following three lines for the Montreal Gazette
+    #title = u'Montreal Gazette'
+    #url_prefix = 'http://www.montrealgazette.com'
+    #description = u'News from Montreal, QC'
+
+
+    language = 'en_CA'
+    __author__ = 'Nick Redding'
+    no_stylesheets = True
+    timefmt = ' [%b %d]'
+    extra_css = '''
+                .timestamp {  font-size:xx-small; display: block; }
+                #storyheader { font-size: medium; }
+                #storyheader h1 { font-size: x-large; }
+                #storyheader h2 { font-size: large;  font-style: italic; }
+                .byline { font-size:xx-small; }
+                #photocaption { font-size: small; font-style: italic }
+                #photocredit { font-size: xx-small; }'''
+    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
+    remove_tags = [{'class':'comments'},
+                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
+                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
+                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
+                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
+                   dict(name='div', attrs={'class':'rule_grey_solid'}),
+                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
+
+    def preprocess_html(self,soup):
+        #delete iempty id attributes--they screw up the TOC for unknow reasons
+        divtags = soup.findAll('div',attrs={'id':''})
+        if divtags:
+            for div in divtags:
+                del(div['id'])
+        return soup
+
+
+    def parse_index(self):
+        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
+
+        articles = {}
+        key = 'News'
+        ans = ['News']
+
+        # Find each instance of class="sectiontitle", class="featurecontent"
+        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
+                #self.log(" div class = %s" % divtag['class'])
+                if divtag['class'].startswith('section_title'):
+                    # div contains section title
+                    if not divtag.h3:
+                        continue
+                    key = self.tag_to_string(divtag.h3,False)
+                    ans.append(key)
+                    self.log("Section name %s" % key)
+                    continue
+                # div contains article data
+                h1tag = divtag.find('h1')
+                if not h1tag:
+                    continue
+                atag = h1tag.find('a',href=True)
+                if not atag:
+                    continue
+                url = self.url_prefix+'/news/todays-paper/'+atag['href']
+                #self.log("Section %s" % key)
+                #self.log("url %s" % url)
+                title = self.tag_to_string(atag,False)
+                #self.log("title %s" % title)
+                pubdate = ''
+                description = ''
+                ptag = divtag.find('p');
+                if ptag:
+                    description = self.tag_to_string(ptag,False)
+                    #self.log("description %s" % description)
+                author = ''
+                autag = divtag.find('h4')
+                if autag:
+                    author = self.tag_to_string(autag,False)
+                    #self.log("author %s" % author)
+                if not articles.has_key(key):
+                    articles[key] = []
+                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
+
+        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
+        return ans

From 46fa724b0aec92c2538207b6f9cfe9b6e5b833cd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 21:05:26 -0700
Subject: [PATCH 20/32] Fix #4623 (WSJ News Download Fails with Included
 Recipe)

---
 resources/recipes/wsj.recipe | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe
index 70c05b1ded..3b6f56e3a3 100644
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@@ -91,6 +91,8 @@ class WallStreetJournal(BasicNewsRecipe):
                     url = url.partition('#')[0]
                     desc = ''
                     d = x.findNextSibling(True)
+                    if d is None:
+                        continue
                     if d.get('class', None) == 'arialResize':
                         desc = self.tag_to_string(d)
                         desc = desc.partition(u'\u2022')[0]

From 419b7e42b371c0d30fba326e6e9e29d44db0c7c1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 21:15:12 -0700
Subject: [PATCH 21/32] ...

---
 resources/recipes/wsj.recipe | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe
index 3b6f56e3a3..da65471e60 100644
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@@ -91,9 +91,7 @@ class WallStreetJournal(BasicNewsRecipe):
                     url = url.partition('#')[0]
                     desc = ''
                     d = x.findNextSibling(True)
-                    if d is None:
-                        continue
-                    if d.get('class', None) == 'arialResize':
+                    if d is not None and d.get('class', None) == 'arialResize':
                         desc = self.tag_to_string(d)
                         desc = desc.partition(u'\u2022')[0]
                     self.log('\t\tFound article:', title)

From ae2b434b35cafd9a42a2d10d3093ba1435e2ff2e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 21:54:34 -0700
Subject: [PATCH 22/32] ...

---
 resources/recipes/wsj.recipe | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe
index da65471e60..3ced77023d 100644
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
 
 
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
 
 # http://online.wsj.com/page/us_in_todays_paper.html
 
@@ -67,6 +68,13 @@ class WallStreetJournal(BasicNewsRecipe):
         def parse_index(self):
             soup = self.wsj_get_index()
 
+            year = strftime('%Y')
+            for x in soup.findAll('td', attrs={'class':'b14'}):
+                txt = self.tag_to_string(x).strip()
+                if year in txt:
+                    self.timefmt = ' [%s]'%txt
+                    break
+
             left_column = soup.find(
                     text=lambda t: 'begin ITP Left Column' in str(t))
 

From 1e12cfeb744de198c51d0dc5d4f72a8b669b3b30 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2010 23:16:30 -0700
Subject: [PATCH 23/32] Fix #4606 (Viewer crash on MOBI files created from PDB)

---
 resources/recipes/globe_and_mail.recipe | 2 +-
 src/calibre/gui2/viewer/documentview.py | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/globe_and_mail.recipe b/resources/recipes/globe_and_mail.recipe
index 71d6b2b304..0ef8bd9dd8 100644
--- a/resources/recipes/globe_and_mail.recipe
+++ b/resources/recipes/globe_and_mail.recipe
@@ -32,7 +32,7 @@ class GlobeAndMail(BasicNewsRecipe):
 		'gallery-controls', 'video', 'galleryLoading','deck','header',
         'toolsBottom'] },
 		{'class':['credit','inline-img-caption','tab-pointer'] },
-		dict(name='div', attrs={'id':'lead-photo'}),
+		dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
 		dict(name='div', attrs={'class':'right'}),
 		dict(name='div', attrs={'id':'footer'}),
 		dict(name='div', attrs={'id':'beta-msg'}),
diff --git a/src/calibre/gui2/viewer/documentview.py b/src/calibre/gui2/viewer/documentview.py
index 790b1c4f2f..aedd709bb8 100644
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@@ -350,7 +350,13 @@ class Document(QWebPage):
         return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
 
     def set_bottom_padding(self, amount):
-        self.javascript('$("body").css("padding-bottom", "%dpx")' % amount)
+        padding = '%dpx'%amount
+        try:
+            old_padding = unicode(self.javascript('$("body").css("padding-bottom")').toString())
+        except:
+            old_padding = ''
+        if old_padding != padding:
+            self.javascript('$("body").css("padding-bottom", "%s")' % padding)
 
 
 class EntityDeclarationProcessor(object):

From 7d2a8dd624506a63ef080fd8746c5fe84a56919f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 09:29:20 -0700
Subject: [PATCH 24/32] ...

---
 resources/recipes/ftd.recipe | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/resources/recipes/ftd.recipe b/resources/recipes/ftd.recipe
index d18f9bdc56..67eb4d08b1 100644
--- a/resources/recipes/ftd.recipe
+++ b/resources/recipes/ftd.recipe
@@ -23,6 +23,7 @@ class FTDe(BasicNewsRecipe):
 		   dict(id='topbanner'),
 		   dict(id='seitenkopf'),
 		   dict(id='BoxA-0-0-0'),
+		   #dict(id='BoxA-2-0-0'),
 		   dict(id='footer'),
 		   dict(id='rating_open'),
 		   dict(id='ADS_Top'),
@@ -59,6 +60,7 @@ class FTDe(BasicNewsRecipe):
 		   dict(name='div', attrs={'class':'relatedhalb'}),
 		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
 		   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
+		   dict(name='div', attrs={'class':'box boxTeaser boxPhotoshow boxImgWide'}),
 		   dict(name='div', attrs={'class':'box boxTeaser'}),
 		   dict(name='div', attrs={'class':'tagCloud'}),
 		   dict(name='div', attrs={'class':'pollView'}),

From d61af79c8c0af73e1e8999d9e5e7cfa262d6d299 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 10:08:24 -0700
Subject: [PATCH 25/32] ...

---
 resources/images/news/ledevoir.png | Bin 0 -> 531 bytes
 src/calibre/ebooks/pdf/reflow.py   |  12 ++++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 resources/images/news/ledevoir.png

diff --git a/resources/images/news/ledevoir.png b/resources/images/news/ledevoir.png
new file mode 100644
index 0000000000000000000000000000000000000000..eabcf9700489650323185e7f22af2083bb97bfc4
GIT binary patch
literal 531
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#M9lT^vI!P9L3Y=<Sp!(k8Dz{dSsDo>Sk`
zMbUh_ZcO2v^|$qp`#0uQis_!m@3^>|yRH;q4v#*v?!#W*GROCOTOM3!G@f(#^WOWF
zmiNUYb@m-$O!Q`-d)(~U8xyu>#@=JbpMTErddhaabf?}_i#|7JRvpirbhXKeHgcEq
zwrfs5z2tIc-s;e{!-{obK>-PqrAs}R1ZjTyDYNyK?CGaQ8VNlI=Whu7@G3^{e1^%Q
zn>lPZE^zMo@XAH0F*UNpYCWS_3HNO=m4`BxH@2NEu{wJGwpvN=eleAvmY(A-A3oRb
zW6p27nsu~lui8u-UO$EV);BgL@EEn-e9q%3!ZrQJ&%HV&yJOFtJGWCy?EB^ir7DIq
zeO`R8y7K<DMW1le!RtWw5&uUqDVw)M>F>C&$KUUNB+2lQUiY58^?%;y|6!C!)4XTA
zd0ja$P*qD@BT7;dOH!?pi&B9UgOP!ufv$m}u90Dgp@Efw84&3jm|Gbb<TotLL(!0%
jpOTqYiK4;C%E;2n#0a8c`QsEDpaup{S3j3^P6<r_pp4Qb

literal 0
HcmV?d00001

diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index f4bdb9c7ac..721df28537 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -140,6 +140,18 @@ class Interval(object):
     def __hash__(self):
         return hash('(%f,%f)'%self.left, self.right)
 
+class Region(object):
+
+    def __init__(self):
+        self.columns = []
+        self.top = self.bottom = self.left = self.right = self.width = self.height = 0
+
+    def add_columns(self, columns):
+        if not self.columns:
+            for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
+                self.columns.append(x)
+        else:
+           pass
 
 class Page(object):
 

From 85b02601563d25d00805b7c2e64e10f37ed17c77 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 10:45:29 -0700
Subject: [PATCH 26/32] Fix #4629 (Nook can't open files with "#" in name)

---
 src/calibre/devices/nook/driver.py  | 3 ++-
 src/calibre/devices/usbms/device.py | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/calibre/devices/nook/driver.py b/src/calibre/devices/nook/driver.py
index c74a964648..16bf9479d8 100644
--- a/src/calibre/devices/nook/driver.py
+++ b/src/calibre/devices/nook/driver.py
@@ -86,4 +86,5 @@ class NOOK(USBMS):
 
         return drives
 
-
+    def sanitize_path_components(self, components):
+        return [x.replace('#', '_') for x in components]
diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py
index ab91de2abf..6ddfc81cf3 100644
--- a/src/calibre/devices/usbms/device.py
+++ b/src/calibre/devices/usbms/device.py
@@ -782,6 +782,13 @@ class Device(DeviceConfig, DevicePlugin):
         '''
         return default
 
+    def sanitize_path_components(self, components):
+        '''
+        Perform any device specific sanitization on the path components
+        for files to be uploaded to the device
+        '''
+        return components
+
     def create_upload_path(self, path, mdata, fname):
         path = os.path.abspath(path)
         extra_components = []
@@ -834,6 +841,7 @@ class Device(DeviceConfig, DevicePlugin):
 
         extra_components = list(map(remove_trailing_periods, extra_components))
         components = shorten_components_to(250 - len(path), extra_components)
+        components = self.sanitize_path_components(components)
         filepath = os.path.join(path, *components)
         filedir = os.path.dirname(filepath)
 

From 69c10e202c6b2a4db0d9148dd0ac30cb1cce4d6a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 10:54:25 -0700
Subject: [PATCH 27/32] New recipe for The Columbia Hournalism Review by
 XanthanGum

---
 resources/recipes/cjr.recipe | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 resources/recipes/cjr.recipe

diff --git a/resources/recipes/cjr.recipe b/resources/recipes/cjr.recipe
new file mode 100644
index 0000000000..d581184c4e
--- /dev/null
+++ b/resources/recipes/cjr.recipe
@@ -0,0 +1,15 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class CJR(BasicNewsRecipe):
+    title              = u'Columbia Journalism Review'
+    __author__         = u'Xanthan Gum'
+    description        = 'News about journalism.'
+    language = 'en'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
+
+    def print_version(self, url):
+        return url + '?page=all&print=true'

From 3e69d4c2aa3cf34d61e72b48b0e11dbc6edf83f3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 11:28:45 -0700
Subject: [PATCH 28/32] News downloads: When getting an article URL from a RSS
 feed, look first for an original article link. This speeds up the download of
 news services that use a syndication service like feedburner or pheedo to
 publish their RSS feeds.

---
 src/calibre/web/feeds/news.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index dfcadf03ed..60b5ad0174 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -357,9 +357,17 @@ class BasicNewsRecipe(Recipe):
         Override in a subclass to customize extraction of the :term:`URL` that points
         to the content for each article. Return the
         article URL. It is called with `article`, an object representing a parsed article
-        from a feed. See `feedsparser <http://www.feedparser.org/docs/>`_.
-        By default it returns `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
+        from a feed. See `feedparser <http://www.feedparser.org/docs/>`_.
+        By default it looks for the original link (for feeds syndicated via a
+        service like feedburner or pheedo) and if found,
+        returns that or else returns
+        `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
         '''
+        for key in article.keys():
+            if key.endswith('_origlink'):
+                url = article[key]
+                if url and url.startswith('http://'):
+                    return url
         return article.get('link',  None)
 
     def preprocess_html(self, soup):

From b3282b3ac569d9dac1f80bff98a641fe0ed18839 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 13:11:37 -0700
Subject: [PATCH 29/32] ...

---
 resources/recipes/wsj_free.recipe | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe
index 495a7c343b..b190f43849 100644
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@@ -8,7 +8,7 @@ online.wsj.com
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Tag, NavigableString
-from datetime import timedelta, datetime, date
+from datetime import timedelta, date
 
 class WSJ(BasicNewsRecipe):
     # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
@@ -74,16 +74,33 @@ class WSJ(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser()
         return br
 
+
     def preprocess_html(self,soup):
+
+        def decode_us_date(datestr):
+            udate = datestr.strip().lower().split()
+            m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(udate[0])+1
+            d = int(udate[1])
+            y = int(udate[2])
+            return date(y,m,d)
+
+        # check if article is paid content
+        if self.omit_paid_content:
+            divtags = soup.findAll('div','tooltip')
+            if divtags:
+                for divtag in divtags:
+                    if divtag.find(text="Subscriber Content"):
+                        return None
+
         # check if article is too old
         datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
         if datetag:
             dateline_string = self.tag_to_string(datetag,False)
             date_items = dateline_string.split(',')
             datestring = date_items[0]+date_items[1]
-            article_date = datetime.strptime(datestring.title(),"%B %d %Y")
+            article_date = decode_us_date(datestring)
             earliest_date = date.today() - timedelta(days=self.oldest_article)
-            if article_date.date() < earliest_date:
+            if article_date < earliest_date:
                 self.log("Skipping article dated %s" % datestring)
                 return None
             datetag.parent.extract()

From e2580655d1acf538a2b746d7d9ada517a6734c1a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 14:00:51 -0700
Subject: [PATCH 30/32] Development environment: First look for resources in
 the location pointed to by CALIBRE_DEVELOP_FROM. If not found, use the normal
 resource location

---
 src/calibre/gui2/tag_view.py   |  2 +-
 src/calibre/utils/resources.py | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index 0f02f2a591..8ad0dff4d2 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -215,7 +215,7 @@ class TagsModel(QAbstractItemModel):
             return QModelIndex()
 
         child_item = index.internalPointer()
-        parent_item = child_item.parent
+        parent_item = getattr(child_item, 'parent', None)
 
         if parent_item is self.root_item or parent_item is None:
             return QModelIndex()
diff --git a/src/calibre/utils/resources.py b/src/calibre/utils/resources.py
index adfbebd9f0..a69db34f2e 100644
--- a/src/calibre/utils/resources.py
+++ b/src/calibre/utils/resources.py
@@ -9,9 +9,22 @@ __docformat__ = 'restructuredtext en'
 
 import __builtin__, sys, os
 
+_dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
+if _dev_path is not None:
+    _dev_path = os.path.join(os.path.abspath(os.path.dirname(_dev_path)), 'resources')
+    if not os.path.exists(_dev_path):
+        _dev_path = None
+
 def get_path(path, data=False):
+    global _dev_path
     path = path.replace(os.sep, '/')
-    path = os.path.join(sys.resources_location, *path.split('/'))
+    base = None
+    if _dev_path is not None:
+        if os.path.exists(os.path.join(_dev_path, *path.split('/'))):
+            base = _dev_path
+    if base is None:
+        base = sys.resources_location
+    path = os.path.join(base, *path.split('/'))
     if data:
         return open(path, 'rb').read()
     return path

From 3c084bb83e6c67f06992052c0493a45262781724 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 14:09:56 -0700
Subject: [PATCH 31/32] Fix #4428 (Became a new user today. Calibre does not
 "see" my Blackberry 8330)

---
 src/calibre/devices/blackberry/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py
index 1d96d4118f..ec8a7e8f49 100644
--- a/src/calibre/devices/blackberry/driver.py
+++ b/src/calibre/devices/blackberry/driver.py
@@ -18,7 +18,7 @@ class BLACKBERRY(USBMS):
 
     VENDOR_ID   = [0x0fca]
     PRODUCT_ID  = [0x8004, 0x0004]
-    BCD         = [0x0200, 0x0107]
+    BCD         = [0x0200, 0x0107, 0x0201]
 
     VENDOR_NAME = 'RIM'
     WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

From 24a6d43b9115a4f6d4bb451c31158b0bf4618186 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 21 Jan 2010 14:50:39 -0700
Subject: [PATCH 32/32] Improve handling of justification. Now calibre will
 explicitly change the justification of all left aligned paragraphs to
 justified or vice versa depending on the justification setting. This should
 make it possible to robustly convert all content to either justified or not.
 calibre will not touch centered or right aligned content.

---
 src/calibre/ebooks/fb2/fb2ml.py               |  5 +-
 src/calibre/ebooks/lit/output.py              |  2 +-
 src/calibre/ebooks/lit/writer.py              | 13 ++-
 src/calibre/ebooks/mobi/mobiml.py             |  3 +-
 src/calibre/ebooks/oeb/factory.py             | 99 -------------------
 src/calibre/ebooks/oeb/stylizer.py            | 19 +++-
 src/calibre/ebooks/oeb/transforms/flatcss.py  |  2 +-
 .../ebooks/oeb/transforms/manglecase.py       | 15 +--
 .../ebooks/oeb/transforms/rasterize.py        |  3 +-
 src/calibre/ebooks/pml/pmlml.py               |  3 +-
 src/calibre/ebooks/rb/rbml.py                 |  5 +-
 src/calibre/ebooks/rtf/rtfml.py               |  5 +-
 src/calibre/ebooks/txt/txtml.py               |  2 +-
 13 files changed, 49 insertions(+), 127 deletions(-)
 delete mode 100644 src/calibre/ebooks/oeb/factory.py

diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py
index 42feeb2330..c8428cf136 100644
--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@@ -132,7 +132,8 @@ class FB2MLizer(object):
             href = self.oeb_book.guide['titlepage'].href
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
+                        self.opts, self.opts.output_profile)
                 output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
         return output
 
@@ -152,7 +153,7 @@ class FB2MLizer(object):
         text = []
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to FictionBook2 XML' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
             text.append(self.add_page_anchor(item))
             text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
         return ''.join(text)
diff --git a/src/calibre/ebooks/lit/output.py b/src/calibre/ebooks/lit/output.py
index 2a08ff51a8..423fb9ce7c 100644
--- a/src/calibre/ebooks/lit/output.py
+++ b/src/calibre/ebooks/lit/output.py
@@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
         mangler(oeb, opts)
         rasterizer = SVGRasterizer()
         rasterizer(oeb, opts)
-        lit = LitWriter()
+        lit = LitWriter(self.opts)
         lit(oeb, output_path)
 
 
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index 6dd5068032..cf9ea6aa77 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -134,7 +134,7 @@ def warn(x):
 class ReBinary(object):
     NSRMAP = {'': None, XML_NS: 'xml'}
 
-    def __init__(self, root, item, oeb, map=HTML_MAP):
+    def __init__(self, root, item, oeb, opts, map=HTML_MAP):
         self.item = item
         self.logger = oeb.logger
         self.manifest = oeb.manifest
@@ -143,7 +143,7 @@ class ReBinary(object):
         self.anchors = []
         self.page_breaks = []
         self.is_html  = is_html = map is HTML_MAP
-        self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
+        self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
         self.tree_to_binary(root)
         self.content = self.buf.getvalue()
         self.ahc = self.build_ahc() if is_html else None
@@ -295,9 +295,8 @@ def preserve(function):
     return wrapper
 
 class LitWriter(object):
-    def __init__(self):
-        # Wow, no options
-        pass
+    def __init__(self, opts):
+        self.opts = opts
 
     def _litize_oeb(self):
         oeb = self._oeb
@@ -469,7 +468,7 @@ class LitWriter(object):
             secnum = 0
             if isinstance(data, etree._Element):
                 self._add_folder(name)
-                rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
+                rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
                 self._add_file(name + '/ahc', rebin.ahc, 0)
                 self._add_file(name + '/aht', rebin.aht, 0)
                 item.page_breaks = rebin.page_breaks
@@ -562,7 +561,7 @@ class LitWriter(object):
         meta.attrib['ms--minimum_level'] = '0'
         meta.attrib['ms--attr5'] = '1'
         meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
-        rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
+        rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
         meta = rebin.content
         self._meta = meta
         self._add_file('/meta', meta)
diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index aa69ba446b..f958b63a12 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -92,6 +92,7 @@ class MobiMLizer(object):
     def __call__(self, oeb, context):
         oeb.logger.info('Converting XHTML to Mobipocket markup...')
         self.oeb = oeb
+        self.opts = context
         self.profile = profile = context.dest
         self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
         self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
@@ -114,7 +115,7 @@ class MobiMLizer(object):
     def mobimlize_spine(self):
         'Iterate over the spine and convert it to MOBIML'
         for item in self.oeb.spine:
-            stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
             body = item.data.find(XHTML('body'))
             nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
             nbody = etree.SubElement(nroot, XHTML('body'))
diff --git a/src/calibre/ebooks/oeb/factory.py b/src/calibre/ebooks/oeb/factory.py
deleted file mode 100644
index 8add71d20d..0000000000
--- a/src/calibre/ebooks/oeb/factory.py
+++ /dev/null
@@ -1,99 +0,0 @@
-'''
-Registry associating file extensions with Reader classes.
-'''
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
-
-import sys, os, logging
-from itertools import chain
-import calibre
-from calibre.ebooks.oeb.base import OEBError
-from calibre.ebooks.oeb.reader import OEBReader
-from calibre.ebooks.oeb.writer import OEBWriter
-from calibre.ebooks.lit.reader import LitReader
-from calibre.ebooks.lit.writer import LitWriter
-from calibre.ebooks.mobi.reader import MobiReader
-from calibre.ebooks.mobi.writer import MobiWriter
-from calibre.ebooks.oeb.base import OEBBook
-from calibre.ebooks.oeb.profile import Context
-from calibre.utils.config import Config
-
-__all__ = ['get_reader']
-
-REGISTRY = {
-    '.opf': (OEBReader, None),
-    '.lit': (LitReader, LitWriter),
-    '.mobi': (MobiReader, MobiWriter),
-    }
-
-def ReaderFactory(path):
-    if os.path.isdir(path):
-        return OEBReader
-    ext = os.path.splitext(path)[1].lower()
-    Reader = REGISTRY.get(ext, (None, None))[0]
-    if Reader is None:
-        raise OEBError('Unknown e-book file extension %r' % ext)
-    return Reader
-
-def WriterFactory(path):
-    if os.path.isdir(path):
-        return OEBWriter
-    ext = os.path.splitext(path)[1].lower()
-    if not os.path.exists(path) and not ext:
-        return OEBWriter
-    Writer = REGISTRY.get(ext, (None, None))[1]
-    if Writer is None:
-        raise OEBError('Unknown e-book file extension %r' % ext)
-    return Writer
-
-
-def option_parser(Reader, Writer):
-    cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
-    Reader.config(cfg)
-    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
-        Transform.config(cfg)
-    Writer.config(cfg)
-    parser = cfg.option_parser()
-    parser.add_option('--encoding', default=None,
-        help=_('Character encoding for input. Default is to auto detect.'))
-    parser.add_option('-o', '--output', default=None, 
-        help=_('Output file. Default is derived from input filename.'))
-    parser.add_option('-p', '--pretty-print', action='store_true',
-        default=False, help=_('Produce more human-readable XML output.'))
-    parser.add_option('-v', '--verbose', default=0, action='count',
-        help=_('Useful for debugging.'))
-    return parser
-
-def main(argv=sys.argv):
-    if len(argv) < 3:
-        print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
-        return 1
-    inpath, outpath = argv[1], argv[2]
-    Reader = ReaderFactory(inpath)
-    Writer = WriterFactory(outpath)
-    parser = option_parser(Reader, Writer)
-    opts, args = parser.parse_args(argv[3:])
-    if len(args) != 0:
-        parser.print_help()
-        return 1
-    logger = logging.getLogger('ebook-convert')
-    calibre.setup_cli_handlers(logger, logging.DEBUG)
-    encoding = opts.encoding
-    pretty_print = opts.pretty_print
-    oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
-    context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
-    reader = Reader.generate(opts)
-    writer = Writer.generate(opts)
-    transforms = []
-    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
-        transforms.append(Transform.generate(opts))
-    reader(oeb, inpath)
-    for transform in transforms:
-        transform(oeb, context)
-    writer(oeb, outpath)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index d0e394b9e5..26fb4ca980 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
 class Stylizer(object):
     STYLESHEETS = WeakKeyDictionary()
 
-    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
+    def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
             extra_css='', user_css=''):
-        self.oeb = oeb
+        self.oeb, self.opts = oeb, opts
         self.profile = profile
         self.logger = oeb.logger
         item = oeb.manifest.hrefs[path]
@@ -249,6 +249,8 @@ class Stylizer(object):
                 style.update(self._normalize_font(prop.cssValue))
             elif name == 'list-style':
                 style.update(self._normalize_list_style(prop.cssValue))
+            elif name == 'text-align':
+                style.update(self._normalize_text_align(prop.cssValue))
             else:
                 style[name] = prop.value
         if 'font-size' in style:
@@ -306,6 +308,19 @@ class Stylizer(object):
 
         return style
 
+    def _normalize_text_align(self, cssvalue):
+        style = {}
+        text = cssvalue.cssText
+        if text == 'inherit':
+            style['text-align'] = 'inherit'
+        else:
+            if text in ('left', 'justify'):
+                val = 'left' if self.opts.dont_justify else 'justify'
+                style['text-align'] = val
+            else:
+                style['text-align'] = text
+        return style
+
     def _normalize_font(self, cssvalue):
         composition = ('font-style', 'font-variant', 'font-weight',
                        'font-size', 'line-height', 'font-family')
diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index 61226ca4f4..1eb6afc1b5 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -141,7 +141,7 @@ class CSSFlattener(object):
             bs.append('text-align: '+ \
                     ('left' if self.context.dont_justify else 'justify'))
             body.set('style', '; '.join(bs))
-            stylizer = Stylizer(html, item.href, self.oeb, profile,
+            stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
                     user_css=self.context.extra_css,
                     extra_css=css)
             self.stylizers[item] = stylizer
diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py
index 4b852db6c4..04bf63ac1d 100644
--- a/src/calibre/ebooks/oeb/transforms/manglecase.py
+++ b/src/calibre/ebooks/oeb/transforms/manglecase.py
@@ -29,13 +29,14 @@ class CaseMangler(object):
     @classmethod
     def generate(cls, opts):
         return cls()
-    
+
     def __call__(self, oeb, context):
         oeb.logger.info('Applying case-transforming CSS...')
         self.oeb = oeb
+        self.opts = context
         self.profile = context.source
         self.mangle_spine()
-    
+
     def mangle_spine(self):
         id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
         self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
@@ -44,9 +45,9 @@ class CaseMangler(object):
             relhref = item.relhref(href)
             etree.SubElement(html.find(XHTML('head')), XHTML('link'),
                              rel='stylesheet', href=relhref, type=CSS_MIME)
-            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
+            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
             self.mangle_elem(html.find(XHTML('body')), stylizer)
-    
+
     def text_transform(self, transform, text):
         if transform == 'capitalize':
             return text.title()
@@ -55,7 +56,7 @@ class CaseMangler(object):
         elif transform == 'lowercase':
             return text.lower()
         return text
-    
+
     def split_text(self, text):
         results = ['']
         isupper = text[0].isupper()
@@ -66,7 +67,7 @@ class CaseMangler(object):
                 isupper = not isupper
                 results.append(char)
         return results
-    
+
     def smallcaps_elem(self, elem, attr):
         texts = self.split_text(getattr(elem, attr))
         setattr(elem, attr, None)
@@ -90,7 +91,7 @@ class CaseMangler(object):
                     last.tail = tail
                     child.tail = None
                 last = child
-    
+
     def mangle_elem(self, elem, stylizer):
         if not isinstance(elem.tag, basestring) or \
            namespace(elem.tag) != XHTML_NS:
diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py
index 30357b10d2..ac28e51b15 100644
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@@ -44,6 +44,7 @@ class SVGRasterizer(object):
     def __call__(self, oeb, context):
         oeb.logger.info('Rasterizing SVG images...')
         self.oeb = oeb
+        self.opts = context
         self.profile = context.dest
         self.images = {}
         self.dataize_manifest()
@@ -102,7 +103,7 @@ class SVGRasterizer(object):
     def rasterize_spine(self):
         for item in self.oeb.spine:
             html = item.data
-            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
+            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
             self.rasterize_item(item, stylizer)
 
     def rasterize_item(self, item, stylizer):
diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py
index 4f3d5f23df..e3609fcddb 100644
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@@ -113,7 +113,8 @@ class PMLMLizer(object):
             href = self.oeb_book.guide['titlepage'].href
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
+                        self.opts, self.opts.output_profile)
                 output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
         return output
 
diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py
index 5574aa94b6..50153d7d4d 100644
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@@ -90,7 +90,8 @@ class RBMLizer(object):
             href = self.oeb_book.guide['titlepage'].href
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
+                        self.opts, self.opts.output_profile)
                 output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
         return output
 
@@ -111,7 +112,7 @@ class RBMLizer(object):
         output = [u'']
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to RocketBook HTML...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
             output.append(self.add_page_anchor(item))
             output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
         return ''.join(output)
diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py
index 6aa48ad61b..1217482823 100644
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@@ -111,12 +111,13 @@ class RTFMLizer(object):
             href = self.oeb_book.guide['titlepage'].href
             item = self.oeb_book.manifest.hrefs[href]
             if item.spine_position is None:
-                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+                stylizer = Stylizer(item.data, item.href, self.oeb_book,
+                        self.opts, self.opts.output_profile)
                 output += self.dump_text(item.data.find(XHTML('body')), stylizer)
                 output += '{\\page } '
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to RTF markup...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
             output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         output += self.footer()
         output = self.insert_images(output)
diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py
index 7642e051fe..bb730c0720 100644
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@@ -54,7 +54,7 @@ class TXTMLizer(object):
         output.append(self.get_toc())
         for item in self.oeb_book.spine:
             self.log.debug('Converting %s to TXT...' % item.href)
-            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
             content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
             content = self.remove_newlines(content)
             output += self.dump_text(etree.fromstring(content), stylizer)