From c2e3683843d28014b8cc0a64ceff691806b7dd0c Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 2 Feb 2010 17:52:18 -0500
Subject: [PATCH 1/5] Fix bug #4777: Typo in regex for eReader PDB chapter
 index generation.

---
 src/calibre/ebooks/pdb/ereader/writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py
index a6ee16db15..c13353745e 100644
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@@ -42,7 +42,7 @@ class Writer(FormatWriter):
         pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
 
         text, text_sizes = self._text(pml)
-        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4)="(?P<text>.+?)"', pml)
+        chapter_index = self._index_item(r'(?s)\\C(?P<val>[0-4])="(?P<text>.+?)"', pml)
         chapter_index += self.index_item(r'(?s)\\X(?P<val>[0-4])(?P<text>.+?)\\X[0-4]', pml)
         chapter_index += self.index_item(r'(?s)\\x(?P<text>.+?)\\x', pml)
         link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)

From 9ea276be209aee48f0927191d5bedf5378eb70af Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 10:22:00 -0700
Subject: [PATCH 2/5] Fix #4779 (Wall Street Journal (Free Content))

---
 resources/recipes/the_gazette.recipe | 22 -----------------
 resources/recipes/wsj_free.recipe    |  2 +-
 src/calibre/ebooks/pdf/reflow.py     | 35 ++++++++++++++++++++++------
 3 files changed, 29 insertions(+), 30 deletions(-)
 delete mode 100644 resources/recipes/the_gazette.recipe

diff --git a/resources/recipes/the_gazette.recipe b/resources/recipes/the_gazette.recipe
deleted file mode 100644
index 19afff986e..0000000000
--- a/resources/recipes/the_gazette.recipe
+++ /dev/null
@@ -1,22 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class The_Gazette(BasicNewsRecipe):
-
-    cover_url      = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
-    title          = u'The Gazette'
-    __author__     = 'Jerry Clapperton'
-    description    = 'Montreal news in English'
-    language = 'en_CA'
-
-    oldest_article = 7
-    max_articles_per_feed = 20
-    use_embedded_content  = False
-    remove_javascript = True
-    no_stylesheets = True
-    encoding = 'utf-8'
-
-    keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
-
-    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
-
-    feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe
index b190f43849..e29bfe3dde 100644
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
                 # first, check if there is an h3 tag which provides a section name
                 stag = divtag.find('h3')
                 if stag:
-                    if stag.parent['class'] == 'dynamic':
+                    if stag.parent.get('class', '') == 'dynamic':
                         # a carousel of articles is too complex to extract a section name
                         # for each article, so we'll just call the section "Carousel"
                         section_name = 'Carousel'
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 9f98147032..552af1590f 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -262,7 +262,6 @@ class Region(object):
             max_lines = max(max_lines, len(c))
         return max_lines
 
-
     @property
     def is_small(self):
         return self.line_count < 3
@@ -438,9 +437,8 @@ class Page(object):
         # absorb into a neighboring region (prefer the one with number of cols
         # closer to the avg number of cols in the set, if equal use larger
         # region)
-        # merge contiguous regions that can contain each other
-        '''absorbed = set([])
         found = True
+        absorbed = set([])
         while found:
             found = False
             for i, region in enumerate(self.regions):
@@ -452,10 +450,33 @@ class Page(object):
                             regions.append(self.regions[j])
                         else:
                             break
-                    prev = None if i == 0 else i-1
-                    next = j if self.regions[j] not in regions else None
-        '''
-        pass
+                    prev_region = None if i == 0 else i-1
+                    next_region = j if self.regions[j] not in regions else None
+                    if prev_region is None and next_region is not None:
+                        absorb_into = next_region
+                    elif next_region is None and prev_region is not None:
+                        absorb_into = prev_region
+                    elif prev_region is None and next_region is None:
+                        if len(regions) > 1:
+                            absorb_into = regions[0]
+                            regions = regions[1:]
+                        else:
+                            absorb_into = None
+                    else:
+                        absorb_into = prev_region
+                        if next_region.line_count >= prev_region.line_count:
+                            avg_column_count = sum([len(r.columns) for r in
+                                regions])/float(len(regions))
+                            if next_region.line_count > prev_region.line_count \
+                               or abs(avg_column_count - len(prev_region.columns)) \
+                               > abs(avg_column_count - len(next_region.columns)):
+                                   absorb_into = next_region
+                    if absorb_into is not None:
+                        absorb_into.absorb_region(regions)
+                        absorbed.update(regions)
+                    i = j
+        for region in absorbed:
+            self.regions.remove(region)
 
 
 

From 4ecab6bc9ee483ddeddb77b8681635b5ab9918e6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 10:54:23 -0700
Subject: [PATCH 3/5] New recipes for Gizmodo, News Straits Times, Read It
 Later, TidBits by Darko Miletic

---
 resources/images/news/gizmodo.png          | Bin 0 -> 640 bytes
 resources/images/news/newsstraitstimes.png | Bin 0 -> 816 bytes
 resources/images/news/readitlater.png      | Bin 0 -> 810 bytes
 resources/images/news/tidbits.png          | Bin 0 -> 783 bytes
 resources/recipes/gizmodo.recipe           |  40 +++++++++++++
 resources/recipes/newsstraitstimes.recipe  |  35 +++++++++++
 resources/recipes/readitlater.recipe       |  64 +++++++++++++++++++++
 resources/recipes/tidbits.recipe           |  53 +++++++++++++++++
 8 files changed, 192 insertions(+)
 create mode 100644 resources/images/news/gizmodo.png
 create mode 100644 resources/images/news/newsstraitstimes.png
 create mode 100644 resources/images/news/readitlater.png
 create mode 100644 resources/images/news/tidbits.png
 create mode 100644 resources/recipes/gizmodo.recipe
 create mode 100644 resources/recipes/newsstraitstimes.recipe
 create mode 100644 resources/recipes/readitlater.recipe
 create mode 100644 resources/recipes/tidbits.recipe

diff --git a/resources/images/news/gizmodo.png b/resources/images/news/gizmodo.png
new file mode 100644
index 0000000000000000000000000000000000000000..8f2e6f002b7719ac70fb67d31b6f5b6785d2c140
GIT binary patch
literal 640
zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI
zAngIhZYQ(tK!Rljj_E*J0gT&!&6&%<z_`-W#W5t}@Y|^i`=kv;T+fHGfdDfT4-*>?
z2M}o7@qdy2X8D_R5z#R2RZUt7s}^Y~Bsd&sU<4{-Wb?n(G_z;R?cC6<##}SKbt1p*
zK7Re$=ex_--=EK4dRTXhp6#mubDNqS`#BFhKCXVxH&fw&DtF|Q?b%mEj=h{@c_sbU
zj1!iNryP=Ue#z~%zar7f%0XJ#f48z``j#MxwI`HJ6fd=OT$GC6k;$-$BmUdmo%%Aj
zLpzPzbY>>sZ3xmo<IcWfw!@y3H5c#XYt7mEWR9WN=GdwE?<7SPj<7W7%eZB`v<L{7
zN(Zdke93F0-d_`jOZ&gAp4oBX*p;LAPfML$^`-ga3WjSLA_75D&!%-VrSlrb-kAG}
z@!hKW_srLwym(#zE@Dt<IkL}wtLfd<`g7g}RmMKE|0H)TJ22iNZ0Yj|^Edsbxg8eS
zCn7VFU6$;gck)+5$A`Mug<GGM<YhQ7;oZpZ6{yE$cC}~;%fAJ_FIWC)njtiOpRiSS
z!LdJ^%4!<^W%Y#32={%i>-$UcZ*N+IWCGg^MliL*^1$8a`S$b5mw)y>v;FF%`8m3a
z93>`lTL5EEwZt`|BqgyV)hf9t6-Y4{85kPq8W`ys7>5`dSeckv8CdEXm|Gbb+`K)J
p14Tn_eoAIqC9(zs3oBDYD-#QdhP<b@B7hngJYD@<);T3K0RV`N`r!Zo

literal 0
HcmV?d00001

diff --git a/resources/images/news/newsstraitstimes.png b/resources/images/news/newsstraitstimes.png
new file mode 100644
index 0000000000000000000000000000000000000000..075e2cc001ee23118026a36f5bf88ad1bbba17c0
GIT binary patch
literal 816
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>hGT^vI!P9L3o)?1`d;<)|(n~`^Keq0(`
z>V8*1$-^-GpgGqej{dFH7e4WGEKzzD(C#Pe@>0&FV~Hb&tA6_;g&WS-GR=2)EKyB$
znw2+8@6oQ!n~gueHgNlH^JUKb=cm8F6%6t^Y3FwH{6~An8H>N)*jd4vDasfyanhf+
zyE$w-E;NhW-{YK{uWV2uS6<tE=!x>2ot0A#w+A>W7MZsT*vT(i!}Io7DgOZ(>Ep3c
zPH!z6cTYRG=Rd=rx;585*F4wPw7uSb!DLCM5|h-A+8x=F8+81eG_DKo^85Ki<JMBg
z0+~5S^MCZqJFi@Sc;5e(J9ie9ERYWrbz*6r;_tV><+UwCW|YTmHqC_*E=f&q7i`<U
zFm`uWfJo8ywvBOnfU0)vH#>P>(lsxCG1CfP_X2J01Ml5K!WSR23Q)Q#B4>BZufD;7
z;q9SRFScKdC;EGn`+Nf?S@cc_^7<Fz$kE@*!gA)^xu&-Q*}1G&zngO%|M;zD)$s?v
zKN-CY>6$lhazt3;%$bt2X3wtt_C#~;-Pj+^Qr+?UF5S4&ktm@u+0)9(Ix{1~LBuDx
zkEj2_)w`}%Q`Pp*ejJjo|1v}>G*nZ>C#-(WrSfg_fA$wYIcu{qqUYuJ%RIe2i&Kp3
z4SAX?lp3c@3a~gcvqI+Ot|vv-BD&qbGNu4UwPVHX^aJE=Ph6ZPHetaPm#m1$$Y*b_
zh?H&gP)VvV%9Xxdwjo#gdMV?I6)TRcFr9GeXrYYA{6{lqoK3qKaeu+Kbz+JvyVgy<
zd+XNcFr}6U4jM<^<?$UaJCS14ayao&XlS+OpY!>Dcqi;*ubp=O`b1!gQ!R0gC`m~y
zNwrEYN(E93Mh1okx&}tN2F4+V2396!R;Gr!2If`<28*s-<VMkuo1c=IR*9^^z|zXl
a)XD^?8KhyIVShGI1B0ilpUXO@geCwXt6C=j

literal 0
HcmV?d00001

diff --git a/resources/images/news/readitlater.png b/resources/images/news/readitlater.png
new file mode 100644
index 0000000000000000000000000000000000000000..439a690cd8155909158acba926ee6d85a12e5cc3
GIT binary patch
literal 810
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?^@RT^vI!PUlYc^$&IwX`4Uu-lcEfPCGr_
z?YflPl#RtiZIXm?=RY3te=PqSw72KQ?lv%Nn<_1AYN${r(k#d!BA6Vc9k9<U^X1;{
zdyVxxxTo$rsj}Ik@>%V9OXW$Ob!!_F3|gj4+SMZPM}KYU<8@OWZHPH!5ous3_T=GT
z?-^+v0SzgdpC=zb|7lJB^$Si5w&pRJG6Z?$@wvW`t837&<F;V%X-oE>5i#e<&)v>b
zE^#OZc$wa*T6FP*GQ<4y$Kx)hsqA!IbKXHJd%<)=B~GVnQlIU=X-?~ZoVm78=C{Oy
z@4q!8g(k}eoh&=kC)X0~ux%#GU&cjSP3?3nuH4ODTbMK3?RtxVg=xjto2i<%&6~G#
zA7T&dD|n=I-A%vX>7GV`MTN7JSeO`>Ou5l_K)-9rFW*HU<65+oOGNVxJkLa`%w3rj
z-Y;`wTB*??K?jeMRR<TGpRRoIeyhGfLl$HHj`oJ*sh|C)U7eG~prRYH>cIV8Tdm)>
z16D6uZ5G0BV9n-vWOjCMSVKibq2A3_$+s=DSl2P#b*oFAr7yXC<+m-&YbE`sO6;_A
z(-3kDKgIEwCwhPQjGtT16ivI<@T;xVExU(T{lyDe2aDevYXe&%|NfdcU0yXT)I2v?
zSm8nFot-g>FOFF}yQz}FIFWJFOq0vS(K%v$@>!f7+oFB$$f;hNt7L5I`SC(?s^pUN
zv`XXr2fKcWELhnY&c3(8Rxdx^Cbi7&_}<-mj*%&^sypX4o5d`Zvj2ERWJ=JUdCfj$
zjd#|y&kdUK_LKQ1*MsZ#h+hwVA-s2$^FPK@T3QA+^$|_Lw5D3(8c~vxSdwa$T$Bo=
z7>o=I4Rj5RbPbF{3=OPI%&m;fbq&m|3=CA5+r&^b<mRVjrd6V7FtRc*wlcARXxJmJ
RSPs;{;OXk;vd$@?2>||EMpFO)

literal 0
HcmV?d00001

diff --git a/resources/images/news/tidbits.png b/resources/images/news/tidbits.png
new file mode 100644
index 0000000000000000000000000000000000000000..e64d71ec6887f57c6f66464bb90208ffe33978f9
GIT binary patch
literal 783
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?><PT^vI!PA{G8n=KqD(l)<3UCnrAzsXup
zudPN`w|THSx@4_XSkYn4dek>OZ&!)pF42;O_6J(C3ljy)|1v9;Bzn6ZbT}F{HNh$Q
zc+J$bSt)AQU2|V;y0LjrL7d+C-!coQ1j!^kah%5ZWo^`v=F_KnWG-qxab{@qpA}M9
zw&U-W^pB;ztvef5MrUr?cl6sbC5<gJDjyjeyimAqcV*_DnU8)YPd-!Vx?jHV-#az+
zbq<NSbCWMKHT>(8dK~VocSdbuT6_9tZ5GFdGcvvf6^D)(@GO~9vb(En_t_UIljh7%
z5x*F=*;D4YMW6Gv?LYS2Sr(zQ=(*b@i<>-!GRYFWi!=^x%XRaAS`a(^&uuM%1+S&1
zr9P~bTNcT@Aa4EQ>{)G(Ei{%21bulR(bo8SSHv~G>G$ueWOpkbTyDgzUtjpE&4r^;
zuf^edDZ>%_lBH8#?CMi1@4BU<&Gl1lWk~Cr87gnL7sQI!?l0w?|JJ}P)xc!RlMB{I
zSO3<{iGF*Z+4u4Xn|>d)!(X-vGT5DK|NXyl0@LYDK)1LkF*?W`|DXQo(jPX4vSr;j
z_O|Y9kFWf^$VH}Ha=(c2=Zrbm+Kzv&UM=#;_flQY?fit9Vn?Uu?va_ZJNPE|k3AwE
z9(#9wEt0$(IA`aJ$z^Pjwed?sw1U=~Z_wdBzRggk)mUVbhsrmmsI_8Ov!=Q&W^~Y+
zx@i6Se!u0;OM?`7oR>^Fv43mUthU#?su{gB7ilcI{q~#91ry$-B84*F?=iLr*!urt
zXm`DxsP)vV9GIX~OI#yLQW8s2t&)pUffR$0fuVt}fsw9(afqRTm5Hg9k)f`Exs`!|
tvs$tsiiX_$l+3hBWDN!uRz}8Fra=854SVte<$)R)JYD@<);T3K0RR{HNE-kE

literal 0
HcmV?d00001

diff --git a/resources/recipes/gizmodo.recipe b/resources/recipes/gizmodo.recipe
new file mode 100644
index 0000000000..6f6e6ae0cf
--- /dev/null
+++ b/resources/recipes/gizmodo.recipe
@@ -0,0 +1,40 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+gizmodo.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gizmodo(BasicNewsRecipe):
+    title                 = 'Gizmodo'
+    __author__            = 'Darko Miletic'
+    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
+    publisher             = 'gizmodo.com'
+    category              = 'news, IT, Internet, gadgets'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'en'
+    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
+    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+    remove_tags       = [dict(name='div',attrs={'class':'feedflare'})]
+    remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
+
+    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
diff --git a/resources/recipes/newsstraitstimes.recipe b/resources/recipes/newsstraitstimes.recipe
new file mode 100644
index 0000000000..ebbaca1a0e
--- /dev/null
+++ b/resources/recipes/newsstraitstimes.recipe
@@ -0,0 +1,35 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.nst.com.my
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Newstraitstimes(BasicNewsRecipe):
+    title                 = 'New Straits Times from Malaysia'
+    __author__            = 'Darko Miletic'
+    description           = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
+    publisher             = 'nst.com.my'
+    category              = 'news, politics, Malaysia'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en'
+    masthead_url          = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags       = [dict(name=['link','table'])]
+    keep_only_tags = dict(name='div',attrs={'id':'haidah'})
+
+    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
+
diff --git a/resources/recipes/readitlater.recipe b/resources/recipes/readitlater.recipe
new file mode 100644
index 0000000000..4bd8fc2bd6
--- /dev/null
+++ b/resources/recipes/readitlater.recipe
@@ -0,0 +1,64 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+readitlaterlist.com
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Readitlater(BasicNewsRecipe):
+    title                 = 'Read It Later'
+    __author__            = 'Darko Miletic'
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to
+                               setup up your news. Fill in your account
+                               username, and optionally you can add password.'''
+    publisher             = 'readitlater.com'
+    category              = 'news, custom'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    needs_subscription    = True
+    INDEX                 = u'http://readitlaterlist.com'
+    LOGIN                 = INDEX + u'/l'
+
+
+    feeds = [(u'Unread articles' , INDEX + u'/unread')]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['feed_id'] = self.username
+            if self.password is not None:
+               br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            ritem = soup.find('ul',attrs={'id':'list'})
+            for item in ritem.findAll('li'):
+                description = ''
+                atag = item.find('a',attrs={'class':'text'})
+                if atag and atag.has_key('href'):
+                    url         = self.INDEX + atag['href']
+                    title       = self.tag_to_string(item.div)
+                    date        = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
diff --git a/resources/recipes/tidbits.recipe b/resources/recipes/tidbits.recipe
new file mode 100644
index 0000000000..702c65e9e4
--- /dev/null
+++ b/resources/recipes/tidbits.recipe
@@ -0,0 +1,53 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+db.tidbits.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TidBITS(BasicNewsRecipe):
+    title                 = 'TidBITS: Mac News for the Rest of Us'
+    __author__            = 'Darko Miletic'
+    description           = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
+    publisher             = 'TidBITS Publishing Inc.'
+    category              = 'news, Apple, Macintosh, IT, Internet'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'en'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://db.tidbits.com/images/tblogo9.gif'
+    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+    remove_tags       = [dict(name='small')]
+    remove_tags_after = dict(name='small')
+
+    feeds = [
+               (u'Business Apps'              , u'http://db.tidbits.com/feeds/business.rss'     )
+              ,(u'Entertainment'              , u'http://db.tidbits.com/feeds/entertainment.rss')
+              ,(u'External Links'             , u'http://db.tidbits.com/feeds/links.rss'        )
+              ,(u'Home Mac'                   , u'http://db.tidbits.com/feeds/home.rss'         )
+              ,(u'Inside TidBITS'             , u'http://db.tidbits.com/feeds/inside.rss'       )
+              ,(u'iPod & iPhone'              , u'http://db.tidbits.com/feeds/ipod-iphone.rss'  )
+              ,(u'Just for Fun'               , u'http://db.tidbits.com/feeds/fun.rss'          )
+              ,(u'Macs & Mac OS X'            , u'http://db.tidbits.com/feeds/macs.rss'         )
+              ,(u'Media Creation'             , u'http://db.tidbits.com/feeds/creative.rss'     )
+              ,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'          )
+              ,(u'Opinion & Editorial'        , u'http://db.tidbits.com/feeds/opinion.rss'      )
+              ,(u'Support & Problem Solving'  , u'http://db.tidbits.com/feeds/support.rss'      )
+              ,(u'Safe Computing'             , u'http://db.tidbits.com/feeds/security.rss'     )
+              ,(u'Tech News'                  , u'http://db.tidbits.com/feeds/tech.rss'         )
+              ,(u'Software Watchlist'         , u'http://db.tidbits.com/feeds/watchlist.rss'    )
+            ]

From 3d1ef6e56499bd24cbca3fb263dfa9580d5b3f9a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 16:14:25 -0700
Subject: [PATCH 4/5] Fix #4786 (Updated recipe for Pagina 12)

---
 resources/recipes/pagina12.recipe | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/resources/recipes/pagina12.recipe b/resources/recipes/pagina12.recipe
index 2fb433dc82..c9801cb359 100644
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@@ -5,9 +5,10 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 pagina12.com.ar
 '''
 
-import time
+import re, time
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
 
 class Pagina12(BasicNewsRecipe):
     title                 = 'Pagina - 12'
@@ -22,7 +23,8 @@ class Pagina12(BasicNewsRecipe):
     use_embedded_content  = False
     language              = 'es'
     remove_empty_feeds    = True
-    extra_css             = ' body{font-family: sans-serif} '
+    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
 
     conversion_options = {
                           'comment'   : description
@@ -32,7 +34,7 @@ class Pagina12(BasicNewsRecipe):
                         }
 
     remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
-
+    
 
     feeds = [
               (u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml'   )
@@ -52,7 +54,11 @@ class Pagina12(BasicNewsRecipe):
         return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
 
     def get_cover_url(self):
-        imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
-        weekday = time.localtime().tm_wday
-        return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
-
+        rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
+        rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
+        soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
+        for image in soup.findAll('img',alt=True):
+           if image['alt'].startswith('Tapa de la fecha'):
+              return image['src']
+        return None
+        
\ No newline at end of file

From 008fab308d68003341cdd3152b50629115afc15e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 17:03:04 -0700
Subject: [PATCH 5/5] Updated recipe for The New Republic

---
 resources/recipes/the_new_republic.recipe | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/the_new_republic.recipe b/resources/recipes/the_new_republic.recipe
index 482dba1af0..59ccef3607 100644
--- a/resources/recipes/the_new_republic.recipe
+++ b/resources/recipes/the_new_republic.recipe
@@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
 
     oldest_article = 7
     max_articles_per_feed = 100
+    no_stylesheets = True
 
     remove_tags = [
             dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
@@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
         ('Economy', 'http://www.tnr.com/rss/articles/Economy'),
         ('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
         ('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
-        ('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
+        ('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
         ('World', 'http://www.tnr.com/rss/articles/World'),
         ('Film', 'http://www.tnr.com/rss/articles/Film'),
         ('Books', 'http://www.tnr.com/rss/articles/books'),
+        ('The Book', 'http://www.tnr.com/rss/book'),
+        ('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
         ('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
         ('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
         ('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
-        ('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
         ('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
         ('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
         ('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
@@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
 
     def print_version(self, url):
         return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
+