From 9ea276be209aee48f0927191d5bedf5378eb70af Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 10:22:00 -0700
Subject: [PATCH 01/20] Fix #4779 (Wall Street Journal (Free Content))

---
 resources/recipes/the_gazette.recipe | 22 -----------------
 resources/recipes/wsj_free.recipe    |  2 +-
 src/calibre/ebooks/pdf/reflow.py     | 35 ++++++++++++++++++++++------
 3 files changed, 29 insertions(+), 30 deletions(-)
 delete mode 100644 resources/recipes/the_gazette.recipe

diff --git a/resources/recipes/the_gazette.recipe b/resources/recipes/the_gazette.recipe
deleted file mode 100644
index 19afff986e..0000000000
--- a/resources/recipes/the_gazette.recipe
+++ /dev/null
@@ -1,22 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class The_Gazette(BasicNewsRecipe):
-
-    cover_url      = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
-    title          = u'The Gazette'
-    __author__     = 'Jerry Clapperton'
-    description    = 'Montreal news in English'
-    language = 'en_CA'
-
-    oldest_article = 7
-    max_articles_per_feed = 20
-    use_embedded_content  = False
-    remove_javascript = True
-    no_stylesheets = True
-    encoding = 'utf-8'
-
-    keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
-
-    extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
-
-    feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe
index b190f43849..e29bfe3dde 100644
--- a/resources/recipes/wsj_free.recipe
+++ b/resources/recipes/wsj_free.recipe
@@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
                 # first, check if there is an h3 tag which provides a section name
                 stag = divtag.find('h3')
                 if stag:
-                    if stag.parent['class'] == 'dynamic':
+                    if stag.parent.get('class', '') == 'dynamic':
                         # a carousel of articles is too complex to extract a section name
                         # for each article, so we'll just call the section "Carousel"
                         section_name = 'Carousel'
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 9f98147032..552af1590f 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -262,7 +262,6 @@ class Region(object):
             max_lines = max(max_lines, len(c))
         return max_lines
 
-
     @property
     def is_small(self):
         return self.line_count < 3
@@ -438,9 +437,8 @@ class Page(object):
         # absorb into a neighboring region (prefer the one with number of cols
         # closer to the avg number of cols in the set, if equal use larger
         # region)
-        # merge contiguous regions that can contain each other
-        '''absorbed = set([])
         found = True
+        absorbed = set([])
         while found:
             found = False
             for i, region in enumerate(self.regions):
@@ -452,10 +450,33 @@ class Page(object):
                             regions.append(self.regions[j])
                         else:
                             break
-                    prev = None if i == 0 else i-1
-                    next = j if self.regions[j] not in regions else None
-        '''
-        pass
+                    prev_region = None if i == 0 else i-1
+                    next_region = j if self.regions[j] not in regions else None
+                    if prev_region is None and next_region is not None:
+                        absorb_into = next_region
+                    elif next_region is None and prev_region is not None:
+                        absorb_into = prev_region
+                    elif prev_region is None and next_region is None:
+                        if len(regions) > 1:
+                            absorb_into = regions[0]
+                            regions = regions[1:]
+                        else:
+                            absorb_into = None
+                    else:
+                        absorb_into = prev_region
+                        if next_region.line_count >= prev_region.line_count:
+                            avg_column_count = sum([len(r.columns) for r in
+                                regions])/float(len(regions))
+                            if next_region.line_count > prev_region.line_count \
+                               or abs(avg_column_count - len(prev_region.columns)) \
+                               > abs(avg_column_count - len(next_region.columns)):
+                                   absorb_into = next_region
+                    if absorb_into is not None:
+                        absorb_into.absorb_region(regions)
+                        absorbed.update(regions)
+                    i = j
+        for region in absorbed:
+            self.regions.remove(region)
 
 
 

From 4ecab6bc9ee483ddeddb77b8681635b5ab9918e6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 10:54:23 -0700
Subject: [PATCH 02/20] New recipes for Gizmodo, News Straits Times, Read It
 Later, TidBits by Darko Miletic

---
 resources/images/news/gizmodo.png          | Bin 0 -> 640 bytes
 resources/images/news/newsstraitstimes.png | Bin 0 -> 816 bytes
 resources/images/news/readitlater.png      | Bin 0 -> 810 bytes
 resources/images/news/tidbits.png          | Bin 0 -> 783 bytes
 resources/recipes/gizmodo.recipe           |  40 +++++++++++++
 resources/recipes/newsstraitstimes.recipe  |  35 +++++++++++
 resources/recipes/readitlater.recipe       |  64 +++++++++++++++++++++
 resources/recipes/tidbits.recipe           |  53 +++++++++++++++++
 8 files changed, 192 insertions(+)
 create mode 100644 resources/images/news/gizmodo.png
 create mode 100644 resources/images/news/newsstraitstimes.png
 create mode 100644 resources/images/news/readitlater.png
 create mode 100644 resources/images/news/tidbits.png
 create mode 100644 resources/recipes/gizmodo.recipe
 create mode 100644 resources/recipes/newsstraitstimes.recipe
 create mode 100644 resources/recipes/readitlater.recipe
 create mode 100644 resources/recipes/tidbits.recipe

diff --git a/resources/images/news/gizmodo.png b/resources/images/news/gizmodo.png
new file mode 100644
index 0000000000000000000000000000000000000000..8f2e6f002b7719ac70fb67d31b6f5b6785d2c140
GIT binary patch
literal 640
zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI
zAngIhZYQ(tK!Rljj_E*J0gT&!&6&%<z_`-W#W5t}@Y|^i`=kv;T+fHGfdDfT4-*>?
z2M}o7@qdy2X8D_R5z#R2RZUt7s}^Y~Bsd&sU<4{-Wb?n(G_z;R?cC6<##}SKbt1p*
zK7Re$=ex_--=EK4dRTXhp6#mubDNqS`#BFhKCXVxH&fw&DtF|Q?b%mEj=h{@c_sbU
zj1!iNryP=Ue#z~%zar7f%0XJ#f48z``j#MxwI`HJ6fd=OT$GC6k;$-$BmUdmo%%Aj
zLpzPzbY>>sZ3xmo<IcWfw!@y3H5c#XYt7mEWR9WN=GdwE?<7SPj<7W7%eZB`v<L{7
zN(Zdke93F0-d_`jOZ&gAp4oBX*p;LAPfML$^`-ga3WjSLA_75D&!%-VrSlrb-kAG}
z@!hKW_srLwym(#zE@Dt<IkL}wtLfd<`g7g}RmMKE|0H)TJ22iNZ0Yj|^Edsbxg8eS
zCn7VFU6$;gck)+5$A`Mug<GGM<YhQ7;oZpZ6{yE$cC}~;%fAJ_FIWC)njtiOpRiSS
z!LdJ^%4!<^W%Y#32={%i>-$UcZ*N+IWCGg^MliL*^1$8a`S$b5mw)y>v;FF%`8m3a
z93>`lTL5EEwZt`|BqgyV)hf9t6-Y4{85kPq8W`ys7>5`dSeckv8CdEXm|Gbb+`K)J
p14Tn_eoAIqC9(zs3oBDYD-#QdhP<b@B7hngJYD@<);T3K0RV`N`r!Zo

literal 0
HcmV?d00001

diff --git a/resources/images/news/newsstraitstimes.png b/resources/images/news/newsstraitstimes.png
new file mode 100644
index 0000000000000000000000000000000000000000..075e2cc001ee23118026a36f5bf88ad1bbba17c0
GIT binary patch
literal 816
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>hGT^vI!P9L3o)?1`d;<)|(n~`^Keq0(`
z>V8*1$-^-GpgGqej{dFH7e4WGEKzzD(C#Pe@>0&FV~Hb&tA6_;g&WS-GR=2)EKyB$
znw2+8@6oQ!n~gueHgNlH^JUKb=cm8F6%6t^Y3FwH{6~An8H>N)*jd4vDasfyanhf+
zyE$w-E;NhW-{YK{uWV2uS6<tE=!x>2ot0A#w+A>W7MZsT*vT(i!}Io7DgOZ(>Ep3c
zPH!z6cTYRG=Rd=rx;585*F4wPw7uSb!DLCM5|h-A+8x=F8+81eG_DKo^85Ki<JMBg
z0+~5S^MCZqJFi@Sc;5e(J9ie9ERYWrbz*6r;_tV><+UwCW|YTmHqC_*E=f&q7i`<U
zFm`uWfJo8ywvBOnfU0)vH#>P>(lsxCG1CfP_X2J01Ml5K!WSR23Q)Q#B4>BZufD;7
z;q9SRFScKdC;EGn`+Nf?S@cc_^7<Fz$kE@*!gA)^xu&-Q*}1G&zngO%|M;zD)$s?v
zKN-CY>6$lhazt3;%$bt2X3wtt_C#~;-Pj+^Qr+?UF5S4&ktm@u+0)9(Ix{1~LBuDx
zkEj2_)w`}%Q`Pp*ejJjo|1v}>G*nZ>C#-(WrSfg_fA$wYIcu{qqUYuJ%RIe2i&Kp3
z4SAX?lp3c@3a~gcvqI+Ot|vv-BD&qbGNu4UwPVHX^aJE=Ph6ZPHetaPm#m1$$Y*b_
zh?H&gP)VvV%9Xxdwjo#gdMV?I6)TRcFr9GeXrYYA{6{lqoK3qKaeu+Kbz+JvyVgy<
zd+XNcFr}6U4jM<^<?$UaJCS14ayao&XlS+OpY!>Dcqi;*ubp=O`b1!gQ!R0gC`m~y
zNwrEYN(E93Mh1okx&}tN2F4+V2396!R;Gr!2If`<28*s-<VMkuo1c=IR*9^^z|zXl
a)XD^?8KhyIVShGI1B0ilpUXO@geCwXt6C=j

literal 0
HcmV?d00001

diff --git a/resources/images/news/readitlater.png b/resources/images/news/readitlater.png
new file mode 100644
index 0000000000000000000000000000000000000000..439a690cd8155909158acba926ee6d85a12e5cc3
GIT binary patch
literal 810
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?^@RT^vI!PUlYc^$&IwX`4Uu-lcEfPCGr_
z?YflPl#RtiZIXm?=RY3te=PqSw72KQ?lv%Nn<_1AYN${r(k#d!BA6Vc9k9<U^X1;{
zdyVxxxTo$rsj}Ik@>%V9OXW$Ob!!_F3|gj4+SMZPM}KYU<8@OWZHPH!5ous3_T=GT
z?-^+v0SzgdpC=zb|7lJB^$Si5w&pRJG6Z?$@wvW`t837&<F;V%X-oE>5i#e<&)v>b
zE^#OZc$wa*T6FP*GQ<4y$Kx)hsqA!IbKXHJd%<)=B~GVnQlIU=X-?~ZoVm78=C{Oy
z@4q!8g(k}eoh&=kC)X0~ux%#GU&cjSP3?3nuH4ODTbMK3?RtxVg=xjto2i<%&6~G#
zA7T&dD|n=I-A%vX>7GV`MTN7JSeO`>Ou5l_K)-9rFW*HU<65+oOGNVxJkLa`%w3rj
z-Y;`wTB*??K?jeMRR<TGpRRoIeyhGfLl$HHj`oJ*sh|C)U7eG~prRYH>cIV8Tdm)>
z16D6uZ5G0BV9n-vWOjCMSVKibq2A3_$+s=DSl2P#b*oFAr7yXC<+m-&YbE`sO6;_A
z(-3kDKgIEwCwhPQjGtT16ivI<@T;xVExU(T{lyDe2aDevYXe&%|NfdcU0yXT)I2v?
zSm8nFot-g>FOFF}yQz}FIFWJFOq0vS(K%v$@>!f7+oFB$$f;hNt7L5I`SC(?s^pUN
zv`XXr2fKcWELhnY&c3(8Rxdx^Cbi7&_}<-mj*%&^sypX4o5d`Zvj2ERWJ=JUdCfj$
zjd#|y&kdUK_LKQ1*MsZ#h+hwVA-s2$^FPK@T3QA+^$|_Lw5D3(8c~vxSdwa$T$Bo=
z7>o=I4Rj5RbPbF{3=OPI%&m;fbq&m|3=CA5+r&^b<mRVjrd6V7FtRc*wlcARXxJmJ
RSPs;{;OXk;vd$@?2>||EMpFO)

literal 0
HcmV?d00001

diff --git a/resources/images/news/tidbits.png b/resources/images/news/tidbits.png
new file mode 100644
index 0000000000000000000000000000000000000000..e64d71ec6887f57c6f66464bb90208ffe33978f9
GIT binary patch
literal 783
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?><PT^vI!PA{G8n=KqD(l)<3UCnrAzsXup
zudPN`w|THSx@4_XSkYn4dek>OZ&!)pF42;O_6J(C3ljy)|1v9;Bzn6ZbT}F{HNh$Q
zc+J$bSt)AQU2|V;y0LjrL7d+C-!coQ1j!^kah%5ZWo^`v=F_KnWG-qxab{@qpA}M9
zw&U-W^pB;ztvef5MrUr?cl6sbC5<gJDjyjeyimAqcV*_DnU8)YPd-!Vx?jHV-#az+
zbq<NSbCWMKHT>(8dK~VocSdbuT6_9tZ5GFdGcvvf6^D)(@GO~9vb(En_t_UIljh7%
z5x*F=*;D4YMW6Gv?LYS2Sr(zQ=(*b@i<>-!GRYFWi!=^x%XRaAS`a(^&uuM%1+S&1
zr9P~bTNcT@Aa4EQ>{)G(Ei{%21bulR(bo8SSHv~G>G$ueWOpkbTyDgzUtjpE&4r^;
zuf^edDZ>%_lBH8#?CMi1@4BU<&Gl1lWk~Cr87gnL7sQI!?l0w?|JJ}P)xc!RlMB{I
zSO3<{iGF*Z+4u4Xn|>d)!(X-vGT5DK|NXyl0@LYDK)1LkF*?W`|DXQo(jPX4vSr;j
z_O|Y9kFWf^$VH}Ha=(c2=Zrbm+Kzv&UM=#;_flQY?fit9Vn?Uu?va_ZJNPE|k3AwE
z9(#9wEt0$(IA`aJ$z^Pjwed?sw1U=~Z_wdBzRggk)mUVbhsrmmsI_8Ov!=Q&W^~Y+
zx@i6Se!u0;OM?`7oR>^Fv43mUthU#?su{gB7ilcI{q~#91ry$-B84*F?=iLr*!urt
zXm`DxsP)vV9GIX~OI#yLQW8s2t&)pUffR$0fuVt}fsw9(afqRTm5Hg9k)f`Exs`!|
tvs$tsiiX_$l+3hBWDN!uRz}8Fra=854SVte<$)R)JYD@<);T3K0RR{HNE-kE

literal 0
HcmV?d00001

diff --git a/resources/recipes/gizmodo.recipe b/resources/recipes/gizmodo.recipe
new file mode 100644
index 0000000000..6f6e6ae0cf
--- /dev/null
+++ b/resources/recipes/gizmodo.recipe
@@ -0,0 +1,40 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+gizmodo.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Gizmodo(BasicNewsRecipe):
+    title                 = 'Gizmodo'
+    __author__            = 'Darko Miletic'
+    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
+    publisher             = 'gizmodo.com'
+    category              = 'news, IT, Internet, gadgets'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'en'
+    masthead_url          = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png'
+    extra_css             = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+    remove_tags       = [dict(name='div',attrs={'class':'feedflare'})]
+    remove_tags_after = dict(name='div',attrs={'class':'feedflare'})
+
+    feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+
diff --git a/resources/recipes/newsstraitstimes.recipe b/resources/recipes/newsstraitstimes.recipe
new file mode 100644
index 0000000000..ebbaca1a0e
--- /dev/null
+++ b/resources/recipes/newsstraitstimes.recipe
@@ -0,0 +1,35 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.nst.com.my
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Newstraitstimes(BasicNewsRecipe):
+    title                 = 'New Straits Times from Malaysia'
+    __author__            = 'Darko Miletic'
+    description           = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
+    publisher             = 'nst.com.my'
+    category              = 'news, politics, Malaysia'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en'
+    masthead_url          = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags       = [dict(name=['link','table'])]
+    keep_only_tags = dict(name='div',attrs={'id':'haidah'})
+
+    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
+
diff --git a/resources/recipes/readitlater.recipe b/resources/recipes/readitlater.recipe
new file mode 100644
index 0000000000..4bd8fc2bd6
--- /dev/null
+++ b/resources/recipes/readitlater.recipe
@@ -0,0 +1,64 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+readitlaterlist.com
+'''
+
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Readitlater(BasicNewsRecipe):
+    title                 = 'Read It Later'
+    __author__            = 'Darko Miletic'
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to
+                               setup up your news. Fill in your account
+                               username, and optionally you can add password.'''
+    publisher             = 'readitlater.com'
+    category              = 'news, custom'
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    needs_subscription    = True
+    INDEX                 = u'http://readitlaterlist.com'
+    LOGIN                 = INDEX + u'/l'
+
+
+    feeds = [(u'Unread articles' , INDEX + u'/unread')]
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None:
+            br.open(self.LOGIN)
+            br.select_form(nr=0)
+            br['feed_id'] = self.username
+            if self.password is not None:
+               br['password'] = self.password
+            br.submit()
+        return br
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            soup = self.index_to_soup(feedurl)
+            ritem = soup.find('ul',attrs={'id':'list'})
+            for item in ritem.findAll('li'):
+                description = ''
+                atag = item.find('a',attrs={'class':'text'})
+                if atag and atag.has_key('href'):
+                    url         = self.INDEX + atag['href']
+                    title       = self.tag_to_string(item.div)
+                    date        = strftime(self.timefmt)
+                    articles.append({
+                                      'title'      :title
+                                     ,'date'       :date
+                                     ,'url'        :url
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+        return totalfeeds
+
diff --git a/resources/recipes/tidbits.recipe b/resources/recipes/tidbits.recipe
new file mode 100644
index 0000000000..702c65e9e4
--- /dev/null
+++ b/resources/recipes/tidbits.recipe
@@ -0,0 +1,53 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+db.tidbits.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class TidBITS(BasicNewsRecipe):
+    title                 = 'TidBITS: Mac News for the Rest of Us'
+    __author__            = 'Darko Miletic'
+    description           = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds'
+    publisher             = 'TidBITS Publishing Inc.'
+    category              = 'news, Apple, Macintosh, IT, Internet'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'en'
+    remove_empty_feeds    = True
+    masthead_url          = 'http://db.tidbits.com/images/tblogo9.gif'
+    extra_css             = ' body{font-family: Georgia,"Times New Roman",Times,serif} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+    remove_tags       = [dict(name='small')]
+    remove_tags_after = dict(name='small')
+
+    feeds = [
+               (u'Business Apps'              , u'http://db.tidbits.com/feeds/business.rss'     )
+              ,(u'Entertainment'              , u'http://db.tidbits.com/feeds/entertainment.rss')
+              ,(u'External Links'             , u'http://db.tidbits.com/feeds/links.rss'        )
+              ,(u'Home Mac'                   , u'http://db.tidbits.com/feeds/home.rss'         )
+              ,(u'Inside TidBITS'             , u'http://db.tidbits.com/feeds/inside.rss'       )
+              ,(u'iPod & iPhone'              , u'http://db.tidbits.com/feeds/ipod-iphone.rss'  )
+              ,(u'Just for Fun'               , u'http://db.tidbits.com/feeds/fun.rss'          )
+              ,(u'Macs & Mac OS X'            , u'http://db.tidbits.com/feeds/macs.rss'         )
+              ,(u'Media Creation'             , u'http://db.tidbits.com/feeds/creative.rss'     )
+              ,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss'          )
+              ,(u'Opinion & Editorial'        , u'http://db.tidbits.com/feeds/opinion.rss'      )
+              ,(u'Support & Problem Solving'  , u'http://db.tidbits.com/feeds/support.rss'      )
+              ,(u'Safe Computing'             , u'http://db.tidbits.com/feeds/security.rss'     )
+              ,(u'Tech News'                  , u'http://db.tidbits.com/feeds/tech.rss'         )
+              ,(u'Software Watchlist'         , u'http://db.tidbits.com/feeds/watchlist.rss'    )
+            ]

From cea60d5fd892f3fa1a03b768e9c865a9fa9bbbd0 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Wed, 3 Feb 2010 14:07:01 -0700
Subject: [PATCH 03/20] Added series to descriptions/titles

---
 resources/catalog/stylesheet.css |  10 +-
 src/calibre/library/catalog.py   | 190 +++++++++++++++++++++++++++----
 2 files changed, 177 insertions(+), 23 deletions(-)

diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css
index b5770599e6..80f4e50cc3 100644
--- a/resources/catalog/stylesheet.css
+++ b/resources/catalog/stylesheet.css
@@ -17,6 +17,14 @@ p.author {
 	font-size:large;
   	}
 
+p.series {
+	margin-top:0em;
+	margin-bottom:0em;
+	text-align: left;
+	text-indent: 1em;
+	font-size:small;
+	}
+
 p.tags {
 	margin-top:0em;
 	margin-bottom:0em;
@@ -27,7 +35,7 @@ p.tags {
 
 p.description {
 	text-align:left;
-	font-style:italic;
+	font-style:normal;
 	margin-top: 0em;
 	}
 
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 78155326dc..5110a2eee1 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -927,8 +927,16 @@ class EPUB_MOBI(CatalogPlugin):
             for record in data:
                 this_title = {}
 
-                title = this_title['title'] = self.convertHTMLEntities(record['title'])
-                this_title['title_sort'] = self.generateSortTitle(title)
+                this_title['title'] = self.convertHTMLEntities(record['title'])
+                if record['series']:
+                    this_title['series'] = record['series']
+                    this_title['series_index'] = record['series_index']
+                    this_title['title'] = self.generateSeriesTitle(this_title)
+                else:
+                    this_title['series'] = None
+                    this_title['series_index'] = 0.0
+
+                this_title['title_sort'] = self.generateSortTitle(this_title['title'])
                 if 'authors' in record and len(record['authors']):
                     this_title['author'] = " &amp; ".join(record['authors'])
                 else:
@@ -984,12 +992,61 @@ class EPUB_MOBI(CatalogPlugin):
 
         def fetchBooksByAuthor(self):
             # Generate a list of titles sorted by author from the database
+            def author_compare(x,y):
+                # Return -1 if x<y
+                # Return  0 if x==y
+                # Return  1 if x>y
+                #print "x['author_sort']: %s y['author_sort']: %s" % (x['author_sort'],y['author_sort'])
+                if x['author_sort'] > y['author_sort']:
+                    return 1
+                elif x['author_sort'] < y['author_sort']:
+                    return -1
+                else:
+                    # Authors equal
+                    # Books w/o series go first
+                    if x['series'] > y['series']:
+                        return 1
+                    elif x['series'] < y['series']:
+                        return -1
+                    elif not x['series'] and not y['series']:
+                        if x['title'] > y['title']:
+                            return 1
+                        else:
+                            return -1
+                    else:
+                        # Both books have series
+                        if x['series'] == y['series']:
+                            if float(x['series_index']) > float(y['series_index']):
+                                return 1
+                            elif float(x['series_index']) < float(y['series_index']):
+                                return -1
+                            else:
+                                return 0
+                        else:
+                            if x['series'] > y['series']:
+                                return 1
+                            else:
+                                return -1
 
             self.updateProgressFullStep("Sorting database")
 
-            # Sort titles case-insensitive
+            '''
+            # Sort titles case-insensitive, by author
             self.booksByAuthor = sorted(self.booksByTitle,
                                  key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper()))
+            '''
+
+            self.booksByAuthor = list(self.booksByTitle)
+            self.booksByAuthor.sort(author_compare)
+
+            if False and self.verbose:
+                self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor))
+                self.opts.log.info(" %-40s %-20s %s" % ('title', 'series', 'series_index'))
+                for title in self.booksByAuthor:
+                    self.opts.log.info((u" %-40s %-20s %s" % \
+                                        (title['title'][0:40],
+                                         title['series'][0:20] if title['series'] else '',
+                                         title['series_index'])).encode('utf-8'))
 
             # Build the unique_authors set from existing data
             authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor]
@@ -1063,7 +1120,15 @@ class EPUB_MOBI(CatalogPlugin):
                 # Insert the book title
                 #<p class="title"><a name="<database_id>"></a><em>Book Title</em></p>
                 emTag = Tag(soup, "em")
-                emTag.insert(0, NavigableString(escape(title['title'])))
+                if title['series']:
+                    # Insert br at colon
+                    brTag = Tag(soup,'br')
+                    title_tokens = title['title'].split(': ')
+                    emTag.insert(0, title_tokens[0] + ':')
+                    emTag.insert(1, brTag)
+                    emTag.insert(2, title_tokens[1])
+                else:
+                    emTag.insert(0, NavigableString(escape(title['title'])))
                 titleTag = body.find(attrs={'class':'title'})
                 titleTag.insert(0,emTag)
 
@@ -1085,6 +1150,27 @@ class EPUB_MOBI(CatalogPlugin):
                     tagsTag.insert(0,emTag)
 
                 '''
+                '''
+                # Insert Series info or remove.
+                seriesTag = body.find(attrs={'class':'series'})
+                if title['series']:
+                    # Insert a spacer to match the author indent
+                    stc = 0
+                    fontTag = Tag(soup,"font")
+                    fontTag['style'] = 'color:white;font-size:large'
+                    if self.opts.fmt == 'epub':
+                        fontTag['style'] += ';opacity: 0.0'
+                    fontTag.insert(0, NavigableString("by "))
+                    seriesTag.insert(stc, fontTag)
+                    stc += 1
+                    if float(title['series_index']) - int(title['series_index']):
+                        series_str = 'Series: %s [%4.2f]' % (title['series'], title['series_index'])
+                    else:
+                        series_str = '%s [%d]' % (title['series'], title['series_index'])
+                    seriesTag.insert(stc,NavigableString(series_str))
+                else:
+                    seriesTag.extract()
+                '''
                 # Insert linked genres
                 if 'tags' in title:
                     tagsTag = body.find(attrs={'class':'tags'})
@@ -1367,6 +1453,7 @@ class EPUB_MOBI(CatalogPlugin):
 
                 aTag = Tag(soup, "a")
                 aTag['href'] = "book_%d.html" % (int(float(book['id'])))
+                # Use series, series index if avail else just title
                 aTag.insert(0,escape(book['title']))
                 pBookTag.insert(ptc, aTag)
                 ptc += 1
@@ -1786,7 +1873,9 @@ class EPUB_MOBI(CatalogPlugin):
                 mtc += 1
 
             # HTML files - add books to manifest and spine
-            for book in self.booksByTitle:
+            sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \
+                                                      else self.booksByTitle
+            for book in sort_descriptions_by:
                 # manifest
                 itemTag = Tag(soup, "item")
                 itemTag['href'] = "content/book_%d.html" % int(book['id'])
@@ -1912,7 +2001,9 @@ class EPUB_MOBI(CatalogPlugin):
             nptc += 1
 
             # Loop over the titles
-            for book in self.booksByTitle:
+            sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \
+                                                      else self.booksByTitle
+            for book in sort_descriptions_by:
                 navPointVolumeTag = Tag(ncx_soup, 'navPoint')
                 navPointVolumeTag['class'] = "article"
                 navPointVolumeTag['id'] = "book%dID" % int(book['id'])
@@ -2553,6 +2644,7 @@ class EPUB_MOBI(CatalogPlugin):
             <p class="title"></p>
             {0}
             <p class="author"></p>
+            <!--p class="series"></p-->
             <p class="tags">&nbsp;</p>
             <table width="100%" border="0">
               <tr>
@@ -2678,6 +2770,17 @@ class EPUB_MOBI(CatalogPlugin):
             draw.text((left, top), text, fill=(0,0,0), font=font)
             img.save(open(out_path, 'wb'), 'GIF')
 
+        def generateSeriesTitle(self, title):
+            if float(title['series_index']) - int(title['series_index']):
+                series_title = '%s %4.2f: %s' % (title['series'],
+                                                title['series_index'],
+                                                title['title'])
+            else:
+                series_title = '%s %d: %s' % (title['series'],
+                                             title['series_index'],
+                                             title['title'])
+            return series_title
+
         def generateShortDescription(self, description):
             # Truncate the description to description_clip, on word boundaries if necessary
             if not description:
@@ -2777,24 +2880,65 @@ class EPUB_MOBI(CatalogPlugin):
 
         def markdownComments(self, comments):
             ''' Convert random comment text to normalized, xml-legal block of <p>s'''
-            # reformat illegal xml
-            desc = prepare_string_for_xml(comments)
 
-            # normalize <br/> tags
-            desc = re.sub(r'&lt;br[/]{0,1}&gt;', '<br/>', desc)
+            comments = comments.replace('\r', '')
+            if re.search('\n\n', comments):
+                soup = BeautifulSoup()
+                split_ps = comments.split('\n\n')
+                tsc = 0
+                for p in split_ps:
+                    pTag = Tag(soup,'p')
+                    pTag.insert(0,p)
+                    soup.insert(tsc,pTag)
+                    tsc += 1
+            else:
+                soup = BeautifulSoup(comments)
 
-            # tokenize double line breaks
-            desc = comments.replace('\r', '')
-            tokens = comments.split('\n\n')
+            result = BeautifulSoup()
+            rtc = 0
+            open_pTag = False
 
-            soup = BeautifulSoup()
-            ptc = 0
-            for token in tokens:
-                pTag = Tag(soup, 'p')
-                pTag.insert(0,token)
-                soup.insert(ptc, pTag)
-                ptc += 1
-            return soup.renderContents(encoding=None)
+            all_tokens = list(soup.contents)
+            for token in all_tokens:
+                if type(token) is NavigableString:
+                    if not open_pTag:
+                        pTag = Tag(result,'p')
+                        open_pTag = True
+                        ptc = 0
+                    pTag.insert(ptc,prepare_string_for_xml(token))
+                    ptc += 1
+
+                elif token.name in ['br','b','i']:
+                    if not open_pTag:
+                        pTag = Tag(result,'p')
+                        open_pTag = True
+                        ptc = 0
+                    pTag.insert(ptc, token)
+                    ptc += 1
+
+                else:
+                    if open_pTag:
+                        result.insert(rtc, pTag)
+                        rtc += 1
+                        open_pTag = False
+                        ptc = 0
+                    # Clean up NavigableStrings for xml
+                    sub_tokens = list(token.contents)
+                    sub_soup = BeautifulSoup()
+                    for sub_token in sub_tokens:
+                        if type(sub_token) is NavigableString:
+                            sub_token.replaceWith(prepare_string_for_xml(sub_token))
+                    result.insert(rtc, token)
+                    rtc += 1
+
+            if open_pTag:
+                result.insert(rtc, pTag)
+
+            paras = result.findAll('p')
+            for p in paras:
+                p['class'] = 'description'
+
+            return result.renderContents(encoding=None)
 
         def processSpecialTags(self, tags, this_title, opts):
             tag_list = []
@@ -2847,6 +2991,8 @@ class EPUB_MOBI(CatalogPlugin):
         opts.basename = "Catalog"
         opts.plugin_path = self.plugin_path
         opts.cli_environment = not hasattr(opts,'sync')
+        # GwR *** hardwired for the moment
+        opts.sort_descriptions_by_author = True
 
         if opts.verbose:
             opts_dict = vars(opts)
@@ -2863,7 +3009,7 @@ class EPUB_MOBI(CatalogPlugin):
             for key in keys:
                 if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles',
                            'generate_recently_added','note_tag','numbers_as_text','read_tag',
-                           'search_text','sort_by','sync']:
+                           'search_text','sort_by','sort_descriptions_by_author','sync']:
                     log("  %s: %s" % (key, opts_dict[key]))
 
         # Launch the Catalog builder

From f283dc892e8308d807c7440903dff8a46ceec8cd Mon Sep 17 00:00:00 2001
From: jason <jason@upstairs>
Date: Wed, 3 Feb 2010 21:32:28 +0000
Subject: [PATCH 04/20] update series info from metadata

---
 src/calibre/gui2/dialogs/metadata_single.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index d066a27c53..78f30ecb21 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -594,10 +594,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                             self.rating.setValue(int(book.rating))
                         if book.tags:
                             self.tags.setText(', '.join(book.tags))
-                        print 'setting series'
-                        print book.series 
                         if book.series is not None:
-                            if self.series is not None:
+                            if self.series.text() is None or self.series.text() == '':
                                self.series.setText(book.series)
                                if book.series_index is not None:
                                   self.series_index.setValue(book.series_index)          

From 3d1ef6e56499bd24cbca3fb263dfa9580d5b3f9a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 16:14:25 -0700
Subject: [PATCH 05/20] Fix #4786 (Updated recipe for Pagina 12)

---
 resources/recipes/pagina12.recipe | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/resources/recipes/pagina12.recipe b/resources/recipes/pagina12.recipe
index 2fb433dc82..c9801cb359 100644
--- a/resources/recipes/pagina12.recipe
+++ b/resources/recipes/pagina12.recipe
@@ -5,9 +5,10 @@ __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 pagina12.com.ar
 '''
 
-import time
+import re, time
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
 
 class Pagina12(BasicNewsRecipe):
     title                 = 'Pagina - 12'
@@ -22,7 +23,8 @@ class Pagina12(BasicNewsRecipe):
     use_embedded_content  = False
     language              = 'es'
     remove_empty_feeds    = True
-    extra_css             = ' body{font-family: sans-serif} '
+    masthead_url          = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
+    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} '
 
     conversion_options = {
                           'comment'   : description
@@ -32,7 +34,7 @@ class Pagina12(BasicNewsRecipe):
                         }
 
     remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
-
+    
 
     feeds = [
               (u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml'   )
@@ -52,7 +54,11 @@ class Pagina12(BasicNewsRecipe):
         return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
 
     def get_cover_url(self):
-        imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg']
-        weekday = time.localtime().tm_wday
-        return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday]
-
+        rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
+        rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
+        soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
+        for image in soup.findAll('img',alt=True):
+           if image['alt'].startswith('Tapa de la fecha'):
+              return image['src']
+        return None
+        
\ No newline at end of file

From 008fab308d68003341cdd3152b50629115afc15e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Feb 2010 17:03:04 -0700
Subject: [PATCH 06/20] Updated recipe for The New Republic

---
 resources/recipes/the_new_republic.recipe | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/the_new_republic.recipe b/resources/recipes/the_new_republic.recipe
index 482dba1af0..59ccef3607 100644
--- a/resources/recipes/the_new_republic.recipe
+++ b/resources/recipes/the_new_republic.recipe
@@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe):
 
     oldest_article = 7
     max_articles_per_feed = 100
+    no_stylesheets = True
 
     remove_tags = [
             dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}),
@@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe):
         ('Economy', 'http://www.tnr.com/rss/articles/Economy'),
         ('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'),
         ('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'),
-        ('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'),
+        ('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'),
         ('World', 'http://www.tnr.com/rss/articles/World'),
         ('Film', 'http://www.tnr.com/rss/articles/Film'),
         ('Books', 'http://www.tnr.com/rss/articles/books'),
+        ('The Book', 'http://www.tnr.com/rss/book'),
+        ('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'),
         ('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'),
         ('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'),
         ('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'),
-        ('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'),
         ('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'),
         ('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'),
         ('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'),
@@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe):
 
     def print_version(self, url):
         return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/')
+

From e7c07ee25effd4a904646e427b2fb33d0b3dbb21 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Wed, 3 Feb 2010 17:07:49 -0700
Subject: [PATCH 07/20] GwR changes for series sorting

---
 src/calibre/gui2/catalog/catalog_epub_mobi.ui | 2 +-
 src/calibre/library/catalog.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.ui b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
index 91fcbdc364..dab8c972c7 100644
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@@ -80,7 +80,7 @@
     <widget class="QLabel" name="label_6">
      <property name="text">
       <string>Regex tips:
-- The default regex - \[[\w]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
+- The default regex - \[[\w ]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
 - A regex pattern of a single dot excludes all genre tags, generating no Genre Section</string>
      </property>
      <property name="wordWrap">
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 5110a2eee1..51f1ff1104 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1009,7 +1009,7 @@ class EPUB_MOBI(CatalogPlugin):
                     elif x['series'] < y['series']:
                         return -1
                     elif not x['series'] and not y['series']:
-                        if x['title'] > y['title']:
+                        if self.generateSortTitle(x['title']) > self.generateSortTitle(y['title']):
                             return 1
                         else:
                             return -1

From a0ea14b5e9bf23d538339f0ca8c0eeb8b4bb1ab3 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 4 Feb 2010 08:44:29 -0700
Subject: [PATCH 08/20] New recipe for Digital Spy UK by Darko Miletic

---
 resources/images/news/digitalspy_uk.png     | Bin 0 -> 1290 bytes
 resources/recipes/digitalspy_uk.recipe      |  43 ++++++++++++++++++++
 src/calibre/gui2/dialogs/metadata_single.py |   2 +-
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 resources/images/news/digitalspy_uk.png
 create mode 100644 resources/recipes/digitalspy_uk.recipe

diff --git a/resources/images/news/digitalspy_uk.png b/resources/images/news/digitalspy_uk.png
new file mode 100644
index 0000000000000000000000000000000000000000..28c865713d5214505a620d111291e051462cef11
GIT binary patch
literal 1290
zcmeAS@N?(olHy`uVBq!ia0vp^4j|0I1|(Ny7TyC=Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgfc0~o*h{3C&ZfyLO<#WAGf*4x>(Iky}n+U|!;J;g9(t%A~3
z?v*pUVmeEjjs~%@MTza&5~<|s6}2+*QkPcn(vC9EB`c<Cr7jHKv*gqPflk(C4-R@Q
zyO3rWBq$ktyxDrI^Xra;nmMw~vX9CgAI#tV-tvB}efl!Fk5BD)9NhJ^jQ!3u{sV9M
zcIe7C+?FkfHkUi_w$OCSGl>r~*=O{*E?6Puva<K?tA)3}^|L$7O8)TE`QxShnl{$i
zJKDdVtqo`FN#znL;`godSDe}5XV*|*kWi&_)^IZafzzKDcid&%{*<|3H*?<WW6T>w
zI9i(-HyB^!zAwvncj2eUZ_Zq;t<Asv_pR2dwo`t>$JLjgW9DFIu9)TBc-~H;`a;su
zw<gCQocydG5OUU%p*4m}UwqRMhqRDK#yVcw#aD9Mn;AbuUMs3g<XSMnd*PR=yf<yk
z2WmbV#_6W$#*1$<3}6;NXmWf<n$7&gT*>@!X2w0mCo32<*51g!J<-)*W`~ko_l_ix
z<XZEtRS%=T3M^8Imu2uRyY_P4!fP1|HMd>_GA`RH&-?$=dE%>mCude4dAvv^d3zO0
z;3VDk->xvE?fiX&Tk8<xwr#KbO!62U3QB>}FMA(U%x*Jn`10er)Y^%X_CCHVZ72PX
zVhXsKxhv_9MyjUxhRvs{Vy~-7N^LXqWns7^yo^2Ful;HAZSe(x%YS!Q>;Zaag16(8
z6q)&RoGky>zWnj=q~^KvrryC<gBh~EiiLif`b;7z=41Y2SGTqQWVgC?SLpluGTN@+
zzv|FIh95lLpQ7Zd<HZlA-+0Z)@IySq@@4L$jU1=4D=$Ae2TX<um7yC%uTM^ycCT8f
zVNq5@m2yJ%Zab6n8@LwMTU7`(tm`Uh_|vUl@w<+H_f=>2Wyk((zUh76_9^R`knAmb
zYoC3LI3lpg|F}}e@pnz<HykO5^Z));{dE+>i|Y$SV*dP(yqWpvsgqc1%&9NGBKvl(
zkgclXx-k)G&;2{<*SM>)+lAR3!e6uBtxDuPpmFN)bH32BH>ZRc>`q5n{&ia7`f_=5
z-n#IG0(TQ68744I`1xqf)8z_h4P6-gepSYX#qBj@Q0Vor5j(iN-}Tk1!db}yai@iQ
z%Y___1YKW#y>r2~_g=-J2W+k@eM=Nix`+USK|jb}XiegRveoX)3q_0gb!{^j1~gP1
z`Ra2~#9{5lrbT@EHZi4(L)Jh2?ySB3srxSRoAYz;EwaoMiMns_D{Hs3@|IU6=Xox$
zGZ$E>zP)~}tTtoep-jzHZQI{}=egi}_ra%C20|R74;e%sCagKuJ*nrp_teI3GoCSZ
ztxBv6b;wNZiwIme73BRJ>V11IJ6;6F<-XAVj`uu17yqPt)L*o``MHW`?ZrhZ&Qor!
zxEdYuyK=qQrWyKn+fvuW3A`<-U#NL9L~k0`0hR2e$%^s~{0G<{Fn?hD!BE3s#~^<|
z;4kashm)EZvkG&8<%nvDYeY#(Vo9o1a#1RfVlXl=G|)9L(lsy%F|e>QF|slSa?Py_
t3_f1kse_^+H$NpatrA&-p@Efwp_PFJM8iVfO?^NO44$rjF6*2UngF(GL9ze<

literal 0
HcmV?d00001

diff --git a/resources/recipes/digitalspy_uk.recipe b/resources/recipes/digitalspy_uk.recipe
new file mode 100644
index 0000000000..ac54c3790d
--- /dev/null
+++ b/resources/recipes/digitalspy_uk.recipe
@@ -0,0 +1,43 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+www.digitalspy.co.uk
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class DigitalSpyUK(BasicNewsRecipe):
+    title                 = 'Digital Spy - UK Edition'
+    __author__            = 'Darko Miletic'
+    description           = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.'
+    publisher             = 'Digital Spy Limited.'
+    category              = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'cp1252'
+    use_embedded_content  = False
+    language              = 'en_GB'
+    remove_empty_feeds    = True
+    extra_css             = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags       = [dict(name=['link'])]
+    remove_attributes = ['height','width']
+    keep_only_tags    = [dict(name='div',attrs={'id':'article'})]
+
+    feeds = [
+              (u'News'          , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml'          )
+             ,(u'Big Brother'   , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml'   )
+             ,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml')
+             ,(u'General'       , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml'      )
+             ,(u'Media'         , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml'        )
+            ]
+
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 78f30ecb21..8fab6a922a 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -598,7 +598,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
                             if self.series.text() is None or self.series.text() == '':
                                self.series.setText(book.series)
                                if book.series_index is not None:
-                                  self.series_index.setValue(book.series_index)          
+                                  self.series_index.setValue(book.series_index)
         else:
             error_dialog(self, _('Cannot fetch metadata'),
                          _('You must specify at least one of ISBN, Title, '

From 96ac81c3421586dac287a1683d621c458a5d3acb Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 4 Feb 2010 09:10:01 -0700
Subject: [PATCH 09/20] RTF Input: Don't eat up the space after \u escaped
 characters

---
 src/calibre/ebooks/rtf2xml/tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py
index 45887f33e7..ad12daa211 100755
--- a/src/calibre/ebooks/rtf2xml/tokenize.py
+++ b/src/calibre/ebooks/rtf2xml/tokenize.py
@@ -72,7 +72,7 @@ class Tokenize:
         return line
     def __compile_expressions(self):
         self.__ms_hex_exp = re.compile(r"\\\'(..)")
-        self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}")
+        self.__utf_exp = re.compile(r"\\u(-?\d{3,6})")
         self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)")
         self.__par_exp = re.compile(r'\\$')
         self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)")

From b8b3efc5b8ffaa7bb9ff33530d9ba3b6b0a8d282 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 4 Feb 2010 09:49:41 -0700
Subject: [PATCH 10/20] ...

---
 src/calibre/manual/faq.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 9bdd9aaa6b..a3c5bd32c4 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -62,7 +62,7 @@ How do I convert my file containing non-English characters, or smart quotes?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 There are two aspects to this problem: 
   1. Knowing the encoding of the source file: |app| tries to guess what character encoding your source files use, but often, this is impossible, so you need to tell it what encoding to use. This can be done in the GUI via the :guilabel:`Input character encoding` field in the :guilabel:`Look & Feel` section. The command-line tools all have an :option:`--input-encoding` option.
-  2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first.
+  2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first. Note that when converting HTML files, leave the input encoding setting mentioned above blank. This is because the HTML2ZIP plugin automatically converts the HTML files to a standard encoding (utf-8). 
   3. Embedding fonts: If you are generating an LRF file to read on your SONY Reader, you are limited by the fact that the Reader only supports a few non-English characters in the fonts it comes pre-loaded with. You can work around this problem by embedding a unicode-aware font that supports the character set your file uses into the LRF file. You should embed atleast a serif and a sans-serif font. Be aware that embedding fonts significantly slows down page-turn speed on the reader. 
 
 

From 526e7198d7cd3e6b619776fe2e96ae42b63b6b7c Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Fri, 5 Feb 2010 07:42:54 -0700
Subject: [PATCH 11/20] GwR revisions, tweaks

---
 src/calibre/gui2/device.py     |  2 +-
 src/calibre/library/catalog.py | 73 ++++++++++++++++++++++++----------
 2 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py
index 5a977b37a6..679e86ab48 100644
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@@ -149,7 +149,7 @@ class DeviceManager(Thread):
                     possibly_connected_devices.append((device, detected_device))
             if possibly_connected_devices:
                 if not self.do_connect(possibly_connected_devices):
-                    print 'Connect to device failed, retying in 5 seconds...'
+                    print 'Connect to device failed, retrying in 5 seconds...'
                     time.sleep(5)
                     if not self.do_connect(possibly_connected_devices):
                         print 'Device connect failed again, giving up'
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 726541bd4a..c0bdd19c82 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1119,12 +1119,14 @@ class EPUB_MOBI(CatalogPlugin):
                 #<p class="title"><a name="<database_id>"></a><em>Book Title</em></p>
                 emTag = Tag(soup, "em")
                 if title['series']:
-                    # Insert br at colon
+                    # title<br />series series_index
                     brTag = Tag(soup,'br')
                     title_tokens = title['title'].split(': ')
-                    emTag.insert(0, title_tokens[0] + ':')
+                    emTag.insert(0, NavigableString(title_tokens[1]))
                     emTag.insert(1, brTag)
-                    emTag.insert(2, title_tokens[1])
+                    smallTag = Tag(soup,'small')
+                    smallTag.insert(0,NavigableString(title_tokens[0]))
+                    emTag.insert(2, smallTag)
                 else:
                     emTag.insert(0, NavigableString(escape(title['title'])))
                 titleTag = body.find(attrs={'class':'title'})
@@ -1202,7 +1204,12 @@ class EPUB_MOBI(CatalogPlugin):
                 else:
                     imgTag['src']  = "../images/thumbnail_default.jpg"
                 imgTag['alt'] = "cover"
-                imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT)
+
+                # Tweak image size if we're building for Sony, not sure why this is needed
+                if self.opts.fmt == 'epub' and self.opts.output_profile.startswith("sony"):
+                    imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH * 2, self.THUMB_HEIGHT * 2)
+                else:
+                    imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT)
                 thumbnailTag = body.find(attrs={'class':'thumbnail'})
                 thumbnailTag.insert(0,imgTag)
 
@@ -1697,7 +1704,9 @@ class EPUB_MOBI(CatalogPlugin):
 
                 for genre in genre_list:
                     for key in genre:
-                        self.opts.log.info("      %s: %d titles" % (key, len(genre[key])))
+                        self.opts.log.info("      %s: %d %s" % (self.getFriendlyGenreTag(key),
+                                           len(genre[key]),
+                                           'titles' if len(genre[key]) > 1 else 'title'))
 
             # Write the results
             # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
@@ -2042,7 +2051,11 @@ class EPUB_MOBI(CatalogPlugin):
                 self.playOrder += 1
                 navLabelTag = Tag(ncx_soup, "navLabel")
                 textTag = Tag(ncx_soup, "text")
-                textTag.insert(0, NavigableString(self.formatNCXText(book['title'])))
+                if book['series']:
+                    tokens = book['title'].split(': ')
+                    textTag.insert(0, NavigableString(self.formatNCXText('%s (%s)' % (tokens[1], tokens[0]))))
+                else:
+                    textTag.insert(0, NavigableString(self.formatNCXText(book['title'])))
                 navLabelTag.insert(0,textTag)
                 navPointVolumeTag.insert(0,navLabelTag)
 
@@ -2548,15 +2561,25 @@ class EPUB_MOBI(CatalogPlugin):
                         else:
                             yield tag
 
-                self.opts.log.info(u'     %d available genre tags in database (exclude_genre: %s):' % \
+                self.opts.log.info(u'     %d genre tags in database (excluding genres matching %s):' % \
                                      (len(genre_tags_dict), self.opts.exclude_genre))
 
                 # Display friendly/normalized genres
                 # friendly => normalized
-                sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
-
-                for tag in next_tag(sorted_tags):
-                    self.opts.log(u'      %s' % tag)
+                if False:
+                    sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
+                    for tag in next_tag(sorted_tags):
+                        self.opts.log(u'      %s' % tag)
+                else:
+                    sorted_tags = ['%s' % (key) for key in sorted(genre_tags_dict.keys())]
+                    out_str = ''
+                    line_break = 70
+                    for tag in next_tag(sorted_tags):
+                        out_str += tag
+                        if len(out_str) >= line_break:
+                            self.opts.log.info('      %s' % out_str)
+                            out_str = ''
+                    self.opts.log.info('      %s' % out_str)
 
             return genre_tags_dict
 
@@ -2596,13 +2619,8 @@ class EPUB_MOBI(CatalogPlugin):
             body.insert(btc,aTag)
             btc += 1
 
-            # Find the first instance of friendly_tag matching genre
-            for friendly_tag in self.genre_tags_dict:
-                if self.genre_tags_dict[friendly_tag] == genre:
-                    break
-
             titleTag = body.find(attrs={'class':'title'})
-            titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(friendly_tag)))
+            titleTag.insert(0,NavigableString('<b><i>%s</i></b>' % escape(self.getFriendlyGenreTag(genre))))
 
             # Insert the books by author list
             divTag = body.find(attrs={'class':'authors'})
@@ -2927,6 +2945,12 @@ class EPUB_MOBI(CatalogPlugin):
             else:
                 return char
 
+        def getFriendlyGenreTag(self, genre):
+            # Find the first instance of friendly_tag matching genre
+            for friendly_tag in self.genre_tags_dict:
+                if self.genre_tags_dict[friendly_tag] == genre:
+                    return friendly_tag
+
         def markdownComments(self, comments):
             '''
             Convert random comment text to normalized, xml-legal block of <p>s
@@ -3076,7 +3100,7 @@ class EPUB_MOBI(CatalogPlugin):
         opts.basename = "Catalog"
         opts.plugin_path = self.plugin_path
         opts.cli_environment = not hasattr(opts,'sync')
-        # GwR *** hardwired for the moment
+        # GwR *** hardwired to sort by author, could be an option if passed in opts
         opts.sort_descriptions_by_author = True
 
         if opts.verbose:
@@ -3087,6 +3111,15 @@ class EPUB_MOBI(CatalogPlugin):
             if opts_dict['ids']:
                 log(" Book count: %d" % len(opts_dict['ids']))
 
+            sections_list = ['Descriptions','Authors']
+            if opts.generate_titles:
+                sections_list.append('Titles')
+            if opts.generate_recently_added:
+                sections_list.append('Recently Added')
+            if not opts.exclude_genre.strip() == '.':
+                sections_list.append('Genres')
+            log(u"Creating Sections for %s" % ', '.join(sections_list))
+
             # If exclude_genre is blank, assume user wants all genre tags included
             if opts.exclude_genre.strip() == '':
                 opts.exclude_genre = '\[^.\]'
@@ -3098,8 +3131,8 @@ class EPUB_MOBI(CatalogPlugin):
             log(" opts:")
 
             for key in keys:
-                if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles',
-                           'generate_recently_added','note_tag','numbers_as_text','read_tag',
+                if key in ['catalog_title','exclude_genre','exclude_tags',
+                           'note_tag','numbers_as_text','read_tag',
                            'search_text','sort_by','sort_descriptions_by_author','sync']:
                     log("  %s: %s" % (key, opts_dict[key]))
 

From 643f60f6e9635f9b8fe25de36d397852f87bc000 Mon Sep 17 00:00:00 2001
From: GRiker <griker@hotmail.com>
Date: Fri, 5 Feb 2010 09:03:54 -0700
Subject: [PATCH 12/20] Added read checkmark to Description page, fixed note
 prefix len

---
 src/calibre/library/catalog.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index c0bdd19c82..8ef9e41dc0 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -1138,7 +1138,12 @@ class EPUB_MOBI(CatalogPlugin):
                 aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor", self.generateAuthorAnchor(title['author']))
                 #aTag.insert(0, escape(title['author']))
                 aTag.insert(0, title['author'])
-                authorTag.insert(0, NavigableString("by "))
+
+                # Insert READ_SYMBOL
+                if title['read']:
+                    authorTag.insert(0, NavigableString(self.READ_SYMBOL + "by "))
+                else:
+                    authorTag.insert(0, NavigableString(self.NOT_READ_SYMBOL + "by "))
                 authorTag.insert(1, aTag)
 
                 '''
@@ -3054,7 +3059,7 @@ class EPUB_MOBI(CatalogPlugin):
             for tag in tags:
                 tag = self.convertHTMLEntities(tag)
                 if tag.startswith(opts.note_tag):
-                    this_title['notes'] = tag[1:]
+                    this_title['notes'] = tag[len(self.opts.note_tag):]
                 elif tag == opts.read_tag:
                     this_title['read'] = True
                 elif re.search(opts.exclude_genre, tag):

From 4018a811cb35397c0a157cb7036e32359979a0f8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 5 Feb 2010 21:17:45 -0700
Subject: [PATCH 13/20] El Comerico by Darko Miletic

---
 resources/images/news/elcomercio.png | Bin 0 -> 764 bytes
 resources/recipes/elcomercio.recipe  |  38 +++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 resources/images/news/elcomercio.png
 create mode 100644 resources/recipes/elcomercio.recipe

diff --git a/resources/images/news/elcomercio.png b/resources/images/news/elcomercio.png
new file mode 100644
index 0000000000000000000000000000000000000000..df484860dde90a1a0e4be214f11df2c8920fc23c
GIT binary patch
literal 764
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?@N&T^vI!PA{Ew-#a)_;<){(dyD7#o}cyn
z2XiuW{e_Sj9NS`DOM(RgCF8mr#qYgY^!nW&?mZp9m|q=|mMl|rG->i$xp~*j<};4=
z+ZJk@pH;qHm@A$y?%wYaG2#B~>H6nC*Zlaj$A9vapdzkHhHqJ=V#chKZYo_|aBePl
z?UwCY3@V#lr|r)RTy=#n>+f-fvkaF%YwM(aa%r|&Su^wHPf-TB{r`Pz5}5?vU5PmJ
z<bTfukLOQ*+?llHTm4Dr-t)RA#b3|<^l9<)PX}L{{xq*N(~)kdP%7ow^X+ib;Z2>b
zU+Uj4SnDdlD`)D~Y_)O1xx-I|mwBl^H|4Q$j+OGaGua`_d1>`Z8NIE0LlakcA3ifR
z_D9^>bLY?cIz4BXJ#dK0H^@uUf5wtesfXrDF#sL(_4js<wC*FqYC!F|x=sGkV&8wM
z1Wy*`OGrD$D8n~%r_tV7UOG#5XPW8EPT6)MZtX#r8xQ7OWc;{;dB$8@ra#{m-@b`b
zxxa65(AL0~2e+1X+f;vTt6Kc<*YcCoHJk$SO+Hl4O_{pt?2UpMX~uiKmS!23ezr1S
z7kAKS{iDyT?#!Lz5Xh2NCECEZ{}@n-*VVL?1@jx?A6Kt@6O(jc{X&k1^YXg|7`Cij
zCB60b1;ZXc29y7h+n4XWec|TKttV=Kw#_{E;Pa{+`5$j%Pxsb5;&WqTXZR7J0Tesp
zro4{->o)=E-hh?2Sfb7^;NrJ!>(jDf`BT?$`TwE(N}fL}e>{4B6qtHcOI#yLQW8s2
zt&)pUffR$0fuVt}fsw9(X^4@bm7$rHv8Aqoxs`#zmDKgZC>nC}Q!>*kku?~aTbY<x
Z83FZ!G=%D0nGMvy;OXk;vd$@?2>=rAP2>Oo

literal 0
HcmV?d00001

diff --git a/resources/recipes/elcomercio.recipe b/resources/recipes/elcomercio.recipe
new file mode 100644
index 0000000000..37733bda8b
--- /dev/null
+++ b/resources/recipes/elcomercio.recipe
@@ -0,0 +1,38 @@
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+elcomercio.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ElComercio(BasicNewsRecipe):
+    title                 = 'El Comercio '
+    __author__            = 'Darko Miletic'
+    description           = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural."
+    publisher             = 'GRUPO EL COMERCIO C.A.'
+    category              = 'news, Ecuador, politics'
+    oldest_article        = 2
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    encoding              = 'utf-8'
+    use_embedded_content  = True
+    language              = 'es'
+    masthead_url          = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif'
+    extra_css             = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} '
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_attributes = ['width','height']
+
+    feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')]
+
+    def preprocess_html(self, soup):
+        return self.adeify_images(soup)
+

From bf8324b6227c5339fd44290b3f84d81c524cacae Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 5 Feb 2010 21:29:18 -0700
Subject: [PATCH 14/20] Searching on the device: Ignore unicode errors

---
 src/calibre/gui2/library.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py
index fd4f8999b4..bf45584df8 100644
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@@ -903,9 +903,14 @@ class OnDeviceSearch(SearchQueryParser):
             locations[i] = q[v]
         for i, r in enumerate(self.model.db):
             for loc in locations:
-                if query in loc(r):
-                    matches.add(i)
-                    break
+                try:
+                    if query in loc(r):
+                        matches.add(i)
+                        break
+                except ValueError: # Unicode errors
+                    import traceback
+                    traceback.print_exc()
+                    pass
         return matches
 
 

From 97ba8f07a44f6ee35ac4dc909745ec113d4cc795 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Feb 2010 12:18:28 -0700
Subject: [PATCH 15/20] News download: Automatically remove <base> tags in the
 postptocess phase as they cause links in generated EPUB files to not work

---
 src/calibre/gui2/library.py   | 1 -
 src/calibre/web/feeds/news.py | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py
index bf45584df8..9b8210c75e 100644
--- a/src/calibre/gui2/library.py
+++ b/src/calibre/gui2/library.py
@@ -910,7 +910,6 @@ class OnDeviceSearch(SearchQueryParser):
                 except ValueError: # Unicode errors
                     import traceback
                     traceback.print_exc()
-                    pass
         return matches
 
 
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index de8eaf6ac5..540f7cd93a 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -615,10 +615,12 @@ class BasicNewsRecipe(Recipe):
                 del o['onload']
 
         for script in list(soup.findAll('noscript')):
-                script.extract()
+            script.extract()
         for attr in self.remove_attributes:
             for x in soup.findAll(attrs={attr:True}):
                 del x[attr]
+        for base in list(soup.findAll('base')):
+            base.extract()
         return self.postprocess_html(soup, first_fetch)
 
 

From 4e3a316c70b263a811ba8e2ff96db7adf0017e74 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Feb 2010 13:02:08 -0700
Subject: [PATCH 16/20] Fix #4816 (build+install issues)

---
 resources/recipes/metro_montreal.recipe | 4 ++--
 setup/resources.py                      | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/resources/recipes/metro_montreal.recipe b/resources/recipes/metro_montreal.recipe
index 8272c760cc..b7f60349df 100644
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@@ -16,7 +16,7 @@ class Metro_Montreal(BasicNewsRecipe):
     extra_css             = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
 
     remove_tags = [dict(attrs={'id':'buttons'})]
-    
+
     feeds = [
              (u"L'info", u'http://journalmetro.com/linfo/rss'),
              (u'Monde', u'http://journalmetro.com/monde/rss'),
@@ -26,4 +26,4 @@ class Metro_Montreal(BasicNewsRecipe):
             ]
 
     def print_version(self, url):
-          return url.replace('article', 'ArticlePrint') + '?language=fr'
\ No newline at end of file
+          return url.replace('article', 'ArticlePrint') + '?language=fr'
diff --git a/setup/resources.py b/setup/resources.py
index d40d31bbf5..977d753828 100644
--- a/setup/resources.py
+++ b/setup/resources.py
@@ -48,7 +48,9 @@ class Resources(Command):
         dest = self.j(self.RESOURCES, 'builtin_recipes.xml')
         if self.newer(dest, files):
             self.info('\tCreating builtin_recipes.xml')
-            open(dest, 'wb').write(serialize_builtin_recipes())
+            xml = serialize_builtin_recipes()
+            with open(dest, 'wb') as f:
+                f.write(xml)
 
         dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle')
         files = []

From 4115fd1168adfa1d2caeffee3b9cdf752a6d35b8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Feb 2010 13:05:41 -0700
Subject: [PATCH 17/20] Fix #4814 (Modified PeopleUsMashup)

---
 resources/recipes/people_us_mashup.recipe | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resources/recipes/people_us_mashup.recipe b/resources/recipes/people_us_mashup.recipe
index 38d750cd4c..ed43e24e56 100644
--- a/resources/recipes/people_us_mashup.recipe
+++ b/resources/recipes/people_us_mashup.recipe
@@ -31,7 +31,7 @@ class PeopleMag(BasicNewsRecipe):
 
 
     keep_only_tags = [
-              dict(name='div', attrs={'class': 'panel_news_article_main'}),
+              dict(name='div', attrs={'class': 'panel_news_article_main'}), 	
 	        dict(name='div', attrs={'class':'article_content'}),
               dict(name='div', attrs={'class': 'headline'}),
               dict(name='div', attrs={'class': 'post'}),
@@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe):
          dict(name='div', attrs={'class':'sharelinkcont'}),
          dict(name='div', attrs={'class':'categories'}),
          dict(name='ul', attrs={'class':'categories'}),
+         dict(name='div', attrs={'class':'related_content'}),
          dict(name='div', attrs={'id':'promo'}),
          dict(name='div', attrs={'class':'linksWrapper'}),
          dict(name='p', attrs={'class':'tag tvnews'}),

From fe3152e8c34e585c5b3bc5687dfd77adcf8d7319 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Feb 2010 13:07:14 -0700
Subject: [PATCH 18/20] Fix #4815 (additional REMOVE_TAGS for Harvard Business
 Review)

---
 resources/recipes/hbr.recipe | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resources/recipes/hbr.recipe b/resources/recipes/hbr.recipe
index b84062af8c..3d1e8ccfac 100644
--- a/resources/recipes/hbr.recipe
+++ b/resources/recipes/hbr.recipe
@@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe):
     remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
         'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
         'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
-        'mailingListTout', 'partnerCenter', 'pageFooter']),
+        'mailingListTout', 'partnerCenter', 'pageFooter',
+        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
         dict(name='iframe')]
     extra_css = '''
                 a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }

From 411b796ba1c9bdaf9e5adae1183bac5fb34ecd07 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Feb 2010 21:14:37 -0700
Subject: [PATCH 19/20] Fix #4816 (build+install issues)

---
 resources/recipes/metro_montreal.recipe | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/recipes/metro_montreal.recipe b/resources/recipes/metro_montreal.recipe
index b7f60349df..c2054bdeec 100644
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@@ -4,7 +4,7 @@ class Metro_Montreal(BasicNewsRecipe):
 
     title          = u'M\xe9tro Montr\xe9al'
     __author__     = 'Jerry Clapperton'
-    description    = 'Le quotidien le plus branch� sur le monde'
+    description    = 'Le quotidien le plus branch\xe9 sur le monde'
     language       = 'fr'
 
     oldest_article        = 7

From 3ae86efb6fd01140063db1feb7c0d7fcd5df2531 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Feb 2010 21:20:11 -0700
Subject: [PATCH 20/20] Zive.sk and iliterature.cz by Abelturd

---
 resources/recipes/ZIVE.sk.recipe        | 45 +++++++++++++++++++++++
 resources/recipes/iliteratura_cz.recipe | 47 +++++++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 resources/recipes/ZIVE.sk.recipe
 create mode 100644 resources/recipes/iliteratura_cz.recipe

diff --git a/resources/recipes/ZIVE.sk.recipe b/resources/recipes/ZIVE.sk.recipe
new file mode 100644
index 0000000000..e5bfd56cef
--- /dev/null
+++ b/resources/recipes/ZIVE.sk.recipe
@@ -0,0 +1,45 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+
+
+class ZiveRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'Abelturd'
+    language = 'sk'
+    version = 1
+
+    title = u'ZIVE.sk'
+    publisher = u''
+    category = u'News, Newspaper'
+    description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. '
+    encoding = 'UTF-8'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    remove_empty_feeds = True
+
+    no_stylesheets = True
+    remove_javascript = True
+    cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif'
+
+    feeds = []
+    feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx'))
+
+    preprocess_regexps = [
+        (re.compile(r'<p><p><strong>Pokra.*ie</strong></p>', re.DOTALL|re.IGNORECASE),
+        lambda match: ''),
+
+     ]
+
+
+    remove_tags = []
+
+    keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),]
+    extra_css = '''
+                h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
+                h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
+                '''
+
+
diff --git a/resources/recipes/iliteratura_cz.recipe b/resources/recipes/iliteratura_cz.recipe
new file mode 100644
index 0000000000..7d603f0cec
--- /dev/null
+++ b/resources/recipes/iliteratura_cz.recipe
@@ -0,0 +1,47 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class SmeRecipe(BasicNewsRecipe):
+    __license__  = 'GPL v3'
+    __author__ = 'Abelturd'
+    language = 'cz'
+    version = 1
+
+    title = u'iLiteratura.cz'
+    publisher = u''
+    category = u'News, Newspaper'
+    description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA'
+    cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+    remove_empty_feeds = True
+
+    no_stylesheets = True
+    remove_javascript = True
+
+
+    feeds = []
+    feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp'))
+
+
+    keep_only_tags = []
+
+    remove_tags = [dict(name='table'),dict(name='h3')]
+
+
+    preprocess_regexps = [
+        (re.compile(r'<h3>Souvisej.*</body>', re.DOTALL|re.IGNORECASE),
+        lambda match: ''),
+     ]
+
+    def print_version(self, url):
+         m = re.search('(?<=ID=)[0-9]*', url)
+
+         return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk'
+
+    extra_css = '''
+                  h1 {font-size:140%;font-family:georgia,serif; font-weight:bold}
+                  h3 {font-size:115%;font-family:georgia,serif; font-weight:bold}
+                '''