From 9ea276be209aee48f0927191d5bedf5378eb70af Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Feb 2010 10:22:00 -0700 Subject: [PATCH 01/20] Fix #4779 (Wall Street Journal (Free Content)) --- resources/recipes/the_gazette.recipe | 22 ----------------- resources/recipes/wsj_free.recipe | 2 +- src/calibre/ebooks/pdf/reflow.py | 35 ++++++++++++++++++++++------ 3 files changed, 29 insertions(+), 30 deletions(-) delete mode 100644 resources/recipes/the_gazette.recipe diff --git a/resources/recipes/the_gazette.recipe b/resources/recipes/the_gazette.recipe deleted file mode 100644 index 19afff986e..0000000000 --- a/resources/recipes/the_gazette.recipe +++ /dev/null @@ -1,22 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe - -class The_Gazette(BasicNewsRecipe): - - cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg' - title = u'The Gazette' - __author__ = 'Jerry Clapperton' - description = 'Montreal news in English' - language = 'en_CA' - - oldest_article = 7 - max_articles_per_feed = 20 - use_embedded_content = False - remove_javascript = True - no_stylesheets = True - encoding = 'utf-8' - - keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})] - - extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}' - - feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')] diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe index b190f43849..e29bfe3dde 100644 --- a/resources/recipes/wsj_free.recipe +++ b/resources/recipes/wsj_free.recipe @@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe): # first, check if there is an h3 tag which provides a section name stag = divtag.find('h3') if stag: - if stag.parent['class'] == 'dynamic': + if stag.parent.get('class', '') == 'dynamic': # a carousel of articles is too complex to extract a section name # for each article, so we'll just call the section "Carousel" section_name = 'Carousel' diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 9f98147032..552af1590f 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -262,7 +262,6 @@ class Region(object): max_lines = max(max_lines, len(c)) return max_lines - @property def is_small(self): return self.line_count < 3 @@ -438,9 +437,8 @@ class Page(object): # absorb into a neighboring region (prefer the one with number of cols # closer to the avg number of cols in the set, if equal use larger # region) - # merge contiguous regions that can contain each other - '''absorbed = set([]) found = True + absorbed = set([]) while found: found = False for i, region in enumerate(self.regions): @@ -452,10 +450,33 @@ class Page(object): regions.append(self.regions[j]) else: break - prev = None if i == 0 else i-1 - next = j if self.regions[j] not in regions else None - ''' - pass + prev_region = None if i == 0 else i-1 + next_region = j if self.regions[j] not in regions else None + if prev_region is None and next_region is not None: + absorb_into = next_region + elif next_region is None and prev_region is not None: + absorb_into = prev_region + elif prev_region is None and next_region is None: + if len(regions) > 1: + absorb_into = regions[0] + regions = regions[1:] + else: + absorb_into = None + else: + absorb_into = prev_region + if next_region.line_count >= prev_region.line_count: + avg_column_count = sum([len(r.columns) for r in + regions])/float(len(regions)) + if next_region.line_count > prev_region.line_count \ + or abs(avg_column_count - len(prev_region.columns)) \ + > abs(avg_column_count - len(next_region.columns)): + absorb_into = next_region + if absorb_into is not None: + absorb_into.absorb_region(regions) + absorbed.update(regions) + i = j + for region in absorbed: + self.regions.remove(region) From 4ecab6bc9ee483ddeddb77b8681635b5ab9918e6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Feb 2010 10:54:23 -0700 Subject: [PATCH 02/20] New recipes for Gizmodo, News Straits Times, Read It Later, TidBits by Darko Miletic --- resources/images/news/gizmodo.png | Bin 0 -> 640 bytes resources/images/news/newsstraitstimes.png | Bin 0 -> 816 bytes resources/images/news/readitlater.png | Bin 0 -> 810 bytes resources/images/news/tidbits.png | Bin 0 -> 783 bytes resources/recipes/gizmodo.recipe | 40 +++++++++++++ resources/recipes/newsstraitstimes.recipe | 35 +++++++++++ resources/recipes/readitlater.recipe | 64 +++++++++++++++++++++ resources/recipes/tidbits.recipe | 53 +++++++++++++++++ 8 files changed, 192 insertions(+) create mode 100644 resources/images/news/gizmodo.png create mode 100644 resources/images/news/newsstraitstimes.png create mode 100644 resources/images/news/readitlater.png create mode 100644 resources/images/news/tidbits.png create mode 100644 resources/recipes/gizmodo.recipe create mode 100644 resources/recipes/newsstraitstimes.recipe create mode 100644 resources/recipes/readitlater.recipe create mode 100644 resources/recipes/tidbits.recipe diff --git a/resources/images/news/gizmodo.png b/resources/images/news/gizmodo.png new file mode 100644 index 0000000000000000000000000000000000000000..8f2e6f002b7719ac70fb67d31b6f5b6785d2c140 GIT binary patch literal 640 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI zAngIhZYQ(tK!Rljj_E*J0gT&!&6&%? z2M}o7@qdy2X8D_R5z#R2RZUt7s}^Y~Bsd&sU<4{-Wb?n(G_z;R?cC6<##}SKbt1p* zK7Re$=ex_--=EK4dRTXhp6#mubDNqS`#BFhKCXVxH&fw&DtF|Q?b%mEj=h{@c_sbU zj1!iNryP=Ue#z~%zar7f%0XJ#f48z``j#MxwI`HJ6fd=OT$GC6k;$-$BmUdmo%%Aj zLpzPzbY>>sZ3xmo-$UcZ*N+IWCGg^MliL*^1$8a`S$b5mw)y>v;FF%`8m3a z93>`lTL5EEwZt`|BqgyV)hf9t6-Y4{85kPq8W`ys7>5`dSeckv8CdEXm|Gbb+`K)J p14Tn_eoAIqC9(zs3oBDYD-#QdhPhGT^vI!P9L3o)?1`d;<)|(n~`^Keq0(` z>V8*1$-^-GpgGqej{dFH7e4WGEKzzD(C#Pe@>0&FV~Hb&tA6_;g&WS-GR=2)EKyB$ znw2+8@6oQ!n~gueHgNlH^JUKb=cm8F6%6t^Y3FwH{6~An8H>N)*jd4vDasfyanhf+ zyE$w-E;NhW-{YK{uWV2uS62ot0A#w+A>W7MZsT*vT(i!}Io7DgOZ(>Ep3c zPH!z6cTYRG=Rd=rx;585*F4wPw7uSb!DLCM5|h-A+8x=F8+81eG_DKo^85Ki<+UwCW|YTmHqC_*E=f&q7i`P>(lsxCG1CfP_X2J01Ml5K!WSR23Q)Q#B4>BZufD;7 z;q9SRFScKdC;EGn`+Nf?S@cc_^76$lhazt3;%$bt2X3wtt_C#~;-Pj+^Qr+?UF5S4&ktm@u+0)9(Ix{1~LBuDx zkEj2_)w`}%Q`Pp*ejJjo|1v}>G*nZ>C#-(WrSfg_fA$wYIcu{qqUYuJ%RIe2i&Kp3 z4SAX?lp3c@3a~gcvqI+Ot|vv-BD&qbGNu4UwPVHX^aJE=Ph6ZPHetaPm#m1$$Y*b_ zh?H&gP)VvV%9Xxdwjo#gdMV?I6)TRcFr9GeXrYYA{6{lqoK3qKaeu+Kbz+JvyVgy< zd+XNcFr}6U4jM<^Dcqi;*ubp=O`b1!gQ!R0gC`m~y zNwrEYN(E93Mh1okx&}tN2F4+V2396!R;Gr!2If`<28*s-%V9OXW$Ob!!_F3|gj4+SMZPM}KYU<8@OWZHPH!5ous3_T=GT z?-^+v0SzgdpC=zb|7lJB^$Si5w&pRJG6Z?$@wvW`t837&5i#e<&)v>b zE^#OZc$wa*T6FP*GQ<4y$Kx)hsqA!IbKXHJd%<)=B~GVnQlIU=X-?~ZoVm78=C{Oy z@4q!8g(k}eoh&=kC)X0~ux%#GU&cjSP3?3nuH4ODTbMK3?RtxVg=xjto2i<%&6~G# zA7T&dD|n=I-A%vX>7GV`MTN7JSeO`>Ou5l_K)-9rFW*HU<65+oOGNVxJkLa`%w3rj z-Y;`wTB*??K?jeMRRcIV8Tdm)> z16D6uZ5G0BV9n-vWOjCMSVKibq2A3_$+s=DSl2P#b*oFAr7yXC<+m-&YbE`sO6;_A z(-3kDKgIEwCwhPQjGtT16ivI<@T;xVExU(T{lyDe2aDevYXe&%|NfdcU0yXT)I2v? zSm8nFot-g>FOFF}yQz}FIFWJFOq0vS(K%v$@>!f7+oFB$$f;hNt7L5I`SC(?s^pUN zv`XXr2fKcWELhnY&c3(8Rxdx^Cbi7&_}<-mj*%&^sypX4o5d`Zvj2ERWJ=JUdCfj$ zjd#|y&kdUK_LKQ1*MsZ#h+hwVA-s2$^FPK@T3QA+^$|_Lw5D3(8c~vxSdwa$T$Bo= z7>o=I4Rj5RbPbF{3=OPI%&m;fbq&m|3=CA5+r&^b||EMpFO) literal 0 HcmV?d00001 diff --git a/resources/images/news/tidbits.png b/resources/images/news/tidbits.png new file mode 100644 index 0000000000000000000000000000000000000000..e64d71ec6887f57c6f66464bb90208ffe33978f9 GIT binary patch literal 783 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>OZ&!)pF42;O_6J(C3ljy)|1v9;Bzn6ZbT}F{HNh$Q zc+J$bSt)AQU2|V;y0LjrL7d+C-!coQ1j!^kah%5ZWo^`v=F_KnWG-qxab{@qpA}M9 zw&U-W^pB;ztvef5MrUr?cl6sbC5(8dK~VocSdbuT6_9tZ5GFdGcvvf6^D)(@GO~9vb(En_t_UIljh7% z5x*F=*;D4YMW6Gv?LYS2Sr(zQ=(*b@i<>-!GRYFWi!=^x%XRaAS`a(^&uuM%1+S&1 zr9P~bTNcT@Aa4EQ>{)G(Ei{%21bulR(bo8SSHv~G>G$ueWOpkbTyDgzUtjpE&4r^; zuf^edDZ>%_lBH8#?CMi1@4BU<&Gl1lWk~Cr87gnL7sQI!?l0w?|JJ}P)xc!RlMB{I zSO3<{iGF*Z+4u4Xn|>d)!(X-vGT5DK|NXyl0@LYDK)1LkF*?W`|DXQo(jPX4vSr;j z_O|Y9kFWf^$VH}Ha=(c2=Zrbm+Kzv&UM=#;_flQY?fit9Vn?Uu?va_ZJNPE|k3AwE z9(#9wEt0$(IA`aJ$z^Pjwed?sw1U=~Z_wdBzRggk)mUVbhsrmmsI_8Ov!=Q&W^~Y+ zx@i6Se!u0;OM?`7oR>^Fv43mUthU#?su{gB7ilcI{q~#91ry$-B84*F?=iLr*!urt zXm`DxsP)vV9GIX~OI#yLQW8s2t&)pUffR$0fuVt}fsw9(afqRTm5Hg9k)f`Exs`!| tvs$tsiiX_$l+3hBWDN!uRz}8Fra=854SVte<$)R)JYD@<);T3K0RR{HNE-kE literal 0 HcmV?d00001 diff --git a/resources/recipes/gizmodo.recipe b/resources/recipes/gizmodo.recipe new file mode 100644 index 0000000000..6f6e6ae0cf --- /dev/null +++ b/resources/recipes/gizmodo.recipe @@ -0,0 +1,40 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +gizmodo.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Gizmodo(BasicNewsRecipe): + title = 'Gizmodo' + __author__ = 'Darko Miletic' + description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural." + publisher = 'gizmodo.com' + category = 'news, IT, Internet, gadgets' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = True + language = 'en' + masthead_url = 'http://cache.gawkerassets.com/assets/gizmodo.com/img/logo.png' + extra_css = ' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_attributes = ['width','height'] + remove_tags = [dict(name='div',attrs={'class':'feedflare'})] + remove_tags_after = dict(name='div',attrs={'class':'feedflare'}) + + feeds = [(u'Articles', u'http://feeds.gawker.com/gizmodo/full')] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + diff --git a/resources/recipes/newsstraitstimes.recipe b/resources/recipes/newsstraitstimes.recipe new file mode 100644 index 0000000000..ebbaca1a0e --- /dev/null +++ b/resources/recipes/newsstraitstimes.recipe @@ -0,0 +1,35 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +www.nst.com.my +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Newstraitstimes(BasicNewsRecipe): + title = 'New Straits Times from Malaysia' + __author__ = 'Darko Miletic' + description = 'Learning Curve, Sunday People, New Straits Times from Malaysia' + publisher = 'nst.com.my' + category = 'news, politics, Malaysia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'cp1252' + use_embedded_content = False + language = 'en' + masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['link','table'])] + keep_only_tags = dict(name='div',attrs={'id':'haidah'}) + + feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')] + diff --git a/resources/recipes/readitlater.recipe b/resources/recipes/readitlater.recipe new file mode 100644 index 0000000000..4bd8fc2bd6 --- /dev/null +++ b/resources/recipes/readitlater.recipe @@ -0,0 +1,64 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +readitlaterlist.com +''' + +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class Readitlater(BasicNewsRecipe): + title = 'Read It Later' + __author__ = 'Darko Miletic' + description = '''Personalized news feeds. Go to readitlaterlist.com to + setup up your news. Fill in your account + username, and optionally you can add password.''' + publisher = 'readitlater.com' + category = 'news, custom' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + needs_subscription = True + INDEX = u'http://readitlaterlist.com' + LOGIN = INDEX + u'/l' + + + feeds = [(u'Unread articles' , INDEX + u'/unread')] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None: + br.open(self.LOGIN) + br.select_form(nr=0) + br['feed_id'] = self.username + if self.password is not None: + br['password'] = self.password + br.submit() + return br + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + ritem = soup.find('ul',attrs={'id':'list'}) + for item in ritem.findAll('li'): + description = '' + atag = item.find('a',attrs={'class':'text'}) + if atag and atag.has_key('href'): + url = self.INDEX + atag['href'] + title = self.tag_to_string(item.div) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + diff --git a/resources/recipes/tidbits.recipe b/resources/recipes/tidbits.recipe new file mode 100644 index 0000000000..702c65e9e4 --- /dev/null +++ b/resources/recipes/tidbits.recipe @@ -0,0 +1,53 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +db.tidbits.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TidBITS(BasicNewsRecipe): + title = 'TidBITS: Mac News for the Rest of Us' + __author__ = 'Darko Miletic' + description = 'Insightful news, reviews, and analysis of the Macintosh and Internet worlds' + publisher = 'TidBITS Publishing Inc.' + category = 'news, Apple, Macintosh, IT, Internet' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = True + language = 'en' + remove_empty_feeds = True + masthead_url = 'http://db.tidbits.com/images/tblogo9.gif' + extra_css = ' body{font-family: Georgia,"Times New Roman",Times,serif} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_attributes = ['width','height'] + remove_tags = [dict(name='small')] + remove_tags_after = dict(name='small') + + feeds = [ + (u'Business Apps' , u'http://db.tidbits.com/feeds/business.rss' ) + ,(u'Entertainment' , u'http://db.tidbits.com/feeds/entertainment.rss') + ,(u'External Links' , u'http://db.tidbits.com/feeds/links.rss' ) + ,(u'Home Mac' , u'http://db.tidbits.com/feeds/home.rss' ) + ,(u'Inside TidBITS' , u'http://db.tidbits.com/feeds/inside.rss' ) + ,(u'iPod & iPhone' , u'http://db.tidbits.com/feeds/ipod-iphone.rss' ) + ,(u'Just for Fun' , u'http://db.tidbits.com/feeds/fun.rss' ) + ,(u'Macs & Mac OS X' , u'http://db.tidbits.com/feeds/macs.rss' ) + ,(u'Media Creation' , u'http://db.tidbits.com/feeds/creative.rss' ) + ,(u'Networking & Communications', u'http://db.tidbits.com/feeds/net.rss' ) + ,(u'Opinion & Editorial' , u'http://db.tidbits.com/feeds/opinion.rss' ) + ,(u'Support & Problem Solving' , u'http://db.tidbits.com/feeds/support.rss' ) + ,(u'Safe Computing' , u'http://db.tidbits.com/feeds/security.rss' ) + ,(u'Tech News' , u'http://db.tidbits.com/feeds/tech.rss' ) + ,(u'Software Watchlist' , u'http://db.tidbits.com/feeds/watchlist.rss' ) + ] From cea60d5fd892f3fa1a03b768e9c865a9fa9bbbd0 Mon Sep 17 00:00:00 2001 From: GRiker Date: Wed, 3 Feb 2010 14:07:01 -0700 Subject: [PATCH 03/20] Added series to descriptions/titles --- resources/catalog/stylesheet.css | 10 +- src/calibre/library/catalog.py | 190 +++++++++++++++++++++++++++---- 2 files changed, 177 insertions(+), 23 deletions(-) diff --git a/resources/catalog/stylesheet.css b/resources/catalog/stylesheet.css index b5770599e6..80f4e50cc3 100644 --- a/resources/catalog/stylesheet.css +++ b/resources/catalog/stylesheet.css @@ -17,6 +17,14 @@ p.author { font-size:large; } +p.series { + margin-top:0em; + margin-bottom:0em; + text-align: left; + text-indent: 1em; + font-size:small; + } + p.tags { margin-top:0em; margin-bottom:0em; @@ -27,7 +35,7 @@ p.tags { p.description { text-align:left; - font-style:italic; + font-style:normal; margin-top: 0em; } diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 78155326dc..5110a2eee1 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -927,8 +927,16 @@ class EPUB_MOBI(CatalogPlugin): for record in data: this_title = {} - title = this_title['title'] = self.convertHTMLEntities(record['title']) - this_title['title_sort'] = self.generateSortTitle(title) + this_title['title'] = self.convertHTMLEntities(record['title']) + if record['series']: + this_title['series'] = record['series'] + this_title['series_index'] = record['series_index'] + this_title['title'] = self.generateSeriesTitle(this_title) + else: + this_title['series'] = None + this_title['series_index'] = 0.0 + + this_title['title_sort'] = self.generateSortTitle(this_title['title']) if 'authors' in record and len(record['authors']): this_title['author'] = " & ".join(record['authors']) else: @@ -984,12 +992,61 @@ class EPUB_MOBI(CatalogPlugin): def fetchBooksByAuthor(self): # Generate a list of titles sorted by author from the database + def author_compare(x,y): + # Return -1 if xy + #print "x['author_sort']: %s y['author_sort']: %s" % (x['author_sort'],y['author_sort']) + if x['author_sort'] > y['author_sort']: + return 1 + elif x['author_sort'] < y['author_sort']: + return -1 + else: + # Authors equal + # Books w/o series go first + if x['series'] > y['series']: + return 1 + elif x['series'] < y['series']: + return -1 + elif not x['series'] and not y['series']: + if x['title'] > y['title']: + return 1 + else: + return -1 + else: + # Both books have series + if x['series'] == y['series']: + if float(x['series_index']) > float(y['series_index']): + return 1 + elif float(x['series_index']) < float(y['series_index']): + return -1 + else: + return 0 + else: + if x['series'] > y['series']: + return 1 + else: + return -1 self.updateProgressFullStep("Sorting database") - # Sort titles case-insensitive + ''' + # Sort titles case-insensitive, by author self.booksByAuthor = sorted(self.booksByTitle, key=lambda x:(x['author_sort'].upper(), x['author_sort'].upper())) + ''' + + self.booksByAuthor = list(self.booksByTitle) + self.booksByAuthor.sort(author_compare) + + if False and self.verbose: + self.opts.log.info("fetchBooksByAuthor(): %d books" % len(self.booksByAuthor)) + self.opts.log.info(" %-40s %-20s %s" % ('title', 'series', 'series_index')) + for title in self.booksByAuthor: + self.opts.log.info((u" %-40s %-20s %s" % \ + (title['title'][0:40], + title['series'][0:20] if title['series'] else '', + title['series_index'])).encode('utf-8')) # Build the unique_authors set from existing data authors = [(record['author'], record['author_sort']) for record in self.booksByAuthor] @@ -1063,7 +1120,15 @@ class EPUB_MOBI(CatalogPlugin): # Insert the book title #

Book Title

emTag = Tag(soup, "em") - emTag.insert(0, NavigableString(escape(title['title']))) + if title['series']: + # Insert br at colon + brTag = Tag(soup,'br') + title_tokens = title['title'].split(': ') + emTag.insert(0, title_tokens[0] + ':') + emTag.insert(1, brTag) + emTag.insert(2, title_tokens[1]) + else: + emTag.insert(0, NavigableString(escape(title['title']))) titleTag = body.find(attrs={'class':'title'}) titleTag.insert(0,emTag) @@ -1085,6 +1150,27 @@ class EPUB_MOBI(CatalogPlugin): tagsTag.insert(0,emTag) ''' + ''' + # Insert Series info or remove. + seriesTag = body.find(attrs={'class':'series'}) + if title['series']: + # Insert a spacer to match the author indent + stc = 0 + fontTag = Tag(soup,"font") + fontTag['style'] = 'color:white;font-size:large' + if self.opts.fmt == 'epub': + fontTag['style'] += ';opacity: 0.0' + fontTag.insert(0, NavigableString("by ")) + seriesTag.insert(stc, fontTag) + stc += 1 + if float(title['series_index']) - int(title['series_index']): + series_str = 'Series: %s [%4.2f]' % (title['series'], title['series_index']) + else: + series_str = '%s [%d]' % (title['series'], title['series_index']) + seriesTag.insert(stc,NavigableString(series_str)) + else: + seriesTag.extract() + ''' # Insert linked genres if 'tags' in title: tagsTag = body.find(attrs={'class':'tags'}) @@ -1367,6 +1453,7 @@ class EPUB_MOBI(CatalogPlugin): aTag = Tag(soup, "a") aTag['href'] = "book_%d.html" % (int(float(book['id']))) + # Use series, series index if avail else just title aTag.insert(0,escape(book['title'])) pBookTag.insert(ptc, aTag) ptc += 1 @@ -1786,7 +1873,9 @@ class EPUB_MOBI(CatalogPlugin): mtc += 1 # HTML files - add books to manifest and spine - for book in self.booksByTitle: + sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \ + else self.booksByTitle + for book in sort_descriptions_by: # manifest itemTag = Tag(soup, "item") itemTag['href'] = "content/book_%d.html" % int(book['id']) @@ -1912,7 +2001,9 @@ class EPUB_MOBI(CatalogPlugin): nptc += 1 # Loop over the titles - for book in self.booksByTitle: + sort_descriptions_by = self.booksByAuthor if self.opts.sort_descriptions_by_author \ + else self.booksByTitle + for book in sort_descriptions_by: navPointVolumeTag = Tag(ncx_soup, 'navPoint') navPointVolumeTag['class'] = "article" navPointVolumeTag['id'] = "book%dID" % int(book['id']) @@ -2553,6 +2644,7 @@ class EPUB_MOBI(CatalogPlugin):

{0}

+

 

@@ -2678,6 +2770,17 @@ class EPUB_MOBI(CatalogPlugin): draw.text((left, top), text, fill=(0,0,0), font=font) img.save(open(out_path, 'wb'), 'GIF') + def generateSeriesTitle(self, title): + if float(title['series_index']) - int(title['series_index']): + series_title = '%s %4.2f: %s' % (title['series'], + title['series_index'], + title['title']) + else: + series_title = '%s %d: %s' % (title['series'], + title['series_index'], + title['title']) + return series_title + def generateShortDescription(self, description): # Truncate the description to description_clip, on word boundaries if necessary if not description: @@ -2777,24 +2880,65 @@ class EPUB_MOBI(CatalogPlugin): def markdownComments(self, comments): ''' Convert random comment text to normalized, xml-legal block of

s''' - # reformat illegal xml - desc = prepare_string_for_xml(comments) - # normalize
tags - desc = re.sub(r'<br[/]{0,1}>', '
', desc) + comments = comments.replace('\r', '') + if re.search('\n\n', comments): + soup = BeautifulSoup() + split_ps = comments.split('\n\n') + tsc = 0 + for p in split_ps: + pTag = Tag(soup,'p') + pTag.insert(0,p) + soup.insert(tsc,pTag) + tsc += 1 + else: + soup = BeautifulSoup(comments) - # tokenize double line breaks - desc = comments.replace('\r', '') - tokens = comments.split('\n\n') + result = BeautifulSoup() + rtc = 0 + open_pTag = False - soup = BeautifulSoup() - ptc = 0 - for token in tokens: - pTag = Tag(soup, 'p') - pTag.insert(0,token) - soup.insert(ptc, pTag) - ptc += 1 - return soup.renderContents(encoding=None) + all_tokens = list(soup.contents) + for token in all_tokens: + if type(token) is NavigableString: + if not open_pTag: + pTag = Tag(result,'p') + open_pTag = True + ptc = 0 + pTag.insert(ptc,prepare_string_for_xml(token)) + ptc += 1 + + elif token.name in ['br','b','i']: + if not open_pTag: + pTag = Tag(result,'p') + open_pTag = True + ptc = 0 + pTag.insert(ptc, token) + ptc += 1 + + else: + if open_pTag: + result.insert(rtc, pTag) + rtc += 1 + open_pTag = False + ptc = 0 + # Clean up NavigableStrings for xml + sub_tokens = list(token.contents) + sub_soup = BeautifulSoup() + for sub_token in sub_tokens: + if type(sub_token) is NavigableString: + sub_token.replaceWith(prepare_string_for_xml(sub_token)) + result.insert(rtc, token) + rtc += 1 + + if open_pTag: + result.insert(rtc, pTag) + + paras = result.findAll('p') + for p in paras: + p['class'] = 'description' + + return result.renderContents(encoding=None) def processSpecialTags(self, tags, this_title, opts): tag_list = [] @@ -2847,6 +2991,8 @@ class EPUB_MOBI(CatalogPlugin): opts.basename = "Catalog" opts.plugin_path = self.plugin_path opts.cli_environment = not hasattr(opts,'sync') + # GwR *** hardwired for the moment + opts.sort_descriptions_by_author = True if opts.verbose: opts_dict = vars(opts) @@ -2863,7 +3009,7 @@ class EPUB_MOBI(CatalogPlugin): for key in keys: if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles', 'generate_recently_added','note_tag','numbers_as_text','read_tag', - 'search_text','sort_by','sync']: + 'search_text','sort_by','sort_descriptions_by_author','sync']: log(" %s: %s" % (key, opts_dict[key])) # Launch the Catalog builder From f283dc892e8308d807c7440903dff8a46ceec8cd Mon Sep 17 00:00:00 2001 From: jason Date: Wed, 3 Feb 2010 21:32:28 +0000 Subject: [PATCH 04/20] update series info from metadata --- src/calibre/gui2/dialogs/metadata_single.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index d066a27c53..78f30ecb21 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -594,10 +594,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.rating.setValue(int(book.rating)) if book.tags: self.tags.setText(', '.join(book.tags)) - print 'setting series' - print book.series if book.series is not None: - if self.series is not None: + if self.series.text() is None or self.series.text() == '': self.series.setText(book.series) if book.series_index is not None: self.series_index.setValue(book.series_index) From 3d1ef6e56499bd24cbca3fb263dfa9580d5b3f9a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Feb 2010 16:14:25 -0700 Subject: [PATCH 05/20] Fix #4786 (Updated recipe for Pagina 12) --- resources/recipes/pagina12.recipe | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/resources/recipes/pagina12.recipe b/resources/recipes/pagina12.recipe index 2fb433dc82..c9801cb359 100644 --- a/resources/recipes/pagina12.recipe +++ b/resources/recipes/pagina12.recipe @@ -5,9 +5,10 @@ __copyright__ = '2008-2010, Darko Miletic ' pagina12.com.ar ''' -import time +import re, time from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag class Pagina12(BasicNewsRecipe): title = 'Pagina - 12' @@ -22,7 +23,8 @@ class Pagina12(BasicNewsRecipe): use_embedded_content = False language = 'es' remove_empty_feeds = True - extra_css = ' body{font-family: sans-serif} ' + masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif' + extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h2{color: #028CCD} img{margin-bottom: 0.4em} .epigrafe{font-size: x-small; background-color: #EBEAE5; color: #565144 } .intro{font-size: 1.1em} ' conversion_options = { 'comment' : description @@ -32,7 +34,7 @@ class Pagina12(BasicNewsRecipe): } remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})] - + feeds = [ (u'Edicion impresa', u'http://www.pagina12.com.ar/diario/rss/principal.xml' ) @@ -52,7 +54,11 @@ class Pagina12(BasicNewsRecipe): return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/') def get_cover_url(self): - imgnames = ['tapan.jpg','tapagn.jpg','tapan_gr.jpg','tapagn.jpg','tapagn.jpg','tapan.jpg','tapagn.jpg'] - weekday = time.localtime().tm_wday - return strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/') + imgnames[weekday] - + rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True) + rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc) + soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None) + for image in soup.findAll('img',alt=True): + if image['alt'].startswith('Tapa de la fecha'): + return image['src'] + return None + \ No newline at end of file From 008fab308d68003341cdd3152b50629115afc15e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 3 Feb 2010 17:03:04 -0700 Subject: [PATCH 06/20] Updated recipe for The New Republic --- resources/recipes/the_new_republic.recipe | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/resources/recipes/the_new_republic.recipe b/resources/recipes/the_new_republic.recipe index 482dba1af0..59ccef3607 100644 --- a/resources/recipes/the_new_republic.recipe +++ b/resources/recipes/the_new_republic.recipe @@ -9,6 +9,7 @@ class The_New_Republic(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 + no_stylesheets = True remove_tags = [ dict(name='div', attrs={'class':['print-logo', 'print-site_name', 'img-left', 'print-source_url']}), @@ -21,14 +22,15 @@ class The_New_Republic(BasicNewsRecipe): ('Economy', 'http://www.tnr.com/rss/articles/Economy'), ('Environment and Energy', 'http://www.tnr.com/rss/articles/Environment-%2526-Energy'), ('Health Care', 'http://www.tnr.com/rss/articles/Health-Care'), - ('Urban Policy', 'http://www.tnr.com/rss/articles/Urban-Policy'), + ('Metro Policy', 'http://www.tnr.com/rss/articles/Metro-Policy'), ('World', 'http://www.tnr.com/rss/articles/World'), ('Film', 'http://www.tnr.com/rss/articles/Film'), ('Books', 'http://www.tnr.com/rss/articles/books'), + ('The Book', 'http://www.tnr.com/rss/book'), + ('Jonathan Chait', 'http://www.tnr.com/rss/blogs/Jonathan-Chait'), ('The Plank', 'http://www.tnr.com/rss/blogs/The-Plank'), ('The Treatment', 'http://www.tnr.com/rss/blogs/The-Treatment'), ('The Spine', 'http://www.tnr.com/rss/blogs/The-Spine'), - ('The Stash', 'http://www.tnr.com/rss/blogs/The-Stash'), ('The Vine', 'http://www.tnr.com/rss/blogs/The-Vine'), ('The Avenue', 'http://www.tnr.com/rss/blogs/The-Avenue'), ('William Galston', 'http://www.tnr.com/rss/blogs/William-Galston'), @@ -40,3 +42,4 @@ class The_New_Republic(BasicNewsRecipe): def print_version(self, url): return url.replace('http://www.tnr.com/', 'http://www.tnr.com/print/') + From e7c07ee25effd4a904646e427b2fb33d0b3dbb21 Mon Sep 17 00:00:00 2001 From: GRiker Date: Wed, 3 Feb 2010 17:07:49 -0700 Subject: [PATCH 07/20] GwR changes for series sorting --- src/calibre/gui2/catalog/catalog_epub_mobi.ui | 2 +- src/calibre/library/catalog.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.ui b/src/calibre/gui2/catalog/catalog_epub_mobi.ui index 91fcbdc364..dab8c972c7 100644 --- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui +++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui @@ -80,7 +80,7 @@ Regex tips: -- The default regex - \[[\w]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie] +- The default regex - \[[\w ]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie] - A regex pattern of a single dot excludes all genre tags, generating no Genre Section diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 5110a2eee1..51f1ff1104 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1009,7 +1009,7 @@ class EPUB_MOBI(CatalogPlugin): elif x['series'] < y['series']: return -1 elif not x['series'] and not y['series']: - if x['title'] > y['title']: + if self.generateSortTitle(x['title']) > self.generateSortTitle(y['title']): return 1 else: return -1 From a0ea14b5e9bf23d538339f0ca8c0eeb8b4bb1ab3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Feb 2010 08:44:29 -0700 Subject: [PATCH 08/20] New recipe for Digital Spy UK by Darko Miletic --- resources/images/news/digitalspy_uk.png | Bin 0 -> 1290 bytes resources/recipes/digitalspy_uk.recipe | 43 ++++++++++++++++++++ src/calibre/gui2/dialogs/metadata_single.py | 2 +- 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 resources/images/news/digitalspy_uk.png create mode 100644 resources/recipes/digitalspy_uk.recipe diff --git a/resources/images/news/digitalspy_uk.png b/resources/images/news/digitalspy_uk.png new file mode 100644 index 0000000000000000000000000000000000000000..28c865713d5214505a620d111291e051462cef11 GIT binary patch literal 1290 zcmeAS@N?(olHy`uVBq!ia0vp^4j|0I1|(Ny7TyC=Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgfc0~o*h{3C&ZfyLO<#WAGf*4x>(Iky}n+U|!;J;g9(t%A~3 z?v*pUVmeEjjs~%@MTza&5~<|s6}2+*QkPcn(vC9EB`cr~*=O{*E?6Puvaw zI9i(-HyB^!zAwvncj2eUZ_Zq;t$JLjgW9DFIu9)TBc-~H;`a;su zw@!X2w0mCo32<*51g!J<-)*W`~ko_l_ix z_GA`RH&-?$=dE%>mCude4dAvv^d3zO0 z;3VDk->xvE?fiX&Tk8}FMA(U%x*Jn`10er)Y^%X_CCHVZ72PX zVhXsKxhv_9MyjUxhRvs{Vy~-7N^LXqWns7^yo^2Ful;HAZSe(x%YS!Q>;Zaag16(8 z6q)&RoGky>zWnj=q~^KvrryC2Wyk((zUh76_9^R`knAmb zYoC3LI3lpg|F}}e@pnzi|Y$SV*dP(yqWpvsgqc1%&9NGBKvl( zkgclXx-k)G&;2{<*SM>)+lAR3!e6uBtxDuPpmFN)bH32BH>ZRc>`q5n{&ia7`f_=5 z-n#IG0(TQ68744I`1xqf)8z_h4P6-gepSYX#qBj@Q0Vor5j(iN-}Tk1!db}yai@iQ z%Y___1YKW#y>r2~_g=-J2W+k@eM=Nix`+USK|jb}XiegRveoX)3q_0gb!{^j1~gP1 z`Ra2~#9{5lrbT@EHZi4(L)Jh2?ySB3srxSRoAYz;EwaoMiMns_D{Hs3@|IU6=Xox$ zGZ$E>zP)~}tTtoep-jzHZQI{}=egi}_ra%C20|R74;e%sCagKuJ*nrp_teI3GoCSZ ztxBv6b;wNZiwIme73BRJ>V11IJ6;6F<-XAVj`uu17yqPt)L*o``MHW`?ZrhZ&Qor! zxEdYuyK=qQrWyKn+fvuW3A`<-U#NL9L~k0`0hR2e$%^s~{0G<{Fn?hD!BE3s#~^<| z;4kashm)EZvkG&8<%nvDYeY#(Vo9o1a#1RfVlXl=G|)9L(lsy%F|e>QF|slSa?Py_ t3_f1kse_^+H$NpatrA&-p@Efwp_PFJM8iVfO?^NO44$rjF6*2UngF(GL9ze< literal 0 HcmV?d00001 diff --git a/resources/recipes/digitalspy_uk.recipe b/resources/recipes/digitalspy_uk.recipe new file mode 100644 index 0000000000..ac54c3790d --- /dev/null +++ b/resources/recipes/digitalspy_uk.recipe @@ -0,0 +1,43 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +www.digitalspy.co.uk +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class DigitalSpyUK(BasicNewsRecipe): + title = 'Digital Spy - UK Edition' + __author__ = 'Darko Miletic' + description = 'Entertainment news about the biggest TV shows, films and celebrities, updated around the clock.' + publisher = 'Digital Spy Limited.' + category = 'news, showbiz, big brother, x factor, torchwood, doctor who, tv, media, sky, freeview, cable' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'cp1252' + use_embedded_content = False + language = 'en_GB' + remove_empty_feeds = True + extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .info{font-size: small} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['link'])] + remove_attributes = ['height','width'] + keep_only_tags = [dict(name='div',attrs={'id':'article'})] + + feeds = [ + (u'News' , u'http://www.digitalspy.co.uk/rss/zones/gb/all.xml' ) + ,(u'Big Brother' , u'http://www.digitalspy.co.uk/rss/zones/gb/bigbrother.xml' ) + ,(u'Entertainment' , u'http://www.digitalspy.co.uk/rss/zones/gb/entertainment.xml') + ,(u'General' , u'http://www.digitalspy.co.uk/rss/zones/gb/general.xml' ) + ,(u'Media' , u'http://www.digitalspy.co.uk/rss/zones/gb/media.xml' ) + ] + diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 78f30ecb21..8fab6a922a 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -598,7 +598,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): if self.series.text() is None or self.series.text() == '': self.series.setText(book.series) if book.series_index is not None: - self.series_index.setValue(book.series_index) + self.series_index.setValue(book.series_index) else: error_dialog(self, _('Cannot fetch metadata'), _('You must specify at least one of ISBN, Title, ' From 96ac81c3421586dac287a1683d621c458a5d3acb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Feb 2010 09:10:01 -0700 Subject: [PATCH 09/20] RTF Input: Don't eat up the space after \u escaped characters --- src/calibre/ebooks/rtf2xml/tokenize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/rtf2xml/tokenize.py b/src/calibre/ebooks/rtf2xml/tokenize.py index 45887f33e7..ad12daa211 100755 --- a/src/calibre/ebooks/rtf2xml/tokenize.py +++ b/src/calibre/ebooks/rtf2xml/tokenize.py @@ -72,7 +72,7 @@ class Tokenize: return line def __compile_expressions(self): self.__ms_hex_exp = re.compile(r"\\\'(..)") - self.__utf_exp = re.compile(r"\\u(-?\d{3,6}) {0,1}") + self.__utf_exp = re.compile(r"\\u(-?\d{3,6})") self.__splitexp = re.compile(r"(\\[\\{}]|{|}|\\[^\s\\{}&]+(?:\s)?)") self.__par_exp = re.compile(r'\\$') self.__mixed_exp = re.compile(r"(\\[a-zA-Z]+\d+)(\D+)") From b8b3efc5b8ffaa7bb9ff33530d9ba3b6b0a8d282 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Feb 2010 09:49:41 -0700 Subject: [PATCH 10/20] ... --- src/calibre/manual/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 9bdd9aaa6b..a3c5bd32c4 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -62,7 +62,7 @@ How do I convert my file containing non-English characters, or smart quotes? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ There are two aspects to this problem: 1. Knowing the encoding of the source file: |app| tries to guess what character encoding your source files use, but often, this is impossible, so you need to tell it what encoding to use. This can be done in the GUI via the :guilabel:`Input character encoding` field in the :guilabel:`Look & Feel` section. The command-line tools all have an :option:`--input-encoding` option. - 2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first. + 2. When adding HTML files to |app|, you may need to tell |app| what encoding the files are in. To do this go to Preferences->Plugins->File Type plugins and customize the HTML2Zip plugin, telling it what encoding your HTML files are in. Now when you add HTML files to |app| they will be correctly processed. HTML files from different sources often have different encodings, so you may have to change this setting repeatedly. A common encoding for many files from the web is ``cp1252`` and I would suggest you try that first. Note that when converting HTML files, leave the input encoding setting mentioned above blank. This is because the HTML2ZIP plugin automatically converts the HTML files to a standard encoding (utf-8). 3. Embedding fonts: If you are generating an LRF file to read on your SONY Reader, you are limited by the fact that the Reader only supports a few non-English characters in the fonts it comes pre-loaded with. You can work around this problem by embedding a unicode-aware font that supports the character set your file uses into the LRF file. You should embed atleast a serif and a sans-serif font. Be aware that embedding fonts significantly slows down page-turn speed on the reader. From 526e7198d7cd3e6b619776fe2e96ae42b63b6b7c Mon Sep 17 00:00:00 2001 From: GRiker Date: Fri, 5 Feb 2010 07:42:54 -0700 Subject: [PATCH 11/20] GwR revisions, tweaks --- src/calibre/gui2/device.py | 2 +- src/calibre/library/catalog.py | 73 ++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 5a977b37a6..679e86ab48 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -149,7 +149,7 @@ class DeviceManager(Thread): possibly_connected_devices.append((device, detected_device)) if possibly_connected_devices: if not self.do_connect(possibly_connected_devices): - print 'Connect to device failed, retying in 5 seconds...' + print 'Connect to device failed, retrying in 5 seconds...' time.sleep(5) if not self.do_connect(possibly_connected_devices): print 'Device connect failed again, giving up' diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 726541bd4a..c0bdd19c82 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1119,12 +1119,14 @@ class EPUB_MOBI(CatalogPlugin): #

Book Title

emTag = Tag(soup, "em") if title['series']: - # Insert br at colon + # title
series series_index brTag = Tag(soup,'br') title_tokens = title['title'].split(': ') - emTag.insert(0, title_tokens[0] + ':') + emTag.insert(0, NavigableString(title_tokens[1])) emTag.insert(1, brTag) - emTag.insert(2, title_tokens[1]) + smallTag = Tag(soup,'small') + smallTag.insert(0,NavigableString(title_tokens[0])) + emTag.insert(2, smallTag) else: emTag.insert(0, NavigableString(escape(title['title']))) titleTag = body.find(attrs={'class':'title'}) @@ -1202,7 +1204,12 @@ class EPUB_MOBI(CatalogPlugin): else: imgTag['src'] = "../images/thumbnail_default.jpg" imgTag['alt'] = "cover" - imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT) + + # Tweak image size if we're building for Sony, not sure why this is needed + if self.opts.fmt == 'epub' and self.opts.output_profile.startswith("sony"): + imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH * 2, self.THUMB_HEIGHT * 2) + else: + imgTag['style'] = 'width: %dpx; height:%dpx;' % (self.THUMB_WIDTH, self.THUMB_HEIGHT) thumbnailTag = body.find(attrs={'class':'thumbnail'}) thumbnailTag.insert(0,imgTag) @@ -1697,7 +1704,9 @@ class EPUB_MOBI(CatalogPlugin): for genre in genre_list: for key in genre: - self.opts.log.info(" %s: %d titles" % (key, len(genre[key]))) + self.opts.log.info(" %s: %d %s" % (self.getFriendlyGenreTag(key), + len(genre[key]), + 'titles' if len(genre[key]) > 1 else 'title')) # Write the results # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...] @@ -2042,7 +2051,11 @@ class EPUB_MOBI(CatalogPlugin): self.playOrder += 1 navLabelTag = Tag(ncx_soup, "navLabel") textTag = Tag(ncx_soup, "text") - textTag.insert(0, NavigableString(self.formatNCXText(book['title']))) + if book['series']: + tokens = book['title'].split(': ') + textTag.insert(0, NavigableString(self.formatNCXText('%s (%s)' % (tokens[1], tokens[0])))) + else: + textTag.insert(0, NavigableString(self.formatNCXText(book['title']))) navLabelTag.insert(0,textTag) navPointVolumeTag.insert(0,navLabelTag) @@ -2548,15 +2561,25 @@ class EPUB_MOBI(CatalogPlugin): else: yield tag - self.opts.log.info(u' %d available genre tags in database (exclude_genre: %s):' % \ + self.opts.log.info(u' %d genre tags in database (excluding genres matching %s):' % \ (len(genre_tags_dict), self.opts.exclude_genre)) # Display friendly/normalized genres # friendly => normalized - sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())] - - for tag in next_tag(sorted_tags): - self.opts.log(u' %s' % tag) + if False: + sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())] + for tag in next_tag(sorted_tags): + self.opts.log(u' %s' % tag) + else: + sorted_tags = ['%s' % (key) for key in sorted(genre_tags_dict.keys())] + out_str = '' + line_break = 70 + for tag in next_tag(sorted_tags): + out_str += tag + if len(out_str) >= line_break: + self.opts.log.info(' %s' % out_str) + out_str = '' + self.opts.log.info(' %s' % out_str) return genre_tags_dict @@ -2596,13 +2619,8 @@ class EPUB_MOBI(CatalogPlugin): body.insert(btc,aTag) btc += 1 - # Find the first instance of friendly_tag matching genre - for friendly_tag in self.genre_tags_dict: - if self.genre_tags_dict[friendly_tag] == genre: - break - titleTag = body.find(attrs={'class':'title'}) - titleTag.insert(0,NavigableString('%s' % escape(friendly_tag))) + titleTag.insert(0,NavigableString('%s' % escape(self.getFriendlyGenreTag(genre)))) # Insert the books by author list divTag = body.find(attrs={'class':'authors'}) @@ -2927,6 +2945,12 @@ class EPUB_MOBI(CatalogPlugin): else: return char + def getFriendlyGenreTag(self, genre): + # Find the first instance of friendly_tag matching genre + for friendly_tag in self.genre_tags_dict: + if self.genre_tags_dict[friendly_tag] == genre: + return friendly_tag + def markdownComments(self, comments): ''' Convert random comment text to normalized, xml-legal block of

s @@ -3076,7 +3100,7 @@ class EPUB_MOBI(CatalogPlugin): opts.basename = "Catalog" opts.plugin_path = self.plugin_path opts.cli_environment = not hasattr(opts,'sync') - # GwR *** hardwired for the moment + # GwR *** hardwired to sort by author, could be an option if passed in opts opts.sort_descriptions_by_author = True if opts.verbose: @@ -3087,6 +3111,15 @@ class EPUB_MOBI(CatalogPlugin): if opts_dict['ids']: log(" Book count: %d" % len(opts_dict['ids'])) + sections_list = ['Descriptions','Authors'] + if opts.generate_titles: + sections_list.append('Titles') + if opts.generate_recently_added: + sections_list.append('Recently Added') + if not opts.exclude_genre.strip() == '.': + sections_list.append('Genres') + log(u"Creating Sections for %s" % ', '.join(sections_list)) + # If exclude_genre is blank, assume user wants all genre tags included if opts.exclude_genre.strip() == '': opts.exclude_genre = '\[^.\]' @@ -3098,8 +3131,8 @@ class EPUB_MOBI(CatalogPlugin): log(" opts:") for key in keys: - if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles', - 'generate_recently_added','note_tag','numbers_as_text','read_tag', + if key in ['catalog_title','exclude_genre','exclude_tags', + 'note_tag','numbers_as_text','read_tag', 'search_text','sort_by','sort_descriptions_by_author','sync']: log(" %s: %s" % (key, opts_dict[key])) From 643f60f6e9635f9b8fe25de36d397852f87bc000 Mon Sep 17 00:00:00 2001 From: GRiker Date: Fri, 5 Feb 2010 09:03:54 -0700 Subject: [PATCH 12/20] Added read checkmark to Description page, fixed note prefix len --- src/calibre/library/catalog.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index c0bdd19c82..8ef9e41dc0 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1138,7 +1138,12 @@ class EPUB_MOBI(CatalogPlugin): aTag['href'] = "%s.html#%s" % ("ByAlphaAuthor", self.generateAuthorAnchor(title['author'])) #aTag.insert(0, escape(title['author'])) aTag.insert(0, title['author']) - authorTag.insert(0, NavigableString("by ")) + + # Insert READ_SYMBOL + if title['read']: + authorTag.insert(0, NavigableString(self.READ_SYMBOL + "by ")) + else: + authorTag.insert(0, NavigableString(self.NOT_READ_SYMBOL + "by ")) authorTag.insert(1, aTag) ''' @@ -3054,7 +3059,7 @@ class EPUB_MOBI(CatalogPlugin): for tag in tags: tag = self.convertHTMLEntities(tag) if tag.startswith(opts.note_tag): - this_title['notes'] = tag[1:] + this_title['notes'] = tag[len(self.opts.note_tag):] elif tag == opts.read_tag: this_title['read'] = True elif re.search(opts.exclude_genre, tag): From 4018a811cb35397c0a157cb7036e32359979a0f8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 5 Feb 2010 21:17:45 -0700 Subject: [PATCH 13/20] El Comerico by Darko Miletic --- resources/images/news/elcomercio.png | Bin 0 -> 764 bytes resources/recipes/elcomercio.recipe | 38 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 resources/images/news/elcomercio.png create mode 100644 resources/recipes/elcomercio.recipe diff --git a/resources/images/news/elcomercio.png b/resources/images/news/elcomercio.png new file mode 100644 index 0000000000000000000000000000000000000000..df484860dde90a1a0e4be214f11df2c8920fc23c GIT binary patch literal 764 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?@N&T^vI!PA{Ew-#a)_;<){(dyD7#o}cyn z2XiuW{e_Sj9NS`DOM(RgCF8mr#qYgY^!nW&?mZp9m|q=|mMl|rG->i$xp~*j<};4= z+ZJk@pH;qHm@A$y?%wYaG2#B~>H6nC*Zlaj$A9vapdzkHhHqJ=V#chKZYo_|aBePl z?UwCY3@V#lr|r)RTy=#n>+f-fvkaF%YwM(aa%r|&Su^wHPf-TB{r`Pz5}5?vU5PmJ zb zU+Uj4SnDdlD`)D~Y_)O1xx-I|mwBl^H|4Q$j+OGaGua`_d1>`Z8NIE0LlakcA3ifR z_D9^>bLY?cIz4BXJ#dK0H^@uUf5wtesfXrDF#sL(_4jso)=E-hh?2Sfb7^;NrJ!>(jDf`BT?$`TwE(N}fL}e>{4B6qtHcOI#yLQW8s2 zt&)pUffR$0fuVt}fsw9(X^4@bm7$rHv8Aqoxs`#zmDKgZC>nC}Q!>*kku?~aTbY=rAP2>Oo literal 0 HcmV?d00001 diff --git a/resources/recipes/elcomercio.recipe b/resources/recipes/elcomercio.recipe new file mode 100644 index 0000000000..37733bda8b --- /dev/null +++ b/resources/recipes/elcomercio.recipe @@ -0,0 +1,38 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +elcomercio.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ElComercio(BasicNewsRecipe): + title = 'El Comercio ' + __author__ = 'Darko Miletic' + description = "Gizmodo, the gadget guide. So much in love with shiny new toys, it's unnatural." + publisher = 'GRUPO EL COMERCIO C.A.' + category = 'news, Ecuador, politics' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'utf-8' + use_embedded_content = True + language = 'es' + masthead_url = 'http://ww1.elcomercio.com/nv_images/headers/EC/logo_new_08.gif' + extra_css = ' body{font-family: Arial,Verdana,sans-serif} img{margin-bottom: 1em} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_attributes = ['width','height'] + + feeds = [(u'Articles', u'http://ww1.elcomercio.com/rss/titulares1.xml')] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + From bf8324b6227c5339fd44290b3f84d81c524cacae Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 5 Feb 2010 21:29:18 -0700 Subject: [PATCH 14/20] Searching on the device: Ignore unicode errors --- src/calibre/gui2/library.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index fd4f8999b4..bf45584df8 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -903,9 +903,14 @@ class OnDeviceSearch(SearchQueryParser): locations[i] = q[v] for i, r in enumerate(self.model.db): for loc in locations: - if query in loc(r): - matches.add(i) - break + try: + if query in loc(r): + matches.add(i) + break + except ValueError: # Unicode errors + import traceback + traceback.print_exc() + pass return matches From 97ba8f07a44f6ee35ac4dc909745ec113d4cc795 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Feb 2010 12:18:28 -0700 Subject: [PATCH 15/20] News download: Automatically remove tags in the postptocess phase as they cause links in generated EPUB files to not work --- src/calibre/gui2/library.py | 1 - src/calibre/web/feeds/news.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/library.py b/src/calibre/gui2/library.py index bf45584df8..9b8210c75e 100644 --- a/src/calibre/gui2/library.py +++ b/src/calibre/gui2/library.py @@ -910,7 +910,6 @@ class OnDeviceSearch(SearchQueryParser): except ValueError: # Unicode errors import traceback traceback.print_exc() - pass return matches diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index de8eaf6ac5..540f7cd93a 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -615,10 +615,12 @@ class BasicNewsRecipe(Recipe): del o['onload'] for script in list(soup.findAll('noscript')): - script.extract() + script.extract() for attr in self.remove_attributes: for x in soup.findAll(attrs={attr:True}): del x[attr] + for base in list(soup.findAll('base')): + base.extract() return self.postprocess_html(soup, first_fetch) From 4e3a316c70b263a811ba8e2ff96db7adf0017e74 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Feb 2010 13:02:08 -0700 Subject: [PATCH 16/20] Fix #4816 (build+install issues) --- resources/recipes/metro_montreal.recipe | 4 ++-- setup/resources.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/resources/recipes/metro_montreal.recipe b/resources/recipes/metro_montreal.recipe index 8272c760cc..b7f60349df 100644 --- a/resources/recipes/metro_montreal.recipe +++ b/resources/recipes/metro_montreal.recipe @@ -16,7 +16,7 @@ class Metro_Montreal(BasicNewsRecipe): extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}' remove_tags = [dict(attrs={'id':'buttons'})] - + feeds = [ (u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), @@ -26,4 +26,4 @@ class Metro_Montreal(BasicNewsRecipe): ] def print_version(self, url): - return url.replace('article', 'ArticlePrint') + '?language=fr' \ No newline at end of file + return url.replace('article', 'ArticlePrint') + '?language=fr' diff --git a/setup/resources.py b/setup/resources.py index d40d31bbf5..977d753828 100644 --- a/setup/resources.py +++ b/setup/resources.py @@ -48,7 +48,9 @@ class Resources(Command): dest = self.j(self.RESOURCES, 'builtin_recipes.xml') if self.newer(dest, files): self.info('\tCreating builtin_recipes.xml') - open(dest, 'wb').write(serialize_builtin_recipes()) + xml = serialize_builtin_recipes() + with open(dest, 'wb') as f: + f.write(xml) dest = self.j(self.RESOURCES, 'ebook-convert-complete.pickle') files = [] From 4115fd1168adfa1d2caeffee3b9cdf752a6d35b8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Feb 2010 13:05:41 -0700 Subject: [PATCH 17/20] Fix #4814 (Modified PeopleUsMashup) --- resources/recipes/people_us_mashup.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/recipes/people_us_mashup.recipe b/resources/recipes/people_us_mashup.recipe index 38d750cd4c..ed43e24e56 100644 --- a/resources/recipes/people_us_mashup.recipe +++ b/resources/recipes/people_us_mashup.recipe @@ -31,7 +31,7 @@ class PeopleMag(BasicNewsRecipe): keep_only_tags = [ - dict(name='div', attrs={'class': 'panel_news_article_main'}), + dict(name='div', attrs={'class': 'panel_news_article_main'}), dict(name='div', attrs={'class':'article_content'}), dict(name='div', attrs={'class': 'headline'}), dict(name='div', attrs={'class': 'post'}), @@ -51,6 +51,7 @@ class PeopleMag(BasicNewsRecipe): dict(name='div', attrs={'class':'sharelinkcont'}), dict(name='div', attrs={'class':'categories'}), dict(name='ul', attrs={'class':'categories'}), + dict(name='div', attrs={'class':'related_content'}), dict(name='div', attrs={'id':'promo'}), dict(name='div', attrs={'class':'linksWrapper'}), dict(name='p', attrs={'class':'tag tvnews'}), From fe3152e8c34e585c5b3bc5687dfd77adcf8d7319 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Feb 2010 13:07:14 -0700 Subject: [PATCH 18/20] Fix #4815 (additional REMOVE_TAGS for Harvard Business Review) --- resources/recipes/hbr.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/recipes/hbr.recipe b/resources/recipes/hbr.recipe index b84062af8c..3d1e8ccfac 100644 --- a/resources/recipes/hbr.recipe +++ b/resources/recipes/hbr.recipe @@ -18,7 +18,8 @@ class HBR(BasicNewsRecipe): remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline', 'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn', 'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR', - 'mailingListTout', 'partnerCenter', 'pageFooter']), + 'mailingListTout', 'partnerCenter', 'pageFooter', + 'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']), dict(name='iframe')] extra_css = ''' a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; } From 411b796ba1c9bdaf9e5adae1183bac5fb34ecd07 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Feb 2010 21:14:37 -0700 Subject: [PATCH 19/20] Fix #4816 (build+install issues) --- resources/recipes/metro_montreal.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/recipes/metro_montreal.recipe b/resources/recipes/metro_montreal.recipe index b7f60349df..c2054bdeec 100644 --- a/resources/recipes/metro_montreal.recipe +++ b/resources/recipes/metro_montreal.recipe @@ -4,7 +4,7 @@ class Metro_Montreal(BasicNewsRecipe): title = u'M\xe9tro Montr\xe9al' __author__ = 'Jerry Clapperton' - description = 'Le quotidien le plus branché sur le monde' + description = 'Le quotidien le plus branch\xe9 sur le monde' language = 'fr' oldest_article = 7 From 3ae86efb6fd01140063db1feb7c0d7fcd5df2531 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Feb 2010 21:20:11 -0700 Subject: [PATCH 20/20] Zive.sk and iliterature.cz by Abelturd --- resources/recipes/ZIVE.sk.recipe | 45 +++++++++++++++++++++++ resources/recipes/iliteratura_cz.recipe | 47 +++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 resources/recipes/ZIVE.sk.recipe create mode 100644 resources/recipes/iliteratura_cz.recipe diff --git a/resources/recipes/ZIVE.sk.recipe b/resources/recipes/ZIVE.sk.recipe new file mode 100644 index 0000000000..e5bfd56cef --- /dev/null +++ b/resources/recipes/ZIVE.sk.recipe @@ -0,0 +1,45 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + + + +class ZiveRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'Abelturd' + language = 'sk' + version = 1 + + title = u'ZIVE.sk' + publisher = u'' + category = u'News, Newspaper' + description = u'Naj\u010d\xedtanej\u0161\xed denn\xedk opo\u010d\xedta\u010doch, IT a internete. ' + encoding = 'UTF-8' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + remove_empty_feeds = True + + no_stylesheets = True + remove_javascript = True + cover_url = 'http://www.zive.sk/Client.Images/Logos/logo-zive-sk.gif' + + feeds = [] + feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.zive.sk/rss/sc-47/default.aspx')) + + preprocess_regexps = [ + (re.compile(r'

Pokra.*ie

', re.DOTALL|re.IGNORECASE), + lambda match: ''), + + ] + + + remove_tags = [] + + keep_only_tags = [dict(name='h1'), dict(name='span', attrs={'class':'arlist-data-info-author'}), dict(name='div', attrs={'class':'bbtext font-resizer-area'}),] + extra_css = ''' + h1 {font-size:140%;font-family:georgia,serif; font-weight:bold} + h3 {font-size:115%;font-family:georgia,serif; font-weight:bold} + ''' + + diff --git a/resources/recipes/iliteratura_cz.recipe b/resources/recipes/iliteratura_cz.recipe new file mode 100644 index 0000000000..7d603f0cec --- /dev/null +++ b/resources/recipes/iliteratura_cz.recipe @@ -0,0 +1,47 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class SmeRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'Abelturd' + language = 'cz' + version = 1 + + title = u'iLiteratura.cz' + publisher = u'' + category = u'News, Newspaper' + description = u'O LITERATU\u0158E V CEL\xc9M SV\u011aT\u011a A DOMA' + cover_url = 'http://www.iliteratura.cz/1_vzhled/1/iliteratura.gif' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + remove_empty_feeds = True + + no_stylesheets = True + remove_javascript = True + + + feeds = [] + feeds.append((u'\u010cl\xe1nky', u'http://www.iliteratura.cz/rss.asp')) + + + keep_only_tags = [] + + remove_tags = [dict(name='table'),dict(name='h3')] + + + preprocess_regexps = [ + (re.compile(r'

Souvisej.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + ] + + def print_version(self, url): + m = re.search('(?<=ID=)[0-9]*', url) + + return u'http://www.iliteratura.cz/clanek.asp?polozkaID=' + str(m.group(0)) + '&c=tisk' + + extra_css = ''' + h1 {font-size:140%;font-family:georgia,serif; font-weight:bold} + h3 {font-size:115%;font-family:georgia,serif; font-weight:bold} + '''