From d13e49b401bfc55e2ca6b1f9ad4011d8f2377849 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 27 Jul 2012 01:11:45 +0530 Subject: [PATCH] Linux Journal and Conowego.pl by fenuks --- recipes/conowego_pl.recipe | 38 ++++++++++++++++++++++++++++++++ recipes/icons/conowego_pl.png | Bin 0 -> 694 bytes recipes/icons/linux_journal.png | Bin 0 -> 443 bytes recipes/linux_journal.recipe | 36 ++++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+) create mode 100755 recipes/conowego_pl.recipe create mode 100644 recipes/icons/conowego_pl.png create mode 100644 recipes/icons/linux_journal.png create mode 100755 recipes/linux_journal.recipe diff --git a/recipes/conowego_pl.recipe b/recipes/conowego_pl.recipe new file mode 100755 index 0000000000..8b4288ddcd --- /dev/null +++ b/recipes/conowego_pl.recipe @@ -0,0 +1,38 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup +class CoNowegoPl(BasicNewsRecipe): + title = u'conowego.pl' + __author__ = 'fenuks' + description = u'Nowy wortal technologiczny oraz gazeta internetowa. Testy najnowszych produktów, fachowe porady i recenzje. U nas znajdziesz wszystko o elektronice użytkowej !' + cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png' + category = 'IT, news' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + use_embedded_content = False + keep_only_tags = [dict(name='div', attrs={'class':'news_list single_view'})] + remove_tags = [dict(name='div', attrs={'class':['ni_bottom', 'ni_rank', 'ni_date']})] + feeds = [(u'Aktualno\u015bci', u'http://www.conowego.pl/rss/aktualnosci-5/?type=100'), (u'Gaming', u'http://www.conowego.pl/rss/gaming-6/?type=100'), (u'Porady', u'http://www.conowego.pl/rss/porady-3/?type=100'), (u'Testy', u'http://www.conowego.pl/rss/testy-2/?type=100')] + + def preprocess_html(self, soup): + for i in soup.findAll('img'): + i.parent.insert(0, BeautifulSoup('
')) + i.insert(len(i), BeautifulSoup('
')) + self.append_page(soup, soup.body) + return soup + + + def append_page(self, soup, appendtag): + tag = appendtag.find('div', attrs={'class':'pages'}) + if tag: + nexturls=tag.findAll('a') + for nexturl in nexturls[:-1]: + soup2 = self.index_to_soup('http://www.conowego.pl/' + nexturl['href']) + pagetext = soup2.find(attrs={'class':'ni_content'}) + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + for r in appendtag.findAll(attrs={'class':['pages', 'paginationWrap']}): + r.extract() diff --git a/recipes/icons/conowego_pl.png b/recipes/icons/conowego_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..3bc8f2c672860344ca4beeb08690f233c4daacf4 GIT binary patch literal 694 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5lsFUM z6XL2=ctD}(h;q?E#iGOVMF$j%_sbU@QUD4TA5fg z7VniW+9zMIU#a-0Y!OgouX53TMW6|VhqD)4I&<&KzyJUL{rh+7&L^efBcYSe{rn5$ z|3ClWgHqu>xx)R+jz7Ej_@iCRNyVara>YkBoP9lg|0B7Qg9=6aHk^C)@BiN!yC1GU z`Rds1cLsGQlILIk|Nrklpo#zgUwZUOxoB_U((7mLe!Thehg{)7#p0ui`G@36_RAIl zowr}1@UVgk>tCS17?Zr+UCa%3>~IHiI14-?i-EKU7`vU!wgWP5c)B=-NL=PU#~Ie_ zz{7CiK*}VK9+9<8P2AU1L*4GYj@thFzNg-Y`|Kwa_$4}Lq%2Nobu#PSZsKyrLPPk8 zNYAOrt$U5PdxbOvu3o{iZMQfVH(NyU;@|AK*X35NbeYjCBRl^~?d|!u9-R@$NvpUQ z8#C*<)3LKDI^yAPw{H5x(8;n#=11C|R36U@GY>Mi?|%2MakBs9sK%ND$G>IP#O-~c z?a8`vo>l()$60?aoZo7l@yzM$EZIb^x>{y0zMu&fLTaZuo&E~HEf3g!WyU7v@END} z%QWj>^n4+ICRn(c_0QVrKtHOMxJHzuB$lLFB^RXvDF!10LnB=Sb6q2|5F-OC6GJNl wQ*8qyD+7c3|L#knXvob^$xN$6(_n01Wo!h|pj@O-2Gqdd>FVdQ&MBb@0Is7WX8-^I literal 0 HcmV?d00001 diff --git a/recipes/icons/linux_journal.png b/recipes/icons/linux_journal.png new file mode 100644 index 0000000000000000000000000000000000000000..ed0092bd1d1f4e057a96281fa34793a7b403b6c4 GIT binary patch literal 443 zcmV;s0Yv_ZP)u=ga~F-(Ftkf`a?)?fmWS`|Iod^YhFK3iHFn&kqpa zU0&d4XY<0t)FmeM&(F&R1<(`~{`mOiet+LzU+kWr%?AkIVPVY$2K(yj%moGIfPv5w z6VD6{^1r~?JUsvX{o-wI^1Z$K>FLxcD9#56`sC#Bva;rdhT29)<8g7y008BGfcog@ z&I=65007GX0RR90%m4t)00A#X_k;id0FX&UK~#9!l+VW!!Y~X3P&=I-dT*h35Q$~` z|33^PdzgV6d(rIEX_@{B(;5L?K_nQQnzkpLCkDi5Hx~3Sf}lVHnVdNYLxQJd4)Xjj zP(t`0C`E{?$95hceGBczdfAl|sKnDZ+~lxUE%z^VPkNx3hSp)zN%zscx43eoEffvv l40em6n2XA;{mx5(0RVufFKfllNM!&3002ovPDHLkV1jnE%Z&g4 literal 0 HcmV?d00001 diff --git a/recipes/linux_journal.recipe b/recipes/linux_journal.recipe new file mode 100755 index 0000000000..99b1a570dc --- /dev/null +++ b/recipes/linux_journal.recipe @@ -0,0 +1,36 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class LinuxJournal(BasicNewsRecipe): + title = u'Linux Journal' + __author__ = 'fenuks' + description = u'The monthly magazine of the Linux community, promoting the use of Linux worldwide.' + cover_url = 'http://www.linuxjournal.com/files/linuxjournal.com/ufiles/logo-lj.jpg' + category = 'IT, Linux' + language = 'en' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + keep_only_tags=[dict(id='content-inner')] + remove_tags_after= dict(attrs={'class':'user-signature clear-block'}) + remove_tags=[dict(attrs={'class':['user-signature clear-block', 'breadcrumb', 'terms terms-inline']})] + feeds = [(u'Front Page', u'http://feeds.feedburner.com/linuxjournalcom'), (u'News', u'http://feeds.feedburner.com/LinuxJournal-BreakingNews'), (u'Blogs', u'http://www.linuxjournal.com/blog/feed'), (u'Audio/Video', u'http://www.linuxjournal.com/taxonomy/term/28/0/feed'), (u'Community', u'http://www.linuxjournal.com/taxonomy/term/18/0/feed'), (u'Education', u'http://www.linuxjournal.com/taxonomy/term/25/0/feed'), (u'Embedded', u'http://www.linuxjournal.com/taxonomy/term/27/0/feed'), (u'Hardware', u'http://www.linuxjournal.com/taxonomy/term/23/0/feed'), (u'HOWTOs', u'http://www.linuxjournal.com/taxonomy/term/19/0/feed'), (u'International', u'http://www.linuxjournal.com/taxonomy/term/30/0/feed'), (u'Security', u'http://www.linuxjournal.com/taxonomy/term/31/0/feed'), (u'Software', u'http://www.linuxjournal.com/taxonomy/term/17/0/feed'), (u'Sysadmin', u'http://www.linuxjournal.com/taxonomy/term/21/0/feed'), (u'Webmaster', u'http://www.linuxjournal.com/taxonomy/term/24/0/feed')] + + def append_page(self, soup, appendtag): + next = appendtag.find('li', attrs={'class':'pager-next'}) + while next: + nexturl = next.a['href'] + appendtag.find('div', attrs={'class':'links'}).extract() + soup2 = self.index_to_soup('http://www.linuxjournal.com'+ nexturl) + pagetext = soup2.find(attrs={'class':'node-inner'}).find(attrs={'class':'content'}) + next = appendtag.find('li', attrs={'class':'pager-next'}) + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag = appendtag.find('div', attrs={'class':'links'}) + if tag: + tag.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup \ No newline at end of file