Merge from trunk

2025-07-09 03:04:10 -04:00 · 2012-02-22 08:11:07 +01:00 · 2012-02-22 08:11:07 +01:00 · 86a0bae6cb
commit 86a0bae6cb
parent e1d6c16f6a 959afbd350
62 changed files with 891 additions and 221 deletions
--- a/recipes/archeowiesci.recipe
+++ b/recipes/archeowiesci.recipe
@ -7,6 +7,7 @@ class Archeowiesci(BasicNewsRecipe):
    language       = 'pl'
    cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
    oldest_article = 7
+    needs_subscription='optional'
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_tags=[dict(name='span', attrs={'class':['post-ratings', 'post-ratings-loading']})]
@ -16,6 +17,16 @@ class Archeowiesci(BasicNewsRecipe):
      feeds = BasicNewsRecipe.parse_feeds(self)
      for feed in feeds:
        for article in feed.articles[:]:
-          if 'subskrypcja' in article.title:
+          if self.username is None and 'subskrypcja' in article.title:
            feed.articles.remove(article)
      return feeds
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('http://archeowiesci.pl/wp-login.php')
+            br.select_form(name='loginform')
+            br['log']   = self.username
+            br['pwd'] = self.password
+            br.submit()
+        return br
--- a/recipes/astronomia_pl.recipe
+++ b/recipes/astronomia_pl.recipe
@ -1,15 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Astronomia_pl(BasicNewsRecipe):
    title          = u'Astronomia.pl'
    __author__        = 'fenuks'
    description   = 'Astronomia - polish astronomy site'
+    masthead_url      = 'http://www.astronomia.pl/grafika/logo.gif'
    cover_url      = 'http://www.astronomia.pl/grafika/logo.gif'
    category       = 'astronomy, science'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
-    #no_stylesheets=True
+    extra_css='#h2 {font-size: 18px;}'
+    no_stylesheets=True
+    preprocess_regexps = [(re.compile(ur'<b>Przeczytaj także:.*?</BODY>', re.DOTALL), lambda match: '</BODY>') ]
    remove_tags_before=dict(name='div', attrs={'id':'a1'})
    keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
    feeds          = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
--- a/recipes/benchmark_pl.recipe
+++ b/recipes/benchmark_pl.recipe
@ -4,16 +4,17 @@ class Benchmark_pl(BasicNewsRecipe):
    title          = u'Benchmark.pl'
    __author__        = 'fenuks'
    description   = u'benchmark.pl -IT site'
+    masthead_url = 'http://www.benchmark.pl/i/logo-footer.png'
    cover_url      = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
    category       = 'IT'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets=True
-    preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
+    preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
    keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
    remove_tags_after=dict(name='div', attrs={'class':'body'})
-    remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
+    remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
    INDEX= 'http://www.benchmark.pl'
    feeds          = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), 
                          (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
--- a/recipes/biolog_pl.recipe
+++ b/recipes/biolog_pl.recipe
@ -10,10 +10,11 @@ class Biolog_pl(BasicNewsRecipe):
    description   = u'Przyrodnicze aktualności ze świata nauki (codziennie aktualizowane), kurs biologii, testy i sprawdziany, forum dyskusyjne.'
    category       = 'biology'
    language       = 'pl'
+    masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
    cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
    no_stylesheets = True
    #keeps_only_tags=[dict(id='main')]
    remove_tags_before=dict(id='main')
    remove_tags_after=dict(name='a', attrs={'name':'komentarze'})
-    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'})]
+    remove_tags=[dict(name='img', attrs={'alt':'Komentarze'}), dict(name='span', attrs={'class':'menu_odsylacze'})]
    feeds          = [(u'Wszystkie', u'http://www.biolog.pl/backend.php'), (u'Medycyna', u'http://www.biolog.pl/medycyna-rss.php'), (u'Ekologia', u'http://www.biolog.pl/rss-ekologia.php'), (u'Genetyka i biotechnologia', u'http://www.biolog.pl/rss-biotechnologia.php'), (u'Botanika', u'http://www.biolog.pl/rss-botanika.php'), (u'Le\u015bnictwo', u'http://www.biolog.pl/rss-lesnictwo.php'), (u'Zoologia', u'http://www.biolog.pl/rss-zoologia.php')]
--- a/recipes/cd_action.recipe
+++ b/recipes/cd_action.recipe
@ -1,16 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe

-
 class CD_Action(BasicNewsRecipe):
    title          = u'CD-Action'
    __author__        = 'fenuks'
-    description   = 'cdaction.pl - polish magazine about games site'
+    description   = 'cdaction.pl - polish games magazine site'
    category       = 'games'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
-    cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
    keep_only_tags= dict(id='news_content')
    remove_tags_after= dict(name='div', attrs={'class':'tresc'})
    feeds          = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]
+
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
+        self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@ -5,6 +5,7 @@ class CGM(BasicNewsRecipe):
    oldest_article = 7
    __author__        = 'fenuks'
    description   = u'Codzienna Gazeta Muzyczna'
+    masthead_url='http://www.cgm.pl/img/header/logo.gif'
    cover_url      = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
    category       = 'music'
    language       = 'pl'
@ -23,21 +24,19 @@ class CGM(BasicNewsRecipe):


    def preprocess_html(self, soup):
+        gallery=soup.find('div', attrs={'class':'galleryFlash'})
+        if gallery:
+            img=gallery.div
+            gallery.img.extract()
+            if img:
+                img=img['style']
+                img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
+                gallery.contents[1].name='img'
+                gallery.contents[1]['src']=img
        for item in soup.findAll(style=True):
            del item['style']
        ad=soup.findAll('a')
        for r in ad:
-            if 'http://www.hustla.pl' in r['href'] or 'http://www.ebilet.pl' in r['href']:                
+            if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:                
                 r.extract()
-        gallery=soup.find('div', attrs={'class':'galleryFlash'})
-        if gallery:
-            img=gallery.find('embed')
-            if img:
-                img=img['src'][35:]
-                img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
-                param=gallery.findAll(name='param')
-                for i in param:
-                    i.extract()
-                gallery.contents[1].name='img'
-                gallery.contents[1]['src']=img
        return soup
--- a/recipes/chr_mon.recipe
+++ b/recipes/chr_mon.recipe
@ -33,6 +33,32 @@ class ChristianScienceMonitor(BasicNewsRecipe):

    remove_javascript     = True
    no_stylesheets = True
+    requires_version = (0, 8, 39)
+
+    def preprocess_raw_html(self, raw, url):
+        try:
+            from html5lib import parse
+            root = parse(raw, namespaceHTMLElements=False,
+                    treebuilder='lxml').getroot()
+            from lxml import etree
+            for tag in root.xpath(
+                    '//script|//style|//noscript|//meta|//link|//object'):
+                tag.getparent().remove(tag)
+            for elem in list(root.iterdescendants(tag=etree.Comment)):
+                elem.getparent().remove(elem)
+            ans = etree.tostring(root, encoding=unicode)
+            ans = re.sub('.*<html', '<html', ans, flags=re.DOTALL)
+            return ans
+        except:
+            import traceback
+            traceback.print_exc()
+            raise
+
+    def index_to_soup(self, url):
+        raw = BasicNewsRecipe.index_to_soup(self, url,
+                raw=True).decode('utf-8')
+        raw = self.preprocess_raw_html(raw, url)
+        return BasicNewsRecipe.index_to_soup(self, raw)

    def append_page(self, soup, appendtag, position):
        nav = soup.find('div',attrs={'class':'navigation'})
@ -78,14 +104,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
            print_soup = soup
        return print_soup

-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
-        [
-            (r'<!--.*?-->', lambda match : ''),
-        (r'<body.*?<div id="story"', lambda match : '<body><div id="story"'),
-        (r'<div class="pubdate">.*?</div>', lambda m: ''),
-        (r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
-              lambda match : '</body>'),
-        ]]
    extra_css      = '''
                        h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
                        .sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@ -0,0 +1,48 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Ciekawostki_Historyczne(BasicNewsRecipe):
+    title          = u'Ciekawostki Historyczne'
+    oldest_article = 7
+    __author__        = 'fenuks'
+    description   = u'Serwis popularnonaukowy - odkrycia, kontrowersje, historia, ciekawostki, badania, ciekawostki z przeszłości.'
+    category       = 'history'
+    language       = 'pl'
+    masthead_url= 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
+    cover_url='http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
+    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
+    no_stylesheets=True
+    remove_empty_feeds=True
+    keep_only_tags=[dict(name='div', attrs={'class':'post'})]
+    remove_tags=[dict(id='singlepostinfo')]
+    feeds          = [(u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'), (u'\u015aredniowiecze', u'http://ciekawostkihistoryczne.pl/tag/sredniowiecze/feed/'), (u'Nowo\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/nowozytnosc/feed/'), (u'XIX wiek', u'http://ciekawostkihistoryczne.pl/tag/xix-wiek/feed/'), (u'1914-1939', u'http://ciekawostkihistoryczne.pl/tag/1914-1939/feed/'), (u'1939-1945', u'http://ciekawostkihistoryczne.pl/tag/1939-1945/feed/'), (u'Powojnie (od 1945)', u'http://ciekawostkihistoryczne.pl/tag/powojnie/feed/'), (u'Recenzje', u'http://ciekawostkihistoryczne.pl/category/recenzje/feed/')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(name='h7')
+        if tag:
+            if tag.br:
+                pass
+            elif tag.nextSibling.name=='p':
+                tag=tag.nextSibling
+                nexturl = tag.findAll('a')
+                for nextpage in nexturl:
+                    tag.extract()
+                    nextpage= nextpage['href']
+                    soup2 = self.index_to_soup(nextpage)
+                    pagetext = soup2.find(name='div', attrs={'class':'post'})
+                    for r in pagetext.findAll('div', attrs={'id':'singlepostinfo'}):
+                        r.extract()
+                    for r in pagetext.findAll('div', attrs={'class':'wp-caption alignright'}):
+                        r.extract()
+                    for r in pagetext.findAll('h1'):
+                        r.extract()
+                    pagetext.find('h6').nextSibling.extract()
+                    pagetext.find('h7').nextSibling.extract()
+                    pos = len(appendtag.contents)
+                    appendtag.insert(pos, pagetext)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
+   
+        
--- a/recipes/computerworld_pl.recipe
+++ b/recipes/computerworld_pl.recipe
@ -7,10 +7,11 @@ class Computerworld_pl(BasicNewsRecipe):
    description   = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
    category       = 'IT'
    language       = 'pl'
+    masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
-    keep_only_tags=[dict(name='div', attrs={'id':'s'})]
+    keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
    remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
    remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
    feeds          = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -7,6 +7,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    __licence__ ='GPL v3'
    category       = 'IT'
    language       = 'pl'
+    masthead_url='http://static.dpcdn.pl/css/Black/Images/header_logo_napis_fullVersion.png'
    cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
    description = u'Aktualności i blogi z dobreprogramy.pl'
    encoding = 'utf-8'
@ -16,7 +17,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    oldest_article = 8
    max_articles_per_feed = 100
    preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
-    remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
-    keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
+    keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
+    remove_tags = [dict(name='div', attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags  font-heading-master']})]
+    #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
    feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
                 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@ -8,15 +8,17 @@ class Dziennik_pl(BasicNewsRecipe):
    description   = u'Wiadomości z kraju i ze świata. Wiadomości gospodarcze. Znajdziesz u nas informacje, wydarzenia, komentarze, opinie.'
    category       = 'newspaper'
    language       = 'pl'
-    cover_url='http://6.s.dziennik.pl/images/og_dziennik.jpg'
+    masthead_url= 'http://5.s.dziennik.pl/images/logos.png'
+    cover_url= 'http://5.s.dziennik.pl/images/logos.png'
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 100
    remove_javascript=True
    remove_empty_feeds=True
-    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: '')]
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
+    preprocess_regexps     = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
    keep_only_tags=[dict(id='article')]
-    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget']}), dict(name='a', attrs={'class':'komentarz'})]
+    remove_tags=[dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})]
    feeds          = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
 		(u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
 		(u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
@ -30,6 +32,12 @@ class Dziennik_pl(BasicNewsRecipe):
 		(u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
 		(u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]

+    def skip_ad_pages(self, soup):
+          tag=soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'})
+          if tag:
+            new_soup=self.index_to_soup(tag['href'], raw=True)
+            return new_soup
+
    def append_page(self, soup, appendtag):
        tag=soup.find('a', attrs={'class':'page_next'})
        if tag:
@ -56,3 +64,4 @@ class Dziennik_pl(BasicNewsRecipe):
    def preprocess_html(self, soup):
         self.append_page(soup, soup.body)
         return soup
+
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -10,7 +10,8 @@ class Filmweb_pl(BasicNewsRecipe):
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets= True
-    extra_css      = '.hdrBig {font-size:22px;}'
+    remove_empty_feeds=True
+    extra_css      = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
    remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
    keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
    feeds          = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
--- a/recipes/gameplay_pl.recipe
+++ b/recipes/gameplay_pl.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class Gameplay_pl(BasicNewsRecipe):
+    title          = u'Gameplay.pl'
+    oldest_article = 7
+    __author__        = 'fenuks'
+    description   = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
+    category       = 'games, movies, books, music'
+    language       = 'pl'
+    masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
+    cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
+    max_articles_per_feed = 100
+    no_stylesheets= True
+    keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
+    remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
+
+    def image_url_processor(self, baseurl, url):
+        if 'http' not in url:
+            return 'http://gameplay.pl'+ url[2:]
+        else:
+		    return url
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -4,10 +4,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Gazeta_Wyborcza(BasicNewsRecipe):
    title          = u'Gazeta Wyborcza'
    __author__        = 'fenuks'
-    cover_url      = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    language       = 'pl'
    description ='news from gazeta.pl'
    category='newspaper'
+    publication_type = 'newspaper'
+    masthead_url='http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
    INDEX='http://wyborcza.pl'
    remove_empty_feeds= True
    oldest_article = 3
@ -81,3 +82,10 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
            return url
        else:
             return url.replace('http://wyborcza.biz/biznes/1', 'http://wyborcza.biz/biznes/2029020')
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
+        cover=soup.find(id='GWmini2')  
+        soup = self.index_to_soup('http://wyborcza.pl/'+ cover.contents[3].a['href'])
+        self.cover_url='http://wyborcza.pl' + soup.img['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/gry_online_pl.recipe
+++ b/recipes/gry_online_pl.recipe
@ -8,29 +8,31 @@ class Gry_online_pl(BasicNewsRecipe):
    language       = 'pl'
    oldest_article = 13
    INDEX= 'http://www.gry-online.pl/'
-    cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
+    masthead_url='http://www.gry-online.pl/im/gry-online-logo.png'
+    cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
    max_articles_per_feed = 100
    no_stylesheets= True
-    extra_css              = 'p.wn1{font-size:22px;}'
-    remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
-    keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
-    #remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
+    keep_only_tags=[dict(name='div', attrs={'class':'gc660'})]
+    remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
    feeds          = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]


    def append_page(self, soup, appendtag):
-        nexturl = soup.find('a', attrs={'class':'num_str_nex'})
-        if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
-            appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
-        if nexturl is not None:
-            if 'strona' in nexturl.div.string:
-                nexturl= self.INDEX + nexturl['href']
-                soup2 = self.index_to_soup(nexturl)
-                pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
-                for tag in pagetext:
-                    pos = len(appendtag.contents)
-                    appendtag.insert(pos, tag)
-                self.append_page(soup2, appendtag)
+        tag = appendtag.find('div', attrs={'class':'n5p'})
+        if tag:
+            nexturls=tag.findAll('a')
+            for nexturl in nexturls[1:]:
+                try:
+                    soup2 = self.index_to_soup('http://www.gry-online.pl/S020.asp'+ nexturl['href'])
+                except:
+                    soup2 = self.index_to_soup('http://www.gry-online.pl/S022.asp'+ nexturl['href'])
+                pagetext = soup2.find(attrs={'class':'gc660'})
+                for r in pagetext.findAll(name='header'):
+                    r.extract()
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button']}):
+                r.extract()


    def preprocess_html(self, soup):
--- a/recipes/icons/ciekawostki_historyczne.png
+++ b/recipes/icons/ciekawostki_historyczne.png
--- a/recipes/icons/gameplay_pl.png
+++ b/recipes/icons/gameplay_pl.png
--- a/recipes/icons/in4_pl.png
+++ b/recipes/icons/in4_pl.png
--- a/recipes/icons/informacje_usa.png
+++ b/recipes/icons/informacje_usa.png
--- a/recipes/icons/kresy_pl.png
+++ b/recipes/icons/kresy_pl.png
--- a/recipes/icons/oclab_pl.png
+++ b/recipes/icons/oclab_pl.png
--- a/recipes/icons/overclock_pl.png
+++ b/recipes/icons/overclock_pl.png
--- a/recipes/icons/palmtop_pl.png
+++ b/recipes/icons/palmtop_pl.png
--- a/recipes/icons/pc_arena.png
+++ b/recipes/icons/pc_arena.png
--- a/recipes/icons/pc_centre_pl.png
+++ b/recipes/icons/pc_centre_pl.png
--- a/recipes/icons/pc_foster.png
+++ b/recipes/icons/pc_foster.png
--- a/recipes/icons/polska_times.png
+++ b/recipes/icons/polska_times.png
--- a/recipes/icons/pure_pc.png
+++ b/recipes/icons/pure_pc.png
--- a/recipes/icons/tanuki.png
+++ b/recipes/icons/tanuki.png
--- a/recipes/icons/tvn24.png
+++ b/recipes/icons/tvn24.png
--- a/recipes/icons/webhosting_pl.png
+++ b/recipes/icons/webhosting_pl.png
--- a/recipes/in4_pl.recipe
+++ b/recipes/in4_pl.recipe
@ -0,0 +1,44 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class in4(BasicNewsRecipe):
+    title          = u'IN4.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Serwis Informacyjny - Aktualnosci, recenzje'
+    category       = 'IT'
+    language       = 'pl'
+    #cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
+    no_stylesheets = True
+    remove_empty_feeds = True
+    preprocess_regexps = [(re.compile(ur'<a title="translate into.*?</a>', re.DOTALL), lambda match: '') ]
+    keep_only_tags=[dict(name='div', attrs={'class':'left_alone'})]
+    remove_tags_after=dict(name='img', attrs={'title':'komentarze'})
+    remove_tags=[dict(name='img', attrs={'title':'komentarze'})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://www.in4.pl/rss.php'), (u'Recenzje', u'http://www.in4.pl/rss_recenzje.php'), (u'Mini recenzje', u'http://www.in4.pl/rss_mini.php')]
+
+    def append_page(self, soup, appendtag):
+        a=soup.findAll('a')
+        nexturl=None
+        for i in a:
+            if i.string and 'następna str' in i.string:
+                nexturl='http://www.in4.pl/' + i['href']
+                i.extract()
+        while nexturl:
+                    soup2 = self.index_to_soup(nexturl)
+                    pagetext = soup2.find(id='news')
+                    pos = len(appendtag.contents)
+                    appendtag.insert(pos, pagetext)
+                    nexturl=None
+                    tag=soup2.findAll('a')
+                    for z in tag:
+                        if z.string and u'następna str' in z.string:
+                            nexturl='http://www.in4.pl/' + z['href']
+                            break
+                    
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
+   
+        
--- a/recipes/informacje_usa.recipe
+++ b/recipes/informacje_usa.recipe
@ -0,0 +1,18 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Informacje_USA(BasicNewsRecipe):
+    title          = u'Informacje USA'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'portal wiadomości amerykańskich'
+    category       = 'news'
+    language       = 'pl'
+    masthead_url= 'http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
+    cover_url='http://www.informacjeusa.com/wp-content/add_images/top_logo_5_2010.jpg'
+    no_stylesheets = True
+    preprocess_regexps = [(re.compile(ur'<p>Zobacz:.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><a href=".*?Zobacz także:.*?</a></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><p>Zobacz też:.*?</a></p>', re.DOTALL), lambda match: '')]
+    keep_only_tags=[dict(name='div', attrs={'class':'box box-single'})]
+    remove_tags_after= dict(attrs={'class':'tags'})
+    remove_tags= [dict(attrs={'class':['postmetadata', 'tags', 'banner']}), dict(name='a', attrs={'title':['Drukuj', u'Wyślij']})]
+    feeds          = [(u'Informacje', u'http://www.informacjeusa.com/feed/')]
--- a/recipes/kresy_pl.recipe
+++ b/recipes/kresy_pl.recipe
@ -0,0 +1,14 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class Kresy(BasicNewsRecipe):
+    title          = u'Kresy'
+    __author__        = 'fenuks'
+    description   = u'portal społeczności kresowej'
+    language       = 'pl'
+    masthead_url= 'http://www.kresy.pl/public/img/logo.png'
+    cover_url= 'http://www.kresy.pl/public/img/logo.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    keep_only_tags= [dict(id='artykul')]
+    remove_tags= [dict(attrs={'class':['twitter-share-button', 'likefbborder', 'tagi']})]
+    feeds          = [(u'Wszystkie', u'http://www.kresy.pl/rss')]
--- a/recipes/la_pausa_caffe.recipe
+++ b/recipes/la_pausa_caffe.recipe
@ -0,0 +1,17 @@
+__version__     = 'v1.0'
+__date__        = '13, February 2011'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1329125921(BasicNewsRecipe):
+    title          = u'La pausa caff\xe8'
+    __author__      = 'faber1971'
+    description    = 'An Italian satirical blog'
+    language = 'it'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    no_stylesheets = True
+    feeds          = [(u'La pausa caff\xe8', u'http://feeds.feedburner.com/LapausaCaffe')]
+
--- a/recipes/marketing_magazine.recipe
+++ b/recipes/marketing_magazine.recipe
@ -1,4 +1,5 @@
 __license__   = 'GPL v3'
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class AdvancedUserRecipe1327062445(BasicNewsRecipe):
@ -7,10 +8,13 @@ class AdvancedUserRecipe1327062445(BasicNewsRecipe):
    max_articles_per_feed = 100
    auto_cleanup = True
    remove_javascript = True
+    no_stylesheets = True
+    remove_tags = [
+                     dict(name='ul', attrs={'id':'ads0'})
+                  ]
    masthead_url            = 'http://www.simrendeogun.com/wp-content/uploads/2011/06/New-Marketing-Magazine-Logo.jpg'
-    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
    __author__    = 'faber1971'
-    description   = 'Collection of Italian marketing websites - v1.00 (28, January 2012)'
+    description   = 'Collection of Italian marketing websites - v1.03 (20, February 2012)'
    language = 'it'

-
+    feeds          = [(u'My Marketing', u'http://feed43.com/0537744466058428.xml'), (u'My Marketing_', u'http://feed43.com/8126723074604845.xml'), (u'Venturini', u'http://robertoventurini.blogspot.com/feeds/posts/default?alt=rss'), (u'Ninja Marketing', u'http://feeds.feedburner.com/NinjaMarketing'), (u'Comunitàzione', u'http://www.comunitazione.it/feed/novita.asp'), (u'Brandforum news', u'http://www.brandforum.it/rss/news'), (u'Brandforum papers', u'http://www.brandforum.it/rss/papers'), (u'MarketingArena', u'http://feeds.feedburner.com/marketingarena'), (u'minimarketing', u'http://feeds.feedburner.com/minimarketingit'), (u'Disambiguando', u'http://giovannacosenza.wordpress.com/feed/')]
--- a/recipes/mediapart.recipe
+++ b/recipes/mediapart.recipe
@ -1,16 +1,17 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, Louis Gesbert <meta at antislash dot info>'
+__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, 2011, Louis Gesbert <meta at antislash dot info>'
 '''
 Mediapart
 '''

-from calibre.ebooks.BeautifulSoup import Tag
+import re
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe

 class Mediapart(BasicNewsRecipe):
    title          = 'Mediapart'
-    __author__ = 'Mathieu Godlewski'
-    description = 'Global news in french from online newspapers'
+    __author__ = 'Mathieu Godlewski, Louis Gesbert'
+    description = 'Global news in french from news site Mediapart'
    oldest_article = 7
    language = 'fr'
    needs_subscription = True
@ -18,52 +19,30 @@ class Mediapart(BasicNewsRecipe):
    max_articles_per_feed = 50
    no_stylesheets = True

-    cover_url = 'http://www.mediapart.fr/sites/all/themes/mediapart/mediapart/images/annonce.jpg'
+    cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'

    feeds =  [
        ('Les articles', 'http://www.mediapart.fr/articles/feed'),
    ]

-# -- print-version has poor quality on this website, better do the conversion ourselves
-#
-#     preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
-#         [
-#             (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
-#             (r'<span class=\'auteur_staff\'>[^>]+<a title=\'[^\']*\'[^>]*>([^<]*)</a>[^<]*</span>',
-#              lambda match : '<i>'+match.group(1)+'</i>'),
-#             (r'\'', lambda match: '&rsquo;'),
-#         ]
-#      ]
-#
-#     remove_tags    = [ dict(name='div', attrs={'class':'print-source_url'}),
-#                        dict(name='div', attrs={'class':'print-links'}),
-#                        dict(name='img', attrs={'src':'entete_article.png'}),
-#                        dict(name='br') ]
-#
-#     def print_version(self, url):
-#         raw = self.browser.open(url).read()
-#         soup = BeautifulSoup(raw.decode('utf8', 'replace'))
-#         div = soup.find('div', {'id':re.compile('node-\d+')})
-#         if div is None:
-#             return None
-#         article_id = string.replace(div['id'], 'node-', '')
-#         if article_id is None:
-#             return None
-#         return 'http://www.mediapart.fr/print/'+article_id
+# -- print-version

-# -- Non-print version [dict(name='div', attrs={'class':'advert'})]
-
-    keep_only_tags = [
-        dict(name='h1', attrs={'class':'title'}),
-        dict(name='div', attrs={'class':'page_papier_detail'}),
+    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
+        [
+            (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
+            (r'\'', lambda match: '&rsquo;')
        ]
+    ]

-    def preprocess_html(self,soup):
-        for title in soup.findAll('div', {'class':'titre'}):
-            tag = Tag(soup, 'h3')
-            title.replaceWith(tag)
-            tag.insert(0,title)
-        return soup
+    remove_tags    = [ dict(name='div', attrs={'class':'print-source_url'}) ]
+
+    def print_version(self, url):
+        raw = self.browser.open(url).read()
+        soup = BeautifulSoup(raw.decode('utf8', 'replace'))
+        link = soup.find('a', {'title':'Imprimer'})
+        if link is None:
+            return None
+        return link['href']

 # -- Handle login

@ -76,4 +55,3 @@ class Mediapart(BasicNewsRecipe):
            br['pass'] = self.password
            br.submit()
        return br
-
--- a/recipes/naczytniki.recipe
+++ b/recipes/naczytniki.recipe
@ -1,8 +1,9 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class naczytniki(BasicNewsRecipe):
    title          = u'naczytniki.pl'
    __author__        = 'fenuks'
+    masthead_url= 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
    cover_url      = 'http://naczytniki.pl/wp-content/uploads/2010/08/logo_nc28.png'
    language       = 'pl'
    description ='everything about e-readers'
@ -10,6 +11,7 @@ class naczytniki(BasicNewsRecipe):
    no_stylesheets=True
    oldest_article = 7
    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(ur'<p><br><b>Zobacz także:</b></p>.*?</body>', re.DOTALL), lambda match: '</body>') ]
    remove_tags_after= dict(name='div', attrs={'class':'sociable'})
    keep_only_tags=[dict(name='div', attrs={'class':'post'})]
    remove_tags=[dict(name='span', attrs={'class':'comments'}), dict(name='div', attrs={'class':'sociable'})]
--- a/recipes/nowa_fantastyka.recipe
+++ b/recipes/nowa_fantastyka.recipe
@ -1,21 +1,33 @@
 # -*- coding: utf-8 -*-
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
 class Nowa_Fantastyka(BasicNewsRecipe):
    title          = u'Nowa Fantastyka'
    oldest_article = 7
    __author__        = 'fenuks'
+    __modified_by__   = 'zaslav'
    language       = 'pl'
    encoding='latin2'
    description ='site for fantasy readers'
    category='fantasy'
+    masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg'
+    #extra_css='.tytul {font-size: 20px;}' #not working
    max_articles_per_feed = 100
    INDEX='http://www.fantastyka.pl/'
    no_stylesheets=True
    needs_subscription = 'optional'
-    remove_tags_before=dict(attrs={'class':'belka1-tlo-md'})
+    remove_tags_before=dict(attrs={'class':'naglowek2'})
    #remove_tags_after=dict(name='span', attrs={'class':'naglowek-oceny'})
-    remove_tags_after=dict(name='td', attrs={'class':'belka1-bot'})
-    remove_tags=[dict(attrs={'class':'avatar2'}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'})]
+    remove_tags_after=dict(name='form', attrs={'name':'form1'})
+    remove_tags=[dict(attrs={'class':['avatar2', 'belka-margin', 'naglowek2']}), dict(name='span', attrs={'class':'alert-oceny'}), dict(name='img', attrs={'src':['obrazki/sledz1.png', 'obrazki/print.gif', 'obrazki/mlnf.gif']}), dict(name='b', text='Dodaj komentarz'),dict(name='a', attrs={'href':'http://www.fantastyka.pl/10,1727.html'}), dict(name='form')]
+    preprocess_regexps = [
+    (re.compile(r'\<table .*?\>'), lambda match: ''),
+    (re.compile(r'\<td.*?\>'), lambda match: ''),
+    (re.compile(r'\<center\>'), lambda match: '')]
+
+
+

    def find_articles(self, url):
        articles = []
@ -41,10 +53,10 @@ class Nowa_Fantastyka(BasicNewsRecipe):

         return feeds

+
    def get_cover_url(self):
-        soup = self.index_to_soup('http://www.fantastyka.pl/1.html')
-        cover=soup.find(name='img', attrs={'class':'okladka'})
-        self.cover_url=self.INDEX+ cover['src']
+        soup = self.index_to_soup('http://www.e-kiosk.pl/nowa_fantastyka')
+        self.cover_url='http://www.e-kiosk.pl' + soup.find(name='a', attrs={'class':'img'})['href']
        return getattr(self, 'cover_url', self.cover_url)

    def get_browser(self):
@ -56,3 +68,18 @@ class Nowa_Fantastyka(BasicNewsRecipe):
            br['pass'] = self.password
            br.submit()
        return br
+
+    def preprocess_html(self, soup):        
+        for item in soup.findAll(style=True):
+            del item['style']
+        for item in soup.findAll(font=True):
+            del item['font']
+        for item in soup.findAll(align=True):
+            del item['align']
+        for item in soup.findAll(name='tr'):
+            item.name='div'
+        title=soup.find(attrs={'class':'tytul'})
+        if title:
+            title['style']='font-size: 20px; font-weight: bold;'
+        self.log.warn(soup)
+        return soup
--- a/recipes/oclab_pl.recipe
+++ b/recipes/oclab_pl.recipe
@ -0,0 +1,31 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class OCLab(BasicNewsRecipe):
+    title          = u'OCLab.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Portal OCLab.pl jest miejscem przyjaznym pasjonatom sprzętu komputerowego, w szczególności overclockerom, które będzie służyć im za aktualną bazę wiedzy o podkręcaniu komputera, źródło aktualnych informacji z rynku oraz opinii na temat sprzętu komputerowego.'
+    category       = 'IT'
+    language       = 'pl'
+    cover_url= 'http://www.idealforum.ru/attachment.php?attachmentid=7963&d=1316008118'
+    no_stylesheets = True
+    keep_only_tags=[dict(id='main')]
+    remove_tags_after= dict(attrs={'class':'single-postmetadata'})
+    remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})]
+    feeds          = [(u'Wpisy', u'http://oclab.pl/feed/')]
+
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(attrs={'class':'contentjumpddl'})
+        if tag:
+            nexturl=tag.findAll('option')
+            for nextpage in nexturl[1:-1]:
+               soup2 = self.index_to_soup(nextpage['value'])
+               pagetext = soup2.find(attrs={'class':'single-entry'})
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':'post-nav-bottom-list'}):
+                r.extract()
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/overclock_pl.recipe
+++ b/recipes/overclock_pl.recipe
@ -0,0 +1,37 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+class Overclock_pl(BasicNewsRecipe):
+    title          = u'Overclock.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Vortal poświęcony tematyce hardware, kładący największy nacisk na podkręcanie / overclocking (włącznie z extreme) i chłodzenie / cooling (air cooling, water cooling, freon cooling, dry ice, liquid nitrogen).'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url='http://www.overclock.pl/gfx/logo_m.png'
+    cover_url='http://www.overclock.pl/gfx/logo_m.png'
+    no_stylesheets = True
+    remove_empty_feeds = True
+    preprocess_regexps = [(re.compile(ur'<b>Komentarze do aktualności:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'<h3>Nawigacja</h3>', re.DOTALL), lambda match: '') ]
+    keep_only_tags=[dict(name='div', attrs={'class':'news'}), dict(id='articleContent')]
+    remove_tags=[dict(name='span', attrs={'class':'info'}), dict(attrs={'class':'shareit'})]
+    feeds          = [(u'Aktualno\u015bci', u'http://www.overclock.pl/rss.news.xml'), (u'Testy i recenzje', u'http://www.overclock.pl/rss.articles.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(id='navigation')
+        if tag:
+            nexturl=tag.findAll('option')
+            tag.extract()
+            for nextpage in nexturl[2:]:
+               soup2 = self.index_to_soup(nextpage['value'])
+               pagetext = soup2.find(id='content')
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+            rem=appendtag.find(attrs={'alt':'Pierwsza'})
+            if rem:
+                rem.parent.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/palmtop_pl.recipe
+++ b/recipes/palmtop_pl.recipe
@ -0,0 +1,14 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class palmtop_pl(BasicNewsRecipe):
+    title          = u'Palmtop.pl'
+    __author__        = 'fenuks'
+    description   = 'wortal technologii mobilnych'
+    category       = 'mobile'
+    language       = 'pl'
+    cover_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
+    masthead_url='http://cdn.g-point.biz/wp-content/themes/palmtop-new/images/header_palmtop_logo.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+
+    feeds          = [(u'Newsy', u'http://palmtop.pl/feed/atom/')]
--- a/recipes/pc_arena.recipe
+++ b/recipes/pc_arena.recipe
@ -0,0 +1,31 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PC_Arena(BasicNewsRecipe):
+    title          = u'PCArena'
+    oldest_article = 18300
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url='http://pcarena.pl/public/design/frontend/images/logo.gif'
+    cover_url= 'http://pcarena.pl/public/design/frontend/images/logo.gif'
+    no_stylesheets = True
+    keep_only_tags=[dict(attrs={'class':['artHeader', 'art']})]
+    remove_tags=[dict(attrs={'class':'pages'})]
+    feeds          = [(u'Newsy', u'http://pcarena.pl/misc/rss/news'), (u'Artyku\u0142y', u'http://pcarena.pl/misc/rss/articles')]
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(name='div', attrs={'class':'pagNum'})
+        if tag:
+            nexturl=tag.findAll('a')
+            tag.extract()
+            for nextpage in nexturl[1:]:
+               nextpage= 'http://pcarena.pl' + nextpage['href']
+               soup2 = self.index_to_soup(nextpage)
+               pagetext = soup2.find(attrs={'class':'artBody'})
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/pc_centre_pl.recipe
+++ b/recipes/pc_centre_pl.recipe
@ -0,0 +1,41 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PC_Centre(BasicNewsRecipe):
+    title          = u'PC Centre'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Portal komputerowy, a w nim: testy sprzętu komputerowego, recenzje gier i oprogramowania. a także opisy produktów związanych z komputerami.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url= 'http://pccentre.pl/views/images/logo.gif'
+    cover_url= 'http://pccentre.pl/views/images/logo.gif'
+    no_stylesheets = True
+    keep_only_tags= [dict(id='content')]
+    remove_tags=[dict(attrs={'class':['ikony r', 'list_of_content', 'dot accordion']}), dict(id='comments')]
+    feeds          = [(u'Publikacje', u'http://pccentre.pl/backend.php?mode=a'), (u'Aktualno\u015bci', u'http://pccentre.pl/backend.php'), (u'Sprz\u0119t komputerowy', u'http://pccentre.pl/backend.php?mode=n&section=2'), (u'Oprogramowanie', u'http://pccentre.pl/backend.php?mode=n&section=3'), (u'Gry komputerowe i konsole', u'http://pccentre.pl/backend.php?mode=n&section=4'), (u'Internet', u'http://pccentre.pl/backend.php?mode=n&section=7'), (u'Bezpiecze\u0144stwo', u'http://pccentre.pl/backend.php?mode=n&section=5'), (u'Multimedia', u'http://pccentre.pl/backend.php?mode=n&section=6'), (u'Biznes', u'http://pccentre.pl/backend.php?mode=n&section=9')]
+
+
+    def append_page(self, soup, appendtag):
+        tag=soup.find(name='div', attrs={'class':'pages'})
+        if tag:
+            nexturl=tag.findAll('a')
+            tag.extract()
+            for nextpage in nexturl[:-1]:
+               nextpage= 'http://pccentre.pl' + nextpage['href']
+               soup2 = self.index_to_soup(nextpage)
+               pagetext = soup2.find(id='content')
+               rem=pagetext.findAll(attrs={'class':['subtitle', 'content_info', 'list_of_content', 'pages', 'social2', 'pcc_acc', 'pcc_acc_na']})
+               for r in rem:
+                   r.extract()
+               rem=pagetext.findAll(id='comments')
+               for r in rem:
+                   r.extract()
+               rem=pagetext.findAll('h1')
+               for r in rem:
+                   r.extract()
+               pos = len(appendtag.contents)
+               appendtag.insert(pos, pagetext)
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/pc_foster.recipe
+++ b/recipes/pc_foster.recipe
@ -0,0 +1,35 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PC_Foster(BasicNewsRecipe):
+    title          = u'PC Foster'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url='http://pcfoster.pl/public/images/logo.png'
+    cover_url= 'http://pcfoster.pl/public/images/logo.png'
+    no_stylesheets= True
+    remove_empty_feeds= True
+    keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})]
+    remove_tags=[dict(name='p', attrs={'class':'right'})]
+    feeds          = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl= appendtag.find(attrs={'alt':u'Następna strona'})
+        if nexturl:
+            appendtag.find(attrs={'class':'pager more_top'}).extract()
+            while nexturl:
+                nexturl='http://pcfoster.pl' + nexturl.parent['href']
+                soup2 = self.index_to_soup(nexturl)
+                nexturl=soup2.find(attrs={'alt':u'Następna strona'})
+                pagetext = soup2.find(attrs={'class':'content'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':'review_content double'}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/polska_times.recipe
+++ b/recipes/polska_times.recipe
@ -0,0 +1,81 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class Polska_times(BasicNewsRecipe):
+    title          = u'Polska Times'
+    __author__        = 'fenuks'
+    description   = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.'
+    category       = 'newspaper'
+    language       = 'pl'
+    masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_emty_feeds= True
+    no_stylesheets = True
+    preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
+    keep_only_tags= [dict(id=['tytul-artykulu', 'kontent'])]
+    remove_tags_after= dict(id='material-tagi')
+    remove_tags=[dict(attrs={'id':'reklama_srodtekst_0'}), dict(attrs={'id':'material-tagi'}), dict(name='div', attrs={'class':'zakladki'}), dict(attrs={'title':u'CZYTAJ TAKŻE'}), dict(attrs={'id':'podobne'}), dict(name='a', attrs={'href':'http://www.dzienniklodzki.pl/newsletter'})]
+    feeds          = [(u'Fakty', u'http://polskatimes.feedsportal.com/c/32980/f/533648/index.rss'), (u'Opinie', u'http://www.polskatimes.pl/rss/opinie.xml'), (u'Sport', u'http://polskatimes.feedsportal.com/c/32980/f/533649/index.rss'), (u'Pieni\u0105dze', u'http://polskatimes.feedsportal.com/c/32980/f/533657/index.rss'), (u'Twoje finanse', u'http://www.polskatimes.pl/rss/twojefinanse.xml'), (u'Kultura', u'http://polskatimes.feedsportal.com/c/32980/f/533650/index.rss'), (u'Dodatki', u'http://www.polskatimes.pl/rss/dodatki.xml')]
+
+    def skip_ad_pages(self, soup):
+        if 'Advertisement' in soup.title:
+            nexturl=soup.find('a')['href']
+            return self.index_to_soup(nexturl, raw=True)
+
+    def append_page(self, soup, appendtag):
+        nexturl=soup.find(id='nastepna_strona')
+        while nexturl:
+            soup2= self.index_to_soup(nexturl['href'])
+            nexturl=soup2.find(id='nastepna_strona')
+            pagetext = soup2.find(id='tresc')
+            for dictionary in self.remove_tags:
+                 v=pagetext.findAll(attrs=dictionary['attrs'])
+                 for delete in v:
+                     delete.extract()
+            for b in pagetext.findAll(name='b'):
+                if b.string:
+                    if u'CZYTAJ TEŻ' in b.string or u'Czytaj także' in b.string or u'Czytaj też' in b.string or u'Zobacz także' in b.string:
+                        b.extract()
+            for center in pagetext.findAll(name='center'):
+                if center.h4:
+                    if center.h4.a:
+                        center.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
+            paginator.extract()
+
+    def image_article(self, soup, appendtag):
+        nexturl=soup.find('a', attrs={'class':'nastepna'})
+        urls=[]
+        while nexturl:
+            if nexturl not in urls:
+                urls.append(nexturl)
+            else:
+                break
+            soup2= self.index_to_soup('http://www.polskatimes.pl/artykul/' + nexturl['href'])
+            nexturl=soup2.find('a', attrs={'class':'nastepna'})
+            if nexturl in urls:
+                break;
+            pagetext = soup2.find(id='galeria-material')
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, '<br />')
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        for rem in appendtag.findAll(attrs={'class':['galeriaNawigator', 'miniaturyPojemnik']}):
+            rem.extract()
+        for paginator in appendtag.findAll(attrs={'class':'stronicowanie'}):
+            paginator.extract()
+
+    def preprocess_html(self, soup):
+        if soup.find('a', attrs={'class':'nastepna'}):
+            self.image_article(soup, soup.body)
+        elif soup.find(id='nastepna_strona'):
+            self.append_page(soup, soup.body)
+        return soup
+
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
+        self.cover_url=soup.find(id='pojemnik').img['src']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/pure_pc.recipe
+++ b/recipes/pure_pc.recipe
@ -0,0 +1,33 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class PurePC(BasicNewsRecipe):
+    title          = u'PurePC'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Artykuły, aktualności, sprzęt, forum, chłodzenie, modding, urządzenia mobilne - wszystko w jednym miejscu.'
+    category       = 'IT'
+    language       = 'pl'
+    masthead_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
+    cover_url= 'http://www.purepc.pl/themes/new/images/purepc.jpg'
+    no_stylesheets = True
+    keep_only_tags= [dict(id='content')]
+    remove_tags_after= dict(attrs={'class':'fivestar-widget'})
+    remove_tags= [dict(id='navigator'), dict(attrs={'class':['box-tools', 'fivestar-widget', 'PageMenuList']})]
+    feeds          = [(u'Wiadomo\u015bci', u'http://www.purepc.pl/node/feed')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl= appendtag.find(attrs={'class':'pager-next'})
+        if nexturl:
+            while nexturl:
+                soup2 = self.index_to_soup('http://www.purepc.pl'+ nexturl.a['href'])
+                nexturl=soup2.find(attrs={'class':'pager-next'})
+                pagetext = soup2.find(attrs={'class':'article'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':['PageMenuList', 'pager', 'fivestar-widget']}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/tablety_pl.recipe
+++ b/recipes/tablety_pl.recipe
@ -1,14 +1,16 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-
+import re
 class Tablety_pl(BasicNewsRecipe):
    title          = u'Tablety.pl'
    __author__        = 'fenuks'
    description   = u'tablety.pl - latest tablet news'
+    masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
    cover_url      = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
    category       = 'IT'
    language       = 'pl'
    oldest_article = 8
    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(ur'<p><strong>Przeczytaj także.*?</a></strong></p>', re.DOTALL), lambda match: ''), (re.compile(ur'<p><strong>Przeczytaj koniecznie.*?</a></strong></p>', re.DOTALL), lambda match: '')]
    remove_tags_before=dict(name="h1", attrs={'class':'entry-title'})
    remove_tags_after=dict(name="div", attrs={'class':'snap_nopreview sharing robots-nocontent'})
    remove_tags=[dict(name='div', attrs={'class':'snap_nopreview sharing robots-nocontent'})]
--- a/recipes/tanuki.recipe
+++ b/recipes/tanuki.recipe
@ -0,0 +1,37 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+class tanuki(BasicNewsRecipe):
+    title          = u'Tanuki'
+    oldest_article = 7
+    __author__        = 'fenuks'
+    category       = 'anime, manga'
+    language       = 'pl'
+    max_articles_per_feed = 100
+    encoding='utf-8'
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
+    preprocess_regexps = [(re.compile(ur'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(ur'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>', re.DOTALL), lambda match: '')]
+    remove_empty_feeds= True
+    no_stylesheets = True
+    keep_only_tags=[dict(attrs={'class':['animename', 'storyname', 'nextarrow','sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={'summary':'Technikalia'}), dict(attrs={'class':['chaptername','copycat']}), dict(id='rightcolumn'), dict(attrs={'class':['headn_tt', 'subtable']})]
+    remove_tags=[dict(name='div', attrs={'class':'screen'}), dict(id='randomtoplist'), dict(attrs={'class':'note'})]
+    feeds          = [(u'Anime', u'http://anime.tanuki.pl/rss_anime.xml'), (u'Manga', u'http://manga.tanuki.pl/rss_manga.xml'), (u'Tomiki', u'http://manga.tanuki.pl/rss_mangabooks.xml'), (u'Artyku\u0142y', u'http://czytelnia.tanuki.pl/rss_czytelnia_artykuly.xml'), (u'Opowiadania', u'http://czytelnia.tanuki.pl/rss_czytelnia.xml')]
+
+
+    def append_page(self, soup, appendtag):
+        nexturl= appendtag.find(attrs={'class':'nextarrow'})
+        if nexturl:
+            while nexturl:
+                soup2 = self.index_to_soup('http://czytelnia.tanuki.pl'+ nexturl['href'])
+                nexturl=soup2.find(attrs={'class':'nextarrow'})
+                pagetext = soup2.find(attrs={'class':['chaptername', 'copycat']})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+                pagetext = soup2.find(attrs={'class':'copycat'})
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class':'nextarrow'}):
+                r.extract()
+
+    def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
--- a/recipes/the_sun.recipe
+++ b/recipes/the_sun.recipe
@ -1,49 +1,57 @@
 import re
-from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag
+from calibre.web.feeds.recipes import BasicNewsRecipe

-class AdvancedUserRecipe1268409464(BasicNewsRecipe):
-    title = u'The Sun'
-    __author__ = 'Chaz Ralph'
-    description = 'News from The Sun'
+class AdvancedUserRecipe1325006965(BasicNewsRecipe):
+
+    title          = u'The Sun UK'
+    cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
+
+    description = 'A Recipe for The Sun tabloid UK - uses feed43'
+    __author__ = 'Dave Asbury'
+    # last updated 20/2/12
+    language = 'en_GB'
    oldest_article = 1
-    max_articles_per_feed = 100
-    language = 'en'
+    max_articles_per_feed = 15
+    remove_empty_feeds = True
    no_stylesheets = True
-    extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
-    encoding= 'iso-8859-1'
-    remove_javascript = True
+
+    masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
+    encoding = 'cp1251'
+
+    encoding = 'cp1252'
+    remove_empty_feeds = True
+    remove_javascript     = True
+    no_stylesheets = True
+
+    extra_css  = '''
+    body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
+                     '''
+
+    preprocess_regexps = [
+        (re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]

    keep_only_tags = [
-            dict(id='column-print')
+                               dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
+               dict(name='div',attrs={'class' : 'text-center'}),
+               dict(name='div',attrs={'id' : 'bodyText'})
+               # dict(name='p')
+               ]
+
+    remove_tags=[
+           #dict(name='head'),
+           dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
+                           dict(name='div',attrs={'class' : 'cf'}),
+           dict(attrs={'title' : 'download flash'}),
+                           dict(attrs={'style' : 'padding: 5px'})
+
+           ]
+
+    feeds          = [
+        (u'News','http://feed43.com/2517447382644748.xml'),
+        (u'Sport', u'http://feed43.com/4283846255668687.xml'),
+        (u'Bizarre', u'http://feed43.com/0233840304242011.xml'),
+        (u'Film',u'http://feed43.com/1307545221226200.xml'),
+        (u'Music',u'http://feed43.com/1701513435064132.xml'),
+        (u'Sun Woman',u'http://feed43.com/0022626854226453.xml'),
    ]

-    remove_tags = [
-        dict(name='div', attrs={'class':[
-            'clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line',
-            'clear width-625 bg-fff padding-top-10'
-            ]}),
-        dict(name='video'),
-    ]
-
-    def preprocess_html(self, soup):
-        h1 = soup.find('h1')
-        if h1 is not None:
-            text = self.tag_to_string(h1)
-            nh = Tag(soup, 'h1')
-            nh.insert(0, text)
-            h1.replaceWith(nh)
-
-        return soup
-
-
-    feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
-,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')
-,(u'Football', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247739.ece')
-,(u'Gizmo', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247829.ece')
-,(u'Bizarre', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247767.ece')]
-
-    def print_version(self, url):
-        return re.sub(r'\?OTC-RSS&ATTR=[-a-zA-Z]+', '?print=yes', url)
-
-
--- a/recipes/tvn24.recipe
+++ b/recipes/tvn24.recipe
@ -0,0 +1,24 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class tvn24(BasicNewsRecipe):
+    title          = u'TVN24'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    __author__        = 'fenuks'
+    description   = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
+    category       = 'news'
+    language       = 'pl'
+    masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
+    extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
+    remove_empty_feeds = True
+    remove_javascript = True
+    no_stylesheets = True
+    keep_only_tags=[dict(id='tvn24_wiadomosci_detal'), dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']})]
+    remove_tags_after= dict(name='div', attrs={'class':'socialBoxesBottom'})
+    remove_tags=[dict(attrs={'class':['tagi_detal', 'socialBoxesBottom', 'twitterBox', 'commentsInfo', 'textSize', 'obj_ukrytydruk obj_ramka1_r', 'related newsNews align-right', 'box', 'newsUserList', 'watchMaterial text']})]
+    feeds          = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), (u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
--- a/recipes/ubuntu_pl.recipe
+++ b/recipes/ubuntu_pl.recipe
@ -4,10 +4,12 @@ class Ubuntu_pl(BasicNewsRecipe):
    title          = u'UBUNTU.pl'
    __author__        = 'fenuks'
    description   = 'UBUNTU.pl - polish ubuntu community site'
+    masthead_url= 'http://ubuntu.pl/img/logo.jpg'
    cover_url      = 'http://ubuntu.pl/img/logo.jpg'
    category       = 'linux, IT'
    language       = 'pl'
    no_stylesheets = True
+    remove_empty_feeds = True
    oldest_article = 8
    max_articles_per_feed = 100
    extra_css      = '#main {text-align:left;}'
--- a/recipes/webhosting_pl.recipe
+++ b/recipes/webhosting_pl.recipe
@ -0,0 +1,39 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class webhosting_pl(BasicNewsRecipe):
+    title          = u'Webhosting.pl'
+    __author__        = 'fenuks'
+    description   = 'Webhosting.pl to pierwszy na polskim rynku serwis poruszający w szerokim aspekcie tematy związane z hostingiem, globalną Siecią i usługami internetowymi. Głównym celem przedsięwzięcia jest dostarczanie przydatnej i bogatej merytorycznie wiedzy osobom, które chcą tworzyć i efektywnie wykorzystywać współczesny Internet.'
+    category       = 'web'
+    language       = 'pl'
+    cover_url='http://webhosting.pl/images/logo.png'
+    masthead_url='http://webhosting.pl/images/logo.png'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    remove_empty_feeds = True
+    #keep_only_tags= [dict(name='div', attrs={'class':'content_article'}), dict(attrs={'class':'paging'})]
+    #remove_tags=[dict(attrs={'class':['tags', 'wykop', 'facebook_button_count', 'article_bottom']})]
+    feeds          = [(u'Newsy', u'http://webhosting.pl/feed/rss/an'), 
+		(u'Artyku\u0142y', u'http://webhosting.pl/feed/rss/aa'), 
+		(u'Software', u'http://webhosting.pl/feed/rss/n/12'), 
+		(u'Internet', u'http://webhosting.pl/feed/rss/n/9'), 
+		(u'Biznes', u'http://webhosting.pl/feed/rss/n/13'), 
+		(u'Bezpiecze\u0144stwo', u'http://webhosting.pl/feed/rss/n/10'), 
+		(u'Blogi', u'http://webhosting.pl/feed/rss/ab'),  
+		(u'Programowanie', u'http://webhosting.pl/feed/rss/n/8'), 
+		(u'Kursy', u'http://webhosting.pl/feed/rss/n/11'), 
+		(u'Tips&Tricks', u'http://webhosting.pl/feed/rss/n/15'), 
+		(u'Imprezy', u'http://webhosting.pl/feed/rss/n/22'), 
+		(u'Wywiady', u'http://webhosting.pl/feed/rss/n/24'), 
+		(u'Porady', u'http://webhosting.pl/feed/rss/n/3027'), 
+		(u'Znalezione w sieci', u'http://webhosting.pl/feed/rss/n/6804'), 
+		(u'Dev area', u'http://webhosting.pl/feed/rss/n/24504'), 
+		(u"Webmaster's blog", u'http://webhosting.pl/feed/rss/n/29195'), 
+		(u'Domeny', u'http://webhosting.pl/feed/rss/n/11513'), 
+		(u'Praktyka', u'http://webhosting.pl/feed/rss/n/2'), 
+		(u'Serwery', u'http://webhosting.pl/feed/rss/n/11514'), 
+		(u'Inne', u'http://webhosting.pl/feed/rss/n/24811'), 
+		(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
+
+    def print_version(self, url):
+        return url.replace('webhosting.pl', 'webhosting.pl/print')
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -190,3 +190,16 @@ def get_windows_username():
            return buf.value

    return get_unicode_windows_env_var(u'USERNAME')
+
+def get_windows_temp_path():
+    import ctypes
+    n = ctypes.windll.kernel32.GetTempPathW(0, None)
+    if n == 0:
+        return None
+    buf = ctypes.create_unicode_buffer(u'\0'*n)
+    ctypes.windll.kernel32.GetTempPathW(n, buf)
+    ans = buf.value
+    if ans[-1] == u'\\':
+        ans = ans[:-1]
+    return ans if ans else None
+
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -192,9 +192,13 @@ class InputFormatPlugin(Plugin):

    def __call__(self, stream, options, file_ext, log,
                 accelerators, output_dir):
-        log('InputFormatPlugin: %s running'%self.name)
-        if hasattr(stream, 'name'):
-            log('on', stream.name)
+        try:
+            log('InputFormatPlugin: %s running'%self.name)
+            if hasattr(stream, 'name'):
+                log('on', stream.name)
+        except:
+            # In case stdout is broken
+            pass

        with CurrentDir(output_dir):
            for x in os.listdir('.'):
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -184,14 +184,14 @@ class ANDROID(USBMS):
            'ALPANDIGITAL', 'ANDROID_MID', 'VTAB1008', 'EMX51_BBG_ANDROI',
            'UMS', '.K080', 'P990', 'LTE', 'MB853', 'GT-S5660_CARD', 'A107',
            'GT-I9003_CARD', 'XT912', 'FILE-CD_GADGET', 'RK29_SDK', 'MB855',
-            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER']
+            'XT910', 'BOOK_A10', 'USB_2.0_DRIVER', 'I9100T']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
            '__UMS_COMPOSITE', 'SGH-I997_CARD', 'MB870', 'ALPANDIGITAL',
            'ANDROID_MID', 'P990_SD_CARD', '.K080', 'LTE_CARD', 'MB853',
            'A1-07___C0541A4F', 'XT912', 'MB855', 'XT910', 'BOOK_A10_CARD',
-            'USB_2.0_DRIVER']
+            'USB_2.0_DRIVER', 'I9100T']

    OSX_MAIN_MEM = 'Android Device Main Memory'

--- a/src/calibre/ebooks/metadata/odt.py
+++ b/src/calibre/ebooks/metadata/odt.py
@ -153,7 +153,9 @@ def get_metadata(stream):
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
-    if data.has_key('creator'):
+    if data.get('initial-creator', '').strip():
+        mi.authors = string_to_authors(data['initial-creator'])
+    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
--- a/src/calibre/gui2/store/stores/oreilly_plugin.py
+++ b/src/calibre/gui2/store/stores/oreilly_plugin.py
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import re
 import urllib
 from contextlib import closing

--- a/src/calibre/gui2/tag_browser/model.py
+++ b/src/calibre/gui2/tag_browser/model.py
@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en'

 import traceback, cPickle, copy
 from itertools import repeat
-from collections import defaultdict
-from functools import partial

 from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
        QMimeData, QModelIndex, pyqtSignal, QObject)
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -16,7 +16,7 @@ from PyQt4.Qt import (QWizard, QWizardPage, QPixmap, Qt, QAbstractListModel,
 from calibre import __appname__, patheq
 from calibre.library.database2 import LibraryDatabase2
 from calibre.library.move import MoveLibrary
-from calibre.constants import filesystem_encoding, iswindows
+from calibre.constants import filesystem_encoding, iswindows, plugins
 from calibre.gui2.wizard.send_email import smtp_prefs
 from calibre.gui2.wizard.device_ui import Ui_WizardPage as DeviceUI
 from calibre.gui2.wizard.library_ui import Ui_WizardPage as LibraryUI
@ -30,6 +30,9 @@ from calibre.gui2 import NONE, choose_dir, error_dialog
 from calibre.gui2.dialogs.progress import ProgressDialog
 from calibre.customize.ui import device_plugins

+if iswindows:
+    winutil = plugins['winutil'][0]
+
 # Devices {{{

 class Device(object):
@ -302,13 +305,13 @@ class HanlinV5(HanlinV3):
 class BeBook(HanlinV3):

    name = 'BeBook'
-    manufacturer = 'Endless Ideas'
+    manufacturer = 'BeBook'
    id = 'bebook'

 class BeBookMini(HanlinV5):

    name = 'BeBook Mini'
-    manufacturer = 'Endless Ideas'
+    manufacturer = 'BeBook'
    id = 'bebook_mini'

 class EZReader(HanlinV3):
@ -420,9 +423,9 @@ class KindlePage(QWizardPage, KindleUI):
    def commit(self):
        x = unicode(self.to_address.text()).strip()
        parts = x.split('@')
-        if len(parts) < 2 or not parts[0]: return

-        if self.send_email_widget.set_email_settings(True):
+        if (self.send_email_widget.set_email_settings(True) and len(parts) >= 2
+                and parts[0]):
            conf = smtp_prefs()
            accounts = conf.parse().accounts
            if not accounts: accounts = {}
@ -751,19 +754,20 @@ class LibraryPage(QWizardPage, LibraryUI):
        self.default_library_name = None
        if not lp:
            fname = _('Calibre Library')
-            if isinstance(fname, unicode):
-                try:
-                    fname = fname.encode(filesystem_encoding)
-                except:
-                    fname = 'Calibre Library'
-            lp = os.path.expanduser('~'+os.sep+fname)
+            base = os.path.expanduser(u'~')
+            if iswindows:
+                x = winutil.special_folder_path(winutil.CSIDL_PERSONAL)
+                if x and os.access(x, os.W_OK):
+                    base = x
+
+            lp = os.path.join(base, fname)
            self.default_library_name = lp
            if not os.path.exists(lp):
                try:
                    os.makedirs(lp)
                except:
                    traceback.print_exc()
-                    lp = os.path.expanduser('~')
+                    lp = os.path.expanduser(u'~')
        self.location.setText(lp)

    def isComplete(self):
@ -779,12 +783,10 @@ class LibraryPage(QWizardPage, LibraryUI):
        oldloc = prefs['library_path']
        newloc = unicode(self.location.text())
        try:
-            newloce = newloc.encode(filesystem_encoding)
-            if self.default_library_name is not None and \
-                os.path.exists(self.default_library_name) and \
-                not os.listdir(self.default_library_name) and \
-                newloce != self.default_library_name:
-                    os.rmdir(self.default_library_name)
+            dln = self.default_library_name
+            if (dln and os.path.exists(dln) and not os.listdir(dln) and newloc
+                    != dln):
+                os.rmdir(dln)
        except:
            pass
        if not os.path.exists(newloc):
--- a/src/calibre/ptempfile.py
+++ b/src/calibre/ptempfile.py
@ -7,7 +7,8 @@ being closed.
 """
 import tempfile, os, atexit, binascii, cPickle

-from calibre.constants import __version__, __appname__
+from calibre.constants import (__version__, __appname__,
+        get_unicode_windows_env_var, iswindows, get_windows_temp_path)

 def cleanup(path):
    try:
@ -47,7 +48,18 @@ def base_dir():
            _base_dir = td
        else:
            base = os.environ.get('CALIBRE_TEMP_DIR', None)
+            if base is not None and iswindows:
+                base = get_unicode_windows_env_var('CALIBRE_TEMP_DIR')
            prefix = app_prefix(u'tmp_')
+            if base is None and iswindows:
+                # On windows always use a unicode temp path, as for some
+                # localized (east asian) windows builds, there's no reliable
+                # way to escalate to unicode only when needed. See
+                # https://bugs.launchpad.net/bugs/937389 Hopefully, by now, the
+                # rest of calibre can deal with unicode temp paths. We'll leave
+                # temp paths as bytestring on Unix, as the temp dir on unix is
+                # very rarely non ascii anyway.
+                base = get_windows_temp_path()
            try:
                # First try an ascii path as that is what was done historically
                # and we dont want to break working code
@ -66,7 +78,9 @@ def base_dir():
 def _make_file(suffix, prefix, base):
    try:
        fd, name = tempfile.mkstemp(suffix, prefix, dir=base)
-    except UnicodeDecodeError:
+    except (UnicodeDecodeError, OSError):
+        # On some windows systems, we get an OSError because base is not
+        # unicode and windows cannot find the path pointed to by base
        global _base_dir
        from calibre.constants import filesystem_encoding
        base_dir()
@ -79,7 +93,9 @@ def _make_file(suffix, prefix, base):
 def _make_dir(suffix, prefix, base):
    try:
        tdir = tempfile.mkdtemp(suffix, prefix, base)
-    except ValueError:
+    except (ValueError, OSError):
+        # On some windows systems, we get an OSError because base is not
+        # unicode and windows cannot find the path pointed to by base
        global _base_dir
        from calibre.constants import filesystem_encoding
        base_dir()
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -4,20 +4,14 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 Builtin recipes.
 '''
-import re, imp, inspect, time, os
-from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, \
-    AutomaticNewsRecipe, CalibrePeriodical
+import re, time, io
+from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
+    AutomaticNewsRecipe, CalibrePeriodical)
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
-from calibre.ptempfile import PersistentTemporaryDirectory
-from calibre import __appname__, english_sort
 from calibre.utils.config import JSONConfig

-BeautifulSoup, time, english_sort
-
 basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
        CalibrePeriodical)
-_tdir = None
-_crep = 0

 custom_recipes = JSONConfig('custom_recipes/index.json')

@ -28,39 +22,33 @@ def custom_recipe_filename(id_, title):

 def compile_recipe(src):
    '''
-    Compile the code in src and return the first object that is a recipe or profile.
-    @param src: Python source code
-    @type src: string
-    @return: Recipe class or None, if no such class was found in C{src}
+    Compile the code in src and return a recipe object, if found.
+
+    :param src: Python source code as bytestring or unicode object
+
+    :return: Recipe class or None, if no such class was found in src
    '''
-    global _tdir, _crep
-    if _tdir is None or not os.path.exists(_tdir):
-        _tdir = PersistentTemporaryDirectory('_recipes')
-    temp = os.path.join(_tdir, 'recipe%d.py'%_crep)
-    _crep += 1
    if not isinstance(src, unicode):
        match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
        enc = match.group(1) if match else 'utf-8'
        src = src.decode(enc)
-    src = re.sub(r'from __future__.*', '', src)
-    f = open(temp, 'wb')
-    src = 'from %s.web.feeds.news import BasicNewsRecipe, AutomaticNewsRecipe\n'%__appname__ + src
-    src = '# coding: utf-8\n' + src
-    src = 'from __future__ import with_statement\n' + src
+    # Python complains if there is a coding declaration in a unicode string
+    src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
+    # Translate newlines to \n
+    src = io.StringIO(src, newline=None).getvalue()

-    src = src.replace('from libprs500', 'from calibre').encode('utf-8')
-    f.write(src)
-    f.close()
-    module = imp.find_module(os.path.splitext(os.path.basename(temp))[0],
-        [os.path.dirname(temp)])
-    module = imp.load_module(os.path.splitext(os.path.basename(temp))[0], *module)
-    classes = inspect.getmembers(module,
-            lambda x : inspect.isclass(x) and \
-                issubclass(x, (BasicNewsRecipe,)) and \
-                x not in basic_recipes)
-    if not classes:
-        return None
+    namespace = {
+            'BasicNewsRecipe':BasicNewsRecipe,
+            'AutomaticNewsRecipe':AutomaticNewsRecipe,
+            'time':time, 're':re,
+            'BeautifulSoup':BeautifulSoup
+        }
+    exec src in namespace

-    return classes[0][1]
+    for x in namespace.itervalues():
+        if (isinstance(x, type) and issubclass(x, BasicNewsRecipe) and x not
+                in basic_recipes):
+            return x

+    return None