Merge branch 'master' of https://github.com/t3d/calibre

2025-11-26 16:25:02 -05:00 · 2016-10-16 15:43:49 +05:30 · 2016-10-16 15:43:49 +05:30 · c080a08b16
commit c080a08b16
parent 253b83a89c 34c0a2f8dc
4 changed files with 20 additions and 46 deletions
--- a/recipes/archeowiesci.recipe
+++ b/recipes/archeowiesci.recipe
@ -1,35 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class Archeowiesci(BasicNewsRecipe):
-    title = u'Archeowieści'
-    __author__ = 'fenuks'
-    category = 'archeology'
-    language = 'pl'
-    description = u'Z pasją o przeszłości'
-    cover_url = 'http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
-    oldest_article = 7
-    needs_subscription = 'optional'
-    max_articles_per_feed = 100
-    auto_cleanup = True
-    remove_tags = [
-        dict(name='span', attrs={'class': ['post-ratings', 'post-ratings-loading']})]
-    feeds = [(u'Archeowieści', u'http://archeowiesci.pl/feed/')]
-
-    def parse_feeds(self):
-        feeds = BasicNewsRecipe.parse_feeds(self)
-        for feed in feeds:
-            for article in feed.articles[:]:
-                if self.username is None and 'subskrypcja' in article.title:
-                    feed.articles.remove(article)
-        return feeds
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            br.open('http://archeowiesci.pl/wp-login.php')
-            br.select_form(name='loginform')
-            br['log'] = self.username
-            br['pwd'] = self.password
-            br.submit()
-        return br
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@ -20,8 +20,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
    remove_empty_feeds = True
    keep_only_tags = [dict(name='div', attrs={'class': 'post'})]
    recursions = 5
-    remove_tags = [dict(id='singlepostinfo'), dict(
-        attrs={'class': ['books short floatRight', 'unprintable', 'booksTable', 'bawmrp']})]
+    remove_tags = [dict(id=['catapult-cookie-bar','header','footer','rightcolumn','singlepostinfo']), dict(
+        attrs={'class': ['ubm_banner','ciekawostki-slider-popular','books short floatRight', 'unprintable', 'booksTable', 'bawmrp']})]

    feeds = [
    (u'Staro\u017cytno\u015b\u0107', u'http://ciekawostkihistoryczne.pl/tag/starozytnosc/feed/'),
--- a/recipes/gosc_niedzielny.recipe
+++ b/recipes/gosc_niedzielny.recipe
@ -11,14 +11,15 @@ from lxml import html


 class GN(BasicNewsRecipe):
-
    __author__ = 'Piotr Kontek, Tomasz Długosz'
    title = u'Gość Niedzielny'
+    publisher = 'Wydawnictwo Kurii Metropolitalnej w Katowicach'
    description = 'Ogólnopolski tygodnik katolicki - fragmenty artykułów z aktualnego numeru'
    encoding = 'utf-8'
    no_stylesheets = True
    language = 'pl'
    remove_javascript = True
+    masthead_url = 'http://m.gosc.pl/static/themes/czerwony_gosc-mobile/logo.png'

    def find_last_issue(self):
        raw = self.index_to_soup(
@ -30,15 +31,21 @@ class GN(BasicNewsRecipe):
        return page[0]

    def parse_index(self):
-        soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue())
+        self.last_issue = self.find_last_issue()
+        soup = self.index_to_soup('http://gosc.pl' + self.last_issue)
+        self.cover_url = 'http://www.gosc.pl' + \
+            soup.find('div', attrs={'class': 'fl-w100 release-wp'}
+                      ).findAll('a')[-4].contents[0]['src']
        feeds = []
-        # wstepniak
-        a = soup.find('div', attrs={'class': 'release-wp-b'}).find('a')
+        # editorial:
+        a = soup.find('div', attrs={'class': 'release-wp-b'})
+        art = a.find('a')
        articles = [
-            {'title': self.tag_to_string(a),
-             'url': 'http://www.gosc.pl' + a['href']
+            {'title': self.tag_to_string(art),
+             'url': 'http://www.gosc.pl' + art['href'],
+             'description': self.tag_to_string(a.find('p', attrs={'class': 'b lead'}))
             }]
-        feeds.append((u'Wstępniak', articles))
+        feeds.append((u'Na dobry początek', articles))
        # kategorie
        for addr in soup.findAll('a', attrs={'href': re.compile('kategoria')}):
            if addr.string != u'wszystkie artyku\u0142y z tej kategorii \xbb':
@ -51,11 +58,13 @@ class GN(BasicNewsRecipe):
        return feeds

    def find_articles(self, main_block):
-        for a in main_block.findAll('div', attrs={'class': ['prev_doc_n1 prev_doc_img21']}):
+        for a in main_block.findAll('div', attrs={'class': ['attachmentContent']}):
            art = a.find('a')
            yield {
                'title': self.tag_to_string(art),
-                'url': 'http://www.gosc.pl' + art['href']
+                'url': 'http://www.gosc.pl' + art['href'],
+                'date': self.tag_to_string(a.find('b', attrs={'class': 'time'})).replace('DODANE', ' '),
+                'description': self.tag_to_string(a.find('div', attrs={'class': 'txt'}))
            }

    def append_page(self, soup, appendtag):
--- a/recipes/icons/archeowiesci.png
+++ b/recipes/icons/archeowiesci.png