calibre/recipes/globes_co_il.recipe
Kovid Goyal ba59ac679d
Fix incorrect soup usage in various recipes
Also make SoupStrainer available in calibre.ebooks.BeautifulSoup
2019-03-25 10:17:27 +05:30

50 lines
2.6 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'This is Globes.co.il.'
cover_url = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg'
title = u'Globes'
language = 'he'
__author__ = 'marbs'
extra_css = 'img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }' # noqa
simultaneous_downloads = 5
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
remove_attributes = ['width', 'style']
feeds = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'),
(u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'),
(u'וול סטריט ושווקי העולם',
u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'),
(u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'),
(u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'),
(u'נתח שוק וצרכנות',
u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'),
(u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'),
(u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'),
(u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'),
(u'קניון המניות - טור שבועי',
u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'),
(u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')]
def print_version(self, url):
split1 = url.split("=")
print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + \
split1[1]
return print_url
def preprocess_html(self, soup):
soup.find('tr', attrs={'bgcolor': 'black'}
).findPrevious('tr').extract()
soup.find('tr', attrs={'bgcolor': 'black'}).extract()
return soup
def fixChars(self, string):
# Replace lsquo (\x91)
fixed = re.sub("■", "■", string)
return fixed