remove dead recipes

These recipes are based on RSS feeds that no longer work.
This commit is contained in:
unkn0w7n 2024-12-07 22:53:21 +05:30
parent 65b549d81c
commit 3943d95dcd
94 changed files with 0 additions and 2663 deletions

View File

@ -1,246 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
__license__ = 'GPL v3'
__copyright__ = '2018, Dale Furrow dkfurrow@gmail.com'
'''
chron.com
'''
import re
import sys
import time
import traceback
from collections import OrderedDict
from datetime import datetime
from calibre.ebooks.BeautifulSoup import NavigableString
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.date import dt_factory, local_tz, utcfromtimestamp
from calibre.web.feeds.recipes import BasicNewsRecipe
regex_date_only = re.compile(r"""(?:January|February|March|April|
{8}May|June|July|August|September|October|November|
{8}December)\s[0-9]{1,2},\s20[01][0-9]""")
regex_time_only = re.compile(r"""[0-9]{1,2}:[0-9]{1,2} \w{2}""")
sentence_regex = re.compile(r"(\S.+?[.!?])(?=\s+|$)")
blog_regex = re.compile(r'post-\d+')
pages = OrderedDict([('news', ('/news/houston-texas/', ['business', 'sports'])),
('business', ('/business/', ['sports'])),
('sports', ('/sports/', ['business']))])
base_url = "http://www.chron.com"
# sports has 'core-package sports' class
xpath_general = """//div[contains(@class, 'centerpiece-tabs') or
contains(@class, 'wrapper') or
contains(@class, 'contentGroups') or
contains(@class, 'headline-list') or
contains(@class, 'core-package sports') or
contains(@class, 'news')]
//a[contains(@class, 'hdn-analytics')]"""
excluded_titles = ["Winning numbers", "TV-radio listings"]
def validate_link(page, link, title):
other_category = page[1][1]
if not title or len(title.strip()) < 5:
print("{0} rejected, title too short".format(link))
return None
parts = link.split('/')
if len(parts) > 3 and parts[3] in other_category:
print("{0} rejected, covered in other section".format(link))
return None
for excluded_title in excluded_titles:
if title.find(excluded_title) != -1:
print("{0} rejected, excluded title".format(link))
return None
return link, title
def get_article_parsed(index_to_soup, this_url):
return index_to_soup(this_url, as_tree=True)
def sort_subject(element_list):
# priority of subjects
subjects = ['news', 'neighborhood', 'entertainment']
subjects.reverse()
subject_dict = OrderedDict(zip(subjects, range(len(subjects))))
rank_dict = OrderedDict([(rank, []) for rank in range(len(subjects) + 1)])
for element in element_list:
try:
subj = element[0].split('/')[3]
except Exception:
subj = 'unknown'
if subject_dict.get(subj) is not None:
rank_dict[subject_dict[subj] + 1].append(element)
else:
rank_dict[0].append(element)
# now return in reverse order, sorted
combined_list = []
for rank in range(len(subjects), -1, -1):
article_list = rank_dict[rank]
article_list.sort()
combined_list.extend(article_list)
return combined_list
def get_links_from_section_page(index_to_soup, page):
page_doc = get_article_parsed(index_to_soup, base_url + page[1][0])
els = page_doc.xpath(xpath_general)
element_list = []
for el in els:
link = el.get('href').split('?')[0]
title = el.text
if title is None or len(title.strip()) < 5:
link_id = link.split('/')[-1][:-3].split('-')[:-1]
title = ' '.join(link_id)
if link[:4] != 'http':
link = base_url + link
validated_link = validate_link(page=page, link=link, title=title)
if validated_link is not None:
element_list.append(validated_link)
sorted_element_list = sort_subject(element_list)
return [page[0], sorted_element_list]
def get_all_links_from_sections(index_to_soup):
all_sections = []
article_set = set()
final_dict = OrderedDict()
for item in pages.items():
print("getting links from {0}".format(item[0]))
all_sections.append(get_links_from_section_page(index_to_soup, item))
for section in all_sections:
section_id = section[0]
article_list = section[1]
final_dict[section_id] = []
for article in article_list:
if article[0] not in article_set:
article_set.add(article[0])
final_dict[section_id].append(article)
return final_dict
# noinspection PyAbstractClass
class HoustonChronicle(BasicNewsRecipe):
title = u'The Houston Chronicle'
description = 'News from Houston, Texas'
__author__ = 'Dale Furrow'
language = 'en'
no_stylesheets = True
remove_attributes = ['style', 'xmlns']
remove_empty_feeds = True
timefmt = '[%a, %d %b %Y]'
timestampfmt = '%Y%m%d%H%M%S'
# ignore_duplicate_articles = {'url'} # defaults to None
extra_css = '.article_date {display: none}'
category = 'news, USA'
masthead_url = 'http://www.chron.com/apple-touch-icon-76x76.png'
keep_only_tags = [dict(name='div', attrs={'class': ['article-content', 'article-wrap']})]
remove_tags = [dict(name='div', attrs={'social-title': True}),
dict(name='div', attrs={'class':
['control-panel', 'gallery-overlay-inner',
'most-popular', 'asset-media mos-playlist',
'asset_media asset-media']}),
dict(name='li', attrs={'class': ['hst-resgalleryitem taboola-frame hidden',
'hst-resgalleryitem hidden']}),
dict(name='ul', attrs={'class': 'clearfix'})]
# max_articles_per_feed = 5 # for use in testing
def get_article_description_from_doc(self, soup):
description_chars_break = 140
description_max_chars = 300
try:
els = soup.findAll('p')
if len(els) > 0:
out_text = ""
this_ellipsis = ""
for el in els:
if el is not None:
result = []
for descendant in el.contents:
if isinstance(descendant, NavigableString):
result.append(type(u'')(descendant).strip())
all_text = u' '.join(result)
if len(all_text) > 1:
sentences = re.findall(sentence_regex, all_text)
if sentences is not None and len(sentences) > 0:
for sentence in sentences:
if len(out_text) < description_chars_break:
out_text += sentence + " "
else:
if len(out_text) > description_max_chars:
this_ellipsis = "..."
return out_text[:description_max_chars] + this_ellipsis
return out_text
else:
return "No Article description returned"
except Exception as ex:
self.log('Error on Article Description')
traceback.print_exc(file=sys.stdout)
print(str(ex))
return ""
@staticmethod
def get_published_time_from_doc(page_doc):
def get_regular_timestamp(date_string):
try:
out_date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ")
return out_date
except ValueError:
return None
el = page_doc.findAll(
lambda this_tag: this_tag.name == "time" and ('itemprop', 'datePublished') in this_tag.attrs)
if len(el) == 1:
return get_regular_timestamp(el[0].get('datetime'))
else:
return None
def populate_article_metadata(self, article, soup, first):
"""
Called when each HTML page belonging to article is downloaded.
Intended to be used to get article metadata like author/summary/etc.
from the parsed HTML (soup).
:param article: A object of class :class:`calibre.web.feeds.Article`.
If you change the summary, remember to also change the text_summary
:param soup: Parsed HTML belonging to this article
:param first: True iff the parsed HTML is the first page of the article.
"""
summary = self.get_article_description_from_doc(soup)
article_date = self.get_published_time_from_doc(soup)
if article_date is not None:
article_timestamp = float((article_date - utcfromtimestamp(0)).total_seconds())
article.date = article_timestamp
article.utctime = dt_factory(article_date.timetuple(), assume_utc=True, as_utc=True)
article.localtime = article.utctime.astimezone(local_tz)
summary_date = article.localtime.strftime("%Y-%m-%d %H:%M") if article_date is not None else "No Date"
article.summary = "{0}: {1}".format(summary_date, summary)
article.text_summary = clean_ascii_chars(article.summary)
def parse_index(self):
self.timefmt = ' [%a, %d %b, %Y]'
self.log('starting parse_index: ', time.strftime(self.timestampfmt))
feeds = []
sections = get_all_links_from_sections(self.index_to_soup)
for section_id, article_list in sections.items():
self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list)))
articles = []
for article_info in article_list:
self.log("Adding {0} to feed".format(article_info[0]))
articles.append({'title': article_info[1], 'url': article_info[0],
'description': '', 'date': ""})
self.log("Appending {0:d} articles for {1}".format(len(articles), section_id))
feeds.append((section_id, articles))
self.log('finished parse_index: ', time.strftime(self.timestampfmt))
return feeds
def preprocess_html(self, soup):
return soup

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 341 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 504 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 289 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 262 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 262 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 801 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 801 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 262 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 504 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 647 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 266 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 266 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 267 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 667 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 911 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 680 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 147 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 810 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 446 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 314 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 867 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 718 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 652 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 236 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 433 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 224 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 239 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 846 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 994 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 749 B

View File

@ -1,135 +0,0 @@
import re
from collections import OrderedDict
from urllib.parse import urlparse
from calibre.web.feeds.news import BasicNewsRecipe
_issue_url = ""
COMMA_SEP_RE = re.compile(r"\s*,\s*")
SPACE_SEP_RE = re.compile(r"\s+")
NON_NUMERIC_RE = re.compile(r"[^\d]+")
class Poetry(BasicNewsRecipe):
title = "Poetry Magazine"
__author__ = "ping"
description = (
"Founded in Chicago by Harriet Monroe in 1912, Poetry is the oldest monthly "
"devoted to verse in the English-speaking world. https://www.poetryfoundation.org/poetrymagazine"
)
publication_type = "magazine"
language = "en"
encoding = "utf-8"
remove_javascript = True
no_stylesheets = True
auto_cleanup = False
ignore_duplicate_articles = {"url"}
compress_news_images = False
remove_attributes = ["style", "font"]
keep_only_tags = [dict(name="article")]
remove_tags = [
dict(name="button"),
dict(
attrs={
"class": [
"c-socialBlocks",
"c-index",
"o-stereo",
"u-hideAboveSmall",
"c-slideTrigger",
"js-slideshow",
]
}
),
]
extra_css = """
h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
.o-titleBar-summary { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
div.o-titleBar-meta, div.c-feature-sub { font-weight: bold; color: #444; margin-bottom: 1.5rem; }
div.pcms_media img, div.o-mediaEnclosure img { max-width: 100%; height: auto; }
div.o-mediaEnclosure .o-mediaEnclosure-metadata { font-size: 0.8rem; margin-top: 0.2rem; }
div.c-feature-bd { margin-bottom: 2rem; }
div.c-auxContent { color: #222; font-size: 0.85rem; margin-top: 2rem; }
"""
def extract_from_img_srcset(self, srcset: str, max_width=0):
sources = [s.strip() for s in COMMA_SEP_RE.split(srcset) if s.strip()]
if len(sources) == 1:
# just a regular img url probably
return sources[0]
parsed_sources = []
for src in sources:
src_n_width = [s.strip() for s in SPACE_SEP_RE.split(src) if s.strip()]
if len(src_n_width) != 2:
raise ValueError(f"Not a valid srcset: {srcset}")
parsed_sources.append(
(
src_n_width[0].strip(),
int(NON_NUMERIC_RE.sub("", src_n_width[1].strip())),
)
)
parsed_sources = list(set(parsed_sources))
parsed_sources = sorted(parsed_sources, key=lambda x: x[1], reverse=True)
if not max_width:
return parsed_sources[0][0]
for img, width in parsed_sources:
if width <= max_width:
return img
return parsed_sources[-1][0]
def preprocess_html(self, soup):
for img in soup.select("div.o-mediaEnclosure img"):
if not img.get("srcset"):
continue
img["src"] = self.extract_from_img_srcset(img["srcset"], max_width=1000)
return soup
def parse_index(self):
if _issue_url:
soup = self.index_to_soup(_issue_url)
else:
soup = self.index_to_soup("https://www.poetryfoundation.org/poetrymagazine")
current_issue = soup.select("div.c-cover-media a")
if not current_issue:
self.abort_recipe_processing("Unable to find latest issue")
current_issue = current_issue[0]
soup = self.index_to_soup(current_issue["href"])
issue_edition = self.tag_to_string(soup.find("h1"))
self.timefmt = f" [{issue_edition}]"
cover_image = soup.select("div.c-issueBillboard-cover-media img")[0]
parsed_cover_url = urlparse(
cover_image["srcset"].split(",")[-1].strip().split(" ")[0]
)
self.cover_url = f"{parsed_cover_url.scheme}://{parsed_cover_url.netloc}{parsed_cover_url.path}"
sectioned_feeds = OrderedDict()
tabs = soup.find_all("div", attrs={"class": "c-tier_tabbed"})
for tab in tabs:
tab_title = tab.find("div", attrs={"class": "c-tier-tab"})
tab_content = tab.find("div", attrs={"class": "c-tier-content"})
if not (tab_title and tab_content):
continue
tab_title = self.tag_to_string(tab_title)
sectioned_feeds[tab_title] = []
for li in tab_content.select("ul.o-blocks > li"):
author = self.tag_to_string(
li.find("span", attrs={"class": "c-txt_attribution"})
)
for link in li.find_all("a", attrs={"class": "c-txt_abstract"}):
self.log("Found article:", self.tag_to_string(link))
sectioned_feeds[tab_title].append(
{
"title": self.tag_to_string(link),
"url": link["href"],
"author": author,
"description": author,
}
)
return sectioned_feeds.items()

View File

@ -1,52 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
ultimahora.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class UltimaHora_py(BasicNewsRecipe):
title = 'Ultima Hora'
__author__ = 'Darko Miletic'
description = 'Noticias de Paraguay y el resto del mundo'
publisher = 'EDITORIAL EL PAIS S.A.'
category = 'news, politics, Paraguay'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'es_PY'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.ultimahora.com/imgs/uh-com.gif'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .sub_titulo_mediano,.TituloNota{font-family: Georgia,"Times New Roman",Times,serif} .sub_titulo_mediano{font-weight: bold} ' # noqa
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
dict(name=['form', 'iframe', 'embed', 'object', 'link', 'base', 'table'])]
keep_only_tags = [
dict(attrs={'id': ['nota_titulo', 'nota_copete', 'texto']})]
feeds = [
(u'Arte y Espectaculos', u'http://www.ultimahora.com/adjuntos/rss/UHEspectaculos.xml'),
(u'Ciudad del Este', u'http://www.ultimahora.com/adjuntos/rss/UHCDE.xml'),
(u'Deportes', u'http://www.ultimahora.com/adjuntos/rss/UHDeportes.xml'),
(u'Ultimo momento', u'http://www.ultimahora.com/adjuntos/rss/UltimoMomento.xml'),
(u'Nacionales', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-nacionales.xml'),
(u'Politica', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-politica.xml'),
(u'Sucesos', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-sucesos.xml'),
(u'Economia', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-economia.xml'),
(u'Ciencia y Tecnologia', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-ciencia.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,51 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
unica.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Unica(BasicNewsRecipe):
title = u'Unica'
__author__ = u'Silviu Cotoar\u0103'
description = 'Asa cum esti tu'
publisher = 'Unica'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Femei'
encoding = 'utf-8'
cover_url = 'http://www.unica.ro/fileadmin/images/logo.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'id': 'sticky'}), dict(
name='p', attrs={'class': 'bodytext'})
]
remove_tags = [
dict(name='div', attrs={'class': ['top-links']}), dict(name='div', attrs={'id': ['autor_name']}), dict(name='div', attrs={
'class': ['box-r']}), dict(name='div', attrs={'class': ['category']}), dict(name='div', attrs={'class': ['data']})
]
remove_tags_after = [
dict(name='ul', attrs={'class': 'pager'})
]
feeds = [
(u'Feeds', u'http://www.unica.ro/rss.html')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,86 +0,0 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class UnitedDaily(BasicNewsRecipe):
title = u'聯合新聞網'
oldest_article = 1
max_articles_per_feed = 100
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
(u'政治', u'http://udn.com/udnrss/politics.xml'),
(u'社會', u'http://udn.com/udnrss/social.xml'),
(u'生活', u'http://udn.com/udnrss/life.xml'),
(u'綜合', u'http://udn.com/udnrss/education.xml'),
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
]
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}''' # noqa
__author__ = 'Eddie Lau'
__version__ = '1.2'
language = 'zh_TW'
publisher = 'United Daily News Group'
description = 'United Daily (Taiwan)'
category = 'News, Chinese, Taiwan'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
encoding = 'utf-8'
conversion_options = {'linearize_tables': True}
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
auto_cleanup = True
# keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
# dict(name='div', attrs={'id':['story_title']}),
# dict(name='td', attrs={'class':['story_author']}),
# dict(name='div', attrs={'id':['story_author']}),
# dict(name='td', attrs={'class':['story']}),
# dict(name='div', attrs={'id':['story']}),
# ]

View File

@ -1,22 +0,0 @@
# -*- coding: utf-8 -*-
# https://github.com/iemejia/calibrecolombia
'''
http://www.unperiodico.unal.edu.co/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class UNPeriodico(BasicNewsRecipe):
title = u'UN Periodico'
language = 'es_CO'
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
cover_url = 'http://www.unperiodico.unal.edu.co/fileadmin/templates/periodico/img/logoperiodico.png'
description = 'UN Periodico'
oldest_article = 30
max_articles_per_feed = 100
publication_type = 'newspaper'
feeds = [
(u'UNPeriodico', u'http://www.unperiodico.unal.edu.co/rss/type/rss2/')
]

View File

@ -1,83 +0,0 @@
#!/usr/bin/env python
__author__ = 'Darko Spasovski'
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
'''
utrinski.com.mk
'''
import datetime
import re
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class UtrinskiVesnik(BasicNewsRecipe):
INDEX = 'http://www.utrinski.com.mk/'
title = 'Utrinski Vesnik'
description = 'Daily Macedonian newspaper'
masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
language = 'mk'
remove_javascript = True
publication_type = 'newspaper'
category = 'news, Macedonia'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
# Remove anything before the start of the article.
(r'<body.*?Article start-->', lambda match: '<body>'),
# Remove anything after the end of the article.
(r'<!--Article end.*?</body>', lambda match: '</body>'),
]
]
extra_css = """
body{font-family: Arial,Helvetica,sans-serif}
.WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
"""
conversion_options = {
'comment': description,
'tags': category,
'language': language,
'linearize_tables': True
}
def parse_index(self):
feeds = []
# open main page
soup = self.index_to_soup(self.INDEX)
# find all anchors with class attribute equal to
# 'WB_UTRINSKIVESNIK_MainMenu'
for section in soup.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_MainMenu'}):
sectionTitle = section.contents[0].string
sectionUrl = self.INDEX + section['href'].strip()
# open the anchor link
raw = browser().open_novisit(sectionUrl).read()
sectionSoup = BeautifulSoup(raw)
# find all anchors with class attribute equal to
# 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'
sectionArticles = sectionSoup.findAll(
'a', attrs={'class': 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'})
articles = []
for sectionArticle in sectionArticles:
# article title = anchor's contents, article url = anchor's
# href
articleTitle = sectionArticle.contents[0].string.strip()
articleUrl = self.INDEX + sectionArticle['href'].strip()
articleDate = datetime.datetime.today().strftime('%d.%m.%Y')
articles.append(
{'title': articleTitle, 'url': articleUrl, 'description': '', 'date': articleDate})
if articles:
feeds.append((sectionTitle, articles))
return feeds
def get_cover_url(self):
datum = datetime.datetime.today().strftime('%d_%m_%Y')
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'

View File

@ -1,28 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Valby Bladet
'''
class ValbyBladet_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Valby Bladet'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Valby Bladet', 'http://minby.dk/valby-bladet/feed/'),
('Kommentarer til Valby Bladet', 'http://minby.dk/valby-bladet/comments/feed/'),
]

View File

@ -1,28 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Vanløse Bladet
'''
class VanloeseBladet_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Vanløse Bladet'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Vanløse Bladet', 'http://minby.dk/vanloese-bladet/feed/'),
('Kommentarer til Vanløse Bladet', 'http://minby.dk/vanloese-bladet/comments/feed/'),
]

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Varde
'''
class VardeLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Varde'
description = 'Lokale og regionale nyheder, sport, kultur fra Varde og omegn på varde.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,83 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
elargentino.com
'''
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class Veintitres(BasicNewsRecipe):
title = 'Veintitres'
__author__ = 'Darko Miletic'
description = 'Revista Argentino dedicada a politica'
publisher = 'Veintitres'
category = 'news, politics, Argentina'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'es_AR'
lang = 'es-AR'
direction = 'ltr'
INDEX = 'http://www.elargentino.com/medios/120/veintitres.html'
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
html2lrf_options = [
'--comment', description, '--category', category, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \
category + \
'"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})]
remove_tags = [dict(name='link')]
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=120&Content-Type=text/xml&ChannelDesc=Veintitres')]
def print_version(self, url):
main, sep, article_part = url.partition('/nota-')
article_id, rsep, rrest = article_part.partition('-')
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
soup.html['lang'] = self.lang
soup.html['dir'] = self.direction
mlang = new_tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)])
mcharset = new_tag(soup, 'meta', [
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
soup.head.insert(0, mlang)
soup.head.insert(1, mcharset)
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('div', attrs={'class': 'colder'})
if cover_item:
clean_url = self.image_url_processor(
None, cover_item.div.img['src'])
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
return cover_url
def image_url_processor(self, baseurl, url):
base, sep, rest = url.rpartition('?Id=')
img, sep2, rrest = rest.partition('&')
return base + sep + img

View File

@ -1,32 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Lokalavisen Vejle
'''
class VejleLokalavisen_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Lokalavisen Vejle'
description = 'Lokale og regionale nyheder, sport, kultur fra Vejle og omegn på vejle.lokalavisen.dk'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Seneste nyt fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestenytrss'),
('Seneste lokale nyheder fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestelokalenyhederrss'),
('Seneste sport fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestesportrss'),
('Seneste 112 nyheder fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/seneste112rss'),
('Seneste kultur nyheder fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestekulturrss'),
('Seneste læserbreve fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestelaeserbreverss'),
]

View File

@ -1,28 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# https://manual.calibre-ebook.com/news_recipe.html
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
'''
Vesterbro Bladet
'''
class VesterbroBladet_dk(BasicNewsRecipe):
__author__ = 'CoderAllan.github.com'
title = 'Vesterbro Bladet'
category = 'newspaper, news, localnews, sport, culture, Denmark'
oldest_article = 7
max_articles_per_feed = 50
auto_cleanup = True
language = 'da'
feeds = [
('Vesterbro Bladet', 'http://minby.dk/vesterbro-bladet/feed/'),
('Kommentarer til Vesterbro Bladet', 'http://minby.dk/vesterbro-bladet/comments/feed/'),
]

View File

@ -1,18 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class VFR(BasicNewsRecipe):
title = u'VFR Magazine'
language = 'it'
__author__ = 'Krittika Goyal'
oldest_article = 31 # days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('VFR Magazine',
'http://feeds.feedburner.com/vfrmagazine'),
]

View File

@ -1,59 +0,0 @@
##
# Title: Vice News recipe for calibre
# Author: Adrian Tennessee
# Contact: adrian.tennessee at domainthatnobodytakes.com
##
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
# Copyright: Copyright 2014 Adrian Tennessee
##
# Written: 2014-09-13
# Last Edited: 2014-09-13
##
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class VICENews(BasicNewsRecipe):
__author__ = 'Adrian Tennessee (adrian.tennessee at domainthatnobodytakes.com)'
__license__ = 'GPLv3'
__copyright__ = '2014, Adrian Tennessee <adrian.tennessee at domainthatnobodytakes.com)'
title = u'VICE News'
language = 'en'
description = u'VICE News web site ebook'
publisher = 'VICE Media'
category = 'news, world'
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/d/dc/Vice_News_logo.jpg'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
# article-title modifies h1-tag of article title
extra_css = '.article-title { font-size:125%; font-weight:bold }'
keep_only_tags = [
classes('article__header__title contributors article__header__datebar__date--original short-form__body__article-body')
]
remove_tags = [
classes('lazy-vice-ad abc__article_embed article__tagged user-newsletter-signup article__embed-component'),
]
def preprocess_html(self, soup):
for img in soup.findAll(**classes('responsive-image__img')):
for source in img.findPreviousSiblings('source'):
img['src'] = source['srcset'].split('?')[0]
source.extract()
return soup
feeds = [(u'VICE News', u'https://news.vice.com/rss')]

View File

@ -1,42 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ViceESRecipe(BasicNewsRecipe):
title = u'Vice Magazine España'
__author__ = 'atordo'
description = u'La página web oficial de la revista Vice España'
category = u'noticias, fotografía, blogs, moda, arte, cine, música, literatura, tecnología'
cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
oldest_article = 14
max_articles_per_feed = 100
auto_cleanup = False
no_stylesheets = True
language = 'es'
use_embedded_content = False
remove_javascript = True
publication_type = 'magazine'
recursions = 10
match_regexps = [r'/read/.*\?Contentpage=[2-9]$']
keep_only_tags = [
dict(attrs={'class': ['article_title', 'article_content', 'next']})
]
remove_tags = [
dict(attrs={'class': ['social_buttons', 'search', 'tweet',
'like', 'inline_socials', 'stumblebadge', 'plusone']})
]
extra_css = '''
.author{font-size:small}
img{margin-bottom: 0.4em; display:block; margin-left:auto; margin-right: auto}
'''
preprocess_regexps = [
(re.compile(r'<img src="http://.*\.scorecardresearch\.com/'), lambda m: '')
]
feeds = [('Vice', 'http://www.vice.com/es/rss')]

View File

@ -1,42 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ViceDERecipe(BasicNewsRecipe):
title = u'Vice Magazin Deutschland'
__author__ = 'atordo;alex'
description = u'Die offizielle Website des Vice Magazins Deutschland'
category = u'Nachrichten, Fotografie, Blogs, Mode, Kunst, Film, Musik, Literatur, Technik'
cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
oldest_article = 14
max_articles_per_feed = 100
auto_cleanup = False
no_stylesheets = True
language = 'de'
use_embedded_content = False
remove_javascript = True
publication_type = 'magazine'
recursions = 10
match_regexps = [r'/read/.*\?Contentpage=[2-9]$']
keep_only_tags = [
dict(attrs={'class': ['article_title', 'article_content', 'next']})
]
remove_tags = [
dict(attrs={'class': ['social_buttons', 'search', 'tweet',
'like', 'inline_socials', 'stumblebadge', 'plusone']})
]
extra_css = '''
.author{font-size:small}
img{margin-bottom: 0.4em; display:block; margin-left:auto; margin-right: auto}
'''
preprocess_regexps = [
(re.compile(r'<img src="http://.*\.scorecardresearch\.com/'), lambda m: '')
]
feeds = [('Vice', 'http://www.vice.com/de/rss')]

View File

@ -1,48 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
'''
vijesti.me
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Vijesti(BasicNewsRecipe):
title = 'Vijesti'
__author__ = 'Darko Miletic'
description = 'News from Montenegro'
publisher = 'Daily Press Vijesti'
category = 'news, politics, Montenegro'
oldest_article = 2
max_articles_per_feed = 150
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'sr'
publication_type = 'newspaper'
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Georgia,"Times New Roman",Times,serif1,serif}
.articledescription,.article,.chapter{font-family: sans1, sans-serif}
"""
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [
dict(name='div', attrs={'id': ['article_intro_text', 'article_text']})]
remove_tags = [dict(name=['object', 'link', 'embed', 'form'])]
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss/')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,31 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.virtualshackles.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class Virtualshackles(BasicNewsRecipe):
title = 'Virtual Shackles'
__author__ = 'Darko Miletic'
description = "The adventures of Orion and Jack, making games they'd never play for people they don't like."
category = 'virtual shackles, virtualshackles, games, webcomic, comic, video game, orion, jack'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
encoding = 'cp1252'
publisher = 'Virtual Shackles'
language = 'en'
publication_type = 'comic'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
feeds = [(u'Virtual Shackles', u'http://feeds2.feedburner.com/virtualshackles')]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,68 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
viva.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Viva(BasicNewsRecipe):
title = u'Viva'
__author__ = u'Silviu Cotoar\u0103'
description = u'Vedete si evenimente'
publisher = u'Viva'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Reviste,Femei'
encoding = 'utf-8'
cover_url = 'http://www.viva.ro/images/default/viva.gif'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'class': 'articol'}), dict(name='div', attrs={
'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
]
remove_tags = [
dict(name='div', attrs={'class': ['breadcrumbs']}), dict(name='div', attrs={'class': ['links clearfix']}), dict(name='a', attrs={'id': ['img_arrow_right']}), dict(name='img', attrs={'id': ['zoom']}), dict(name='div', attrs={'class': ['foto_counter']}), dict(name='div', attrs={'class': ['gal_select clearfix']}) # noqa
]
remove_tags_after = [
dict(name='div', attrs={'class': ['links clearfix']})
]
feeds = [
(u'Vedete', u'http://feeds.feedburner.com/viva-Vedete'),
(u'Evenimente', u'http://feeds.feedburner.com/viva-Evenimente'),
(u'Frumusete', u'http://feeds.feedburner.com/viva-Beauty-Fashion'),
(u'Noutati', u'http://feeds.feedburner.com/viva-Noutati')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,40 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Huan Komrade T <huantnh at gmail.com>'
'''
vnexpress.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class BBCVietnamese(BasicNewsRecipe):
title = u'VnExpress'
__author__ = 'Huan Komrade T'
description = 'Vietnam news and current affairs from the Food Production Technology Corporation'
no_stylesheets = True
language = 'vi'
encoding = 'utf-8'
recursions = 0
remove_tags = [dict(name='div', attrs={'class': 'footer'})]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
feeds = [
('Index', 'http://vnexpress.net/rss/gl/trang-chu.rss'),
('Vietnam', 'http://vnexpress.net/rss/gl/xa-hoi.rss'),
('World News', 'http://vnexpress.net/rss/gl/the-gioi.rss'),
('Business', 'http://vnexpress.net/rss/gl/kinh-doanh.rss'),
('Culture', 'http://vnexpress.net/rss/gl/van-hoa.rss'),
('Sports', 'http://vnexpress.net/rss/gl/the-thao.rss'),
('Lifestyle', 'http://vnexpress.net/rss/gl/doi-song.rss'),
('From The Readers',
'http://vnexpress.net/rss/gl/ban-doc-viet.rss'),
('From The Readers - Sharing',
'http://vnexpress.net/rss/gl/ban-doc-viet-tam-su.rss'),
]
def print_version(self, url):
return url + '?q=1'

View File

@ -1,26 +0,0 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe
class VoetbalBE(BasicNewsRecipe):
title = u'Voetbal België'
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
description = u'Voetbalnieuws uit België'
oldest_article = 7
language = 'nl_BE'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [
dict(name='title'), dict(name='h1'),
dict(name='a', attrs={'class': 'fancy'}),
dict(name='img', attrs={'class': 'border kalooga_10605'}),
dict(name='div', attrs={'class': 'text'})
]
remove_tags = []
feeds = [(u'Voetbalnieuws', u'http://www.voetbalbelgie.be/nl/rss')]

View File

@ -1,25 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HindustanTimes(BasicNewsRecipe):
title = u'Voice of America'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 15 # days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('All Zones',
'http://learningenglish.voanews.com/rss/?count=20'),
('World',
'http://learningenglish.voanews.com/rss/?count=20&zoneid=957'),
('USA',
'http://learningenglish.voanews.com/rss/?count=20&zoneid=958'),
('Health',
'http://learningenglish.voanews.com/rss/?count=20&zoneid=955'),
]

View File

@ -1,86 +0,0 @@
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
def new_tag(soup, name, attrs=()):
impl = getattr(soup, 'new_tag', None)
if impl is not None:
return impl(name, attrs=dict(attrs))
return Tag(soup, name, attrs=attrs or None)
class VrijNederlandRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'nl'
locale = 'nl'
version = 1
title = u'Vrij Nederland'
publisher = u'Weekbladpers Tijdschriften'
category = u'News, Opinion'
description = u'Weekly opinion magazine from the Netherlands'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
conversion_options = {'publisher': publisher,
'tags': category, 'comments': description}
feeds = []
feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss'))
feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss'))
feeds.append((u'Economie', u'http://www.vn.nl/economie.rss'))
feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss'))
feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss'))
feeds.append((u'Crime', u'http://www.vn.nl/crime.rss'))
feeds.append((u'Media', u'http://www.vn.nl/media.rss'))
feeds.append((u'De Republiek der Letteren',
u'http://www.vn.nl/republiek.rss'))
feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss'))
feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss'))
feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss'))
feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss'))
feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss'))
feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss'))
feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss'))
feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss'))
keep_only_tags = [
dict(name='div', attrs={'class': 'cl-column column-one'})]
remove_tags = []
remove_tags.append(
dict(name='div', attrs={'class': 'wpg-element guest-book-overview'}))
remove_tags.append(
dict(name='div', attrs={'class': 'wpg-element forum-message-form'}))
remove_tags.append(dict(name='div', attrs={'class': 'mediaterms'}))
remove_tags.append(dict(name='div', attrs={'class': 'label-term'}))
remove_tags.append(dict(name='div', attrs={
'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'}))
remove_tags.append(dict(name='object'))
remove_tags.append(dict(name='link'))
remove_tags.append(dict(name='meta'))
def preprocess_html(self, soup):
# Just clean up the result a little
meta = soup.find('div', attrs={'class': 'meta'})
if meta:
link = meta.find('span', attrs={'class': 'link'})
if link:
link.extract()
for seperator in meta.findAll('span', attrs={'class': 'seperator'}):
seperator.extract()
# Their header is full of 'if IE6/7/8' tags. Just get rid of it
# altogether
theirHead = soup.head
theirHead.extract()
myHead = new_tag(soup, 'head')
soup.insert(0, myHead)
return soup

View File

@ -1,26 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1278773519(BasicNewsRecipe):
title = u'Waco Tribune Herald'
__author__ = 'rty'
publisher = 'A Robinson Media Company'
description = 'Waco, Texas, Newspaper'
category = 'News, Texas, Waco'
oldest_article = 7
max_articles_per_feed = 100
feeds = [
(u'News', u'http://www.wacotrib.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/ap_nation,news/ap_nation/*&f=rss'),
(u'Sports', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=sports*&f=rss'),
(u'AccessWaco', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=entertainment/accesswaco*&f=rss'),
(u'Opinions', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=opinion*&f=rss')
]
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables': True}
auto_cleanup = True

View File

@ -1,50 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
wall-street.ro
'''
from calibre.web.feeds.news import BasicNewsRecipe
class WallStreetRo(BasicNewsRecipe):
title = u'Wall Street'
__author__ = u'Silviu Cotoar\u0103'
description = ''
publisher = 'Wall Street'
oldest_article = 5
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare'
encoding = 'utf-8'
cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
conversion_options = {
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
}
keep_only_tags = [
dict(name='div', attrs={'class': 'article_header'}), dict(
name='div', attrs={'class': 'article_text'})
]
remove_tags = [
dict(name='p', attrs={'class': ['page_breadcrumbs']}), dict(name='div', attrs={'id': ['article_user_toolbox']}), dict(
name='p', attrs={'class': ['comments_count_container']}), dict(name='div', attrs={'class': ['article_left_column']})
]
remove_tags_after = [
dict(name='div', attrs={'class': 'clearfloat'})
]
feeds = [
(u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)

View File

@ -1,149 +0,0 @@
from datetime import date, timedelta
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class WaPoCartoonsRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 2
title = u'Washington Post Cartoons'
publisher = u'Washington Post'
category = u'News, Cartoons'
description = u'Cartoons from the Washington Post'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
feeds = []
feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
feeds.append(
(u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
feeds.append(
(u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
feeds.append(
(u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
#name {margin-bottom: 0.2em}
#copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
'''
def parse_index(self):
index = []
oldestDate = date.today() - timedelta(days=self.oldest_article)
oldest = oldestDate.strftime('%Y%m%d')
for feed in self.feeds:
cartoons = []
soup = self.index_to_soup(feed[1])
cartoon = {'title': 'Current', 'date': None,
'url': feed[1], 'description': ''}
cartoons.append(cartoon)
select = soup.find('select', attrs={'name': ['url', 'dest']})
if select:
cartoonCandidates = []
if select['name'] == 'url':
cartoonCandidates = self.cartoonCandidatesWaPo(
select, oldest)
else:
cartoonCandidates = self.cartoonCandidatesCreatorsCom(
select, oldest)
for cartoon in cartoonCandidates:
cartoons.append(cartoon)
index.append([feed[0], cartoons])
return index
def preprocess_html(self, soup):
freshSoup = self.getFreshSoup(soup)
div = soup.find('div', attrs={'id': 'name'})
if div:
freshSoup.body.append(div)
comic = soup.find('div', attrs={'id': 'comic_full'})
img = comic.find('img')
if '&' in img['src']:
img['src'], sep, bad = img['src'].rpartition('&')
freshSoup.body.append(comic)
freshSoup.body.append(soup.find('div', attrs={'id': 'copyright'}))
else:
span = soup.find('span', attrs={'class': 'title'})
if span:
del span['class']
span['id'] = 'name'
span.name = 'div'
freshSoup.body.append(span)
img = soup.find('img', attrs={'class': 'pic_big'})
if img:
td = img.parent
td['style'] = ''
del td['style']
td.name = 'div'
td['id'] = 'comic_full'
freshSoup.body.append(td)
td = soup.find('td', attrs={'class': 'copy'})
if td:
for a in td.find('a'):
a.extract()
del td['class']
td['id'] = 'copyright'
td.name = 'div'
freshSoup.body.append(td)
return freshSoup
def getFreshSoup(self, oldSoup):
freshSoup = BeautifulSoup(
'<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup
def cartoonCandidatesWaPo(self, select, oldest):
opts = select.findAll('option')
for i in range(1, len(opts)):
url = opts[i]['value'].rstrip('/')
dateparts = url.split('/')[-3:]
datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
if datenum >= oldest:
yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
else:
return
def cartoonCandidatesCreatorsCom(self, select, oldest):
monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
'November': '11', 'December': '12'}
opts = select.findAll('option', selected=False)
for i in range(1, len(opts)):
dateString = self.tag_to_string(opts[i])
rest, sep, year = dateString.rpartition(', ')
parts = rest.split(' ')
day = parts[2].rjust(2, '0')
month = monthNames[parts[1]]
datenum = str(year) + month + str(day)
if datenum >= oldest:
yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
else:
return

View File

@ -1,102 +0,0 @@
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe
class WatchingAmericaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 1
title = u'Watching America'
publisher = u'watchingamerica.com'
category = u'News'
description = u'Global opinion about the United States'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
remove_attributes = ['style']
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
.main_content em {font-size: x-small; font-style: italic; color: #696969;}
.main_content span strong {font-size: x-large; font-weight: bold;}
.insideitro {font-size: xx-small; font-style: italic; color: #666666;}
span {padding: 0em; margin 0em;}
'''
INDEX = u'http://watchingamerica.com/News/'
def parse_index(self):
answer = []
soup = self.index_to_soup(self.INDEX)
articles = []
feature = soup.find('div', attrs={'id': 'headzone'})
if feature:
link = feature.find('a', attrs={'class': 'feature'})
url = link.get('href', None)
title = self.tag_to_string(link)
description = self.tag_to_string(
feature.find('h1', attrs={'class': 'pull'}))
article = {'title': title, 'date': u'',
'url': url, 'description': description}
articles.append(article)
answer.append(('Feature', articles))
feed_titles = ['Translations from the West',
'Translations from the East']
for i in range(1, 3):
articles = []
div = soup.find('div', attrs={'class': 'newscol' + str(i)})
if div:
for link in div.findAll('a', attrs={'class': 'headline'}):
url = link.get('href', None)
title = self.tag_to_string(link)
description = None
h3 = link.findNextSibling('h3')
if h3:
description = self.tag_to_string(h3)
article = {'title': title, 'date': u'',
'url': url, 'description': description}
articles.append(article)
answer.append((feed_titles[i - 1], articles))
return answer
def preprocess_html(self, soup):
freshSoup = self.get_fresh_soup(soup)
article = soup.find('p', attrs={'class': 'MsoNormal'}).parent
if article:
article.name = 'div'
del article['width']
article['class'] = 'main_content'
org = article.find('a', attrs={'href': '?SHOW_ORIGINAL_TEXT'})
if org:
org.parent.extract()
intro = article.find('span', attrs={'class': 'insideitro'})
if intro:
for el in intro.findAll(['strong', 'em', 'br']):
if el.name == 'br':
el.extract()
else:
el.name = 'div'
freshSoup.body.append(article)
return freshSoup
def get_fresh_soup(self, oldSoup):
freshSoup = BeautifulSoup(
'<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup

View File

@ -1,28 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class XkcdCom(BasicNewsRecipe):
cover_url = 'http://what-if.xkcd.com/imgs/whatif-logo.png'
masthead_url = 'http://what-if.xkcd.com/imgs/whatif-logo.png'
__author__ = 'kisnik'
title = 'What If...'
description = 'The "What If" feed from xkcd'
language = 'en'
keep_only_tags = [dict(name='article')]
use_embedded_content = False
oldest_article = 60
# add image and text
# add an horizontal line after the question
preprocess_regexps = [
(re.compile(r'(<img.*title=")([^"]+)(".*>)'),
lambda m: '<div>%s%s<p id="photo_text">(%s)</p></div>' % (m.group(1), m.group(3), m.group(2))),
(re.compile(r'(<p.*id="attribute">[^>]+</p>)'),
lambda n: '%s<hr>' % (n.group(1))),
]
extra_css = "#photo_text{font-size:small;}"
feeds = [(u'What If...', u'http://what-if.xkcd.com/feed.atom')]

View File

@ -1,30 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1294938721(BasicNewsRecipe):
title = u'Wichita Eagle'
language = 'en'
__author__ = 'Jason Cameron'
description = 'Daily news from the Wichita Eagle'
oldest_article = 1
max_articles_per_feed = 30
keep_only_tags = [dict(name='div', attrs={'id': 'wide'})]
feeds = [
(u'Local News',
u'http://www.kansas.com/news/local/index.rss'),
(u'National News',
u'http://www.kansas.com/news/nation-world/index.rss'),
(u'Sports',
u'http://www.kansas.com/sports/index.rss'),
(u'Opinion',
u'http://www.kansas.com/opinion/index.rss'),
(u'Life',
u'http://www.kansas.com/living/index.rss'),
(u'Entertainment',
u'http://www.kansas.com/entertainment/index.rss')
]
def print_version(self, url):
urlparts = url.split('/')
newadd = urlparts[5] + '/v-print'
return url.replace(url, newadd.join(url.split(urlparts[5])))

View File

@ -1,71 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
en.wikinews.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class WikiNews(BasicNewsRecipe):
title = 'Wikinews'
__author__ = 'Darko Miletic'
description = 'News from wikipedia'
category = 'news, world'
oldest_article = 7
max_articles_per_feed = 100
publisher = 'Wiki'
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
remove_javascript = True
language = 'en'
html2lrf_options = [
'--comment', description, '--category', category, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + \
'"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='h1', attrs={'id': 'firstHeading'}), dict(
name='div', attrs={'id': 'bodyContent'})
]
remove_tags = [
dict(name='link'), dict(name='div', attrs={'id': ['printfooter', 'catlinks', 'footer']}), dict(
name='div', attrs={'class': ['thumb left', 'thumb right']})
]
remove_tags_after = dict(name='h2')
feeds = [(u'News', u'http://feeds.feedburner.com/WikinewsLatestNews')]
def get_article_url(self, article):
artl = article.get('link', None)
rest, sep, article_id = artl.rpartition('/')
return 'http://en.wikinews.org/wiki/' + article_id
def print_version(self, url):
rest, sep, article_id = url.rpartition('/')
return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes'
def get_cover_url(self):
return 'http://upload.wikimedia.org/wikipedia/commons/b/bd/Wikinews-logo-en.png'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="en"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0, mtag)
btag = soup.find('div', attrs={'id': 'bodyContent'})
for item in btag.findAll('div'):
item.extract()
for item in btag.findAll('h2'):
item.extract()
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
return soup

View File

@ -1,36 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1277647803(BasicNewsRecipe):
title = u'Winnipeg Sun'
__author__ = 'rty'
__version__ = '1.0'
oldest_article = 2
publisher = 'www.winnipegsun.com'
description = 'Winnipeg Newspaper'
category = 'News, Winnipeg, Canada'
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'UTF-8'
remove_javascript = True
use_embedded_content = False
language = 'en_CA'
feeds = [
(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
]
keep_only_tags = [
dict(name='div', attrs={'id': 'article'}),
]
remove_tags = [
dict(name='div', attrs={'class': ['leftBox', 'bottomBox clear']}),
dict(name='ul', attrs={'class': 'tabs dl contentSwap'}),
dict(name='div', attrs={'id': 'commentsBottom'}),
]
remove_tags_after = [
dict(name='div', attrs={'class': 'bottomBox clear'})
]

View File

@ -1,31 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Winsupersite(BasicNewsRecipe):
title = u'Supersite for Windows'
description = u'Paul Thurrott SuperSite for Windows'
publisher = 'Paul Thurrott'
__author__ = 'Hypernova'
language = 'en'
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
conversion_options = {'linearize_tables': True}
remove_tags_before = dict(name='h1')
preprocess_regexps = [
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open('http://www.winsupersite.com')
return br
feeds = [(u'Supersite for Windows',
u'http://www.winsupersite.com/supersite.xml')]

View File

@ -1,60 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class WiredITA(BasicNewsRecipe):
title = u'Wired.it'
oldest_article = 1
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
auto_cleanup = False
encoding = 'utf8'
masthead_url = 'http://www.wired.com/images/home/wired_logo.gif'
description = 'Wired - Make in Italy. Inventa, sbaglia, innova'
publisher = 'http://www.wired.it/'
language = 'it'
__author__ = 'isspro'
publication_type = 'magazine'
conversion_options = {'title': title,
'comments': description,
'language': language,
'publisher': publisher,
'authors': title,
'smarten_punctuation': True
}
keep_only_tags = [
dict(name='div', attrs={'id': 'main-article'})
]
remove_tags = [
dict(name='img', attrs={'class': 'avatar img-circle'}),
dict(name='div', attrs={'class': 'topics'}),
dict(name='div', attrs={'class': 'social-share hidden-lg'}),
dict(name='span', attrs={'class': 'label'})
]
extra_css = '''
h1 {font-size:x-large;}
p.lead {font-size:medium;}
.who {line-height: 0pt; margin: 0pt;}
'''
preprocess_regexps = [
(re.compile(r'Pubblicato'), lambda match: '')
]
feeds = [(u'Attualit\xe0', u'http://www.wired.it/attualita/feed/'),
(u'Internet', 'http://www.wired.it/internet/feed/'),
(u'Gadget', 'http://www.wired.it/gadget/feed/'),
(u'Mobile', 'http://www.wired.it/mobile/feed/'),
(u'Scienza', 'http://www.wired.it/scienza/feed/'),
(u'Economia', 'http://www.wired.it/economia/feed/'),
(u'LifeStyle', 'http://www.wired.it/lifestyle/feed/'),
(u'Play', 'http://www.wired.it/play/feed/'),
]

View File

@ -1,51 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'teepel <teepel44@gmail.com>'
'''
wolnemedia.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class wolne_media(BasicNewsRecipe):
title = u'Wolne Media'
__author__ = 'teepel <teepel44@gmail.com>'
language = 'pl'
description = 'Wiadomości z wolnemedia.net'
INDEX = 'http://wolnemedia.net'
oldest_article = 1
max_articles_per_feed = 100
remove_empty_feeds = True
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
ignore_duplicate_articles = {'url'}
remove_tags = [dict(name='p', attrs={'class': 'tags'})]
feeds = [
(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),
(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),
(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),
(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),
(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),
(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),
(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),
(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),
(u'Media', u'http://wolnemedia.net/category/media/feed/'),
(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),
(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),
(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),
(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),
(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),
(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),
(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),
(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),
(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),
(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),
(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),
(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')]

View File

@ -1,26 +0,0 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class WorkersWorld(BasicNewsRecipe):
title = u'Workers World'
description = u'Socialist news and analysis'
__author__ = u'urslnx'
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
oldest_article = 7
max_articles_per_feed = 100
encoding = 'utf8'
publisher = 'workers.org'
category = 'news, politics, USA, world'
language = 'en'
publication_type = 'newsportal'
extra_css = ' body{ font-family: Verdana,Arial,Helvetica,sans-serif; } h1{ font-size: x-large; text-align: left; margin-top:0.5em; margin-bottom:0.25em; } h2{ font-size: large; } p{ text-align: left; } .published{ font-size: small; } .byline{ font-size: small; } .copyright{ font-size: small; } ' # noqa
remove_tags_before = dict(name='div', attrs={'id': 'evernote'})
remove_tags_after = dict(name='div', attrs={'id': 'footer'})
masthead_url = 'http://www.workers.org/graphics/wwlogo300.gif'
cover_url = 'http://www.workers.org/pdf/current.jpg'
feeds = [(u'Headlines', u'http://www.workers.org/rss/nonstandard_rss.xml'),
]

View File

@ -1,38 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class WorksInProgress(BasicNewsRecipe):
title = 'Works in progress'
description = 'Works in Progress is an online magazine dedicated to sharing new and underrated ideas to improve the world, and features original writing from some of the most interesting thinkers in the world' # noqa
cover_url = "https://www.worksinprogress.co/wp-content/uploads/2020/03/logo-1.svg"
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
publication_type = 'magazine'
language = 'en'
index = "https://www.worksinprogress.co/"
__author__ = "barakplasma"
def parse_index(self):
soup = self.index_to_soup(self.index)
feeds = []
for section in soup.find_all('div', 'issue-loop'):
section_title = section['data-section-id']
section_items = []
for article in section.find_all('div', 'issue-intro'):
title = article.find('h2', 'issue-title').text
url = article.find_all('a')[1]['href']
author = article.find('a', 'author').text
section_items.append({
"title": title,
"url": url,
"author": author
})
feeds.append((section_title, section_items))
return feeds

View File

@ -1,18 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class WoW(BasicNewsRecipe):
title = u'WoW Insider'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 # days
max_articles_per_feed = 25
use_embedded_content = False
no_stylesheets = True
auto_cleanup = True
feeds = [
('WoW',
'http://wow.joystiq.com/rss.xml')
]

View File

@ -1,61 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL 3'
__copyright__ = 'zotzo'
__docformat__ = 'restructuredtext en'
'''
http://wvhooligan.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class wvHooligan(BasicNewsRecipe):
authors = u'Drew Epperley'
__author__ = 'rylsfan'
language = 'en'
version = 2
title = u'WV Hooligan'
publisher = u'Drew Epperley'
publication_type = 'Blog'
category = u'Soccer'
description = u'A look at Major League Soccer (MLS) through the eyes of a MLS writer and fan.'
cover_url = 'http://wvhooligan.com/wp-content/themes/urbanelements/images/logo3.png'
oldest_article = 15
max_articles_per_feed = 150
use_embedded_content = True
no_stylesheets = True
remove_javascript = True
encoding = 'utf8'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
remove_tags = [
{'class': 'feedflare'},
{'class': 'tweetmeme_button'},
]
def preprocess_html(self, soup):
return self.adeify_images(soup)
feeds = [
(u'Stories', u'http://feeds2.feedburner.com/wvhooligan'),
(u'MLS', u'http://wvhooligan.com/category/mls/feed/'),
(u'MLS Power Rankings',
u'http://wvhooligan.com/category/power-rankings/feed/'),
(u'MLS Expansion',
u'http://wvhooligan.com/category/mls/expansion-talk/feed/'),
(u'US National Team',
u'http://wvhooligan.com/category/us-national-team/feed/'),
(u'College', u'http://wvhooligan.com/category/college-soccer/feed/'),
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -1,34 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1347997197(BasicNewsRecipe):
title = u'XpatLoop.com'
__author__ = 'laca'
oldest_article = 7
language = 'en_HU'
auto_cleanup = True
masthead_url = 'http://www.xpatloop.com/images/cms/xs_logo.gif'
use_embedded_content = False
author = 'laca'
simultaneous_downloads = 1
max_articles_per_feed = 50
no_stylesheets = True
feeds = [(u'Current Affairs', u'http://www.xpatloop.com/current_affairs.rss'),
(u'Community & Culture', u'http://www.xpatloop.com/community.rss'),
(u'Business & Finance', u'http://www.xpatloop.com/business.rss'),
(u'Entertainment', u'http://www.xpatloop.com/entertainment.rss'),
(u'Dining Guide', u'http://www.xpatloop.com/dining_guide.rss'),
(u'Getting Around', u'http://www.xpatloop.com/getting_around.rss'),
(u'Movies', u'http://www.xpatloop.com/movies.rss'),
(u'Shopping', u'http://www.xpatloop.com/shopping_channel.rss'),
(u'Travel', u'http://www.xpatloop.com/travel.rss'),
(u'Sport and Fitness', u'http://www.xpatloop.com/sport_and_fitness.rss'),
(u'Health and Wellness', u'http://www.xpatloop.com/health_and_wellness.rss'),
(u'Infotech & Telco', u'http://www.xpatloop.com/infotech_telco.rss'),
(u'Real Estate', u'http://www.xpatloop.com/property_real_estate.rss'),
(u'Specials', u'http://www.xpatloop.com/specials.rss'),
(u'Video Channel', u'http://www.xpatloop.com/video.rss'),
(u'Events', u'http://www.xpatloop.com/events.rss')]

View File

@ -1,32 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Yagmur(BasicNewsRecipe):
title = u'Yagmur Dergisi'
__author__ = u'thomass'
description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
oldest_article = 90
max_articles_per_feed = 100
no_stylesheets = True
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
language = 'tr'
publication_type = 'magazine'
encoding = 'ISO 8859-9'
publisher = 'thomass'
conversion_options = {
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
}
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
feeds = [
(u'Yagmur', u'http://open.dapper.net/services/yagmur'),
]
def print_version(self, url): # there is a problem caused by table format
return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')

View File

@ -1,22 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class YakimaHeraldRepublicRecipe(BasicNewsRecipe):
title = u'Yakima Herald-Republic'
description = 'The Yakima Herald-Republic.'
language = 'en'
__author__ = 'Laura Gjovaag'
oldest_article = 1.5
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='div', attrs={'id': ['searchleft', 'headline_credit']}),
dict(name='div', attrs={'class': ['photo', 'cauthor', 'photocredit']}),
dict(name='div', attrs={'id': ['content_body', 'footerleft']})
]
extra_css = '.cauthor {font: monospace 60%;} .photocredit {font: monospace 60%}'
feeds = [
(u'Yakima Herald Online', u'http://feeds.feedburner.com/yhronlinenews'),
]

View File

@ -1,22 +0,0 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1350731826(BasicNewsRecipe):
title = u'Yazihane'
oldest_article = 30
max_articles_per_feed = 100
__author__ = 'A Erdogan'
description = 'Sports Blog'
publisher = 'yazihaneden.com'
category = 'sports, basketball, nba, cycling, euroleague'
no_stylesheets = True
use_embedded_content = False
masthead_url = 'http://www.yazihaneden.com/wp-content/uploads/Untitled-1.png'
language = 'tr'
keep_only_tags = [
dict(name='div', attrs={'id': re.compile('(^|| )post-($|| )', re.DOTALL)})]
remove_tags_after = dict(name='div', attrs={'class': 'post-footer clear'})
feeds = [(u'Yazihane', u'http://www.yazihaneden.com/feed/')]

View File

@ -1,39 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class YemenTimesRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en_YE'
country = 'YE'
version = 1
title = u'Yemen Times'
publisher = u'yementimes.com'
category = u'News, Opinion, Yemen'
description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
oldest_article = 10
max_articles_per_feed = 100
use_embedded_content = False
encoding = 'utf-8'
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
auto_cleanup = True
feeds = [
('News',
'http://www.yementimes.com/?tpl=1341'),
]
extra_css = '''
body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
div.yemen_byline {font-size: medium; font-weight: bold;}
div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
.yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
'''
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
'publisher': publisher, 'linearize_tables': True}

View File

@ -1,31 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class YeniUmit(BasicNewsRecipe):
title = u'Yeni Umit Dergisi'
__author__ = u'thomass'
description = 'Aylık Dini İlimler ve Kültür Dergisi'
oldest_article = 45
max_articles_per_feed = 100
no_stylesheets = True
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
language = 'tr'
publication_type = 'magazine'
encoding = 'ISO 8859-9'
publisher = 'thomass'
conversion_options = {
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
}
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
feeds = [
(u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
]
def print_version(self, url): # there is a problem caused by table format
return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')

View File

@ -1,59 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
'''
www.yomiuri.co.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class YOLNews(BasicNewsRecipe):
title = u'Yomiuri Online (Latest)'
__author__ = 'Hiroshi Miura'
oldest_article = 1
max_articles_per_feed = 50
description = 'Japanese traditional newspaper Yomiuri Online News'
publisher = 'Yomiuri Online News'
category = 'news, japan'
language = 'ja'
encoding = 'UTF-8'
index = 'http://www.yomiuri.co.jp/latestnews/'
remove_javascript = True
masthead_title = u'YOMIURI ONLINE'
keep_only_tags = [{'class': "article text-resizeable"}]
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a, curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList) > 0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index + 1] = []
return feeds
def parse_index(self):
feeds = []
newsarticles = []
soup = self.index_to_soup(self.index)
listlatest = soup.find(
'ul', attrs={'class': 'list-common list-common-latest'})
if listlatest:
for itt in listlatest.findAll('li'):
itema = itt.find('a', href=True)
if itema:
item_headline = itema.find(
'span', attrs={'class': 'headline'})
item_date = item_headline.find(
'span', attrs={'class': 'update'})
newsarticles.append({
'title': item_headline.contents[0], 'date': item_date, 'url': itema['href'], 'description': ''
})
feeds.append(('latest', newsarticles))
return feeds

View File

@ -1,70 +0,0 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Zaman (BasicNewsRecipe):
title = u'ZAMAN Gazetesi'
description = ' Zaman Gazetesi''nin internet sitesinden günlük haberler'
__author__ = u'thomass'
oldest_article = 2
max_articles_per_feed = 50
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
publisher = 'Feza Gazetecilik'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'newspaper '
extra_css = 'h1{text-transform: capitalize; font-weight: bold; font-size: 22px;color:#0000FF} p{text-align:justify} '
conversion_options = {
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
}
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
ignore_duplicate_articles = {'title', 'url'}
auto_cleanup = False
remove_empty_feeds = True
# keep_only_tags = [dict(name='div', attrs={'id':[
# 'contentposition19']})]#,dict(name='div', attrs={'id':[
# 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div',
# attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[
# 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div',
# attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[
# 'news-detail-content']}), dict(name='td',
# attrs={'class':['columnist-detail','columnist_head']}), ]
remove_tags = [dict(name='img', attrs={'src': ['http://cmsmedya.zaman.com.tr/images/logo/logo.bmp']}), dict(name='hr', attrs={'class': ['interactive-hr']})]
remove_empty_feeds = True
feeds = [
(u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
(u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
(u'Politika', u'http://www.zaman.com.tr/politika.rss'),
(u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
(u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
(u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
(u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
(u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
(u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
(u'Dizi Yazı', u'http://www.zaman.com.tr/dizi.rss'),
(u'Bilişim', u'http://www.zaman.com.tr/bilisim.rss'),
(u'Otomotiv', u'http://www.zaman.com.tr/otomobil.rss'),
(u'Spor', u'http://www.zaman.com.tr/spor.rss'),
(u'Kürsü', u'http://www.zaman.com.tr/kursu.rss'),
(u'Eğitim', u'http://www.zaman.com.tr/egitim.rss'),
(u'Kültür Sanat', u'http://www.zaman.com.tr/kultursanat.rss'),
(u'Televizyon', u'http://www.zaman.com.tr/televizyon.rss'),
(u'Aile', u'http://www.zaman.com.tr/aile.rss'),
(u'Cuma Eki', u'http://www.zaman.com.tr/cuma.rss'),
(u'Cumaertesi Eki', u'http://www.zaman.com.tr/cumaertesi.rss'),
(u'Pazar Eki', u'http://www.zaman.com.tr/pazar.rss'),
(u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
(u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
]
def print_version(self, url):
return url.replace('http://www.zaman.com.tr/newsDetail_getNewsById.action?newsId=', 'http://www.zaman.com.tr/newsDetail_openPrintPage.action?newsId=')

View File

@ -1,21 +0,0 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from calibre.web.feeds.news import BasicNewsRecipe
class ZTS(BasicNewsRecipe):
title = u'Zaufana Trzecia Strona'
__author__ = 'fenuks'
description = u'Niezależne źródło wiadomości o świecie bezpieczeństwa IT'
category = 'IT, security'
language = 'pl'
cover_url = 'http://www.zaufanatrzeciastrona.pl/wp-content/uploads/2012/08/z3s_h100.png'
extra_css = '.thumbnail {float: left; margin-right:5px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
use_embedded_content = False
keep_only_tags = [dict(name='div', attrs={'class': 'post postcontent'})]
remove_tags = [dict(name='div', attrs={'class': 'dolna-ramka'})]
feeds = [(u'Strona g\u0142\xf3wna', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaGlowna'),
(u'Drobiazgi', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaDrobiazgi')]

View File

@ -1,57 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
class Zaxid(BasicNewsRecipe):
title = 'Zaxid.net'
__author__ = 'rpalyvoda (with fixes by bugmen00t)'
oldest_article = 14
max_articles_per_feed = 100
language = 'uk'
cover_url = 'https://zaxid.net/images/logo.png'
masthead_url = 'https://zaxid.net/images/logo.png'
auto_cleanup = True
feeds = [
('\u0417\u0430\u0433\u0430\u043B\u044C\u043D\u0438\u0439 RSS', 'http://zaxid.net/rss/all.xml'),
('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://zaxid.net/rss/1.xml'),
('\u0421\u0442\u0430\u0442\u0442\u0456', 'https://zaxid.net/rss/2.xml'),
('\u0411\u043B\u043E\u0433\u0438', 'https://zaxid.net/rss/3.xml'),
('\u0421\u0443\u0441\u043F\u0456\u043B\u044C\u0441\u0442\u0432\u043E', 'https://zaxid.net/rss/4.xml'),
('\u0415\u043A\u043E\u043D\u043E\u043C\u0456\u043A\u0430', 'https://zaxid.net/rss/5.xml'),
('\u041A\u0443\u043B\u044C\u0442\u0443\u0440\u0430', 'https://zaxid.net/rss/6.xml'),
('\u0421\u043F\u043E\u0440\u0442', 'https://zaxid.net/rss/7.xml'),
('\u0421\u0432\u0456\u0442', 'https://zaxid.net/rss/8.xml'),
('IQ', 'https://zaxid.net/rss/9.xml'),
('\u0410\u043D\u043E\u043D\u0441\u0438', 'https://zaxid.net/rss/13.xml'),
('ZAXID.NET TV', 'https://zaxid.net/rss/zaxid_tv.xml'),
('\u041D\u043E\u0432\u0438\u043D\u0438 \u041B\u044C\u0432\u043E\u0432\u0430', 'https://zaxid.net/rss/lviv_news.xml'),
('\u0424\u043E\u0442\u043E', 'https://zaxid.net/rss/19.xml'),
('\u0414\u0456\u043C', 'https://zaxid.net/rss/26.xml'),
('\u0417\u0434\u043E\u0440\u043E\u0432\u0027\u044F', 'https://zaxid.net/rss/28.xml'),
('\u0410\u0432\u0442\u043E', 'https://zaxid.net/rss/29.xml'),
('\u041B\u044C\u0432\u0456\u0432', 'https://zaxid.net/rss/16.xml'),
('\u041F\u0440\u0438\u043A\u0430\u0440\u043F\u0430\u0442\u0442\u044F', 'https://zaxid.net/rss/59.xml'),
('\u0422\u0435\u0440\u043D\u043E\u043F\u0456\u043B\u044C', 'https://zaxid.net/rss/60.xml'),
('\u0412\u043E\u043B\u0438\u043D\u044C', 'https://zaxid.net/rss/61.xml'),
('\u0417\u0430\u043A\u0430\u0440\u043F\u0430\u0442\u0442\u044F', 'https://zaxid.net/rss/62.xml'),
('\u0427\u0435\u0440\u043D\u0456\u0432\u0446\u0456', 'https://zaxid.net/rss/63.xml'),
('\u0420\u0456\u0432\u043D\u0435', 'https://zaxid.net/rss/65.xml'),
('\u0425\u043C\u0435\u043B\u044C\u043D\u0438\u0446\u044C\u043A\u0438\u0439', 'https://zaxid.net/rss/68.xml'),
('\u041D\u0435\u0440\u0443\u0445\u043E\u043C\u0456\u0441\u0442\u044C', 'https://zaxid.net/rss/37.xml'),
('\u0427\u0435\u0440\u0432\u043E\u043D\u043E\u0433\u0440\u0430\u0434', 'https://zaxid.net/rss/40.xml'),
('\u0421\u0442\u0440\u0438\u0439', 'https://zaxid.net/rss/41.xml'),
('\u0414\u0440\u043E\u0433\u043E\u0431\u0438\u0447', 'https://zaxid.net/rss/42.xml'),
('\u0412\u0456\u043D\u043D\u0438\u0446\u044F', 'https://zaxid.net/rss/44.xml'),
('\u041F\u043E\u0434\u043E\u0440\u043E\u0436\u0456', 'https://zaxid.net/rss/46.xml'),
('\u0414\u0456\u0442\u0438', 'https://zaxid.net/rss/53.xml'),
('Громадський транспорт', 'https://zaxid.net/rss/54.xml'),
('\u0420\u0435\u0446\u0435\u043F\u0442\u0438', 'https://zaxid.net/rss/55.xml'),
('\u0421\u043F\u043E\u0440\u0442', 'https://zaxid.net/rss/56.xml'),
('\u041A\u0443\u0440\u0439\u043E\u0437\u0438', 'https://zaxid.net/rss/57.xml'),
('\u0414\u043E\u0437\u0432\u0456\u043B\u043B\u044F', 'https://zaxid.net/rss/58.xml')
]

View File

@ -1,40 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch zdnet.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class cdnet(BasicNewsRecipe):
title = 'zdnet'
description = 'zdnet security'
__author__ = 'Oliver Niesner, Krittika Goyal'
language = 'en'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 40
no_stylesheets = True
encoding = 'latin1'
auto_cleanup = True
# remove_tags = [dict(id='eyebrows'),
# dict(id='header'),
# dict(id='search'),
# dict(id='nav'),
# dict(id='blog-author-info'),
# dict(id='post-tags'),
# dict(id='bio-naraine'),
# dict(id='bio-kennedy'),
# dict(id='author-short-disclosure-kennedy'),
# dict(id=''),
feeds = [('zdnet', 'http://feeds.feedburner.com/zdnet/security')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,20 +0,0 @@
__version__ = 'v1.0'
__date__ = '7, April 2012'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1333705905(BasicNewsRecipe):
title = u'Zerocalcare'
__author__ = 'faber1971'
description = 'Free Italian Comics'
oldest_article = 7
language = 'it'
max_articles_per_feed = 100
auto_cleanup = False
keep_only_tags = [
dict(name='div', attrs={'class': 'main entry-content group'})
]
masthead_url = 'http://zerocalcare.it/wp-content/uploads/2011/11/zerocalcare-banner.jpg'
feeds = [(u'Zerocalcare', u'http://feeds.feedburner.com/Zerocalcareit')]

View File

@ -1,36 +0,0 @@
#!/usr/bin/env python
from calibre.web.feeds.news import BasicNewsRecipe
class Zitabe(BasicNewsRecipe):
title = u'Zita.be'
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
description = u'Lifestyle News from Belgium in Dutch'
oldest_article = 7
language = 'nl_BE'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
keep_only_tags = [
dict(name='title'), dict(name='article', attrs={'class': 'article-main'})
]
remove_tags = [
dict(name='div', attrs={'class': 'box-50 box-omega sidebar-webtips'}),
dict(name='blockquote', attrs={'class': 'instagram-media'}),
]
feeds = [(u'Auto', u'http://www.zita.be/xml/auto.xml'),
(u'Business', u'http://www.zita.be/xml/business.xml'),
(u'Entertainment', u'http://www.zita.be/xml/entertainment.xml'),
(u'Fun', u'http://www.zita.be/xml/fun.xml'),
(u'Lifestyle', u'http://www.zita.be/xml/lifestyle.xml'),
(u'Nieuws', u'http://www.zita.be/xml/nieuws.xml'),
(u'Nieuws binneland', u'http://www.zita.be/xml/nieuws-binnenland.xml'),
(u'Bizar', u'http://www.zita.be/xml/nieuws-bizar.xml'),
(u'Nieuws Buitenland', u'http://www.zita.be/xml/nieuws-buitenland.xml'),
(u'Nieuws Economie', u'http://www.zita.be/xml/nieuws-economie.xml'),
(u'Sport', u'http://www.zita.be/xml/nieuws-sport.xml')]

View File

@ -1,12 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1320264153(BasicNewsRecipe):
title = u'zougla'
__author__ = 'Stelios'
language = 'el'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
feeds = [(u'zougla', u'http://www.zougla.gr/ArticleRss.xml')]