remove dead recipes
These recipes are based on RSS feeds that no longer work.
@ -1,246 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2018, Dale Furrow dkfurrow@gmail.com'
|
|
||||||
'''
|
|
||||||
chron.com
|
|
||||||
'''
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
from collections import OrderedDict
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import NavigableString
|
|
||||||
from calibre.utils.cleantext import clean_ascii_chars
|
|
||||||
from calibre.utils.date import dt_factory, local_tz, utcfromtimestamp
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
regex_date_only = re.compile(r"""(?:January|February|March|April|
|
|
||||||
{8}May|June|July|August|September|October|November|
|
|
||||||
{8}December)\s[0-9]{1,2},\s20[01][0-9]""")
|
|
||||||
regex_time_only = re.compile(r"""[0-9]{1,2}:[0-9]{1,2} \w{2}""")
|
|
||||||
sentence_regex = re.compile(r"(\S.+?[.!?])(?=\s+|$)")
|
|
||||||
blog_regex = re.compile(r'post-\d+')
|
|
||||||
|
|
||||||
pages = OrderedDict([('news', ('/news/houston-texas/', ['business', 'sports'])),
|
|
||||||
('business', ('/business/', ['sports'])),
|
|
||||||
('sports', ('/sports/', ['business']))])
|
|
||||||
|
|
||||||
base_url = "http://www.chron.com"
|
|
||||||
|
|
||||||
# sports has 'core-package sports' class
|
|
||||||
xpath_general = """//div[contains(@class, 'centerpiece-tabs') or
|
|
||||||
contains(@class, 'wrapper') or
|
|
||||||
contains(@class, 'contentGroups') or
|
|
||||||
contains(@class, 'headline-list') or
|
|
||||||
contains(@class, 'core-package sports') or
|
|
||||||
contains(@class, 'news')]
|
|
||||||
//a[contains(@class, 'hdn-analytics')]"""
|
|
||||||
|
|
||||||
excluded_titles = ["Winning numbers", "TV-radio listings"]
|
|
||||||
|
|
||||||
|
|
||||||
def validate_link(page, link, title):
|
|
||||||
other_category = page[1][1]
|
|
||||||
if not title or len(title.strip()) < 5:
|
|
||||||
print("{0} rejected, title too short".format(link))
|
|
||||||
return None
|
|
||||||
parts = link.split('/')
|
|
||||||
if len(parts) > 3 and parts[3] in other_category:
|
|
||||||
print("{0} rejected, covered in other section".format(link))
|
|
||||||
return None
|
|
||||||
for excluded_title in excluded_titles:
|
|
||||||
if title.find(excluded_title) != -1:
|
|
||||||
print("{0} rejected, excluded title".format(link))
|
|
||||||
return None
|
|
||||||
return link, title
|
|
||||||
|
|
||||||
|
|
||||||
def get_article_parsed(index_to_soup, this_url):
|
|
||||||
return index_to_soup(this_url, as_tree=True)
|
|
||||||
|
|
||||||
|
|
||||||
def sort_subject(element_list):
|
|
||||||
# priority of subjects
|
|
||||||
subjects = ['news', 'neighborhood', 'entertainment']
|
|
||||||
subjects.reverse()
|
|
||||||
subject_dict = OrderedDict(zip(subjects, range(len(subjects))))
|
|
||||||
rank_dict = OrderedDict([(rank, []) for rank in range(len(subjects) + 1)])
|
|
||||||
for element in element_list:
|
|
||||||
try:
|
|
||||||
subj = element[0].split('/')[3]
|
|
||||||
except Exception:
|
|
||||||
subj = 'unknown'
|
|
||||||
if subject_dict.get(subj) is not None:
|
|
||||||
rank_dict[subject_dict[subj] + 1].append(element)
|
|
||||||
else:
|
|
||||||
rank_dict[0].append(element)
|
|
||||||
# now return in reverse order, sorted
|
|
||||||
combined_list = []
|
|
||||||
for rank in range(len(subjects), -1, -1):
|
|
||||||
article_list = rank_dict[rank]
|
|
||||||
article_list.sort()
|
|
||||||
combined_list.extend(article_list)
|
|
||||||
return combined_list
|
|
||||||
|
|
||||||
|
|
||||||
def get_links_from_section_page(index_to_soup, page):
|
|
||||||
page_doc = get_article_parsed(index_to_soup, base_url + page[1][0])
|
|
||||||
els = page_doc.xpath(xpath_general)
|
|
||||||
element_list = []
|
|
||||||
for el in els:
|
|
||||||
link = el.get('href').split('?')[0]
|
|
||||||
title = el.text
|
|
||||||
if title is None or len(title.strip()) < 5:
|
|
||||||
link_id = link.split('/')[-1][:-3].split('-')[:-1]
|
|
||||||
title = ' '.join(link_id)
|
|
||||||
if link[:4] != 'http':
|
|
||||||
link = base_url + link
|
|
||||||
validated_link = validate_link(page=page, link=link, title=title)
|
|
||||||
if validated_link is not None:
|
|
||||||
element_list.append(validated_link)
|
|
||||||
sorted_element_list = sort_subject(element_list)
|
|
||||||
return [page[0], sorted_element_list]
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_links_from_sections(index_to_soup):
|
|
||||||
all_sections = []
|
|
||||||
article_set = set()
|
|
||||||
final_dict = OrderedDict()
|
|
||||||
for item in pages.items():
|
|
||||||
print("getting links from {0}".format(item[0]))
|
|
||||||
all_sections.append(get_links_from_section_page(index_to_soup, item))
|
|
||||||
for section in all_sections:
|
|
||||||
section_id = section[0]
|
|
||||||
article_list = section[1]
|
|
||||||
final_dict[section_id] = []
|
|
||||||
for article in article_list:
|
|
||||||
if article[0] not in article_set:
|
|
||||||
article_set.add(article[0])
|
|
||||||
final_dict[section_id].append(article)
|
|
||||||
return final_dict
|
|
||||||
|
|
||||||
|
|
||||||
# noinspection PyAbstractClass
|
|
||||||
class HoustonChronicle(BasicNewsRecipe):
|
|
||||||
title = u'The Houston Chronicle'
|
|
||||||
description = 'News from Houston, Texas'
|
|
||||||
__author__ = 'Dale Furrow'
|
|
||||||
language = 'en'
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_attributes = ['style', 'xmlns']
|
|
||||||
remove_empty_feeds = True
|
|
||||||
timefmt = '[%a, %d %b %Y]'
|
|
||||||
timestampfmt = '%Y%m%d%H%M%S'
|
|
||||||
# ignore_duplicate_articles = {'url'} # defaults to None
|
|
||||||
extra_css = '.article_date {display: none}'
|
|
||||||
category = 'news, USA'
|
|
||||||
masthead_url = 'http://www.chron.com/apple-touch-icon-76x76.png'
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': ['article-content', 'article-wrap']})]
|
|
||||||
remove_tags = [dict(name='div', attrs={'social-title': True}),
|
|
||||||
dict(name='div', attrs={'class':
|
|
||||||
['control-panel', 'gallery-overlay-inner',
|
|
||||||
'most-popular', 'asset-media mos-playlist',
|
|
||||||
'asset_media asset-media']}),
|
|
||||||
dict(name='li', attrs={'class': ['hst-resgalleryitem taboola-frame hidden',
|
|
||||||
'hst-resgalleryitem hidden']}),
|
|
||||||
dict(name='ul', attrs={'class': 'clearfix'})]
|
|
||||||
|
|
||||||
# max_articles_per_feed = 5 # for use in testing
|
|
||||||
|
|
||||||
def get_article_description_from_doc(self, soup):
|
|
||||||
description_chars_break = 140
|
|
||||||
description_max_chars = 300
|
|
||||||
try:
|
|
||||||
els = soup.findAll('p')
|
|
||||||
if len(els) > 0:
|
|
||||||
out_text = ""
|
|
||||||
this_ellipsis = ""
|
|
||||||
for el in els:
|
|
||||||
if el is not None:
|
|
||||||
result = []
|
|
||||||
for descendant in el.contents:
|
|
||||||
if isinstance(descendant, NavigableString):
|
|
||||||
result.append(type(u'')(descendant).strip())
|
|
||||||
all_text = u' '.join(result)
|
|
||||||
if len(all_text) > 1:
|
|
||||||
sentences = re.findall(sentence_regex, all_text)
|
|
||||||
if sentences is not None and len(sentences) > 0:
|
|
||||||
for sentence in sentences:
|
|
||||||
if len(out_text) < description_chars_break:
|
|
||||||
out_text += sentence + " "
|
|
||||||
else:
|
|
||||||
if len(out_text) > description_max_chars:
|
|
||||||
this_ellipsis = "..."
|
|
||||||
return out_text[:description_max_chars] + this_ellipsis
|
|
||||||
return out_text
|
|
||||||
else:
|
|
||||||
return "No Article description returned"
|
|
||||||
except Exception as ex:
|
|
||||||
self.log('Error on Article Description')
|
|
||||||
traceback.print_exc(file=sys.stdout)
|
|
||||||
print(str(ex))
|
|
||||||
return ""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_published_time_from_doc(page_doc):
|
|
||||||
|
|
||||||
def get_regular_timestamp(date_string):
|
|
||||||
try:
|
|
||||||
out_date = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%SZ")
|
|
||||||
return out_date
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
el = page_doc.findAll(
|
|
||||||
lambda this_tag: this_tag.name == "time" and ('itemprop', 'datePublished') in this_tag.attrs)
|
|
||||||
if len(el) == 1:
|
|
||||||
return get_regular_timestamp(el[0].get('datetime'))
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
|
||||||
"""
|
|
||||||
Called when each HTML page belonging to article is downloaded.
|
|
||||||
Intended to be used to get article metadata like author/summary/etc.
|
|
||||||
from the parsed HTML (soup).
|
|
||||||
|
|
||||||
:param article: A object of class :class:`calibre.web.feeds.Article`.
|
|
||||||
If you change the summary, remember to also change the text_summary
|
|
||||||
:param soup: Parsed HTML belonging to this article
|
|
||||||
:param first: True iff the parsed HTML is the first page of the article.
|
|
||||||
"""
|
|
||||||
summary = self.get_article_description_from_doc(soup)
|
|
||||||
article_date = self.get_published_time_from_doc(soup)
|
|
||||||
if article_date is not None:
|
|
||||||
article_timestamp = float((article_date - utcfromtimestamp(0)).total_seconds())
|
|
||||||
article.date = article_timestamp
|
|
||||||
article.utctime = dt_factory(article_date.timetuple(), assume_utc=True, as_utc=True)
|
|
||||||
article.localtime = article.utctime.astimezone(local_tz)
|
|
||||||
summary_date = article.localtime.strftime("%Y-%m-%d %H:%M") if article_date is not None else "No Date"
|
|
||||||
article.summary = "{0}: {1}".format(summary_date, summary)
|
|
||||||
article.text_summary = clean_ascii_chars(article.summary)
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
self.timefmt = ' [%a, %d %b, %Y]'
|
|
||||||
self.log('starting parse_index: ', time.strftime(self.timestampfmt))
|
|
||||||
feeds = []
|
|
||||||
sections = get_all_links_from_sections(self.index_to_soup)
|
|
||||||
for section_id, article_list in sections.items():
|
|
||||||
self.log("Getting {0} section, {1:d} articles".format(section_id, len(article_list)))
|
|
||||||
articles = []
|
|
||||||
for article_info in article_list:
|
|
||||||
self.log("Adding {0} to feed".format(article_info[0]))
|
|
||||||
articles.append({'title': article_info[1], 'url': article_info[0],
|
|
||||||
'description': '', 'date': ""})
|
|
||||||
self.log("Appending {0:d} articles for {1}".format(len(articles), section_id))
|
|
||||||
feeds.append((section_id, articles))
|
|
||||||
self.log('finished parse_index: ', time.strftime(self.timestampfmt))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return soup
|
|
Before Width: | Height: | Size: 170 B |
Before Width: | Height: | Size: 341 B |
Before Width: | Height: | Size: 504 B |
Before Width: | Height: | Size: 289 B |
Before Width: | Height: | Size: 184 B |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 262 B |
Before Width: | Height: | Size: 262 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 98 B |
Before Width: | Height: | Size: 801 B |
Before Width: | Height: | Size: 262 B |
Before Width: | Height: | Size: 504 B |
Before Width: | Height: | Size: 647 B |
Before Width: | Height: | Size: 266 B |
Before Width: | Height: | Size: 266 B |
Before Width: | Height: | Size: 267 B |
Before Width: | Height: | Size: 1.0 KiB |
Before Width: | Height: | Size: 667 B |
Before Width: | Height: | Size: 911 B |
Before Width: | Height: | Size: 1.9 KiB |
Before Width: | Height: | Size: 78 B |
Before Width: | Height: | Size: 3.5 KiB |
Before Width: | Height: | Size: 680 B |
Before Width: | Height: | Size: 147 B |
Before Width: | Height: | Size: 810 B |
Before Width: | Height: | Size: 446 B |
Before Width: | Height: | Size: 314 B |
Before Width: | Height: | Size: 867 B |
Before Width: | Height: | Size: 718 B |
Before Width: | Height: | Size: 2.4 KiB |
Before Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 652 B |
Before Width: | Height: | Size: 236 B |
Before Width: | Height: | Size: 433 B |
Before Width: | Height: | Size: 224 B |
Before Width: | Height: | Size: 111 B |
Before Width: | Height: | Size: 239 B |
Before Width: | Height: | Size: 846 B |
Before Width: | Height: | Size: 994 B |
Before Width: | Height: | Size: 749 B |
@ -1,135 +0,0 @@
|
|||||||
import re
|
|
||||||
from collections import OrderedDict
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
_issue_url = ""
|
|
||||||
|
|
||||||
COMMA_SEP_RE = re.compile(r"\s*,\s*")
|
|
||||||
SPACE_SEP_RE = re.compile(r"\s+")
|
|
||||||
NON_NUMERIC_RE = re.compile(r"[^\d]+")
|
|
||||||
|
|
||||||
|
|
||||||
class Poetry(BasicNewsRecipe):
|
|
||||||
title = "Poetry Magazine"
|
|
||||||
__author__ = "ping"
|
|
||||||
description = (
|
|
||||||
"Founded in Chicago by Harriet Monroe in 1912, Poetry is the oldest monthly "
|
|
||||||
"devoted to verse in the English-speaking world. https://www.poetryfoundation.org/poetrymagazine"
|
|
||||||
)
|
|
||||||
publication_type = "magazine"
|
|
||||||
language = "en"
|
|
||||||
encoding = "utf-8"
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = False
|
|
||||||
ignore_duplicate_articles = {"url"}
|
|
||||||
compress_news_images = False
|
|
||||||
|
|
||||||
remove_attributes = ["style", "font"]
|
|
||||||
keep_only_tags = [dict(name="article")]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name="button"),
|
|
||||||
dict(
|
|
||||||
attrs={
|
|
||||||
"class": [
|
|
||||||
"c-socialBlocks",
|
|
||||||
"c-index",
|
|
||||||
"o-stereo",
|
|
||||||
"u-hideAboveSmall",
|
|
||||||
"c-slideTrigger",
|
|
||||||
"js-slideshow",
|
|
||||||
]
|
|
||||||
}
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = """
|
|
||||||
h1 { font-size: 1.8rem; margin-bottom: 0.5rem; }
|
|
||||||
.o-titleBar-summary { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; }
|
|
||||||
div.o-titleBar-meta, div.c-feature-sub { font-weight: bold; color: #444; margin-bottom: 1.5rem; }
|
|
||||||
div.pcms_media img, div.o-mediaEnclosure img { max-width: 100%; height: auto; }
|
|
||||||
div.o-mediaEnclosure .o-mediaEnclosure-metadata { font-size: 0.8rem; margin-top: 0.2rem; }
|
|
||||||
div.c-feature-bd { margin-bottom: 2rem; }
|
|
||||||
div.c-auxContent { color: #222; font-size: 0.85rem; margin-top: 2rem; }
|
|
||||||
"""
|
|
||||||
|
|
||||||
def extract_from_img_srcset(self, srcset: str, max_width=0):
|
|
||||||
sources = [s.strip() for s in COMMA_SEP_RE.split(srcset) if s.strip()]
|
|
||||||
if len(sources) == 1:
|
|
||||||
# just a regular img url probably
|
|
||||||
return sources[0]
|
|
||||||
parsed_sources = []
|
|
||||||
for src in sources:
|
|
||||||
src_n_width = [s.strip() for s in SPACE_SEP_RE.split(src) if s.strip()]
|
|
||||||
if len(src_n_width) != 2:
|
|
||||||
raise ValueError(f"Not a valid srcset: {srcset}")
|
|
||||||
parsed_sources.append(
|
|
||||||
(
|
|
||||||
src_n_width[0].strip(),
|
|
||||||
int(NON_NUMERIC_RE.sub("", src_n_width[1].strip())),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
parsed_sources = list(set(parsed_sources))
|
|
||||||
parsed_sources = sorted(parsed_sources, key=lambda x: x[1], reverse=True)
|
|
||||||
if not max_width:
|
|
||||||
return parsed_sources[0][0]
|
|
||||||
for img, width in parsed_sources:
|
|
||||||
if width <= max_width:
|
|
||||||
return img
|
|
||||||
return parsed_sources[-1][0]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for img in soup.select("div.o-mediaEnclosure img"):
|
|
||||||
if not img.get("srcset"):
|
|
||||||
continue
|
|
||||||
img["src"] = self.extract_from_img_srcset(img["srcset"], max_width=1000)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
if _issue_url:
|
|
||||||
soup = self.index_to_soup(_issue_url)
|
|
||||||
else:
|
|
||||||
soup = self.index_to_soup("https://www.poetryfoundation.org/poetrymagazine")
|
|
||||||
current_issue = soup.select("div.c-cover-media a")
|
|
||||||
if not current_issue:
|
|
||||||
self.abort_recipe_processing("Unable to find latest issue")
|
|
||||||
current_issue = current_issue[0]
|
|
||||||
soup = self.index_to_soup(current_issue["href"])
|
|
||||||
|
|
||||||
issue_edition = self.tag_to_string(soup.find("h1"))
|
|
||||||
self.timefmt = f" [{issue_edition}]"
|
|
||||||
cover_image = soup.select("div.c-issueBillboard-cover-media img")[0]
|
|
||||||
parsed_cover_url = urlparse(
|
|
||||||
cover_image["srcset"].split(",")[-1].strip().split(" ")[0]
|
|
||||||
)
|
|
||||||
self.cover_url = f"{parsed_cover_url.scheme}://{parsed_cover_url.netloc}{parsed_cover_url.path}"
|
|
||||||
|
|
||||||
sectioned_feeds = OrderedDict()
|
|
||||||
|
|
||||||
tabs = soup.find_all("div", attrs={"class": "c-tier_tabbed"})
|
|
||||||
for tab in tabs:
|
|
||||||
tab_title = tab.find("div", attrs={"class": "c-tier-tab"})
|
|
||||||
tab_content = tab.find("div", attrs={"class": "c-tier-content"})
|
|
||||||
if not (tab_title and tab_content):
|
|
||||||
continue
|
|
||||||
tab_title = self.tag_to_string(tab_title)
|
|
||||||
sectioned_feeds[tab_title] = []
|
|
||||||
for li in tab_content.select("ul.o-blocks > li"):
|
|
||||||
author = self.tag_to_string(
|
|
||||||
li.find("span", attrs={"class": "c-txt_attribution"})
|
|
||||||
)
|
|
||||||
for link in li.find_all("a", attrs={"class": "c-txt_abstract"}):
|
|
||||||
self.log("Found article:", self.tag_to_string(link))
|
|
||||||
sectioned_feeds[tab_title].append(
|
|
||||||
{
|
|
||||||
"title": self.tag_to_string(link),
|
|
||||||
"url": link["href"],
|
|
||||||
"author": author,
|
|
||||||
"description": author,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return sectioned_feeds.items()
|
|
@ -1,52 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
ultimahora.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class UltimaHora_py(BasicNewsRecipe):
|
|
||||||
title = 'Ultima Hora'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Noticias de Paraguay y el resto del mundo'
|
|
||||||
publisher = 'EDITORIAL EL PAIS S.A.'
|
|
||||||
category = 'news, politics, Paraguay'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 200
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'cp1252'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'es_PY'
|
|
||||||
remove_empty_feeds = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
masthead_url = 'http://www.ultimahora.com/imgs/uh-com.gif'
|
|
||||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .sub_titulo_mediano,.TituloNota{font-family: Georgia,"Times New Roman",Times,serif} .sub_titulo_mediano{font-weight: bold} ' # noqa
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name=['form', 'iframe', 'embed', 'object', 'link', 'base', 'table'])]
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'id': ['nota_titulo', 'nota_copete', 'texto']})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Arte y Espectaculos', u'http://www.ultimahora.com/adjuntos/rss/UHEspectaculos.xml'),
|
|
||||||
(u'Ciudad del Este', u'http://www.ultimahora.com/adjuntos/rss/UHCDE.xml'),
|
|
||||||
(u'Deportes', u'http://www.ultimahora.com/adjuntos/rss/UHDeportes.xml'),
|
|
||||||
(u'Ultimo momento', u'http://www.ultimahora.com/adjuntos/rss/UltimoMomento.xml'),
|
|
||||||
(u'Nacionales', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-nacionales.xml'),
|
|
||||||
(u'Politica', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-politica.xml'),
|
|
||||||
(u'Sucesos', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-sucesos.xml'),
|
|
||||||
(u'Economia', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-economia.xml'),
|
|
||||||
(u'Ciencia y Tecnologia', u'http://www.ultimahora.com/adjuntos/rss/uh-rss-ciencia.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,51 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
unica.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Unica(BasicNewsRecipe):
|
|
||||||
title = u'Unica'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = 'Asa cum esti tu'
|
|
||||||
publisher = 'Unica'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Reviste,Femei'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.unica.ro/fileadmin/images/logo.gif'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'sticky'}), dict(
|
|
||||||
name='p', attrs={'class': 'bodytext'})
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['top-links']}), dict(name='div', attrs={'id': ['autor_name']}), dict(name='div', attrs={
|
|
||||||
'class': ['box-r']}), dict(name='div', attrs={'class': ['category']}), dict(name='div', attrs={'class': ['data']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='ul', attrs={'class': 'pager'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://www.unica.ro/rss.html')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,86 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class UnitedDaily(BasicNewsRecipe):
|
|
||||||
title = u'聯合新聞網'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
|
|
||||||
(u'政治', u'http://udn.com/udnrss/politics.xml'),
|
|
||||||
(u'社會', u'http://udn.com/udnrss/social.xml'),
|
|
||||||
(u'生活', u'http://udn.com/udnrss/life.xml'),
|
|
||||||
(u'綜合', u'http://udn.com/udnrss/education.xml'),
|
|
||||||
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
|
|
||||||
(u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'),
|
|
||||||
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
|
|
||||||
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
|
|
||||||
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
|
|
||||||
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
|
|
||||||
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
|
|
||||||
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
|
|
||||||
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
|
|
||||||
(u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'),
|
|
||||||
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
|
|
||||||
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
|
|
||||||
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
|
|
||||||
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
|
|
||||||
(u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'),
|
|
||||||
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
|
|
||||||
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
|
|
||||||
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
|
|
||||||
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
|
|
||||||
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
|
|
||||||
(u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'),
|
|
||||||
(u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'),
|
|
||||||
(u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'),
|
|
||||||
(u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'),
|
|
||||||
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
|
|
||||||
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
|
|
||||||
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
|
|
||||||
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
|
|
||||||
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
|
|
||||||
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
|
|
||||||
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
|
|
||||||
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
|
|
||||||
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
|
|
||||||
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
|
|
||||||
(u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'),
|
|
||||||
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
|
|
||||||
(u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'),
|
|
||||||
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
|
|
||||||
(u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'),
|
|
||||||
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
|
|
||||||
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
|
|
||||||
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
|
|
||||||
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
|
|
||||||
(u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'),
|
|
||||||
(u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'),
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}''' # noqa
|
|
||||||
|
|
||||||
__author__ = 'Eddie Lau'
|
|
||||||
__version__ = '1.2'
|
|
||||||
language = 'zh_TW'
|
|
||||||
publisher = 'United Daily News Group'
|
|
||||||
description = 'United Daily (Taiwan)'
|
|
||||||
category = 'News, Chinese, Taiwan'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
|
||||||
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
|
||||||
auto_cleanup = True
|
|
||||||
# keep_only_tags = [dict(name='td', attrs={'class':['story_title']}),
|
|
||||||
# dict(name='div', attrs={'id':['story_title']}),
|
|
||||||
# dict(name='td', attrs={'class':['story_author']}),
|
|
||||||
# dict(name='div', attrs={'id':['story_author']}),
|
|
||||||
# dict(name='td', attrs={'class':['story']}),
|
|
||||||
# dict(name='div', attrs={'id':['story']}),
|
|
||||||
# ]
|
|
@ -1,22 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# https://github.com/iemejia/calibrecolombia
|
|
||||||
|
|
||||||
'''
|
|
||||||
http://www.unperiodico.unal.edu.co/
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class UNPeriodico(BasicNewsRecipe):
|
|
||||||
title = u'UN Periodico'
|
|
||||||
language = 'es_CO'
|
|
||||||
__author__ = 'Ismael Mejia <iemejia@gmail.com>'
|
|
||||||
cover_url = 'http://www.unperiodico.unal.edu.co/fileadmin/templates/periodico/img/logoperiodico.png'
|
|
||||||
description = 'UN Periodico'
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
feeds = [
|
|
||||||
(u'UNPeriodico', u'http://www.unperiodico.unal.edu.co/rss/type/rss2/')
|
|
||||||
]
|
|
@ -1,83 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__author__ = 'Darko Spasovski'
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>'
|
|
||||||
'''
|
|
||||||
utrinski.com.mk
|
|
||||||
'''
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre import browser
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class UtrinskiVesnik(BasicNewsRecipe):
|
|
||||||
|
|
||||||
INDEX = 'http://www.utrinski.com.mk/'
|
|
||||||
title = 'Utrinski Vesnik'
|
|
||||||
description = 'Daily Macedonian newspaper'
|
|
||||||
masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg'
|
|
||||||
language = 'mk'
|
|
||||||
remove_javascript = True
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
category = 'news, Macedonia'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
|
||||||
[
|
|
||||||
# Remove anything before the start of the article.
|
|
||||||
(r'<body.*?Article start-->', lambda match: '<body>'),
|
|
||||||
|
|
||||||
# Remove anything after the end of the article.
|
|
||||||
(r'<!--Article end.*?</body>', lambda match: '</body>'),
|
|
||||||
]
|
|
||||||
]
|
|
||||||
extra_css = """
|
|
||||||
body{font-family: Arial,Helvetica,sans-serif}
|
|
||||||
.WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description,
|
|
||||||
'tags': category,
|
|
||||||
'language': language,
|
|
||||||
'linearize_tables': True
|
|
||||||
}
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
feeds = []
|
|
||||||
# open main page
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
# find all anchors with class attribute equal to
|
|
||||||
# 'WB_UTRINSKIVESNIK_MainMenu'
|
|
||||||
for section in soup.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_MainMenu'}):
|
|
||||||
sectionTitle = section.contents[0].string
|
|
||||||
sectionUrl = self.INDEX + section['href'].strip()
|
|
||||||
# open the anchor link
|
|
||||||
raw = browser().open_novisit(sectionUrl).read()
|
|
||||||
sectionSoup = BeautifulSoup(raw)
|
|
||||||
# find all anchors with class attribute equal to
|
|
||||||
# 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'
|
|
||||||
sectionArticles = sectionSoup.findAll(
|
|
||||||
'a', attrs={'class': 'WB_UTRINSKIVESNIK_ONLINEArticleTitle'})
|
|
||||||
articles = []
|
|
||||||
for sectionArticle in sectionArticles:
|
|
||||||
# article title = anchor's contents, article url = anchor's
|
|
||||||
# href
|
|
||||||
articleTitle = sectionArticle.contents[0].string.strip()
|
|
||||||
articleUrl = self.INDEX + sectionArticle['href'].strip()
|
|
||||||
articleDate = datetime.datetime.today().strftime('%d.%m.%Y')
|
|
||||||
articles.append(
|
|
||||||
{'title': articleTitle, 'url': articleUrl, 'description': '', 'date': articleDate})
|
|
||||||
if articles:
|
|
||||||
feeds.append((sectionTitle, articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
datum = datetime.datetime.today().strftime('%d_%m_%Y')
|
|
||||||
return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg'
|
|
@ -1,28 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Valby Bladet
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class ValbyBladet_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Valby Bladet'
|
|
||||||
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Valby Bladet', 'http://minby.dk/valby-bladet/feed/'),
|
|
||||||
('Kommentarer til Valby Bladet', 'http://minby.dk/valby-bladet/comments/feed/'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,28 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Vanløse Bladet
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class VanloeseBladet_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Vanløse Bladet'
|
|
||||||
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Vanløse Bladet', 'http://minby.dk/vanloese-bladet/feed/'),
|
|
||||||
('Kommentarer til Vanløse Bladet', 'http://minby.dk/vanloese-bladet/comments/feed/'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Lokalavisen Varde
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class VardeLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Lokalavisen Varde'
|
|
||||||
description = 'Lokale og regionale nyheder, sport, kultur fra Varde og omegn på varde.lokalavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Lokalavisen Varde', 'http://varde.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,83 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
elargentino.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class Veintitres(BasicNewsRecipe):
|
|
||||||
title = 'Veintitres'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'Revista Argentino dedicada a politica'
|
|
||||||
publisher = 'Veintitres'
|
|
||||||
category = 'news, politics, Argentina'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
language = 'es_AR'
|
|
||||||
|
|
||||||
lang = 'es-AR'
|
|
||||||
direction = 'ltr'
|
|
||||||
INDEX = 'http://www.elargentino.com/medios/120/veintitres.html'
|
|
||||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + \
|
|
||||||
category + \
|
|
||||||
'"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'ContainerPop'})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='link')]
|
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=120&Content-Type=text/xml&ChannelDesc=Veintitres')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
main, sep, article_part = url.partition('/nota-')
|
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
soup.html['lang'] = self.lang
|
|
||||||
soup.html['dir'] = self.direction
|
|
||||||
mlang = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
|
||||||
mcharset = new_tag(soup, 'meta', [
|
|
||||||
("http-equiv", "Content-Type"), ("content", "text/html; charset=utf-8")])
|
|
||||||
soup.head.insert(0, mlang)
|
|
||||||
soup.head.insert(1, mcharset)
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
cover_url = None
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
cover_item = soup.find('div', attrs={'class': 'colder'})
|
|
||||||
if cover_item:
|
|
||||||
clean_url = self.image_url_processor(
|
|
||||||
None, cover_item.div.img['src'])
|
|
||||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
|
||||||
return cover_url
|
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
|
||||||
base, sep, rest = url.rpartition('?Id=')
|
|
||||||
img, sep2, rrest = rest.partition('&')
|
|
||||||
return base + sep + img
|
|
@ -1,32 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Lokalavisen Vejle
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class VejleLokalavisen_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Lokalavisen Vejle'
|
|
||||||
description = 'Lokale og regionale nyheder, sport, kultur fra Vejle og omegn på vejle.lokalavisen.dk'
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Seneste nyt fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestenytrss'),
|
|
||||||
('Seneste lokale nyheder fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestelokalenyhederrss'),
|
|
||||||
('Seneste sport fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestesportrss'),
|
|
||||||
('Seneste 112 nyheder fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/seneste112rss'),
|
|
||||||
('Seneste kultur nyheder fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestekulturrss'),
|
|
||||||
('Seneste læserbreve fra Lokalavisen Vejle', 'http://vejle.lokalavisen.dk/section/senestelaeserbreverss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,28 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
# https://manual.calibre-ebook.com/news_recipe.html
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
'''
|
|
||||||
Vesterbro Bladet
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class VesterbroBladet_dk(BasicNewsRecipe):
|
|
||||||
__author__ = 'CoderAllan.github.com'
|
|
||||||
title = 'Vesterbro Bladet'
|
|
||||||
|
|
||||||
category = 'newspaper, news, localnews, sport, culture, Denmark'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
auto_cleanup = True
|
|
||||||
language = 'da'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Vesterbro Bladet', 'http://minby.dk/vesterbro-bladet/feed/'),
|
|
||||||
('Kommentarer til Vesterbro Bladet', 'http://minby.dk/vesterbro-bladet/comments/feed/'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class VFR(BasicNewsRecipe):
|
|
||||||
title = u'VFR Magazine'
|
|
||||||
language = 'it'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 31 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('VFR Magazine',
|
|
||||||
'http://feeds.feedburner.com/vfrmagazine'),
|
|
||||||
]
|
|
@ -1,59 +0,0 @@
|
|||||||
##
|
|
||||||
# Title: Vice News recipe for calibre
|
|
||||||
# Author: Adrian Tennessee
|
|
||||||
# Contact: adrian.tennessee at domainthatnobodytakes.com
|
|
||||||
##
|
|
||||||
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
|
||||||
# Copyright: Copyright 2014 Adrian Tennessee
|
|
||||||
##
|
|
||||||
# Written: 2014-09-13
|
|
||||||
# Last Edited: 2014-09-13
|
|
||||||
##
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
|
||||||
q = frozenset(classes.split(' '))
|
|
||||||
return dict(attrs={
|
|
||||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
||||||
|
|
||||||
|
|
||||||
class VICENews(BasicNewsRecipe):
|
|
||||||
__author__ = 'Adrian Tennessee (adrian.tennessee at domainthatnobodytakes.com)'
|
|
||||||
__license__ = 'GPLv3'
|
|
||||||
__copyright__ = '2014, Adrian Tennessee <adrian.tennessee at domainthatnobodytakes.com)'
|
|
||||||
|
|
||||||
title = u'VICE News'
|
|
||||||
language = 'en'
|
|
||||||
description = u'VICE News web site ebook'
|
|
||||||
publisher = 'VICE Media'
|
|
||||||
category = 'news, world'
|
|
||||||
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/d/dc/Vice_News_logo.jpg'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
# article-title modifies h1-tag of article title
|
|
||||||
extra_css = '.article-title { font-size:125%; font-weight:bold }'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
classes('article__header__title contributors article__header__datebar__date--original short-form__body__article-body')
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
classes('lazy-vice-ad abc__article_embed article__tagged user-newsletter-signup article__embed-component'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for img in soup.findAll(**classes('responsive-image__img')):
|
|
||||||
for source in img.findPreviousSiblings('source'):
|
|
||||||
img['src'] = source['srcset'].split('?')[0]
|
|
||||||
source.extract()
|
|
||||||
return soup
|
|
||||||
|
|
||||||
feeds = [(u'VICE News', u'https://news.vice.com/rss')]
|
|
@ -1,42 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ViceESRecipe(BasicNewsRecipe):
|
|
||||||
title = u'Vice Magazine España'
|
|
||||||
__author__ = 'atordo'
|
|
||||||
description = u'La página web oficial de la revista Vice España'
|
|
||||||
category = u'noticias, fotografía, blogs, moda, arte, cine, música, literatura, tecnología'
|
|
||||||
cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = False
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'es'
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
publication_type = 'magazine'
|
|
||||||
|
|
||||||
recursions = 10
|
|
||||||
match_regexps = [r'/read/.*\?Contentpage=[2-9]$']
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['article_title', 'article_content', 'next']})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(attrs={'class': ['social_buttons', 'search', 'tweet',
|
|
||||||
'like', 'inline_socials', 'stumblebadge', 'plusone']})
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.author{font-size:small}
|
|
||||||
img{margin-bottom: 0.4em; display:block; margin-left:auto; margin-right: auto}
|
|
||||||
'''
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<img src="http://.*\.scorecardresearch\.com/'), lambda m: '')
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [('Vice', 'http://www.vice.com/es/rss')]
|
|
@ -1,42 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ViceDERecipe(BasicNewsRecipe):
|
|
||||||
title = u'Vice Magazin Deutschland'
|
|
||||||
__author__ = 'atordo;alex'
|
|
||||||
description = u'Die offizielle Website des Vice Magazins Deutschland'
|
|
||||||
category = u'Nachrichten, Fotografie, Blogs, Mode, Kunst, Film, Musik, Literatur, Technik'
|
|
||||||
cover_url = 'http://www.seeklogo.com/images/V/Vice-logo-668578AC94-seeklogo.com.gif'
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = False
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'de'
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
publication_type = 'magazine'
|
|
||||||
|
|
||||||
recursions = 10
|
|
||||||
match_regexps = [r'/read/.*\?Contentpage=[2-9]$']
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(attrs={'class': ['article_title', 'article_content', 'next']})
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(attrs={'class': ['social_buttons', 'search', 'tweet',
|
|
||||||
'like', 'inline_socials', 'stumblebadge', 'plusone']})
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
.author{font-size:small}
|
|
||||||
img{margin-bottom: 0.4em; display:block; margin-left:auto; margin-right: auto}
|
|
||||||
'''
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<img src="http://.*\.scorecardresearch\.com/'), lambda m: '')
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [('Vice', 'http://www.vice.com/de/rss')]
|
|
@ -1,48 +0,0 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
vijesti.me
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Vijesti(BasicNewsRecipe):
|
|
||||||
title = 'Vijesti'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from Montenegro'
|
|
||||||
publisher = 'Daily Press Vijesti'
|
|
||||||
category = 'news, politics, Montenegro'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 150
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
|
||||||
publication_type = 'newspaper'
|
|
||||||
extra_css = """
|
|
||||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
|
||||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
|
||||||
body{font-family: Georgia,"Times New Roman",Times,serif1,serif}
|
|
||||||
.articledescription,.article,.chapter{font-family: sans1, sans-serif}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': ['article_intro_text', 'article_text']})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name=['object', 'link', 'embed', 'form'])]
|
|
||||||
|
|
||||||
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss/')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,31 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
www.virtualshackles.com
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Virtualshackles(BasicNewsRecipe):
|
|
||||||
title = 'Virtual Shackles'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = "The adventures of Orion and Jack, making games they'd never play for people they don't like."
|
|
||||||
category = 'virtual shackles, virtualshackles, games, webcomic, comic, video game, orion, jack'
|
|
||||||
oldest_article = 10
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = True
|
|
||||||
encoding = 'cp1252'
|
|
||||||
publisher = 'Virtual Shackles'
|
|
||||||
language = 'en'
|
|
||||||
publication_type = 'comic'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
feeds = [(u'Virtual Shackles', u'http://feeds2.feedburner.com/virtualshackles')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,68 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
viva.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Viva(BasicNewsRecipe):
|
|
||||||
title = u'Viva'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = u'Vedete si evenimente'
|
|
||||||
publisher = u'Viva'
|
|
||||||
oldest_article = 25
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare,Reviste,Femei'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://www.viva.ro/images/default/viva.gif'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
.byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
.date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
|
|
||||||
.story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'articol'}), dict(name='div', attrs={
|
|
||||||
'class': 'gallery clearfix'}), dict(name='div', attrs={'align': 'justify'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['breadcrumbs']}), dict(name='div', attrs={'class': ['links clearfix']}), dict(name='a', attrs={'id': ['img_arrow_right']}), dict(name='img', attrs={'id': ['zoom']}), dict(name='div', attrs={'class': ['foto_counter']}), dict(name='div', attrs={'class': ['gal_select clearfix']}) # noqa
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': ['links clearfix']})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
|
|
||||||
(u'Vedete', u'http://feeds.feedburner.com/viva-Vedete'),
|
|
||||||
(u'Evenimente', u'http://feeds.feedburner.com/viva-Evenimente'),
|
|
||||||
(u'Frumusete', u'http://feeds.feedburner.com/viva-Beauty-Fashion'),
|
|
||||||
(u'Noutati', u'http://feeds.feedburner.com/viva-Noutati')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,40 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Huan Komrade T <huantnh at gmail.com>'
|
|
||||||
'''
|
|
||||||
vnexpress.net
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class BBCVietnamese(BasicNewsRecipe):
|
|
||||||
title = u'VnExpress'
|
|
||||||
__author__ = 'Huan Komrade T'
|
|
||||||
description = 'Vietnam news and current affairs from the Food Production Technology Corporation'
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'vi'
|
|
||||||
|
|
||||||
encoding = 'utf-8'
|
|
||||||
recursions = 0
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'footer'})]
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('Index', 'http://vnexpress.net/rss/gl/trang-chu.rss'),
|
|
||||||
('Vietnam', 'http://vnexpress.net/rss/gl/xa-hoi.rss'),
|
|
||||||
('World News', 'http://vnexpress.net/rss/gl/the-gioi.rss'),
|
|
||||||
('Business', 'http://vnexpress.net/rss/gl/kinh-doanh.rss'),
|
|
||||||
('Culture', 'http://vnexpress.net/rss/gl/van-hoa.rss'),
|
|
||||||
('Sports', 'http://vnexpress.net/rss/gl/the-thao.rss'),
|
|
||||||
('Lifestyle', 'http://vnexpress.net/rss/gl/doi-song.rss'),
|
|
||||||
('From The Readers',
|
|
||||||
'http://vnexpress.net/rss/gl/ban-doc-viet.rss'),
|
|
||||||
('From The Readers - Sharing',
|
|
||||||
'http://vnexpress.net/rss/gl/ban-doc-viet-tam-su.rss'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url + '?q=1'
|
|
@ -1,26 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class VoetbalBE(BasicNewsRecipe):
|
|
||||||
title = u'Voetbal België'
|
|
||||||
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
|
|
||||||
description = u'Voetbalnieuws uit België'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'nl_BE'
|
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='title'), dict(name='h1'),
|
|
||||||
dict(name='a', attrs={'class': 'fancy'}),
|
|
||||||
dict(name='img', attrs={'class': 'border kalooga_10605'}),
|
|
||||||
dict(name='div', attrs={'class': 'text'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = []
|
|
||||||
|
|
||||||
feeds = [(u'Voetbalnieuws', u'http://www.voetbalbelgie.be/nl/rss')]
|
|
@ -1,25 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class HindustanTimes(BasicNewsRecipe):
|
|
||||||
title = u'Voice of America'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 15 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('All Zones',
|
|
||||||
'http://learningenglish.voanews.com/rss/?count=20'),
|
|
||||||
('World',
|
|
||||||
'http://learningenglish.voanews.com/rss/?count=20&zoneid=957'),
|
|
||||||
('USA',
|
|
||||||
'http://learningenglish.voanews.com/rss/?count=20&zoneid=958'),
|
|
||||||
('Health',
|
|
||||||
'http://learningenglish.voanews.com/rss/?count=20&zoneid=955'),
|
|
||||||
|
|
||||||
]
|
|
@ -1,86 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
def new_tag(soup, name, attrs=()):
|
|
||||||
impl = getattr(soup, 'new_tag', None)
|
|
||||||
if impl is not None:
|
|
||||||
return impl(name, attrs=dict(attrs))
|
|
||||||
return Tag(soup, name, attrs=attrs or None)
|
|
||||||
|
|
||||||
|
|
||||||
class VrijNederlandRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'nl'
|
|
||||||
locale = 'nl'
|
|
||||||
version = 1
|
|
||||||
|
|
||||||
title = u'Vrij Nederland'
|
|
||||||
publisher = u'Weekbladpers Tijdschriften'
|
|
||||||
category = u'News, Opinion'
|
|
||||||
description = u'Weekly opinion magazine from the Netherlands'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
|
|
||||||
conversion_options = {'publisher': publisher,
|
|
||||||
'tags': category, 'comments': description}
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss'))
|
|
||||||
feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss'))
|
|
||||||
feeds.append((u'Economie', u'http://www.vn.nl/economie.rss'))
|
|
||||||
feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss'))
|
|
||||||
feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss'))
|
|
||||||
feeds.append((u'Crime', u'http://www.vn.nl/crime.rss'))
|
|
||||||
feeds.append((u'Media', u'http://www.vn.nl/media.rss'))
|
|
||||||
feeds.append((u'De Republiek der Letteren',
|
|
||||||
u'http://www.vn.nl/republiek.rss'))
|
|
||||||
feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss'))
|
|
||||||
feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss'))
|
|
||||||
feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss'))
|
|
||||||
feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss'))
|
|
||||||
feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss'))
|
|
||||||
feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss'))
|
|
||||||
feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss'))
|
|
||||||
feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss'))
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'cl-column column-one'})]
|
|
||||||
|
|
||||||
remove_tags = []
|
|
||||||
remove_tags.append(
|
|
||||||
dict(name='div', attrs={'class': 'wpg-element guest-book-overview'}))
|
|
||||||
remove_tags.append(
|
|
||||||
dict(name='div', attrs={'class': 'wpg-element forum-message-form'}))
|
|
||||||
remove_tags.append(dict(name='div', attrs={'class': 'mediaterms'}))
|
|
||||||
remove_tags.append(dict(name='div', attrs={'class': 'label-term'}))
|
|
||||||
remove_tags.append(dict(name='div', attrs={
|
|
||||||
'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'}))
|
|
||||||
remove_tags.append(dict(name='object'))
|
|
||||||
remove_tags.append(dict(name='link'))
|
|
||||||
remove_tags.append(dict(name='meta'))
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
# Just clean up the result a little
|
|
||||||
meta = soup.find('div', attrs={'class': 'meta'})
|
|
||||||
if meta:
|
|
||||||
link = meta.find('span', attrs={'class': 'link'})
|
|
||||||
if link:
|
|
||||||
link.extract()
|
|
||||||
for seperator in meta.findAll('span', attrs={'class': 'seperator'}):
|
|
||||||
seperator.extract()
|
|
||||||
|
|
||||||
# Their header is full of 'if IE6/7/8' tags. Just get rid of it
|
|
||||||
# altogether
|
|
||||||
theirHead = soup.head
|
|
||||||
theirHead.extract()
|
|
||||||
myHead = new_tag(soup, 'head')
|
|
||||||
soup.insert(0, myHead)
|
|
||||||
|
|
||||||
return soup
|
|
@ -1,26 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1278773519(BasicNewsRecipe):
|
|
||||||
title = u'Waco Tribune Herald'
|
|
||||||
__author__ = 'rty'
|
|
||||||
publisher = 'A Robinson Media Company'
|
|
||||||
description = 'Waco, Texas, Newspaper'
|
|
||||||
category = 'News, Texas, Waco'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'News', u'http://www.wacotrib.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/ap_nation,news/ap_nation/*&f=rss'),
|
|
||||||
(u'Sports', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=sports*&f=rss'),
|
|
||||||
(u'AccessWaco', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=entertainment/accesswaco*&f=rss'),
|
|
||||||
(u'Opinions', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=opinion*&f=rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
language = 'en'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
auto_cleanup = True
|
|
@ -1,50 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = u'2011, Silviu Cotoar\u0103'
|
|
||||||
'''
|
|
||||||
wall-street.ro
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WallStreetRo(BasicNewsRecipe):
|
|
||||||
title = u'Wall Street'
|
|
||||||
__author__ = u'Silviu Cotoar\u0103'
|
|
||||||
description = ''
|
|
||||||
publisher = 'Wall Street'
|
|
||||||
oldest_article = 5
|
|
||||||
language = 'ro'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
category = 'Ziare'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
cover_url = 'http://img.wall-street.ro/images/WS_new_logo.jpg'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'article_header'}), dict(
|
|
||||||
name='div', attrs={'class': 'article_text'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='p', attrs={'class': ['page_breadcrumbs']}), dict(name='div', attrs={'id': ['article_user_toolbox']}), dict(
|
|
||||||
name='p', attrs={'class': ['comments_count_container']}), dict(name='div', attrs={'class': ['article_left_column']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': 'clearfloat'})
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Feeds', u'http://img.wall-street.ro/rssfeeds/wall-street.xml')
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
@ -1,149 +0,0 @@
|
|||||||
from datetime import date, timedelta
|
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WaPoCartoonsRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'en'
|
|
||||||
version = 2
|
|
||||||
|
|
||||||
title = u'Washington Post Cartoons'
|
|
||||||
publisher = u'Washington Post'
|
|
||||||
category = u'News, Cartoons'
|
|
||||||
description = u'Cartoons from the Washington Post'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/'))
|
|
||||||
feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/'))
|
|
||||||
feeds.append(
|
|
||||||
(u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb'))
|
|
||||||
feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/'))
|
|
||||||
feeds.append(
|
|
||||||
(u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk'))
|
|
||||||
feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/'))
|
|
||||||
feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/'))
|
|
||||||
feeds.append(
|
|
||||||
(u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/'))
|
|
||||||
feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/'))
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
|
||||||
h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;}
|
|
||||||
#name {margin-bottom: 0.2em}
|
|
||||||
#copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
index = []
|
|
||||||
oldestDate = date.today() - timedelta(days=self.oldest_article)
|
|
||||||
oldest = oldestDate.strftime('%Y%m%d')
|
|
||||||
for feed in self.feeds:
|
|
||||||
cartoons = []
|
|
||||||
soup = self.index_to_soup(feed[1])
|
|
||||||
|
|
||||||
cartoon = {'title': 'Current', 'date': None,
|
|
||||||
'url': feed[1], 'description': ''}
|
|
||||||
cartoons.append(cartoon)
|
|
||||||
|
|
||||||
select = soup.find('select', attrs={'name': ['url', 'dest']})
|
|
||||||
if select:
|
|
||||||
cartoonCandidates = []
|
|
||||||
if select['name'] == 'url':
|
|
||||||
cartoonCandidates = self.cartoonCandidatesWaPo(
|
|
||||||
select, oldest)
|
|
||||||
else:
|
|
||||||
cartoonCandidates = self.cartoonCandidatesCreatorsCom(
|
|
||||||
select, oldest)
|
|
||||||
|
|
||||||
for cartoon in cartoonCandidates:
|
|
||||||
cartoons.append(cartoon)
|
|
||||||
|
|
||||||
index.append([feed[0], cartoons])
|
|
||||||
|
|
||||||
return index
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
freshSoup = self.getFreshSoup(soup)
|
|
||||||
|
|
||||||
div = soup.find('div', attrs={'id': 'name'})
|
|
||||||
if div:
|
|
||||||
freshSoup.body.append(div)
|
|
||||||
comic = soup.find('div', attrs={'id': 'comic_full'})
|
|
||||||
|
|
||||||
img = comic.find('img')
|
|
||||||
if '&' in img['src']:
|
|
||||||
img['src'], sep, bad = img['src'].rpartition('&')
|
|
||||||
|
|
||||||
freshSoup.body.append(comic)
|
|
||||||
freshSoup.body.append(soup.find('div', attrs={'id': 'copyright'}))
|
|
||||||
else:
|
|
||||||
span = soup.find('span', attrs={'class': 'title'})
|
|
||||||
if span:
|
|
||||||
del span['class']
|
|
||||||
span['id'] = 'name'
|
|
||||||
span.name = 'div'
|
|
||||||
freshSoup.body.append(span)
|
|
||||||
|
|
||||||
img = soup.find('img', attrs={'class': 'pic_big'})
|
|
||||||
if img:
|
|
||||||
td = img.parent
|
|
||||||
td['style'] = ''
|
|
||||||
del td['style']
|
|
||||||
td.name = 'div'
|
|
||||||
td['id'] = 'comic_full'
|
|
||||||
freshSoup.body.append(td)
|
|
||||||
|
|
||||||
td = soup.find('td', attrs={'class': 'copy'})
|
|
||||||
if td:
|
|
||||||
for a in td.find('a'):
|
|
||||||
a.extract()
|
|
||||||
del td['class']
|
|
||||||
td['id'] = 'copyright'
|
|
||||||
td.name = 'div'
|
|
||||||
freshSoup.body.append(td)
|
|
||||||
|
|
||||||
return freshSoup
|
|
||||||
|
|
||||||
def getFreshSoup(self, oldSoup):
|
|
||||||
freshSoup = BeautifulSoup(
|
|
||||||
'<html><head><title></title></head><body></body></html>')
|
|
||||||
if oldSoup.head.title:
|
|
||||||
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
|
||||||
return freshSoup
|
|
||||||
|
|
||||||
def cartoonCandidatesWaPo(self, select, oldest):
|
|
||||||
opts = select.findAll('option')
|
|
||||||
for i in range(1, len(opts)):
|
|
||||||
url = opts[i]['value'].rstrip('/')
|
|
||||||
dateparts = url.split('/')[-3:]
|
|
||||||
datenum = str(dateparts[0]) + str(dateparts[1]) + str(dateparts[2])
|
|
||||||
if datenum >= oldest:
|
|
||||||
yield {'title': self.tag_to_string(opts[i]), 'date': None, 'url': url, 'description': ''}
|
|
||||||
else:
|
|
||||||
return
|
|
||||||
|
|
||||||
def cartoonCandidatesCreatorsCom(self, select, oldest):
|
|
||||||
monthNames = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05',
|
|
||||||
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
|
|
||||||
'November': '11', 'December': '12'}
|
|
||||||
|
|
||||||
opts = select.findAll('option', selected=False)
|
|
||||||
for i in range(1, len(opts)):
|
|
||||||
dateString = self.tag_to_string(opts[i])
|
|
||||||
rest, sep, year = dateString.rpartition(', ')
|
|
||||||
parts = rest.split(' ')
|
|
||||||
day = parts[2].rjust(2, '0')
|
|
||||||
month = monthNames[parts[1]]
|
|
||||||
datenum = str(year) + month + str(day)
|
|
||||||
if datenum >= oldest:
|
|
||||||
yield {'title': dateString, 'date': None, 'url': opts[i]['value'], 'description': ''}
|
|
||||||
else:
|
|
||||||
return
|
|
@ -1,102 +0,0 @@
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WatchingAmericaRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'en'
|
|
||||||
version = 1
|
|
||||||
|
|
||||||
title = u'Watching America'
|
|
||||||
publisher = u'watchingamerica.com'
|
|
||||||
category = u'News'
|
|
||||||
description = u'Global opinion about the United States'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
remove_attributes = ['style']
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
|
||||||
.main_content em {font-size: x-small; font-style: italic; color: #696969;}
|
|
||||||
.main_content span strong {font-size: x-large; font-weight: bold;}
|
|
||||||
.insideitro {font-size: xx-small; font-style: italic; color: #666666;}
|
|
||||||
span {padding: 0em; margin 0em;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
INDEX = u'http://watchingamerica.com/News/'
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
answer = []
|
|
||||||
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
|
|
||||||
articles = []
|
|
||||||
feature = soup.find('div', attrs={'id': 'headzone'})
|
|
||||||
if feature:
|
|
||||||
link = feature.find('a', attrs={'class': 'feature'})
|
|
||||||
url = link.get('href', None)
|
|
||||||
title = self.tag_to_string(link)
|
|
||||||
description = self.tag_to_string(
|
|
||||||
feature.find('h1', attrs={'class': 'pull'}))
|
|
||||||
article = {'title': title, 'date': u'',
|
|
||||||
'url': url, 'description': description}
|
|
||||||
articles.append(article)
|
|
||||||
answer.append(('Feature', articles))
|
|
||||||
|
|
||||||
feed_titles = ['Translations from the West',
|
|
||||||
'Translations from the East']
|
|
||||||
for i in range(1, 3):
|
|
||||||
articles = []
|
|
||||||
div = soup.find('div', attrs={'class': 'newscol' + str(i)})
|
|
||||||
if div:
|
|
||||||
for link in div.findAll('a', attrs={'class': 'headline'}):
|
|
||||||
url = link.get('href', None)
|
|
||||||
title = self.tag_to_string(link)
|
|
||||||
|
|
||||||
description = None
|
|
||||||
h3 = link.findNextSibling('h3')
|
|
||||||
if h3:
|
|
||||||
description = self.tag_to_string(h3)
|
|
||||||
|
|
||||||
article = {'title': title, 'date': u'',
|
|
||||||
'url': url, 'description': description}
|
|
||||||
articles.append(article)
|
|
||||||
answer.append((feed_titles[i - 1], articles))
|
|
||||||
|
|
||||||
return answer
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
freshSoup = self.get_fresh_soup(soup)
|
|
||||||
article = soup.find('p', attrs={'class': 'MsoNormal'}).parent
|
|
||||||
if article:
|
|
||||||
article.name = 'div'
|
|
||||||
del article['width']
|
|
||||||
article['class'] = 'main_content'
|
|
||||||
org = article.find('a', attrs={'href': '?SHOW_ORIGINAL_TEXT'})
|
|
||||||
if org:
|
|
||||||
org.parent.extract()
|
|
||||||
|
|
||||||
intro = article.find('span', attrs={'class': 'insideitro'})
|
|
||||||
if intro:
|
|
||||||
for el in intro.findAll(['strong', 'em', 'br']):
|
|
||||||
if el.name == 'br':
|
|
||||||
el.extract()
|
|
||||||
else:
|
|
||||||
el.name = 'div'
|
|
||||||
|
|
||||||
freshSoup.body.append(article)
|
|
||||||
|
|
||||||
return freshSoup
|
|
||||||
|
|
||||||
def get_fresh_soup(self, oldSoup):
|
|
||||||
freshSoup = BeautifulSoup(
|
|
||||||
'<html><head><title></title></head><body></body></html>')
|
|
||||||
if oldSoup.head.title:
|
|
||||||
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
|
||||||
return freshSoup
|
|
@ -1,28 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class XkcdCom(BasicNewsRecipe):
|
|
||||||
cover_url = 'http://what-if.xkcd.com/imgs/whatif-logo.png'
|
|
||||||
masthead_url = 'http://what-if.xkcd.com/imgs/whatif-logo.png'
|
|
||||||
__author__ = 'kisnik'
|
|
||||||
title = 'What If...'
|
|
||||||
description = 'The "What If" feed from xkcd'
|
|
||||||
language = 'en'
|
|
||||||
keep_only_tags = [dict(name='article')]
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
oldest_article = 60
|
|
||||||
# add image and text
|
|
||||||
# add an horizontal line after the question
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'(<img.*title=")([^"]+)(".*>)'),
|
|
||||||
lambda m: '<div>%s%s<p id="photo_text">(%s)</p></div>' % (m.group(1), m.group(3), m.group(2))),
|
|
||||||
(re.compile(r'(<p.*id="attribute">[^>]+</p>)'),
|
|
||||||
lambda n: '%s<hr>' % (n.group(1))),
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = "#photo_text{font-size:small;}"
|
|
||||||
|
|
||||||
feeds = [(u'What If...', u'http://what-if.xkcd.com/feed.atom')]
|
|
@ -1,30 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1294938721(BasicNewsRecipe):
|
|
||||||
title = u'Wichita Eagle'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Jason Cameron'
|
|
||||||
description = 'Daily news from the Wichita Eagle'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 30
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id': 'wide'})]
|
|
||||||
feeds = [
|
|
||||||
(u'Local News',
|
|
||||||
u'http://www.kansas.com/news/local/index.rss'),
|
|
||||||
(u'National News',
|
|
||||||
u'http://www.kansas.com/news/nation-world/index.rss'),
|
|
||||||
(u'Sports',
|
|
||||||
u'http://www.kansas.com/sports/index.rss'),
|
|
||||||
(u'Opinion',
|
|
||||||
u'http://www.kansas.com/opinion/index.rss'),
|
|
||||||
(u'Life',
|
|
||||||
u'http://www.kansas.com/living/index.rss'),
|
|
||||||
(u'Entertainment',
|
|
||||||
u'http://www.kansas.com/entertainment/index.rss')
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
urlparts = url.split('/')
|
|
||||||
newadd = urlparts[5] + '/v-print'
|
|
||||||
return url.replace(url, newadd.join(url.split(urlparts[5])))
|
|
@ -1,71 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|
||||||
'''
|
|
||||||
en.wikinews.org
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WikiNews(BasicNewsRecipe):
|
|
||||||
title = 'Wikinews'
|
|
||||||
__author__ = 'Darko Miletic'
|
|
||||||
description = 'News from wikipedia'
|
|
||||||
category = 'news, world'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
publisher = 'Wiki'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
remove_javascript = True
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
html2lrf_options = [
|
|
||||||
'--comment', description, '--category', category, '--publisher', publisher
|
|
||||||
]
|
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + \
|
|
||||||
'"\ncomments="' + description + '"\ntags="' + category + '"'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='h1', attrs={'id': 'firstHeading'}), dict(
|
|
||||||
name='div', attrs={'id': 'bodyContent'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='link'), dict(name='div', attrs={'id': ['printfooter', 'catlinks', 'footer']}), dict(
|
|
||||||
name='div', attrs={'class': ['thumb left', 'thumb right']})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags_after = dict(name='h2')
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feeds.feedburner.com/WikinewsLatestNews')]
|
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
artl = article.get('link', None)
|
|
||||||
rest, sep, article_id = artl.rpartition('/')
|
|
||||||
return 'http://en.wikinews.org/wiki/' + article_id
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
rest, sep, article_id = url.rpartition('/')
|
|
||||||
return 'http://en.wikinews.org/w/index.php?title=' + article_id + '&printable=yes'
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
return 'http://upload.wikimedia.org/wikipedia/commons/b/bd/Wikinews-logo-en.png'
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
mtag = '<meta http-equiv="Content-Language" content="en"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
|
||||||
soup.head.insert(0, mtag)
|
|
||||||
btag = soup.find('div', attrs={'id': 'bodyContent'})
|
|
||||||
for item in btag.findAll('div'):
|
|
||||||
item.extract()
|
|
||||||
for item in btag.findAll('h2'):
|
|
||||||
item.extract()
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
for item in soup.findAll(font=True):
|
|
||||||
del item['font']
|
|
||||||
return soup
|
|
@ -1,36 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1277647803(BasicNewsRecipe):
|
|
||||||
title = u'Winnipeg Sun'
|
|
||||||
__author__ = 'rty'
|
|
||||||
__version__ = '1.0'
|
|
||||||
oldest_article = 2
|
|
||||||
publisher = 'www.winnipegsun.com'
|
|
||||||
description = 'Winnipeg Newspaper'
|
|
||||||
category = 'News, Winnipeg, Canada'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'en_CA'
|
|
||||||
feeds = [
|
|
||||||
(u'News', u'http://www.winnipegsun.com/news/rss.xml'),
|
|
||||||
(u'Columnists', u'http://www.winnipegsun.com/columnists/rss.xml'),
|
|
||||||
(u'Editorial', u'http://www.winnipegsun.com/comment/editorial/rss.xml'),
|
|
||||||
(u'Entertainments', u'http://www.winnipegsun.com/entertainment/rss.xml'),
|
|
||||||
(u'Life', u'http://www.winnipegsun.com/life/rss.xml'),
|
|
||||||
(u'Money', u'http://www.winnipegsun.com/money/rss.xml')
|
|
||||||
]
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'article'}),
|
|
||||||
]
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': ['leftBox', 'bottomBox clear']}),
|
|
||||||
dict(name='ul', attrs={'class': 'tabs dl contentSwap'}),
|
|
||||||
dict(name='div', attrs={'id': 'commentsBottom'}),
|
|
||||||
]
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(name='div', attrs={'class': 'bottomBox clear'})
|
|
||||||
]
|
|
@ -1,31 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Winsupersite(BasicNewsRecipe):
|
|
||||||
title = u'Supersite for Windows'
|
|
||||||
description = u'Paul Thurrott SuperSite for Windows'
|
|
||||||
publisher = 'Paul Thurrott'
|
|
||||||
__author__ = 'Hypernova'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
conversion_options = {'linearize_tables': True}
|
|
||||||
remove_tags_before = dict(name='h1')
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL | re.IGNORECASE),
|
|
||||||
lambda match: '</body>'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
|
||||||
br.open('http://www.winsupersite.com')
|
|
||||||
return br
|
|
||||||
|
|
||||||
feeds = [(u'Supersite for Windows',
|
|
||||||
u'http://www.winsupersite.com/supersite.xml')]
|
|
@ -1,60 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WiredITA(BasicNewsRecipe):
|
|
||||||
title = u'Wired.it'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
auto_cleanup = False
|
|
||||||
encoding = 'utf8'
|
|
||||||
masthead_url = 'http://www.wired.com/images/home/wired_logo.gif'
|
|
||||||
description = 'Wired - Make in Italy. Inventa, sbaglia, innova'
|
|
||||||
publisher = 'http://www.wired.it/'
|
|
||||||
language = 'it'
|
|
||||||
__author__ = 'isspro'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
|
|
||||||
conversion_options = {'title': title,
|
|
||||||
'comments': description,
|
|
||||||
'language': language,
|
|
||||||
'publisher': publisher,
|
|
||||||
'authors': title,
|
|
||||||
'smarten_punctuation': True
|
|
||||||
}
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': 'main-article'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='img', attrs={'class': 'avatar img-circle'}),
|
|
||||||
dict(name='div', attrs={'class': 'topics'}),
|
|
||||||
dict(name='div', attrs={'class': 'social-share hidden-lg'}),
|
|
||||||
dict(name='span', attrs={'class': 'label'})
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1 {font-size:x-large;}
|
|
||||||
p.lead {font-size:medium;}
|
|
||||||
.who {line-height: 0pt; margin: 0pt;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
preprocess_regexps = [
|
|
||||||
(re.compile(r'Pubblicato'), lambda match: '')
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Attualit\xe0', u'http://www.wired.it/attualita/feed/'),
|
|
||||||
(u'Internet', 'http://www.wired.it/internet/feed/'),
|
|
||||||
(u'Gadget', 'http://www.wired.it/gadget/feed/'),
|
|
||||||
(u'Mobile', 'http://www.wired.it/mobile/feed/'),
|
|
||||||
(u'Scienza', 'http://www.wired.it/scienza/feed/'),
|
|
||||||
(u'Economia', 'http://www.wired.it/economia/feed/'),
|
|
||||||
(u'LifeStyle', 'http://www.wired.it/lifestyle/feed/'),
|
|
||||||
(u'Play', 'http://www.wired.it/play/feed/'),
|
|
||||||
|
|
||||||
]
|
|
@ -1,51 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'teepel <teepel44@gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
wolnemedia.net
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class wolne_media(BasicNewsRecipe):
|
|
||||||
title = u'Wolne Media'
|
|
||||||
__author__ = 'teepel <teepel44@gmail.com>'
|
|
||||||
language = 'pl'
|
|
||||||
description = 'Wiadomości z wolnemedia.net'
|
|
||||||
INDEX = 'http://wolnemedia.net'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
remove_empty_feeds = True
|
|
||||||
simultaneous_downloads = 5
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
ignore_duplicate_articles = {'url'}
|
|
||||||
|
|
||||||
remove_tags = [dict(name='p', attrs={'class': 'tags'})]
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),
|
|
||||||
(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),
|
|
||||||
(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),
|
|
||||||
(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),
|
|
||||||
(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),
|
|
||||||
(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),
|
|
||||||
(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),
|
|
||||||
(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),
|
|
||||||
(u'Media', u'http://wolnemedia.net/category/media/feed/'),
|
|
||||||
(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),
|
|
||||||
(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),
|
|
||||||
(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),
|
|
||||||
(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),
|
|
||||||
(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),
|
|
||||||
(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),
|
|
||||||
(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),
|
|
||||||
(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),
|
|
||||||
(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),
|
|
||||||
(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),
|
|
||||||
(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),
|
|
||||||
(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')]
|
|
@ -1,26 +0,0 @@
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WorkersWorld(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'Workers World'
|
|
||||||
description = u'Socialist news and analysis'
|
|
||||||
__author__ = u'urslnx'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
encoding = 'utf8'
|
|
||||||
publisher = 'workers.org'
|
|
||||||
category = 'news, politics, USA, world'
|
|
||||||
language = 'en'
|
|
||||||
publication_type = 'newsportal'
|
|
||||||
extra_css = ' body{ font-family: Verdana,Arial,Helvetica,sans-serif; } h1{ font-size: x-large; text-align: left; margin-top:0.5em; margin-bottom:0.25em; } h2{ font-size: large; } p{ text-align: left; } .published{ font-size: small; } .byline{ font-size: small; } .copyright{ font-size: small; } ' # noqa
|
|
||||||
remove_tags_before = dict(name='div', attrs={'id': 'evernote'})
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id': 'footer'})
|
|
||||||
|
|
||||||
masthead_url = 'http://www.workers.org/graphics/wwlogo300.gif'
|
|
||||||
cover_url = 'http://www.workers.org/pdf/current.jpg'
|
|
||||||
feeds = [(u'Headlines', u'http://www.workers.org/rss/nonstandard_rss.xml'),
|
|
||||||
]
|
|
@ -1,38 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WorksInProgress(BasicNewsRecipe):
|
|
||||||
title = 'Works in progress'
|
|
||||||
description = 'Works in Progress is an online magazine dedicated to sharing new and underrated ideas to improve the world, and features original writing from some of the most interesting thinkers in the world' # noqa
|
|
||||||
cover_url = "https://www.worksinprogress.co/wp-content/uploads/2020/03/logo-1.svg"
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
publication_type = 'magazine'
|
|
||||||
language = 'en'
|
|
||||||
index = "https://www.worksinprogress.co/"
|
|
||||||
__author__ = "barakplasma"
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
soup = self.index_to_soup(self.index)
|
|
||||||
feeds = []
|
|
||||||
|
|
||||||
for section in soup.find_all('div', 'issue-loop'):
|
|
||||||
section_title = section['data-section-id']
|
|
||||||
section_items = []
|
|
||||||
|
|
||||||
for article in section.find_all('div', 'issue-intro'):
|
|
||||||
title = article.find('h2', 'issue-title').text
|
|
||||||
url = article.find_all('a')[1]['href']
|
|
||||||
author = article.find('a', 'author').text
|
|
||||||
section_items.append({
|
|
||||||
"title": title,
|
|
||||||
"url": url,
|
|
||||||
"author": author
|
|
||||||
})
|
|
||||||
|
|
||||||
feeds.append((section_title, section_items))
|
|
||||||
|
|
||||||
return feeds
|
|
@ -1,18 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class WoW(BasicNewsRecipe):
|
|
||||||
title = u'WoW Insider'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Krittika Goyal'
|
|
||||||
oldest_article = 1 # days
|
|
||||||
max_articles_per_feed = 25
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('WoW',
|
|
||||||
'http://wow.joystiq.com/rss.xml')
|
|
||||||
]
|
|
@ -1,61 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL 3'
|
|
||||||
__copyright__ = 'zotzo'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
'''
|
|
||||||
http://wvhooligan.com/
|
|
||||||
'''
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class wvHooligan(BasicNewsRecipe):
|
|
||||||
authors = u'Drew Epperley'
|
|
||||||
__author__ = 'rylsfan'
|
|
||||||
language = 'en'
|
|
||||||
version = 2
|
|
||||||
|
|
||||||
title = u'WV Hooligan'
|
|
||||||
publisher = u'Drew Epperley'
|
|
||||||
publication_type = 'Blog'
|
|
||||||
category = u'Soccer'
|
|
||||||
description = u'A look at Major League Soccer (MLS) through the eyes of a MLS writer and fan.'
|
|
||||||
|
|
||||||
cover_url = 'http://wvhooligan.com/wp-content/themes/urbanelements/images/logo3.png'
|
|
||||||
|
|
||||||
oldest_article = 15
|
|
||||||
max_articles_per_feed = 150
|
|
||||||
use_embedded_content = True
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
encoding = 'utf8'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
{'class': 'feedflare'},
|
|
||||||
{'class': 'tweetmeme_button'},
|
|
||||||
]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return self.adeify_images(soup)
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Stories', u'http://feeds2.feedburner.com/wvhooligan'),
|
|
||||||
(u'MLS', u'http://wvhooligan.com/category/mls/feed/'),
|
|
||||||
(u'MLS Power Rankings',
|
|
||||||
u'http://wvhooligan.com/category/power-rankings/feed/'),
|
|
||||||
(u'MLS Expansion',
|
|
||||||
u'http://wvhooligan.com/category/mls/expansion-talk/feed/'),
|
|
||||||
(u'US National Team',
|
|
||||||
u'http://wvhooligan.com/category/us-national-team/feed/'),
|
|
||||||
(u'College', u'http://wvhooligan.com/category/college-soccer/feed/'),
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
|
||||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
|
||||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
|
||||||
'''
|
|
@ -1,34 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1347997197(BasicNewsRecipe):
|
|
||||||
title = u'XpatLoop.com'
|
|
||||||
__author__ = 'laca'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'en_HU'
|
|
||||||
auto_cleanup = True
|
|
||||||
masthead_url = 'http://www.xpatloop.com/images/cms/xs_logo.gif'
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
author = 'laca'
|
|
||||||
simultaneous_downloads = 1
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
feeds = [(u'Current Affairs', u'http://www.xpatloop.com/current_affairs.rss'),
|
|
||||||
(u'Community & Culture', u'http://www.xpatloop.com/community.rss'),
|
|
||||||
(u'Business & Finance', u'http://www.xpatloop.com/business.rss'),
|
|
||||||
(u'Entertainment', u'http://www.xpatloop.com/entertainment.rss'),
|
|
||||||
(u'Dining Guide', u'http://www.xpatloop.com/dining_guide.rss'),
|
|
||||||
(u'Getting Around', u'http://www.xpatloop.com/getting_around.rss'),
|
|
||||||
(u'Movies', u'http://www.xpatloop.com/movies.rss'),
|
|
||||||
(u'Shopping', u'http://www.xpatloop.com/shopping_channel.rss'),
|
|
||||||
(u'Travel', u'http://www.xpatloop.com/travel.rss'),
|
|
||||||
(u'Sport and Fitness', u'http://www.xpatloop.com/sport_and_fitness.rss'),
|
|
||||||
(u'Health and Wellness', u'http://www.xpatloop.com/health_and_wellness.rss'),
|
|
||||||
(u'Infotech & Telco', u'http://www.xpatloop.com/infotech_telco.rss'),
|
|
||||||
(u'Real Estate', u'http://www.xpatloop.com/property_real_estate.rss'),
|
|
||||||
(u'Specials', u'http://www.xpatloop.com/specials.rss'),
|
|
||||||
(u'Video Channel', u'http://www.xpatloop.com/video.rss'),
|
|
||||||
(u'Events', u'http://www.xpatloop.com/events.rss')]
|
|
@ -1,32 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Yagmur(BasicNewsRecipe):
|
|
||||||
title = u'Yagmur Dergisi'
|
|
||||||
__author__ = u'thomass'
|
|
||||||
description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
|
|
||||||
oldest_article = 90
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
|
|
||||||
language = 'tr'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
encoding = 'ISO 8859-9'
|
|
||||||
publisher = 'thomass'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
|
||||||
}
|
|
||||||
|
|
||||||
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
|
|
||||||
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Yagmur', u'http://open.dapper.net/services/yagmur'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url): # there is a problem caused by table format
|
|
||||||
return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')
|
|
@ -1,22 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class YakimaHeraldRepublicRecipe(BasicNewsRecipe):
|
|
||||||
title = u'Yakima Herald-Republic'
|
|
||||||
description = 'The Yakima Herald-Republic.'
|
|
||||||
language = 'en'
|
|
||||||
__author__ = 'Laura Gjovaag'
|
|
||||||
oldest_article = 1.5
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': ['searchleft', 'headline_credit']}),
|
|
||||||
dict(name='div', attrs={'class': ['photo', 'cauthor', 'photocredit']}),
|
|
||||||
dict(name='div', attrs={'id': ['content_body', 'footerleft']})
|
|
||||||
]
|
|
||||||
extra_css = '.cauthor {font: monospace 60%;} .photocredit {font: monospace 60%}'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Yakima Herald Online', u'http://feeds.feedburner.com/yhronlinenews'),
|
|
||||||
]
|
|
@ -1,22 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1350731826(BasicNewsRecipe):
|
|
||||||
title = u'Yazihane'
|
|
||||||
oldest_article = 30
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
__author__ = 'A Erdogan'
|
|
||||||
description = 'Sports Blog'
|
|
||||||
publisher = 'yazihaneden.com'
|
|
||||||
category = 'sports, basketball, nba, cycling, euroleague'
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
masthead_url = 'http://www.yazihaneden.com/wp-content/uploads/Untitled-1.png'
|
|
||||||
language = 'tr'
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'id': re.compile('(^|| )post-($|| )', re.DOTALL)})]
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'post-footer clear'})
|
|
||||||
feeds = [(u'Yazihane', u'http://www.yazihaneden.com/feed/')]
|
|
@ -1,39 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class YemenTimesRecipe(BasicNewsRecipe):
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__author__ = 'kwetal'
|
|
||||||
language = 'en_YE'
|
|
||||||
country = 'YE'
|
|
||||||
version = 1
|
|
||||||
|
|
||||||
title = u'Yemen Times'
|
|
||||||
publisher = u'yementimes.com'
|
|
||||||
category = u'News, Opinion, Yemen'
|
|
||||||
description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
|
|
||||||
|
|
||||||
oldest_article = 10
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
remove_empty_feeds = True
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_javascript = True
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('News',
|
|
||||||
'http://www.yementimes.com/?tpl=1341'),
|
|
||||||
]
|
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
|
|
||||||
div.yemen_byline {font-size: medium; font-weight: bold;}
|
|
||||||
div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
|
|
||||||
.yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
|
||||||
'publisher': publisher, 'linearize_tables': True}
|
|
@ -1,31 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class YeniUmit(BasicNewsRecipe):
|
|
||||||
title = u'Yeni Umit Dergisi'
|
|
||||||
__author__ = u'thomass'
|
|
||||||
description = 'Aylık Dini İlimler ve Kültür Dergisi'
|
|
||||||
oldest_article = 45
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
|
|
||||||
language = 'tr'
|
|
||||||
publication_type = 'magazine'
|
|
||||||
encoding = 'ISO 8859-9'
|
|
||||||
publisher = 'thomass'
|
|
||||||
|
|
||||||
conversion_options = {
|
|
||||||
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
|
||||||
}
|
|
||||||
|
|
||||||
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
|
|
||||||
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url): # there is a problem caused by table format
|
|
||||||
return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')
|
|
@ -1,59 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
|
||||||
'''
|
|
||||||
www.yomiuri.co.jp
|
|
||||||
'''
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class YOLNews(BasicNewsRecipe):
|
|
||||||
title = u'Yomiuri Online (Latest)'
|
|
||||||
__author__ = 'Hiroshi Miura'
|
|
||||||
oldest_article = 1
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
description = 'Japanese traditional newspaper Yomiuri Online News'
|
|
||||||
publisher = 'Yomiuri Online News'
|
|
||||||
category = 'news, japan'
|
|
||||||
language = 'ja'
|
|
||||||
encoding = 'UTF-8'
|
|
||||||
index = 'http://www.yomiuri.co.jp/latestnews/'
|
|
||||||
remove_javascript = True
|
|
||||||
masthead_title = u'YOMIURI ONLINE'
|
|
||||||
|
|
||||||
keep_only_tags = [{'class': "article text-resizeable"}]
|
|
||||||
|
|
||||||
def parse_feeds(self):
|
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
|
||||||
for curfeed in feeds:
|
|
||||||
delList = []
|
|
||||||
for a, curarticle in enumerate(curfeed.articles):
|
|
||||||
if re.search(r'rssad.jp', curarticle.url):
|
|
||||||
delList.append(curarticle)
|
|
||||||
if len(delList) > 0:
|
|
||||||
for d in delList:
|
|
||||||
index = curfeed.articles.index(d)
|
|
||||||
curfeed.articles[index:index + 1] = []
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
feeds = []
|
|
||||||
newsarticles = []
|
|
||||||
soup = self.index_to_soup(self.index)
|
|
||||||
listlatest = soup.find(
|
|
||||||
'ul', attrs={'class': 'list-common list-common-latest'})
|
|
||||||
if listlatest:
|
|
||||||
for itt in listlatest.findAll('li'):
|
|
||||||
itema = itt.find('a', href=True)
|
|
||||||
if itema:
|
|
||||||
item_headline = itema.find(
|
|
||||||
'span', attrs={'class': 'headline'})
|
|
||||||
item_date = item_headline.find(
|
|
||||||
'span', attrs={'class': 'update'})
|
|
||||||
newsarticles.append({
|
|
||||||
'title': item_headline.contents[0], 'date': item_date, 'url': itema['href'], 'description': ''
|
|
||||||
})
|
|
||||||
feeds.append(('latest', newsarticles))
|
|
||||||
return feeds
|
|
@ -1,70 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Zaman (BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = u'ZAMAN Gazetesi'
|
|
||||||
description = ' Zaman Gazetesi''nin internet sitesinden günlük haberler'
|
|
||||||
__author__ = u'thomass'
|
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 50
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
encoding = 'utf-8'
|
|
||||||
publisher = 'Feza Gazetecilik'
|
|
||||||
category = 'news, haberler,TR,gazete'
|
|
||||||
language = 'tr'
|
|
||||||
publication_type = 'newspaper '
|
|
||||||
extra_css = 'h1{text-transform: capitalize; font-weight: bold; font-size: 22px;color:#0000FF} p{text-align:justify} '
|
|
||||||
conversion_options = {
|
|
||||||
'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True
|
|
||||||
}
|
|
||||||
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
|
|
||||||
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
auto_cleanup = False
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
# keep_only_tags = [dict(name='div', attrs={'id':[
|
|
||||||
# 'contentposition19']})]#,dict(name='div', attrs={'id':[
|
|
||||||
# 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div',
|
|
||||||
# attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[
|
|
||||||
# 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div',
|
|
||||||
# attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[
|
|
||||||
# 'news-detail-content']}), dict(name='td',
|
|
||||||
# attrs={'class':['columnist-detail','columnist_head']}), ]
|
|
||||||
remove_tags = [dict(name='img', attrs={'src': ['http://cmsmedya.zaman.com.tr/images/logo/logo.bmp']}), dict(name='hr', attrs={'class': ['interactive-hr']})]
|
|
||||||
|
|
||||||
remove_empty_feeds = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
(u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
|
|
||||||
(u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
|
|
||||||
(u'Politika', u'http://www.zaman.com.tr/politika.rss'),
|
|
||||||
(u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
|
|
||||||
(u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
|
|
||||||
(u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
|
||||||
(u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
|
|
||||||
(u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
|
|
||||||
(u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
|
|
||||||
(u'Dizi Yazı', u'http://www.zaman.com.tr/dizi.rss'),
|
|
||||||
(u'Bilişim', u'http://www.zaman.com.tr/bilisim.rss'),
|
|
||||||
(u'Otomotiv', u'http://www.zaman.com.tr/otomobil.rss'),
|
|
||||||
(u'Spor', u'http://www.zaman.com.tr/spor.rss'),
|
|
||||||
(u'Kürsü', u'http://www.zaman.com.tr/kursu.rss'),
|
|
||||||
(u'Eğitim', u'http://www.zaman.com.tr/egitim.rss'),
|
|
||||||
(u'Kültür Sanat', u'http://www.zaman.com.tr/kultursanat.rss'),
|
|
||||||
(u'Televizyon', u'http://www.zaman.com.tr/televizyon.rss'),
|
|
||||||
(u'Aile', u'http://www.zaman.com.tr/aile.rss'),
|
|
||||||
(u'Cuma Eki', u'http://www.zaman.com.tr/cuma.rss'),
|
|
||||||
(u'Cumaertesi Eki', u'http://www.zaman.com.tr/cumaertesi.rss'),
|
|
||||||
(u'Pazar Eki', u'http://www.zaman.com.tr/pazar.rss'),
|
|
||||||
(u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
|
|
||||||
(u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('http://www.zaman.com.tr/newsDetail_getNewsById.action?newsId=', 'http://www.zaman.com.tr/newsDetail_openPrintPage.action?newsId=')
|
|
@ -1,21 +0,0 @@
|
|||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class ZTS(BasicNewsRecipe):
|
|
||||||
title = u'Zaufana Trzecia Strona'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
description = u'Niezależne źródło wiadomości o świecie bezpieczeństwa IT'
|
|
||||||
category = 'IT, security'
|
|
||||||
language = 'pl'
|
|
||||||
cover_url = 'http://www.zaufanatrzeciastrona.pl/wp-content/uploads/2012/08/z3s_h100.png'
|
|
||||||
extra_css = '.thumbnail {float: left; margin-right:5px;}'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
use_embedded_content = False
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'post postcontent'})]
|
|
||||||
remove_tags = [dict(name='div', attrs={'class': 'dolna-ramka'})]
|
|
||||||
feeds = [(u'Strona g\u0142\xf3wna', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaGlowna'),
|
|
||||||
(u'Drobiazgi', u'http://feeds.feedburner.com/ZaufanaTrzeciaStronaDrobiazgi')]
|
|
@ -1,57 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# vim:fileencoding=utf-8
|
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Zaxid(BasicNewsRecipe):
|
|
||||||
title = 'Zaxid.net'
|
|
||||||
__author__ = 'rpalyvoda (with fixes by bugmen00t)'
|
|
||||||
oldest_article = 14
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
language = 'uk'
|
|
||||||
cover_url = 'https://zaxid.net/images/logo.png'
|
|
||||||
masthead_url = 'https://zaxid.net/images/logo.png'
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [
|
|
||||||
('\u0417\u0430\u0433\u0430\u043B\u044C\u043D\u0438\u0439 RSS', 'http://zaxid.net/rss/all.xml'),
|
|
||||||
('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://zaxid.net/rss/1.xml'),
|
|
||||||
('\u0421\u0442\u0430\u0442\u0442\u0456', 'https://zaxid.net/rss/2.xml'),
|
|
||||||
('\u0411\u043B\u043E\u0433\u0438', 'https://zaxid.net/rss/3.xml'),
|
|
||||||
('\u0421\u0443\u0441\u043F\u0456\u043B\u044C\u0441\u0442\u0432\u043E', 'https://zaxid.net/rss/4.xml'),
|
|
||||||
('\u0415\u043A\u043E\u043D\u043E\u043C\u0456\u043A\u0430', 'https://zaxid.net/rss/5.xml'),
|
|
||||||
('\u041A\u0443\u043B\u044C\u0442\u0443\u0440\u0430', 'https://zaxid.net/rss/6.xml'),
|
|
||||||
('\u0421\u043F\u043E\u0440\u0442', 'https://zaxid.net/rss/7.xml'),
|
|
||||||
('\u0421\u0432\u0456\u0442', 'https://zaxid.net/rss/8.xml'),
|
|
||||||
('IQ', 'https://zaxid.net/rss/9.xml'),
|
|
||||||
('\u0410\u043D\u043E\u043D\u0441\u0438', 'https://zaxid.net/rss/13.xml'),
|
|
||||||
('ZAXID.NET TV', 'https://zaxid.net/rss/zaxid_tv.xml'),
|
|
||||||
('\u041D\u043E\u0432\u0438\u043D\u0438 \u041B\u044C\u0432\u043E\u0432\u0430', 'https://zaxid.net/rss/lviv_news.xml'),
|
|
||||||
('\u0424\u043E\u0442\u043E', 'https://zaxid.net/rss/19.xml'),
|
|
||||||
('\u0414\u0456\u043C', 'https://zaxid.net/rss/26.xml'),
|
|
||||||
('\u0417\u0434\u043E\u0440\u043E\u0432\u0027\u044F', 'https://zaxid.net/rss/28.xml'),
|
|
||||||
('\u0410\u0432\u0442\u043E', 'https://zaxid.net/rss/29.xml'),
|
|
||||||
('\u041B\u044C\u0432\u0456\u0432', 'https://zaxid.net/rss/16.xml'),
|
|
||||||
('\u041F\u0440\u0438\u043A\u0430\u0440\u043F\u0430\u0442\u0442\u044F', 'https://zaxid.net/rss/59.xml'),
|
|
||||||
('\u0422\u0435\u0440\u043D\u043E\u043F\u0456\u043B\u044C', 'https://zaxid.net/rss/60.xml'),
|
|
||||||
('\u0412\u043E\u043B\u0438\u043D\u044C', 'https://zaxid.net/rss/61.xml'),
|
|
||||||
('\u0417\u0430\u043A\u0430\u0440\u043F\u0430\u0442\u0442\u044F', 'https://zaxid.net/rss/62.xml'),
|
|
||||||
('\u0427\u0435\u0440\u043D\u0456\u0432\u0446\u0456', 'https://zaxid.net/rss/63.xml'),
|
|
||||||
('\u0420\u0456\u0432\u043D\u0435', 'https://zaxid.net/rss/65.xml'),
|
|
||||||
('\u0425\u043C\u0435\u043B\u044C\u043D\u0438\u0446\u044C\u043A\u0438\u0439', 'https://zaxid.net/rss/68.xml'),
|
|
||||||
('\u041D\u0435\u0440\u0443\u0445\u043E\u043C\u0456\u0441\u0442\u044C', 'https://zaxid.net/rss/37.xml'),
|
|
||||||
('\u0427\u0435\u0440\u0432\u043E\u043D\u043E\u0433\u0440\u0430\u0434', 'https://zaxid.net/rss/40.xml'),
|
|
||||||
('\u0421\u0442\u0440\u0438\u0439', 'https://zaxid.net/rss/41.xml'),
|
|
||||||
('\u0414\u0440\u043E\u0433\u043E\u0431\u0438\u0447', 'https://zaxid.net/rss/42.xml'),
|
|
||||||
('\u0412\u0456\u043D\u043D\u0438\u0446\u044F', 'https://zaxid.net/rss/44.xml'),
|
|
||||||
('\u041F\u043E\u0434\u043E\u0440\u043E\u0436\u0456', 'https://zaxid.net/rss/46.xml'),
|
|
||||||
('\u0414\u0456\u0442\u0438', 'https://zaxid.net/rss/53.xml'),
|
|
||||||
('Громадський транспорт', 'https://zaxid.net/rss/54.xml'),
|
|
||||||
('\u0420\u0435\u0446\u0435\u043F\u0442\u0438', 'https://zaxid.net/rss/55.xml'),
|
|
||||||
('\u0421\u043F\u043E\u0440\u0442', 'https://zaxid.net/rss/56.xml'),
|
|
||||||
('\u041A\u0443\u0440\u0439\u043E\u0437\u0438', 'https://zaxid.net/rss/57.xml'),
|
|
||||||
('\u0414\u043E\u0437\u0432\u0456\u043B\u043B\u044F', 'https://zaxid.net/rss/58.xml')
|
|
||||||
]
|
|
@ -1,40 +0,0 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Fetch zdnet.
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class cdnet(BasicNewsRecipe):
|
|
||||||
|
|
||||||
title = 'zdnet'
|
|
||||||
description = 'zdnet security'
|
|
||||||
__author__ = 'Oliver Niesner, Krittika Goyal'
|
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
use_embedded_content = False
|
|
||||||
timefmt = ' [%d %b %Y]'
|
|
||||||
max_articles_per_feed = 40
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'latin1'
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
# remove_tags = [dict(id='eyebrows'),
|
|
||||||
# dict(id='header'),
|
|
||||||
# dict(id='search'),
|
|
||||||
# dict(id='nav'),
|
|
||||||
# dict(id='blog-author-info'),
|
|
||||||
# dict(id='post-tags'),
|
|
||||||
# dict(id='bio-naraine'),
|
|
||||||
# dict(id='bio-kennedy'),
|
|
||||||
# dict(id='author-short-disclosure-kennedy'),
|
|
||||||
# dict(id=''),
|
|
||||||
feeds = [('zdnet', 'http://feeds.feedburner.com/zdnet/security')]
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
for item in soup.findAll(style=True):
|
|
||||||
del item['style']
|
|
||||||
return soup
|
|
@ -1,20 +0,0 @@
|
|||||||
__version__ = 'v1.0'
|
|
||||||
__date__ = '7, April 2012'
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1333705905(BasicNewsRecipe):
|
|
||||||
title = u'Zerocalcare'
|
|
||||||
__author__ = 'faber1971'
|
|
||||||
description = 'Free Italian Comics'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'it'
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = False
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'main entry-content group'})
|
|
||||||
]
|
|
||||||
masthead_url = 'http://zerocalcare.it/wp-content/uploads/2011/11/zerocalcare-banner.jpg'
|
|
||||||
feeds = [(u'Zerocalcare', u'http://feeds.feedburner.com/Zerocalcareit')]
|
|
@ -1,36 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class Zitabe(BasicNewsRecipe):
|
|
||||||
title = u'Zita.be'
|
|
||||||
__author__ = u'erkfuizfeuadjfjzefzfuzeff'
|
|
||||||
description = u'Lifestyle News from Belgium in Dutch'
|
|
||||||
oldest_article = 7
|
|
||||||
language = 'nl_BE'
|
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
use_embedded_content = False
|
|
||||||
|
|
||||||
keep_only_tags = [
|
|
||||||
dict(name='title'), dict(name='article', attrs={'class': 'article-main'})
|
|
||||||
]
|
|
||||||
|
|
||||||
remove_tags = [
|
|
||||||
dict(name='div', attrs={'class': 'box-50 box-omega sidebar-webtips'}),
|
|
||||||
dict(name='blockquote', attrs={'class': 'instagram-media'}),
|
|
||||||
]
|
|
||||||
|
|
||||||
feeds = [(u'Auto', u'http://www.zita.be/xml/auto.xml'),
|
|
||||||
(u'Business', u'http://www.zita.be/xml/business.xml'),
|
|
||||||
(u'Entertainment', u'http://www.zita.be/xml/entertainment.xml'),
|
|
||||||
(u'Fun', u'http://www.zita.be/xml/fun.xml'),
|
|
||||||
(u'Lifestyle', u'http://www.zita.be/xml/lifestyle.xml'),
|
|
||||||
(u'Nieuws', u'http://www.zita.be/xml/nieuws.xml'),
|
|
||||||
(u'Nieuws binneland', u'http://www.zita.be/xml/nieuws-binnenland.xml'),
|
|
||||||
(u'Bizar', u'http://www.zita.be/xml/nieuws-bizar.xml'),
|
|
||||||
(u'Nieuws Buitenland', u'http://www.zita.be/xml/nieuws-buitenland.xml'),
|
|
||||||
(u'Nieuws Economie', u'http://www.zita.be/xml/nieuws-economie.xml'),
|
|
||||||
(u'Sport', u'http://www.zita.be/xml/nieuws-sport.xml')]
|
|
@ -1,12 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1320264153(BasicNewsRecipe):
|
|
||||||
title = u'zougla'
|
|
||||||
__author__ = 'Stelios'
|
|
||||||
language = 'el'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
auto_cleanup = True
|
|
||||||
|
|
||||||
feeds = [(u'zougla', u'http://www.zougla.gr/ArticleRss.xml')]
|
|