diff --git a/Changelog.yaml b/Changelog.yaml index f2920c00f1..174e7fc823 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -19,6 +19,139 @@ # new recipes: # - title: +- version: 0.8.12 + date: 2011-07-29 + + new features: + - title: "Content server: Return the correct last modified date when serving ebook files. Also allow getting of book metadata as /get/opf/" + + - title: "Driver for the COBY MP977" + + - title: "Get Books: Remove epub bud store. Add Ozon.ru and e-knigni.net stores. Fix broken amazon UK and DE stores." + tickets: [816091] + + - title: "Add a new tweak to Preferences->Tweaks that allows auto generation of series numbers when importing books with a series name, but no number" + tickets: [815573] + + bug fixes: + - title: "Fix a regression in 0.8.11 that broke calibre on linux systems that use a file system encoding that cannot support cyrillic characters" + tickets: [815224] + + - title: "Fix long titles not wrapping in cover browser" + tickets: [816595] + + - title: "When adding books, handle the case of files without read permission more gracefully." + tickets: [814771] + + - title: "When changing metadata in EPUB files do not use the opf: namespace prefix on newly created elements. Apparently, FBReaderJ doesn't understand XML namespaces." + tickets: [814722] + + - title: "Prevent metadata download from returning published dates earlier than 101 A.D." + + - title: "Fix a bug where dates before 101AD in the database could cause errors" + tickets: [814964] + + - title: "Fix an error in the book details panel if the user sets the default author link to blank" + + improved recipes: + - The Economist + - Instapaper + - Corren + + new recipes: + - title: Counterpunch + author: O. Emmerson + + - title: National Geographic (PL) + author: Marcin Urban + + - title: Caros Amigos + author: Pablo Aldama + + - title: Aksiyon Dergisi + author: thomass + + - title: Dnevnik (MK) and +Info + author: Darko Spasovski + + - title: Dagens Industri + author: Jonas Svensson + + +- version: 0.8.11 + date: 2011-07-22 + + new features: + - title: "When doing a conversion from some format to the same format, save the original file" + description: "When calibre does a conversion from the same format to the same format, for + example, from EPUB to EPUB, the original file is saved as original_epub, so that in case the + conversion is poor, you can change the settings and run it again. The original is automatically used + every time you run a conversion with that format as input. If you want to disable this, + there is a tweak that prevents calibre from saving the originals in Preferences->Tweaks. You can + easily replace the converted version with the original in the Edit metadata dialog by right + clicking on the list of formats in the top right corner." + type: major + + - title: "Conversion pipeline: Add an option to control the height of the blank lines inserted by calibre" + + - title: "Drivers for bq DaVinci, Samsung Galaxy ACE GT-S5830 and Medion e-reader" + + - title: "Get Books: Add stores Chitanka and Bookoteka. Remove epubbuy.de at store's request" + + - title: "Content server: Add a link at the bottom of the mobile interface to switch to the full interface." + tickets: [812525] + + - title: "Update the kindle icon shown when a Kindle is connected to use a picture of the Kindle 3" + tickets: [810852] + + - title: "MOBI Output: When converting epub documents that have a start element in their guide, use it to mark the starting position at which the MOBI file will be opened." + tickets: [804755] + + - title: "News download: Add a default Accept header to all requests" + + bug fixes: + - title: "Fix regression that broke loading translations from .po files in the working directory" + + - title: "Fix conversion dialog not allowing series numbers larger than 9999" + tickets: [813281] + + - title: "Conversion pipeline: When adding/removing entries to the manifest, ignore unparseable URLs instead of erroring out on them" + + - title: "SD Card in Azbooka not being detected" + tickets: [812750] + + - title: "Conversion pipeline: Strip out large blocks of contiguous space (more than 10000 contiguous blanks) as these slow down the conversion process and are almost always indicative of an error in the input document." + + - title: "ebook-convert: Abort if a keyboard interrupt is raised during parsing" + + - title: "Regex builder: Show a nicer error message when the user has the file open in another program on windows." + tickets: [811641] + + - title: "When converting in the GUI, set all identifiers present in the book's metadata in the output file, if the output format supports them." + + improved recipes: + - NBObline + - JBPress + - Instapaper + - Die Zeit + - Wired (UK) + + new recipes: + - title: Utrinski Vesnik + author: Darko Spasovski + + - title: IDG.se + author: zapt0 + + - title: Los Andes + author: Darko Miletic + + - title: De Luns a Venres + author: Susana Sotelo Docío + + - title: "Nikkei News subscription version" + author: Ado Nishimura + - version: 0.8.10 date: 2011-07-15 @@ -669,7 +802,7 @@ - version: 0.8.0 - date: 2010-05-06 + date: 2011-05-06 new features: - title: "Go to http://calibre-ebook.com/new-in/eight to see what's new in 0.8.0" diff --git a/recipes/aksiyon_derigisi.recipe b/recipes/aksiyon_derigisi.recipe new file mode 100644 index 0000000000..f18ebd84d3 --- /dev/null +++ b/recipes/aksiyon_derigisi.recipe @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class Aksiyon (BasicNewsRecipe): + + title = u'Aksiyon Dergisi' + __author__ = u'thomass' + description = 'Haftalık haber dergisi ' + oldest_article =13 + max_articles_per_feed =100 + no_stylesheets = True + #delay = 1 + #use_embedded_content = False + encoding = 'utf-8' + publisher = 'Aksiyon' + category = 'news, haberler,TR,gazete' + language = 'tr' + publication_type = 'magazine' + #extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + #keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})] + remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ] + + cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg' + masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg' + remove_empty_feeds= True + remove_attributes = ['width','height'] + + feeds = [ + ( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'), + ( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'), + ( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'), + ( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'), + ( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'), + ( u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'), + ( u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'), + ( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'), + ( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'), + ( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'), + ( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'), + ( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'), + ( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'), + ( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'), + ( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'), + ( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'), + ( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'), + + + ] + + def print_version(self, url): + return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?') + diff --git a/recipes/caros_amigos.recipe b/recipes/caros_amigos.recipe new file mode 100644 index 0000000000..48edceacba --- /dev/null +++ b/recipes/caros_amigos.recipe @@ -0,0 +1,17 @@ +__copyright__ = '2011, Pablo Aldama ' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1311839910(BasicNewsRecipe): + title = u'Caros Amigos' + oldest_article = 20 + max_articles_per_feed = 100 + language = 'pt_BR' + __author__ = 'Pablo Aldama' + + feeds = [(u'Caros Amigos', u'http://carosamigos.terra.com.br/index/index.php?format=feed&type=rss')] + keep_only_tags = [dict(name='div', attrs={'class':['blog']}) + ,dict(name='div', attrs={'class':['blogcontent']}) + ] + remove_tags = [dict(name='div', attrs={'class':'addtoany'})] + diff --git a/recipes/corren2.recipe b/recipes/corren2.recipe index 494be88f10..f53da20fd1 100644 --- a/recipes/corren2.recipe +++ b/recipes/corren2.recipe @@ -1,39 +1,34 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1255797795(BasicNewsRecipe): - title = u'Corren' - language = 'sv' - __author__ = 'Jonas Svensson' - simultaneous_downloads = 1 - no_stylesheets = True - oldest_article = 7 +class AdvancedUserRecipe1311446032(BasicNewsRecipe): + title = 'Corren' + __author__ = 'Jonas Svensson' + description = 'News from Sweden' + publisher = 'Corren' + category = 'news, politics, Sweden' + oldest_article = 2 + delay = 1 max_articles_per_feed = 100 - remove_attributes = ['onload'] - timefmt = '' + no_stylesheets = True + use_embedded_content = False + encoding = 'iso-8859-1' + language = 'sv' - feeds = [ - (u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'), - (u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'), - (u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'), - (u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'), - (u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'), - (u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'), - (u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'), - (u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'), - (u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'), - (u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/') - ] - - def print_version(self, url): - url = url.replace("ekonomi/artikel.aspx", "Print.aspx") - url = url.replace("bostad/artikel.aspx", "Print.aspx") - url = url.replace("kultur/artikel.aspx", "Print.aspx") - url = url.replace("motor/artikel.aspx", "Print.aspx") - url = url.replace("mat-dryck/artikel.aspx", "Print.aspx") - url = url.replace("sport/artikel.aspx", "Print.aspx") - url = url.replace("asikter/artikel.aspx", "Print.aspx") - url = url.replace("mat-dryck/artikel.aspx", "Print.aspx") - url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx") - url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx") - return url.replace("nyheter/artikel.aspx", "Print.aspx") + feeds = [ + (u'Toppnyheter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/') + ,(u'Ekonomi', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/') + ,(u'Link\xf6ping', u'http://www.corren.se/inc/RssHandler.ashx?id=4122234') + ,(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223,4122224,4122226,4122227,4122228,4122229,4122230') + ] + keep_only_tags = [dict(name='div', attrs={'id':'article'}),dict(name='div', attrs={'class':'body'})] + remove_tags = [ + dict(name='ul',attrs={'class':'functions'}) + ,dict(name='a',attrs={'href':'javascript*'}) + ,dict(name='div',attrs={'class':'box'}) + ,dict(name='div',attrs={'class':'functionsbottom'}) + ] diff --git a/recipes/counterpunch.recipe b/recipes/counterpunch.recipe new file mode 100644 index 0000000000..5fefc86cb4 --- /dev/null +++ b/recipes/counterpunch.recipe @@ -0,0 +1,40 @@ +import re +from lxml.html import parse +from calibre.web.feeds.news import BasicNewsRecipe + +class Counterpunch(BasicNewsRecipe): + ''' + Parses counterpunch.com for articles + ''' + title = 'Counterpunch' + description = 'Daily political opinion from www.Counterpunch.com' + language = 'en' + __author__ = 'O. Emmerson' + keep_only_tags = [dict(name='td', attrs={'width': '522'})] + max_articles_per_feed = 10 + + def parse_index(self): + feeds = [] + title, url = 'Counterpunch', 'http://www.counterpunch.com' + articles = self.parse_page(url) + if articles: + feeds.append((title, articles)) + return feeds + + def parse_page(self, url): + parsed_page = parse(url).getroot() + articles = [] + unwanted_text = re.compile('Website\ of\ the|I\ urge\ you|Subscribe\ now|DONATE|\@asis\.com|donation\ button|click\ over\ to\ our') + parsed_articles = [a for a in parsed_page.cssselect("html>body>table tr>td>p[class='style2']") if not unwanted_text.search(a.text_content())] + for art in parsed_articles: + try: + author = art.text + title = art.cssselect("a")[0].text + ' by {0}'.format(author) + art_url = 'http://www.counterpunch.com/' + art.cssselect("a")[0].attrib['href'] + articles.append({'title': title, 'url': art_url}) + except Exception as e: + e + #print('Handler Error: ', e, 'title :', a.text_content()) + pass + return articles + diff --git a/recipes/dagens_industri.recipe b/recipes/dagens_industri.recipe new file mode 100644 index 0000000000..c9b60c72b1 --- /dev/null +++ b/recipes/dagens_industri.recipe @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPLv3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1311450855(BasicNewsRecipe): + title = u'Dagens Industri' + __author__ = 'Jonas Svensson' + description = 'Economy news from Sweden' + publisher = 'DI' + category = 'news, politics, Sweden' + oldest_article = 2 + delay = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = 'sv' + + feeds = [(u'DI', u'http://di.se/rss')] + + keep_only_tags = [dict(name='h1', attrs={'id':'ctl00_ExtraWideContentRegion_WideContentRegion_MainRegion_MainContentRegion_MainBodyRegion_headlineNormal'}),dict(name='div', attrs={'id':'articleBody'})] + + remove_tags = [ + dict(name='div',attrs={'class':'article-actions clear'}) + ,dict(name='div',attrs={'class':'article-action-popup'}) + ,dict(name='div',attrs={'class':'header'}) + ,dict(name='div',attrs={'class':'content clear'}) + ,dict(name='div',attrs={'id':'articleAdvertisementDiv'}) + ,dict(name='ul',attrs={'class':'action-list'}) + ] diff --git a/recipes/dnevnik_mk.recipe b/recipes/dnevnik_mk.recipe new file mode 100644 index 0000000000..ce8656339f --- /dev/null +++ b/recipes/dnevnik_mk.recipe @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +__author__ = 'Darko Spasovski' +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Spasovski ' +''' +dnevnik.com.mk +''' + +import re +import datetime +from calibre.web.feeds.news import BasicNewsRecipe +from calibre import browser +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class Dnevnik(BasicNewsRecipe): + + INDEX = 'http://www.dnevnik.com.mk' + __author__ = 'Darko Spasovski' + title = 'Dnevnik - mk' + description = 'Daily Macedonian newspaper' + masthead_url = 'http://www.dnevnik.com.mk/images/re-logo.gif' + language = 'mk' + publication_type = 'newspaper' + category = 'news, Macedonia' + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + ## Remove anything before the start of the article. + (r'', lambda match: ''), + + ## Remove anything after the end of the article. + (r'', re.DOTALL), lambda m: '')] + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + + remove_tags = [ + dict(name='div', attrs={'class':'add_inf'}), + dict(name='div', attrs={'class':'add_f'}), + ] + + remove_attributes = ['width','height'] + + feeds = [ + ('National Geographic PL', 'http://www.national-geographic.pl/rss/'), + ] + + def print_version(self, url): + return url.replace('artykuly0Cpokaz', 'drukuj-artykul') + diff --git a/recipes/plus_info.recipe b/recipes/plus_info.recipe new file mode 100644 index 0000000000..e95a3e7359 --- /dev/null +++ b/recipes/plus_info.recipe @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +__author__ = 'Darko Spasovski' +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Spasovski ' + +''' +www.plusinfo.mk +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class PlusInfo(BasicNewsRecipe): + + INDEX = 'www.plusinfo.mk' + title = u'+info' + __author__ = 'Darko Spasovski' + description = 'Macedonian news portal' + publication_type = 'newsportal' + category = 'news, Macedonia' + language = 'mk' + masthead_url = 'http://www.plusinfo.mk/style/images/logo.jpg' + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + oldest_article = 1 + max_articles_per_feed = 100 + + keep_only_tags = [dict(name='div', attrs={'class': 'vest'})] + remove_tags = [dict(name='div', attrs={'class':['komentari_holder', 'objava']})] + + feeds = [(u'Македонија', u'http://www.plusinfo.mk/rss/makedonija'), + (u'Бизнис', u'http://www.plusinfo.mk/rss/biznis'), + (u'Скопје', u'http://www.plusinfo.mk/rss/skopje'), + (u'Култура', u'http://www.plusinfo.mk/rss/kultura'), + (u'Свет', u'http://www.plusinfo.mk/rss/svet'), + (u'Сцена', u'http://www.plusinfo.mk/rss/scena'), + (u'Здравје', u'http://www.plusinfo.mk/rss/zdravje'), + (u'Магазин', u'http://www.plusinfo.mk/rss/magazin'), + (u'Спорт', u'http://www.plusinfo.mk/rss/sport')] + + # uncomment the following block if you want the print version (note: it lacks photos) +# def print_version(self,url): +# segments = url.split('/') +# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5:]) +# return printURL diff --git a/recipes/portafolio.recipe b/recipes/portafolio.recipe new file mode 100644 index 0000000000..1b442f68f7 --- /dev/null +++ b/recipes/portafolio.recipe @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1311799898(BasicNewsRecipe): + title = u'Periódico Portafolio Colombia' + language = 'es_CO' + __author__ = 'BIGO-CAVA' + cover_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png' + remove_tags_before = dict(id='contenidoArt') + remove_tags_after = [dict(name='div', attrs={'class':'articulo-mas'})] + keep_only_tags = [dict(name='div', id='contenidoArt')] + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + masthead_url = 'http://www.portafolio.co/sites/portafolio.co/themes/portafolio_2011/logo.png' + publication_type = 'newspaper' + + extra_css = """ + p{text-align: justify; font-size: 100%} + body{ text-align: left; font-size:100% } + h1{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } + h3{font-family: sans-serif; font-size:100%; font-style: italic; text-align: justify; } + """ + + + feeds = [(u'Negocios', u'http://www.portafolio.co/negocios/feed'), + (u'Economia', u'http://www.portafolio.co/economia/feed'), + (u'Internacional', u'http://www.portafolio.co/internacional/feed'), + (u'Indicadores', u'http://www.portafolio.co/indicadores/feed'), + (u'Opinion', u'http://www.portafolio.co/opinion/feed'), + (u'Finanzas Personales', u'http://www.portafolio.co/finanzas-personales/feed'), + (u'Herramientas', u'http://www.portafolio.co/herramientas/feed')] diff --git a/recipes/united_daily.recipe b/recipes/united_daily.recipe index 1013b3d2b6..25493a43ac 100644 --- a/recipes/united_daily.recipe +++ b/recipes/united_daily.recipe @@ -64,7 +64,7 @@ class UnitedDaily(BasicNewsRecipe): __author__ = 'Eddie Lau' __version__ = '1.1' - language = 'zh-TW' + language = 'zh_TW' publisher = 'United Daily News Group' description = 'United Daily (Taiwan)' category = 'News, Chinese, Taiwan' diff --git a/recipes/utrinski.recipe b/recipes/utrinski.recipe new file mode 100644 index 0000000000..5256695079 --- /dev/null +++ b/recipes/utrinski.recipe @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Spasovski ' +''' +utrinski.com.mk +''' + +import re +import datetime +from calibre.web.feeds.news import BasicNewsRecipe + +class UtrinskiVesnik(BasicNewsRecipe): + + __author__ = 'Darko Spasovski' + INDEX = 'http://www.utrinski.com.mk/' + title = 'Utrinski Vesnik' + description = 'Daily Macedonian newspaper' + masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg' + language = 'mk' + remove_javascript = True + publication_type = 'newspaper' + category = 'news, Macedonia' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in + [ + ## Remove anything before the start of the article. + (r'', lambda match: ''), + + ## Remove anything after the end of the article. + (r'