From 6ce808c4994ce3e981a3d3023b7c8f626b03c320 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 5 Nov 2018 12:43:20 +0530 Subject: [PATCH] Allow saving index html easily --- src/calibre/web/feeds/news.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index d131524114..3277d957e0 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -657,7 +657,7 @@ class BasicNewsRecipe(Recipe): return frozenset() return frozenset([(parts.netloc, (parts.path or '').rstrip('/'))]) - def index_to_soup(self, url_or_raw, raw=False, as_tree=False): + def index_to_soup(self, url_or_raw, raw=False, as_tree=False, save_raw=None): ''' Convenience method that takes an URL to the index page and returns a `BeautifulSoup `_ @@ -692,6 +692,9 @@ class BasicNewsRecipe(Recipe): else: _raw = xml_to_unicode(_raw, strip_encoding_pats=True, resolve_entities=True)[0] _raw = clean_xml_chars(_raw) + if save_raw: + with lopen(save_raw, 'wb') as f: + f.write(_raw.encode('utf-8')) if as_tree: from html5_parser import parse return parse(_raw)