News download: Allow passing username/password in feed URLs. Fixes #1900874 [[Enhancement] Username password protected support for RSS Custom news source](https://bugs.launchpad.net/calibre/+bug/1900874)

This commit is contained in:
Kovid Goyal 2020-10-26 11:33:00 +05:30
parent 3c4dce3b95
commit 02df6c77fa
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,30 +7,38 @@ Defines various abstract base classes that can be subclassed to create powerful
__docformat__ = "restructuredtext en"
import os, time, traceback, re, sys, io
import io
import os
import re
import sys
import time
import traceback
from collections import defaultdict
from contextlib import closing
from urllib.parse import urlparse, urlsplit
from calibre import (browser, __appname__, iswindows, force_unicode,
strftime, preferred_encoding, as_unicode, random_user_agent)
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.web import Recipe
from calibre.ebooks.metadata.toc import TOC
from calibre import (
__appname__, as_unicode, browser, force_unicode, iswindows, preferred_encoding,
random_user_agent, strftime
)
from calibre.ebooks.BeautifulSoup import BeautifulSoup, CData, NavigableString, Tag
from calibre.ebooks.metadata import MetaInformation
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.fetch.simple import option_parser as web2disk_option_parser, RecursiveFetcher, AbortArticle
from calibre.web.fetch.utils import prepare_masthead_image
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.date import now as nowf
from calibre.utils.icu import numeric_sort_key
from calibre.utils.img import save_cover_data_to, add_borders_to_image, image_to_data
from calibre.utils.img import add_borders_to_image, image_to_data, save_cover_data_to
from calibre.utils.localization import canonicalize_lang
from calibre.utils.logging import ThreadSafeWrapper
from polyglot.builtins import unicode_type, string_or_bytes, getcwd
from polyglot.urllib import urlparse, urlsplit
from calibre.utils.threadpool import NoResultsPending, ThreadPool, WorkRequest
from calibre.web import Recipe
from calibre.web.feeds import Feed, feed_from_xml, feeds_from_index, templates
from calibre.web.fetch.simple import (
AbortArticle, RecursiveFetcher, option_parser as web2disk_option_parser
)
from calibre.web.fetch.utils import prepare_masthead_image
from polyglot.builtins import getcwd, string_or_bytes, unicode_type
def classes(classes):
@ -704,7 +712,9 @@ class BasicNewsRecipe(Recipe):
_raw = self.encoding(_raw)
else:
_raw = _raw.decode(self.encoding, 'replace')
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
from calibre.ebooks.chardet import (
strip_encoding_declarations, xml_to_unicode
)
from calibre.utils.cleantext import clean_xml_chars
if isinstance(_raw, unicode_type):
_raw = strip_encoding_declarations(_raw)
@ -724,9 +734,9 @@ class BasicNewsRecipe(Recipe):
Extracts main article content from 'html', cleans up and returns as a (article_html, extracted_title) tuple.
Based on the original readability algorithm by Arc90.
'''
from lxml.html import document_fromstring, fragment_fromstring, tostring
from calibre.ebooks.readability import readability
from lxml.html import (fragment_fromstring, tostring,
document_fromstring)
doc = readability.Document(html, self.log, url=url,
keep_elements=self.auto_cleanup_keep)
@ -1097,6 +1107,7 @@ class BasicNewsRecipe(Recipe):
ans = src[:npos+1]
if len(ans) < len(src):
from calibre.utils.cleantext import clean_xml_chars
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
ans = clean_xml_chars(ans) + '\u2026'
return ans
@ -1644,6 +1655,7 @@ class BasicNewsRecipe(Recipe):
'''
feeds = self.get_feeds()
parsed_feeds = []
br = self.browser
for obj in feeds:
if isinstance(obj, string_or_bytes):
title, url = None, obj
@ -1657,7 +1669,15 @@ class BasicNewsRecipe(Recipe):
url = 'http'+url[4:]
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
try:
with closing(self.browser.open(url)) as f:
purl = urlparse(url, allow_fragments=False)
if purl.username or purl.password:
hostname = purl.hostname
if purl.port:
hostname += f':{purl.port}'
url = purl._replace(netloc=hostname).geturl()
if purl.username and purl.password:
br.add_password(url, purl.username, purl.password)
with closing(br.open_novisit(url)) as f:
parsed_feeds.append(feed_from_xml(f.read(),
title=title,
log=self.log,
@ -1846,8 +1866,9 @@ class CalibrePeriodical(BasicNewsRecipe):
zf = ZipFile(f)
zf.extractall()
zf.close()
from calibre.web.feeds.recipes import compile_recipe
from glob import glob
from calibre.web.feeds.recipes import compile_recipe
try:
recipe = compile_recipe(open(glob('*.recipe')[0],
'rb').read())