mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
News download: Allow passing username/password in feed URLs. Fixes #1900874 [[Enhancement] Username password protected support for RSS Custom news source](https://bugs.launchpad.net/calibre/+bug/1900874)
This commit is contained in:
parent
3c4dce3b95
commit
02df6c77fa
@ -7,30 +7,38 @@ Defines various abstract base classes that can be subclassed to create powerful
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
|
||||
import os, time, traceback, re, sys, io
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from collections import defaultdict
|
||||
from contextlib import closing
|
||||
from urllib.parse import urlparse, urlsplit
|
||||
|
||||
|
||||
from calibre import (browser, __appname__, iswindows, force_unicode,
|
||||
strftime, preferred_encoding, as_unicode, random_user_agent)
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.web import Recipe
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre import (
|
||||
__appname__, as_unicode, browser, force_unicode, iswindows, preferred_encoding,
|
||||
random_user_agent, strftime
|
||||
)
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, CData, NavigableString, Tag
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
||||
from calibre.web.fetch.simple import option_parser as web2disk_option_parser, RecursiveFetcher, AbortArticle
|
||||
from calibre.web.fetch.utils import prepare_masthead_image
|
||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.date import now as nowf
|
||||
from calibre.utils.icu import numeric_sort_key
|
||||
from calibre.utils.img import save_cover_data_to, add_borders_to_image, image_to_data
|
||||
from calibre.utils.img import add_borders_to_image, image_to_data, save_cover_data_to
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.logging import ThreadSafeWrapper
|
||||
from polyglot.builtins import unicode_type, string_or_bytes, getcwd
|
||||
from polyglot.urllib import urlparse, urlsplit
|
||||
from calibre.utils.threadpool import NoResultsPending, ThreadPool, WorkRequest
|
||||
from calibre.web import Recipe
|
||||
from calibre.web.feeds import Feed, feed_from_xml, feeds_from_index, templates
|
||||
from calibre.web.fetch.simple import (
|
||||
AbortArticle, RecursiveFetcher, option_parser as web2disk_option_parser
|
||||
)
|
||||
from calibre.web.fetch.utils import prepare_masthead_image
|
||||
from polyglot.builtins import getcwd, string_or_bytes, unicode_type
|
||||
|
||||
|
||||
def classes(classes):
|
||||
@ -704,7 +712,9 @@ class BasicNewsRecipe(Recipe):
|
||||
_raw = self.encoding(_raw)
|
||||
else:
|
||||
_raw = _raw.decode(self.encoding, 'replace')
|
||||
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
|
||||
from calibre.ebooks.chardet import (
|
||||
strip_encoding_declarations, xml_to_unicode
|
||||
)
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
if isinstance(_raw, unicode_type):
|
||||
_raw = strip_encoding_declarations(_raw)
|
||||
@ -724,9 +734,9 @@ class BasicNewsRecipe(Recipe):
|
||||
Extracts main article content from 'html', cleans up and returns as a (article_html, extracted_title) tuple.
|
||||
Based on the original readability algorithm by Arc90.
|
||||
'''
|
||||
from lxml.html import document_fromstring, fragment_fromstring, tostring
|
||||
|
||||
from calibre.ebooks.readability import readability
|
||||
from lxml.html import (fragment_fromstring, tostring,
|
||||
document_fromstring)
|
||||
|
||||
doc = readability.Document(html, self.log, url=url,
|
||||
keep_elements=self.auto_cleanup_keep)
|
||||
@ -1097,6 +1107,7 @@ class BasicNewsRecipe(Recipe):
|
||||
ans = src[:npos+1]
|
||||
if len(ans) < len(src):
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
|
||||
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||
ans = clean_xml_chars(ans) + '\u2026'
|
||||
return ans
|
||||
@ -1644,6 +1655,7 @@ class BasicNewsRecipe(Recipe):
|
||||
'''
|
||||
feeds = self.get_feeds()
|
||||
parsed_feeds = []
|
||||
br = self.browser
|
||||
for obj in feeds:
|
||||
if isinstance(obj, string_or_bytes):
|
||||
title, url = None, obj
|
||||
@ -1657,7 +1669,15 @@ class BasicNewsRecipe(Recipe):
|
||||
url = 'http'+url[4:]
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
|
||||
try:
|
||||
with closing(self.browser.open(url)) as f:
|
||||
purl = urlparse(url, allow_fragments=False)
|
||||
if purl.username or purl.password:
|
||||
hostname = purl.hostname
|
||||
if purl.port:
|
||||
hostname += f':{purl.port}'
|
||||
url = purl._replace(netloc=hostname).geturl()
|
||||
if purl.username and purl.password:
|
||||
br.add_password(url, purl.username, purl.password)
|
||||
with closing(br.open_novisit(url)) as f:
|
||||
parsed_feeds.append(feed_from_xml(f.read(),
|
||||
title=title,
|
||||
log=self.log,
|
||||
@ -1846,8 +1866,9 @@ class CalibrePeriodical(BasicNewsRecipe):
|
||||
zf = ZipFile(f)
|
||||
zf.extractall()
|
||||
zf.close()
|
||||
from calibre.web.feeds.recipes import compile_recipe
|
||||
from glob import glob
|
||||
|
||||
from calibre.web.feeds.recipes import compile_recipe
|
||||
try:
|
||||
recipe = compile_recipe(open(glob('*.recipe')[0],
|
||||
'rb').read())
|
||||
|
Loading…
x
Reference in New Issue
Block a user