mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
News download: Allow passing username/password in feed URLs. Fixes #1900874 [[Enhancement] Username password protected support for RSS Custom news source](https://bugs.launchpad.net/calibre/+bug/1900874)
This commit is contained in:
parent
3c4dce3b95
commit
02df6c77fa
@ -7,30 +7,38 @@ Defines various abstract base classes that can be subclassed to create powerful
|
|||||||
__docformat__ = "restructuredtext en"
|
__docformat__ = "restructuredtext en"
|
||||||
|
|
||||||
|
|
||||||
import os, time, traceback, re, sys, io
|
import io
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
from urllib.parse import urlparse, urlsplit
|
||||||
|
|
||||||
|
from calibre import (
|
||||||
from calibre import (browser, __appname__, iswindows, force_unicode,
|
__appname__, as_unicode, browser, force_unicode, iswindows, preferred_encoding,
|
||||||
strftime, preferred_encoding, as_unicode, random_user_agent)
|
random_user_agent, strftime
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
)
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, CData, NavigableString, Tag
|
||||||
from calibre.web import Recipe
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.web.fetch.simple import option_parser as web2disk_option_parser, RecursiveFetcher, AbortArticle
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.web.fetch.utils import prepare_masthead_image
|
|
||||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
from calibre.utils.icu import numeric_sort_key
|
from calibre.utils.icu import numeric_sort_key
|
||||||
from calibre.utils.img import save_cover_data_to, add_borders_to_image, image_to_data
|
from calibre.utils.img import add_borders_to_image, image_to_data, save_cover_data_to
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
from calibre.utils.logging import ThreadSafeWrapper
|
from calibre.utils.logging import ThreadSafeWrapper
|
||||||
from polyglot.builtins import unicode_type, string_or_bytes, getcwd
|
from calibre.utils.threadpool import NoResultsPending, ThreadPool, WorkRequest
|
||||||
from polyglot.urllib import urlparse, urlsplit
|
from calibre.web import Recipe
|
||||||
|
from calibre.web.feeds import Feed, feed_from_xml, feeds_from_index, templates
|
||||||
|
from calibre.web.fetch.simple import (
|
||||||
|
AbortArticle, RecursiveFetcher, option_parser as web2disk_option_parser
|
||||||
|
)
|
||||||
|
from calibre.web.fetch.utils import prepare_masthead_image
|
||||||
|
from polyglot.builtins import getcwd, string_or_bytes, unicode_type
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
@ -704,7 +712,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
_raw = self.encoding(_raw)
|
_raw = self.encoding(_raw)
|
||||||
else:
|
else:
|
||||||
_raw = _raw.decode(self.encoding, 'replace')
|
_raw = _raw.decode(self.encoding, 'replace')
|
||||||
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
|
from calibre.ebooks.chardet import (
|
||||||
|
strip_encoding_declarations, xml_to_unicode
|
||||||
|
)
|
||||||
from calibre.utils.cleantext import clean_xml_chars
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
if isinstance(_raw, unicode_type):
|
if isinstance(_raw, unicode_type):
|
||||||
_raw = strip_encoding_declarations(_raw)
|
_raw = strip_encoding_declarations(_raw)
|
||||||
@ -724,9 +734,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
Extracts main article content from 'html', cleans up and returns as a (article_html, extracted_title) tuple.
|
Extracts main article content from 'html', cleans up and returns as a (article_html, extracted_title) tuple.
|
||||||
Based on the original readability algorithm by Arc90.
|
Based on the original readability algorithm by Arc90.
|
||||||
'''
|
'''
|
||||||
|
from lxml.html import document_fromstring, fragment_fromstring, tostring
|
||||||
|
|
||||||
from calibre.ebooks.readability import readability
|
from calibre.ebooks.readability import readability
|
||||||
from lxml.html import (fragment_fromstring, tostring,
|
|
||||||
document_fromstring)
|
|
||||||
|
|
||||||
doc = readability.Document(html, self.log, url=url,
|
doc = readability.Document(html, self.log, url=url,
|
||||||
keep_elements=self.auto_cleanup_keep)
|
keep_elements=self.auto_cleanup_keep)
|
||||||
@ -1097,6 +1107,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
ans = src[:npos+1]
|
ans = src[:npos+1]
|
||||||
if len(ans) < len(src):
|
if len(ans) < len(src):
|
||||||
from calibre.utils.cleantext import clean_xml_chars
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
|
|
||||||
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||||
ans = clean_xml_chars(ans) + '\u2026'
|
ans = clean_xml_chars(ans) + '\u2026'
|
||||||
return ans
|
return ans
|
||||||
@ -1644,6 +1655,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
'''
|
'''
|
||||||
feeds = self.get_feeds()
|
feeds = self.get_feeds()
|
||||||
parsed_feeds = []
|
parsed_feeds = []
|
||||||
|
br = self.browser
|
||||||
for obj in feeds:
|
for obj in feeds:
|
||||||
if isinstance(obj, string_or_bytes):
|
if isinstance(obj, string_or_bytes):
|
||||||
title, url = None, obj
|
title, url = None, obj
|
||||||
@ -1657,7 +1669,15 @@ class BasicNewsRecipe(Recipe):
|
|||||||
url = 'http'+url[4:]
|
url = 'http'+url[4:]
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(title if title else url))
|
||||||
try:
|
try:
|
||||||
with closing(self.browser.open(url)) as f:
|
purl = urlparse(url, allow_fragments=False)
|
||||||
|
if purl.username or purl.password:
|
||||||
|
hostname = purl.hostname
|
||||||
|
if purl.port:
|
||||||
|
hostname += f':{purl.port}'
|
||||||
|
url = purl._replace(netloc=hostname).geturl()
|
||||||
|
if purl.username and purl.password:
|
||||||
|
br.add_password(url, purl.username, purl.password)
|
||||||
|
with closing(br.open_novisit(url)) as f:
|
||||||
parsed_feeds.append(feed_from_xml(f.read(),
|
parsed_feeds.append(feed_from_xml(f.read(),
|
||||||
title=title,
|
title=title,
|
||||||
log=self.log,
|
log=self.log,
|
||||||
@ -1846,8 +1866,9 @@ class CalibrePeriodical(BasicNewsRecipe):
|
|||||||
zf = ZipFile(f)
|
zf = ZipFile(f)
|
||||||
zf.extractall()
|
zf.extractall()
|
||||||
zf.close()
|
zf.close()
|
||||||
from calibre.web.feeds.recipes import compile_recipe
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
|
|
||||||
|
from calibre.web.feeds.recipes import compile_recipe
|
||||||
try:
|
try:
|
||||||
recipe = compile_recipe(open(glob('*.recipe')[0],
|
recipe = compile_recipe(open(glob('*.recipe')[0],
|
||||||
'rb').read())
|
'rb').read())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user