From 2865bcbb040f435c57de483c3d3e13d1d0a91540 Mon Sep 17 00:00:00 2001 From: Hayden <64056131+hay-kot@users.noreply.github.com> Date: Sat, 20 Aug 2022 17:54:06 -0800 Subject: [PATCH] fix: missing user agent for scraper (#1586) * set user agent for requests.get * bump scraper version --- mealie/services/scraper/scraper_strategies.py | 3 ++- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mealie/services/scraper/scraper_strategies.py b/mealie/services/scraper/scraper_strategies.py index 074e760fa546..c79536e4ff1d 100644 --- a/mealie/services/scraper/scraper_strategies.py +++ b/mealie/services/scraper/scraper_strategies.py @@ -15,6 +15,7 @@ from mealie.services.scraper.scraped_extras import ScrapedExtras from . import cleaner +_FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0" SCRAPER_TIMEOUT = 15 @@ -28,7 +29,7 @@ def safe_scrape_html(url: str) -> str: if the request takes longer than 15 seconds. This is used to mitigate DDOS attacks from users providing a url with arbitrary large content. """ - resp = requests.get(url, timeout=SCRAPER_TIMEOUT, stream=True) + resp = requests.get(url, timeout=SCRAPER_TIMEOUT, stream=True, headers={"User-Agent": _FIREFOX_UA}) html_bytes = b"" diff --git a/poetry.lock b/poetry.lock index a1b2bfb9440e..51a04fe7dfc3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1246,7 +1246,7 @@ rdflib = ">=5.0.0" [[package]] name = "recipe-scrapers" -version = "14.11.0" +version = "14.13.0" description = "Python package, scraping recipes from all over the internet" category = "main" optional = false @@ -1619,7 +1619,7 @@ pgsql = ["psycopg2-binary"] [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "ac22f90df7cc8fc0e1787a0379d1c4e2f4cd1ab7520956fbd5bbf359c3a81480" +content-hash = "80bd1bd922ed481e594d5950fb254cedfeeac467d5f3fcb4d241d6d9c689419b" [metadata.files] aiofiles = [ @@ -2381,8 +2381,8 @@ rdflib-jsonld = [ {file = "rdflib_jsonld-0.6.2-py2.py3-none-any.whl", hash = "sha256:011afe67672353ca9978ab9a4bee964dff91f14042f2d8a28c22a573779d2f8b"}, ] recipe-scrapers = [ - {file = "recipe_scrapers-14.11.0-py3-none-any.whl", hash = "sha256:992b37ef2c29d66caaec82b2c5a1f9d901a74d2e267e60e505370c59ceadaeef"}, - {file = "recipe_scrapers-14.11.0.tar.gz", hash = "sha256:85192e976388eeba9bb314c5cf75ac087ec1cfaf4b4aa1ffe580dae4099e2be9"}, + {file = "recipe_scrapers-14.13.0-py3-none-any.whl", hash = "sha256:4d9eb6d22dbe38976e1853c7d6e8f2060412cefb741ff03818c34b3faf8ea8e9"}, + {file = "recipe_scrapers-14.13.0.tar.gz", hash = "sha256:b4e08e9e34ff4490025a844c6823d5f269f54e1e866ee86162760a3f5ce0dd22"}, ] requests = [] requests-oauthlib = [ diff --git a/pyproject.toml b/pyproject.toml index cbd409363ed5..9120221ef807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ passlib = "^1.7.4" lxml = "^4.7.1" Pillow = "^8.2.0" apprise = "^0.9.6" -recipe-scrapers = "^14.11.0" +recipe-scrapers = "^14.13.0" psycopg2-binary = {version = "^2.9.1", optional = true} gunicorn = "^20.1.0" emails = "^0.6"