Feature: Pick best image from list (#745)

* Pick largest image from list

* Add a safe value incase all requests fail

* Formatting
This commit is contained in:
cadamswaite 2021-10-20 05:30:06 +01:00 committed by GitHub
parent 3831eef508
commit 18b099b115
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 2 deletions

View File

@ -47,7 +47,21 @@ def scrape_image(image_url: str, slug: str) -> Path:
pass
if isinstance(image_url, list): # Handles List Types
image_url = image_url[0]
# Multiple images have been defined in the schema - usually different resolutions
# Typically would be in smallest->biggest order, but can't be certain so test each.
# 'Google will pick the best image to display in Search results based on the aspect ratio and resolution.'
all_image_requests = []
for url in image_url:
try:
r = requests.get(url, stream=True, headers={"User-Agent": ""})
except Exception:
logger.exception("Image {url} could not be requested")
continue
if r.status_code == 200:
all_image_requests.append((url, r))
image_url, _ = max(all_image_requests, key=lambda url_r: len(url_r[1].content), default=("", 0))
if isinstance(image_url, dict): # Handles Dictionary Types
for key in image_url:

View File

@ -145,7 +145,7 @@ def clean_scraper(scraped_data: SchemaScraperFactory.SchemaScraper, url: str) ->
return Recipe(
name=try_get_default(scraped_data.title, "name", "No Name Found", cleaner.clean_string),
slug="",
image=try_get_default(scraped_data.image, "image", None),
image=try_get_default(None, "image", None),
description=try_get_default(None, "description", "", cleaner.clean_string),
nutrition=try_get_default(None, "nutrition", None, cleaner.clean_nutrition),
recipe_yield=try_get_default(scraped_data.yields, "recipeYield", "1", cleaner.clean_string),