mirror of
https://github.com/mealie-recipes/mealie.git
synced 2025-07-09 03:04:54 -04:00
fix: Bulk URL Import Fixes (#2796)
* allow expections when fetching content * removed extra bracket on import text * added more fault tolerance and limited concurrency * fix entries not being saved to report * disable clicking into in-proress import * conditionally render expansion
This commit is contained in:
parent
449bb6f0ce
commit
f8ad72ec31
@ -49,6 +49,10 @@ export default defineComponent({
|
|||||||
];
|
];
|
||||||
|
|
||||||
function handleRowClick(item: ReportSummary) {
|
function handleRowClick(item: ReportSummary) {
|
||||||
|
if (item.status === "in-progress") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
router.push(`/group/reports/${item.id}`);
|
router.push(`/group/reports/${item.id}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@
|
|||||||
<template #icon>
|
<template #icon>
|
||||||
{{ $globals.icons.database }}
|
{{ $globals.icons.database }}
|
||||||
</template>
|
</template>
|
||||||
{{ $t('general.import') }}}
|
{{ $t('general.import') }}
|
||||||
</BaseButton>
|
</BaseButton>
|
||||||
<BaseButton
|
<BaseButton
|
||||||
color="info"
|
color="info"
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
{{ $d(Date.parse(item.timestamp), "short") }}
|
{{ $d(Date.parse(item.timestamp), "short") }}
|
||||||
</template>
|
</template>
|
||||||
<template #expanded-item="{ headers, item }">
|
<template #expanded-item="{ headers, item }">
|
||||||
<td class="pa-6" :colspan="headers.length">{{ item.exception }}</td>
|
<td v-if="item.exception" class="pa-6" :colspan="headers.length">{{ item.exception }}</td>
|
||||||
</template>
|
</template>
|
||||||
</v-data-table>
|
</v-data-table>
|
||||||
</v-container>
|
</v-container>
|
||||||
|
@ -12,14 +12,17 @@ from mealie.services._base_service import BaseService
|
|||||||
_FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"
|
_FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"
|
||||||
|
|
||||||
|
|
||||||
async def gather_with_concurrency(n, *coros):
|
async def gather_with_concurrency(n, *coros, ignore_exceptions=False):
|
||||||
semaphore = asyncio.Semaphore(n)
|
semaphore = asyncio.Semaphore(n)
|
||||||
|
|
||||||
async def sem_coro(coro):
|
async def sem_coro(coro):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await coro
|
return await coro
|
||||||
|
|
||||||
return await asyncio.gather(*(sem_coro(c) for c in coros))
|
results = await asyncio.gather(*(sem_coro(c) for c in coros), return_exceptions=ignore_exceptions)
|
||||||
|
if ignore_exceptions:
|
||||||
|
results = [r for r in results if not isinstance(r, Exception)]
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
async def largest_content_len(urls: list[str]) -> tuple[str, int]:
|
async def largest_content_len(urls: list[str]) -> tuple[str, int]:
|
||||||
@ -31,7 +34,7 @@ async def largest_content_len(urls: list[str]) -> tuple[str, int]:
|
|||||||
|
|
||||||
async with AsyncClient() as client:
|
async with AsyncClient() as client:
|
||||||
tasks = [do(client, url) for url in urls]
|
tasks = [do(client, url) for url in urls]
|
||||||
responses: list[Response] = await gather_with_concurrency(10, *tasks)
|
responses: list[Response] = await gather_with_concurrency(10, *tasks, ignore_exceptions=True)
|
||||||
for response in responses:
|
for response in responses:
|
||||||
len_int = int(response.headers.get("Content-Length", 0))
|
len_int = int(response.headers.get("Content-Length", 0))
|
||||||
if len_int > largest_len:
|
if len_int > largest_len:
|
||||||
|
@ -1,10 +1,16 @@
|
|||||||
from asyncio import gather
|
import asyncio
|
||||||
|
|
||||||
from pydantic import UUID4
|
from pydantic import UUID4
|
||||||
|
|
||||||
from mealie.repos.repository_factory import AllRepositories
|
from mealie.repos.repository_factory import AllRepositories
|
||||||
from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe
|
from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe
|
||||||
from mealie.schema.reports.reports import ReportCategory, ReportCreate, ReportEntryCreate, ReportSummaryStatus
|
from mealie.schema.reports.reports import (
|
||||||
|
ReportCategory,
|
||||||
|
ReportCreate,
|
||||||
|
ReportEntryCreate,
|
||||||
|
ReportEntryOut,
|
||||||
|
ReportSummaryStatus,
|
||||||
|
)
|
||||||
from mealie.schema.user.user import GroupInDB
|
from mealie.schema.user.user import GroupInDB
|
||||||
from mealie.services._base_service import BaseService
|
from mealie.services._base_service import BaseService
|
||||||
from mealie.services.recipe.recipe_service import RecipeService
|
from mealie.services.recipe.recipe_service import RecipeService
|
||||||
@ -47,6 +53,7 @@ class RecipeBulkScraperService(BaseService):
|
|||||||
is_success = True
|
is_success = True
|
||||||
is_failure = True
|
is_failure = True
|
||||||
|
|
||||||
|
new_entries: list[ReportEntryOut] = []
|
||||||
for entry in self.report_entries:
|
for entry in self.report_entries:
|
||||||
if is_failure and entry.success:
|
if is_failure and entry.success:
|
||||||
is_failure = False
|
is_failure = False
|
||||||
@ -54,7 +61,7 @@ class RecipeBulkScraperService(BaseService):
|
|||||||
if is_success and not entry.success:
|
if is_success and not entry.success:
|
||||||
is_success = False
|
is_success = False
|
||||||
|
|
||||||
self.repos.group_report_entries.create(entry)
|
new_entries.append(self.repos.group_report_entries.create(entry))
|
||||||
|
|
||||||
if is_success:
|
if is_success:
|
||||||
self.report.status = ReportSummaryStatus.success
|
self.report.status = ReportSummaryStatus.success
|
||||||
@ -65,25 +72,29 @@ class RecipeBulkScraperService(BaseService):
|
|||||||
if not is_success and not is_failure:
|
if not is_success and not is_failure:
|
||||||
self.report.status = ReportSummaryStatus.partial
|
self.report.status = ReportSummaryStatus.partial
|
||||||
|
|
||||||
|
self.report.entries = new_entries
|
||||||
self.repos.group_reports.update(self.report.id, self.report)
|
self.repos.group_reports.update(self.report.id, self.report)
|
||||||
|
|
||||||
async def scrape(self, urls: CreateRecipeByUrlBulk) -> None:
|
async def scrape(self, urls: CreateRecipeByUrlBulk) -> None:
|
||||||
|
sem = asyncio.Semaphore(3)
|
||||||
|
|
||||||
async def _do(url: str) -> Recipe | None:
|
async def _do(url: str) -> Recipe | None:
|
||||||
try:
|
async with sem:
|
||||||
recipe, _ = await create_from_url(url)
|
try:
|
||||||
return recipe
|
recipe, _ = await create_from_url(url)
|
||||||
except Exception as e:
|
return recipe
|
||||||
self.service.logger.error(f"failed to scrape url during bulk url import {b.url}")
|
except Exception as e:
|
||||||
self.service.logger.exception(e)
|
self.service.logger.error(f"failed to scrape url during bulk url import {url}")
|
||||||
self._add_error_entry(f"failed to scrape url {url}", str(e))
|
self.service.logger.exception(e)
|
||||||
return None
|
self._add_error_entry(f"failed to scrape url {url}", str(e))
|
||||||
|
return None
|
||||||
|
|
||||||
if self.report is None:
|
if self.report is None:
|
||||||
self.get_report_id()
|
self.get_report_id()
|
||||||
tasks = [_do(b.url) for b in urls.imports]
|
tasks = [_do(b.url) for b in urls.imports]
|
||||||
results = await gather(*tasks)
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
for b, recipe in zip(urls.imports, results, strict=True):
|
for b, recipe in zip(urls.imports, results, strict=True):
|
||||||
if not recipe:
|
if not recipe or isinstance(recipe, Exception):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if b.tags:
|
if b.tags:
|
||||||
|
@ -172,7 +172,7 @@ class RecipeScraperPackage(ABCScraperStrategy):
|
|||||||
try:
|
try:
|
||||||
scraped_schema = scrape_html(recipe_html, org_url=self.url)
|
scraped_schema = scrape_html(recipe_html, org_url=self.url)
|
||||||
except (NoSchemaFoundInWildMode, AttributeError):
|
except (NoSchemaFoundInWildMode, AttributeError):
|
||||||
self.logger.error("Recipe Scraper was unable to extract a recipe.")
|
self.logger.error(f"Recipe Scraper was unable to extract a recipe from {self.url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except ConnectionError as e:
|
except ConnectionError as e:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user