feat: Remove OCR Support (#2838)

* remove ocr package

* remove tesseract

* remove OCR from app

* remove OCR from tests

* fix docs
This commit is contained in:
Michael Genson 2023-12-14 20:26:43 -06:00 committed by GitHub
parent c48680374d
commit ca9f66ee24
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 29 additions and 1570 deletions

View File

@ -67,7 +67,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install libsasl2-dev libldap2-dev libssl-dev tesseract-ocr-all
sudo apt-get install libsasl2-dev libldap2-dev libssl-dev
poetry install
poetry add "psycopg2-binary==2.8.6"
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' || steps.cache-validate.outputs.cache-hit-success != 'true'

View File

@ -50,7 +50,6 @@ RUN apt-get update \
build-essential \
libpq-dev \
libwebp-dev \
tesseract-ocr-all \
# LDAP Dependencies
libsasl2-dev libldap2-dev libssl-dev \
gnupg gnupg2 gnupg1 \
@ -89,7 +88,6 @@ RUN apt-get update \
&& apt-get install --no-install-recommends -y \
gosu \
iproute2 \
tesseract-ocr-all \
libldap-common \
&& rm -rf /var/lib/apt/lists/*

File diff suppressed because one or more lines are too long

View File

@ -102,7 +102,6 @@ const SAVE_EVENT = "save";
const DELETE_EVENT = "delete";
const CLOSE_EVENT = "close";
const JSON_EVENT = "json";
const OCR_EVENT = "ocr";
export default defineComponent({
components: { RecipeContextMenu, RecipeFavoriteBadge, RecipeTimerMenu, RecipeTimelineBadge },
@ -139,10 +138,6 @@ export default defineComponent({
type: Boolean,
default: false,
},
showOcrButton: {
type: Boolean,
default: false,
},
},
setup(props, context) {
const deleteDialog = ref(false);
@ -175,15 +170,6 @@ export default defineComponent({
},
];
if (props.showOcrButton) {
editorButtons.splice(2, 0, {
text: i18n.t("ocr-editor.ocr-editor"),
icon: $globals.icons.eye,
event: OCR_EVENT,
color: "accent",
});
}
function emitHandler(event: string) {
switch (event) {
case CLOSE_EVENT:

View File

@ -1,390 +0,0 @@
<template>
<v-container
v-if="recipe && recipe.slug && recipe.settings && recipe.recipeIngredient"
:class="{
'pa-0': $vuetify.breakpoint.smAndDown,
}"
>
<BannerExperimental />
<div v-if="loading">
<v-spacer />
<v-progress-circular indeterminate class="" color="primary"> </v-progress-circular>
{{ loadingText }}
<v-spacer />
</div>
<v-row v-if="!loading">
<v-col cols="12" sm="7" md="7" lg="7">
<RecipeOcrEditorPageCanvas
:image="canvasImage"
:tsv="tsv"
@setText="canvasSetText"
@update-recipe="updateRecipe"
@close-editor="closeEditor"
@text-selected="updateSelectedText"
>
</RecipeOcrEditorPageCanvas>
<RecipeOcrEditorPageHelp />
</v-col>
<v-col cols="12" sm="5" md="5" lg="5">
<v-tabs v-model="tab" fixed-tabs>
<v-tab key="header">
{{ $t("general.recipe") }}
</v-tab>
<v-tab key="ingredients">
{{ $t("recipe.ingredients") }}
</v-tab>
<v-tab key="instructions">
{{ $t("recipe.instructions") }}
</v-tab>
</v-tabs>
<v-tabs-items v-model="tab">
<v-tab-item key="header">
<v-text-field
v-model="recipe.name"
class="my-3"
:label="$t('recipe.recipe-name')"
:rules="[validators.required]"
@focus="selectedRecipeField = 'name'"
>
</v-text-field>
<div class="d-flex flex-wrap">
<v-text-field
v-model="recipe.totalTime"
class="mx-2"
:label="$t('recipe.total-time')"
@click="selectedRecipeField = 'totalTime'"
></v-text-field>
<v-text-field
v-model="recipe.prepTime"
class="mx-2"
:label="$t('recipe.prep-time')"
@click="selectedRecipeField = 'prepTime'"
></v-text-field>
<v-text-field
v-model="recipe.performTime"
class="mx-2"
:label="$t('recipe.perform-time')"
@click="selectedRecipeField = 'performTime'"
></v-text-field>
</div>
<v-textarea
v-model="recipe.description"
auto-grow
min-height="100"
:label="$t('recipe.description')"
@click="selectedRecipeField = 'description'"
>
</v-textarea>
<v-text-field
v-model="recipe.recipeYield"
dense
:label="$t('recipe.servings')"
@click="selectedRecipeField = 'recipeYield'"
>
</v-text-field>
</v-tab-item>
<v-tab-item key="ingredients">
<div class="d-flex justify-end mt-2">
<RecipeDialogBulkAdd class="ml-1 mr-1" :input-text-prop="canvasSelectedText" @bulk-data="addIngredient" />
<BaseButton @click="addIngredient"> {{ $t("general.new") }} </BaseButton>
</div>
<draggable
v-if="recipe.recipeIngredient.length > 0"
v-model="recipe.recipeIngredient"
handle=".handle"
v-bind="{
animation: 200,
group: 'description',
disabled: false,
ghostClass: 'ghost',
}"
@start="drag = true"
@end="drag = false"
>
<TransitionGroup type="transition" :name="!drag ? 'flip-list' : ''">
<RecipeIngredientEditor
v-for="(ingredient, index) in recipe.recipeIngredient"
:key="ingredient.referenceId"
v-model="recipe.recipeIngredient[index]"
class="list-group-item"
:disable-amount="recipe.settings.disableAmount"
@delete="recipe.recipeIngredient.splice(index, 1)"
@clickIngredientField="setSingleIngredient($event, index)"
/>
</TransitionGroup>
</draggable>
</v-tab-item>
<v-tab-item key="instructions">
<div class="d-flex justify-end mt-2">
<RecipeDialogBulkAdd class="ml-1 mr-1" :input-text-prop="canvasSelectedText" @bulk-data="addStep" />
<BaseButton @click="addStep()"> {{ $t("general.new") }}</BaseButton>
</div>
<RecipePageInstructions
v-model="recipe.recipeInstructions"
:ingredients="recipe.recipeIngredient"
:disable-amount="recipe.settings.disableAmount"
:edit="true"
:recipe="recipe"
:assets.sync="recipe.assets"
@click-instruction-field="setSingleStep"
/>
</v-tab-item>
</v-tabs-items>
</v-col>
</v-row>
</v-container>
</template>
<script lang="ts">
import { defineComponent, ref, onMounted, reactive, toRefs, useContext, useRouter, computed, useRoute } from "@nuxtjs/composition-api";
import { until } from "@vueuse/core";
import { invoke } from "@vueuse/shared";
import draggable from "vuedraggable";
import RecipePageInstructions from "~/components/Domain/Recipe/RecipePage/RecipePageParts/RecipePageInstructions.vue";
import { useUserApi, useStaticRoutes } from "~/composables/api";
import { OcrTsvResponse as NullableOcrTsvResponse } from "~/lib/api/types/ocr";
import { validators } from "~/composables/use-validators";
import { Recipe, RecipeIngredient, RecipeStep } from "~/lib/api/types/recipe";
import { Paths, Leaves, SelectedRecipeLeaves } from "~/types/ocr-types";
import BannerExperimental from "~/components/global/BannerExperimental.vue";
import RecipeDialogBulkAdd from "~/components/Domain/Recipe/RecipeDialogBulkAdd.vue";
import RecipeIngredientEditor from "~/components/Domain/Recipe/RecipeIngredientEditor.vue";
import RecipeOcrEditorPageCanvas from "~/components/Domain/Recipe/RecipeOcrEditorPage/RecipeOcrEditorPageParts/RecipeOcrEditorPageCanvas.vue";
import RecipeOcrEditorPageHelp from "~/components/Domain/Recipe/RecipeOcrEditorPage/RecipeOcrEditorPageParts/RecipeOcrEditorPageHelp.vue";
import { uuid4 } from "~/composables/use-utils";
import { NoUndefinedField } from "~/lib/api/types/non-generated";
// Temporary Shim until we have a better solution
// https://github.com/phillipdupuis/pydantic-to-typescript/issues/28
type OcrTsvResponse = NoUndefinedField<NullableOcrTsvResponse>;
export default defineComponent({
components: {
RecipeIngredientEditor,
draggable,
BannerExperimental,
RecipeDialogBulkAdd,
RecipePageInstructions,
RecipeOcrEditorPageCanvas,
RecipeOcrEditorPageHelp,
},
props: {
recipe: {
type: Object as () => NoUndefinedField<Recipe>,
required: true,
},
},
setup(props) {
const { $auth } = useContext();
const route = useRoute();
const groupSlug = computed(() => route.value.params.groupSlug || $auth.user?.groupSlug || "");
const router = useRouter();
const api = useUserApi();
const tsv = ref<OcrTsvResponse[]>([]);
const drag = ref(false);
const { i18n } = useContext();
const { recipeAssetPath } = useStaticRoutes();
function assetURL(assetName: string) {
return recipeAssetPath(props.recipe.id, assetName);
}
const state = reactive({
loading: true,
loadingText: i18n.tc("general.loading-recipe"),
tab: null,
selectedRecipeField: "" as SelectedRecipeLeaves | "",
canvasSelectedText: "",
canvasImage: new Image(),
});
const setPropertyValueByPath = function <T extends Recipe>(object: T, path: Paths<T>, value: any) {
const a = path.split(".");
let nextProperty: any = object;
for (let i = 0, n = a.length - 1; i < n; ++i) {
const k = a[i];
if (k in nextProperty) {
nextProperty = nextProperty[k];
} else {
return;
}
}
nextProperty[a[a.length - 1]] = value;
};
/**
* This function will find the title of a recipe with the assumption that the title
* has the biggest ratio of surface area / number of words on the image.
* @return Returns the text parts of the block with the highest score.
*/
function findRecipeTitle() {
const filtered = tsv.value.filter((element) => element.level === 2 || element.level === 5);
const blocks = [[]] as OcrTsvResponse[][];
let blockNum = 1;
filtered.forEach((element, index, array) => {
if (index !== 0 && array[index - 1].blockNum !== element.blockNum) {
blocks.push([]);
blockNum = element.blockNum;
}
blocks[blockNum - 1].push(element);
});
let bestScore = 0;
let bestBlock = blocks[0];
blocks.forEach((element) => {
// element[0] is the block declaration line containing the blocks total dimensions
// element.length is the number of words (+ 2) contained in that block
const elementScore = (element[0].height * element[0].width) / element.length; // Prettier is adding useless parenthesis for a mysterious reason
const elementText = element.map((element) => element.text).join(""); // Identify empty blocks and don't count them
if (elementScore > bestScore && elementText !== "") {
bestBlock = element;
bestScore = elementScore;
}
});
return bestBlock
.filter((element) => element.level === 5 && element.conf >= 40)
.map((element) => {
return element.text.trim();
})
.join(" ");
}
onMounted(() => {
invoke(async () => {
await until(props.recipe).not.toBeNull();
state.loadingText = i18n.tc("general.loading-ocr-data");
const assetName = props.recipe.assets[0].fileName;
const imagesrc = assetURL(assetName);
state.canvasImage.src = imagesrc;
const res = await api.ocr.assetToTsv(props.recipe.slug, assetName);
tsv.value = res.data as OcrTsvResponse[];
state.loading = false;
if (props.recipe.name.match(/New\sOCR\sRecipe(\s\([0-9]+\))?/g)) {
props.recipe.name = findRecipeTitle();
}
});
});
function addIngredient(ingredients: Array<string> | null = null) {
if (ingredients?.length) {
const newIngredients = ingredients.map((x) => {
return {
referenceId: uuid4(),
title: "",
note: x,
unit: undefined,
food: undefined,
disableAmount: true,
quantity: 1,
originalText: "",
};
});
if (newIngredients) {
// @ts-expect-error - prop can be null-type by NoUndefinedField type forces it to be set
props.recipe.recipeIngredient.push(...newIngredients);
}
} else {
props.recipe.recipeIngredient.push({
referenceId: uuid4(),
title: "",
note: "",
// @ts-expect-error - prop can be null-type by NoUndefinedField type forces it to be set
unit: undefined,
// @ts-expect-error - prop can be null-type by NoUndefinedField type forces it to be set
food: undefined,
disableAmount: true,
quantity: 1,
});
}
}
function addStep(steps: Array<string> | null = null) {
if (!props.recipe.recipeInstructions) {
return;
}
if (steps) {
const cleanedSteps = steps.map((step) => {
return { id: uuid4(), text: step, title: "", ingredientReferences: [] };
});
props.recipe.recipeInstructions.push(...cleanedSteps);
} else {
props.recipe.recipeInstructions.push({ id: uuid4(), text: "", title: "", ingredientReferences: [] });
}
}
// EVENT HANDLERS
// Canvas component event handlers
async function updateRecipe() {
const { data } = await api.recipes.updateOne(props.recipe.slug, props.recipe);
if (data?.slug) {
router.push(`/g/${groupSlug.value}/r/${data.slug}`);
}
}
function closeEditor() {
router.push(`/g/${groupSlug.value}/r/${props.recipe.slug}`);
}
const canvasSetText = function () {
if (state.selectedRecipeField !== "") {
setPropertyValueByPath<Recipe>(props.recipe, state.selectedRecipeField, state.canvasSelectedText);
}
};
function updateSelectedText(value: string) {
state.canvasSelectedText = value;
}
// Recipe field selection event handlers
function setSingleIngredient(f: keyof RecipeIngredient, index: number) {
state.selectedRecipeField = `recipeIngredient.${index}.${f}` as SelectedRecipeLeaves;
}
// Leaves<RecipeStep[]> will return some function types making eslint very unhappy
type RecipeStepsLeaves = `${number}.${Leaves<RecipeStep>}`;
function setSingleStep(path: RecipeStepsLeaves) {
state.selectedRecipeField = `recipeInstructions.${path}` as SelectedRecipeLeaves;
}
return {
...toRefs(state),
addIngredient,
addStep,
drag,
assetURL,
updateRecipe,
closeEditor,
updateSelectedText,
tsv,
validators,
setSingleIngredient,
setSingleStep,
canvasSetText,
};
},
});
</script>
<style lang="css">
.ghost {
opacity: 0.5;
}
</style>

View File

@ -1,488 +0,0 @@
<template>
<v-card flat tile>
<v-toolbar v-for="(section, idx) in toolbarIcons" :key="section.sectionTitle" dense style="float: left">
<v-toolbar-title bottom>
{{ section.sectionTitle }}
</v-toolbar-title>
<v-tooltip v-for="icon in section.icons" :key="icon.name" bottom>
<template #activator="{ on, attrs }">
<v-btn icon @click="section.eventHandler(icon.name)">
<v-icon :color="section.highlight === icon.name ? 'primary' : 'default'" v-bind="attrs" v-on="on">
{{ icon.icon }}
</v-icon>
</v-btn>
</template>
<span>{{ icon.tooltip }}</span>
</v-tooltip>
<v-divider v-if="idx != toolbarIcons.length - 1" vertical class="mx-2" />
</v-toolbar>
<v-toolbar dense style="float: right">
<BaseButton class="ml-1 mr-1" save @click="updateRecipe()">
{{ $t("general.save") }}
</BaseButton>
<BaseButton cancel @click="closeEditor()">
{{ $t("general.close") }}
</BaseButton>
</v-toolbar>
<canvas
ref="canvas"
@mousedown="handleMouseDown"
@mouseup="handleMouseUp"
@mousemove="handleMouseMove"
@wheel="handleMouseScroll"
>
</canvas>
<span style="white-space: pre-wrap">
{{ selectedText.trim() }}
</span>
</v-card>
</template>
<script lang="ts">
import { defineComponent, reactive, useContext, ref, toRefs, watch, onMounted } from "@nuxtjs/composition-api";
import { NoUndefinedField } from "~/lib/api/types/non-generated";
import { OcrTsvResponse as NullableOcrTsvResponse } from "~/lib/api/types/ocr";
import { CanvasModes, SelectedTextSplitModes, ImagePosition, Mouse, CanvasRect, ToolbarIcons } from "~/types/ocr-types";
// Temporary Shim until we have a better solution
// https://github.com/phillipdupuis/pydantic-to-typescript/issues/28
type OcrTsvResponse = NoUndefinedField<NullableOcrTsvResponse>;
export default defineComponent({
props: {
image: {
type: HTMLImageElement,
required: true,
},
tsv: {
type: Array as () => OcrTsvResponse[],
required: true,
},
},
setup(props, context) {
const state = reactive({
canvas: null as HTMLCanvasElement | null,
ctx: null as CanvasRenderingContext2D | null,
canvasRect: null as DOMRect | null,
rect: {
startX: 0,
startY: 0,
w: 0,
h: 0,
},
mouse: {
current: {
x: 0,
y: 0,
},
down: false,
},
selectedText: "",
canvasMode: "selection" as CanvasModes,
imagePosition: {
sx: 0,
sy: 0,
sWidth: 0,
sHeight: 0,
dx: 0,
dy: 0,
dWidth: 0,
dHeight: 0,
scale: 1,
panStartPoint: {
x: 0,
y: 0,
},
} as ImagePosition,
isImageSmallerThanCanvas: false,
selectedTextSplitMode: "lineNum" as SelectedTextSplitModes,
});
watch(
() => state.selectedText,
(value) => {
context.emit("text-selected", value);
}
);
onMounted(() => {
if (state.canvas === null) return; // never happens because the ref "canvas" is in the template
state.ctx = state.canvas.getContext("2d") as CanvasRenderingContext2D;
state.ctx.imageSmoothingEnabled = false;
state.canvasRect = state.canvas.getBoundingClientRect();
state.canvas.width = state.canvasRect.width;
if (props.image.width < state.canvas.width) {
state.isImageSmallerThanCanvas = true;
}
state.imagePosition.dWidth = state.canvas.width;
updateImageScale();
state.canvas.height = Math.min(props.image.height * state.imagePosition.scale, 700); // Max height of 700px
state.imagePosition.sWidth = props.image.width;
state.imagePosition.sHeight = props.image.height;
state.imagePosition.dWidth = state.canvas.width;
drawImage(state.ctx);
drawWordBoxesOnCanvas(props.tsv);
});
function handleMouseDown(event: MouseEvent) {
if (state.canvasRect === null || state.canvas === null || state.ctx === null) return;
state.mouse.down = true;
updateMousePos(event);
if (state.canvasMode === "selection") {
if (isMouseInRect(state.mouse, state.rect)) {
context.emit("setText", state.selectedText);
} else {
state.ctx.fillStyle = "rgb(255, 255, 255)";
state.ctx.fillRect(0, 0, state.canvas.width, state.canvas.height);
drawImage(state.ctx);
state.rect.startX = state.mouse.current.x;
state.rect.startY = state.mouse.current.y;
resetSelection();
}
return;
}
if (state.canvasMode === "panAndZoom") {
state.imagePosition.panStartPoint.x = state.mouse.current.x - state.imagePosition.dx;
state.imagePosition.panStartPoint.y = state.mouse.current.y - state.imagePosition.dy;
resetSelection();
}
}
function handleMouseUp(_event: MouseEvent) {
if (state.canvasRect === null) return;
state.mouse.down = false;
state.selectedText = getWordsInSelection(props.tsv, state.rect);
}
function handleMouseMove(event: MouseEvent) {
if (state.canvasRect === null || state.canvas === null || state.ctx === null) return;
updateMousePos(event);
if (state.mouse.down) {
if (state.canvasMode === "selection") {
state.rect.w = state.mouse.current.x - state.rect.startX;
state.rect.h = state.mouse.current.y - state.rect.startY;
draw();
return;
}
if (state.canvasMode === "panAndZoom") {
state.canvas.style.cursor = "move";
state.imagePosition.dx = state.mouse.current.x - state.imagePosition.panStartPoint.x;
state.imagePosition.dy = state.mouse.current.y - state.imagePosition.panStartPoint.y;
keepImageInCanvas();
state.ctx.fillStyle = "rgb(255, 255, 255)";
state.ctx.fillRect(0, 0, state.canvas.width, state.canvas.height);
drawImage(state.ctx);
return;
}
}
if (isMouseInRect(state.mouse, state.rect) && state.canvasMode === "selection") {
state.canvas.style.cursor = "pointer";
} else {
state.canvas.style.cursor = "default";
}
}
const scrollSensitivity = 0.05;
function handleMouseScroll(event: WheelEvent) {
if (state.isImageSmallerThanCanvas) return;
if (state.canvasRect === null || state.canvas === null || state.ctx === null) return;
if (state.canvasMode === "panAndZoom") {
event.preventDefault();
updateMousePos(event);
const m = Math.sign(event.deltaY);
const ndx = state.imagePosition.dx + m * state.imagePosition.dWidth * scrollSensitivity;
const ndy = state.imagePosition.dy + m * state.imagePosition.dHeight * scrollSensitivity;
const ndw = state.imagePosition.dWidth + -m * state.imagePosition.dWidth * scrollSensitivity * 2;
const ndh = state.imagePosition.dHeight + -m * state.imagePosition.dHeight * scrollSensitivity * 2;
if (ndw < props.image.width) {
state.imagePosition.dx = ndx;
state.imagePosition.dy = ndy;
state.imagePosition.dWidth = ndw;
state.imagePosition.dHeight = ndh;
}
keepImageInCanvas();
updateImageScale();
state.ctx.fillStyle = "rgb(255, 255, 255)";
state.ctx.fillRect(0, 0, state.canvas.width, state.canvas.height);
drawImage(state.ctx);
}
}
function draw() {
if (state.canvasRect === null || state.canvas === null || state.ctx === null) return;
if (state.mouse.down) {
state.ctx.imageSmoothingEnabled = false;
state.ctx.fillStyle = "rgb(255, 255, 255)";
state.ctx.fillRect(0, 0, state.canvas.width, state.canvas.height);
drawImage(state.ctx);
state.ctx.fillStyle = "rgba(255, 255, 255, 0.1)";
state.ctx.setLineDash([6]);
state.ctx.fillRect(state.rect.startX, state.rect.startY, state.rect.w, state.rect.h);
state.ctx.strokeRect(state.rect.startX, state.rect.startY, state.rect.w, state.rect.h);
}
}
function drawImage(ctx: CanvasRenderingContext2D) {
ctx.drawImage(
props.image,
state.imagePosition.sx,
state.imagePosition.sy,
state.imagePosition.sWidth,
state.imagePosition.sHeight,
state.imagePosition.dx,
state.imagePosition.dy,
state.imagePosition.dWidth,
state.imagePosition.dHeight
);
}
function keepImageInCanvas() {
if (state.canvasRect === null || state.canvas === null) return;
// Prevent image from being smaller than the canvas width
if (state.imagePosition.dWidth - state.canvas.width < 0) {
state.imagePosition.dWidth = state.canvas.width;
}
// Prevent image from being smaller than the canvas height
if (state.imagePosition.dHeight - state.canvas.height < 0) {
state.imagePosition.dHeight = props.image.height * state.imagePosition.scale;
}
// Prevent to move the image too much to the left
if (state.canvas.width - state.imagePosition.dx - state.imagePosition.dWidth > 0) {
state.imagePosition.dx = state.canvas.width - state.imagePosition.dWidth;
}
// Prevent to move the image too much to the top
if (state.canvas.height - state.imagePosition.dy - state.imagePosition.dHeight > 0) {
state.imagePosition.dy = state.canvas.height - state.imagePosition.dHeight;
}
// Prevent to move the image too much to the right
if (state.imagePosition.dx > 0) {
state.imagePosition.dx = 0;
}
// Prevent to move the image too much to the bottom
if (state.imagePosition.dy > 0) {
state.imagePosition.dy = 0;
}
}
function updateImageScale() {
state.imagePosition.scale = state.imagePosition.dWidth / props.image.width;
// force the original ratio to be respected
state.imagePosition.dHeight = props.image.height * state.imagePosition.scale;
// Don't let images bigger than the canvas be zoomed in more than 1:1 scale
// Meaning only let images smaller than the canvas to have a scale > 1
if (!state.isImageSmallerThanCanvas && state.imagePosition.scale > 1) {
state.imagePosition.scale = 1;
}
}
function resetSelection() {
if (state.canvasRect === null) return;
state.rect.w = 0;
state.rect.h = 0;
state.selectedText = "";
}
function updateMousePos<T extends MouseEvent>(event: T) {
if (state.canvas === null) return;
state.canvasRect = state.canvas.getBoundingClientRect();
state.mouse.current = {
x: event.clientX - state.canvasRect.left,
y: event.clientY - state.canvasRect.top,
};
}
function isMouseInRect(mouse: Mouse, rect: CanvasRect) {
if (state.canvasRect === null) return;
const correctRect = correctRectCoordinates(rect);
return (
mouse.current.x > correctRect.startX &&
mouse.current.x < correctRect.startX + correctRect.w &&
mouse.current.y > correctRect.startY &&
mouse.current.y < correctRect.startY + correctRect.h
);
}
/**
* Returns rectangle coordinates with positive dimensions
* @param rect A rectangle
* @returns An equivalent rectangle with width and height > 0
*/
function correctRectCoordinates(rect: CanvasRect) {
if (rect.w < 0) {
rect.startX = rect.startX + rect.w;
rect.w = -rect.w;
}
if (rect.h < 0) {
rect.startY = rect.startY + rect.h;
rect.h = -rect.h;
}
return rect;
}
function drawWordBoxesOnCanvas(tsv: OcrTsvResponse[]) {
if (state.canvasRect === null || state.canvas === null || state.ctx === null) return;
state.ctx.fillStyle = "rgb(255, 255, 255, 0.3)";
tsv
.filter((element) => element.level === 5)
.forEach((element) => {
if (state.canvasRect === null || state.canvas === null || state.ctx === null) return;
state.ctx.fillRect(
element.left * state.imagePosition.scale,
element.top * state.imagePosition.scale,
element.width * state.imagePosition.scale,
element.height * state.imagePosition.scale
);
});
}
// Event emitters
const updateRecipe = function () {
context.emit("update-recipe");
};
const closeEditor = function () {
context.emit("close-editor");
};
// TOOLBAR STUFF
const { $globals, i18n } = useContext();
const toolbarIcons = ref<ToolbarIcons<CanvasModes | SelectedTextSplitModes>>([
{
sectionTitle: i18n.tc("ocr-editor.toolbar"),
eventHandler: switchCanvasMode,
highlight: state.canvasMode,
icons: [
{
name: "selection",
icon: $globals.icons.selectMode,
tooltip: i18n.tc("ocr-editor.selection-mode"),
},
{
name: "panAndZoom",
icon: $globals.icons.panAndZoom,
tooltip: i18n.tc("ocr-editor.pan-and-zoom-picture"),
},
],
},
{
sectionTitle: i18n.tc("ocr-editor.split-text"),
eventHandler: switchSplitTextMode,
highlight: state.selectedTextSplitMode,
icons: [
{
name: "lineNum",
icon: $globals.icons.preserveLines,
tooltip: i18n.tc("ocr-editor.preserve-line-breaks"),
},
{
name: "blockNum",
icon: $globals.icons.preserveBlocks,
tooltip: i18n.tc("ocr-editor.split-by-block"),
},
{
name: "flatten",
icon: $globals.icons.flatten,
tooltip: i18n.tc("ocr-editor.flatten"),
},
],
},
]);
function switchCanvasMode(mode: CanvasModes) {
if (state.canvasRect === null || state.canvas === null) return;
state.canvasMode = mode;
toolbarIcons.value[0].highlight = mode;
if (mode === "panAndZoom") {
state.canvas.style.cursor = "pointer";
} else {
state.canvas.style.cursor = "default";
}
}
function switchSplitTextMode(mode: SelectedTextSplitModes) {
if (state.canvasRect === null) return;
state.selectedTextSplitMode = mode;
toolbarIcons.value[1].highlight = mode;
state.selectedText = getWordsInSelection(props.tsv, state.rect);
}
/**
* Using rectangle coordinates, filters the tsv to get text elements contained
* inside the rectangle
* Additionaly adds newlines depending on the current "text split" mode
* @param tsv An Object containing tesseracts tsv fields
* @param rect Coordinates of a rectangle
* @returns Text from tsv contained in the rectangle
*/
function getWordsInSelection(tsv: OcrTsvResponse[], rect: CanvasRect) {
const correctedRect = correctRectCoordinates(rect);
return tsv
.filter(
(element) =>
element.level === 5 &&
correctedRect.startY - state.imagePosition.dy < element.top * state.imagePosition.scale &&
correctedRect.startX - state.imagePosition.dx < element.left * state.imagePosition.scale &&
correctedRect.startX + correctedRect.w >
(element.left + element.width) * state.imagePosition.scale + state.imagePosition.dx &&
correctedRect.startY + correctedRect.h >
(element.top + element.height) * state.imagePosition.scale + state.imagePosition.dy
)
.map((element, index, array) => {
let separator = " ";
if (
state.selectedTextSplitMode !== "flatten" &&
index !== array.length - 1 &&
element[state.selectedTextSplitMode] !== array[index + 1][state.selectedTextSplitMode]
) {
separator = "\n";
}
return element.text + separator;
})
.join("");
}
return {
...toRefs(state),
handleMouseDown,
handleMouseUp,
handleMouseMove,
handleMouseScroll,
toolbarIcons,
updateRecipe,
closeEditor,
};
},
});
</script>

View File

@ -1,54 +0,0 @@
<template>
<v-card>
<v-app-bar dense dark color="primary" class="mb-2">
<v-icon large left>
{{ $globals.icons.help }}
</v-icon>
<v-toolbar-title class="headline"> {{ $t("ocr-editor.help.help") }} </v-toolbar-title>
<v-spacer></v-spacer>
</v-app-bar>
<v-card-text>
<h1> {{ $t("ocr-editor.help.mouse-modes") }}</h1>
<v-divider class="mb-2 mt-1" />
<h2 class="my-2">
<v-icon> {{ $globals.icons.selectMode }} </v-icon>{{ $t("ocr-editor.help.selection-mode") }}
</h2>
<p class="my-1">{{ $t("ocr-editor.help.selection-mode") }}</p>
<ol>
<li>{{ $t("ocr-editor.help.selection-mode-steps.draw") }}</li>
<li>{{ $t("ocr-editor.help.selection-mode-steps.click") }}</li>
<li>{{ $t("ocr-editor.help.selection-mode-steps.result") }}</li>
</ol>
<h2 class="my-2">
<v-icon> {{ $globals.icons.panAndZoom }} </v-icon>{{ $t("ocr-editor.help.pan-and-zoom-mode") }}
</h2>
{{ $t("ocr-editor.help.pan-and-zoom-desc") }}
<h1 class="mt-5">{{ $t("ocr-editor.help.split-text-mode") }}</h1>
<v-divider class="mb-2 mt-1" />
<h2 class="my-2">
<v-icon> {{ $globals.icons.preserveLines }} </v-icon>
{{ $t("ocr-editor.help.split-modes.line-mode") }}
</h2>
<p>
{{ $t("ocr-editor.help.split-modes.line-mode-desc") }}
</p>
<h2 class="my-2">
<v-icon> {{ $globals.icons.preserveBlocks }} </v-icon>
{{ $t("ocr-editor.help.split-modes.block-mode") }}
</h2>
<p>
{{ $t("ocr-editor.help.split-modes.block-mode-desc") }}
</p>
<h2 class="my-2">
<v-icon> {{ $globals.icons.flatten }} </v-icon> {{ $t("ocr-editor.help.split-modes.flat-mode") }}
</h2>
<p>{{ $t("ocr-editor.help.split-modes.flat-mode-desc") }}</p>
</v-card-text>
</v-card>
</template>
<script lang="ts">
import { defineComponent } from "@nuxtjs/composition-api";
export default defineComponent({});
</script>

View File

@ -1,3 +0,0 @@
import RecipeOcrEditorPage from "./RecipeOcrEditorPage.vue";
export default RecipeOcrEditorPage;

View File

@ -50,7 +50,6 @@
:logged-in="isOwnGroup"
:open="isEditMode"
:recipe-id="recipe.id"
:show-ocr-button="recipe.isOcrRecipe"
class="ml-auto mt-n8 pb-4"
@close="setMode(PageMode.VIEW)"
@json="toggleEditMode()"
@ -58,13 +57,12 @@
@save="$emit('save')"
@delete="$emit('delete')"
@print="printRecipe"
@ocr="goToOcrEditor"
/>
</div>
</template>
<script lang="ts">
import { defineComponent, useContext, computed, ref, watch, useRouter, useRoute } from "@nuxtjs/composition-api";
import { defineComponent, useContext, computed, ref, watch } from "@nuxtjs/composition-api";
import { useLoggedInState } from "~/composables/use-logged-in-state";
import RecipeRating from "~/components/Domain/Recipe/RecipeRating.vue";
import RecipeLastMade from "~/components/Domain/Recipe/RecipeLastMade.vue";
@ -96,16 +94,12 @@ export default defineComponent({
},
},
setup(props) {
const { $auth, $vuetify } = useContext();
const { $vuetify } = useContext();
const { recipeImage } = useStaticRoutes();
const { imageKey, pageMode, editMode, setMode, toggleEditMode, isEditMode } = usePageState(props.recipe.slug);
const { user } = usePageUser();
const { isOwnGroup } = useLoggedInState();
const route = useRoute();
const groupSlug = computed(() => route.value.params.groupSlug || $auth.user?.groupSlug || "");
const router = useRouter();
function printRecipe() {
window.print();
}
@ -119,10 +113,6 @@ export default defineComponent({
return recipeImage(props.recipe.id, props.recipe.image, imageKey.value);
});
function goToOcrEditor() {
router.push(`/g/${groupSlug.value}/r/${props.recipe.slug}/ocr-editor`);
}
watch(
() => recipeImageUrl.value,
() => {
@ -146,7 +136,6 @@ export default defineComponent({
hideImage,
isEditMode,
recipeImageUrl,
goToOcrEditor,
};
},
});

View File

@ -23,7 +23,6 @@ import { MultiPurposeLabelsApi } from "./user/group-multiple-purpose-labels";
import { GroupEventNotifierApi } from "./user/group-event-notifier";
import { MealPlanRulesApi } from "./user/group-mealplan-rules";
import { GroupDataSeederApi } from "./user/group-seeder";
import { OcrAPI } from "./user/ocr";
import { ApiRequestInstance } from "~/lib/api/types/non-generated";
export class UserApiClient {
@ -52,7 +51,6 @@ export class UserApiClient {
public groupEventNotifier: GroupEventNotifierApi;
public upload: UploadFile;
public seeders: GroupDataSeederApi;
public ocr: OcrAPI;
constructor(requests: ApiRequestInstance) {
// Recipes
@ -91,9 +89,6 @@ export class UserApiClient {
this.bulk = new BulkActionsAPI(requests);
this.groupEventNotifier = new GroupEventNotifierApi(requests);
// ocr
this.ocr = new OcrAPI(requests);
Object.freeze(this);
}
}

View File

@ -1,25 +0,0 @@
/* tslint:disable */
/* eslint-disable */
/**
/* This file was automatically generated from pydantic models by running pydantic2ts.
/* Do not modify it by hand - just update the pydantic models and then re-run the script
*/
export interface OcrAssetReq {
recipeSlug: string;
assetName: string;
}
export interface OcrTsvResponse {
level?: number;
pageNum?: number;
blockNum?: number;
parNum?: number;
lineNum?: number;
wordNum?: number;
left?: number;
top?: number;
width?: number;
height?: number;
conf?: number;
text?: string;
}

View File

@ -244,7 +244,6 @@ export interface Recipe {
extras?: {
[k: string]: unknown;
};
isOcrRecipe?: boolean;
comments?: RecipeCommentOut[];
}
export interface RecipeTool {

View File

@ -1,16 +0,0 @@
import { BaseAPI } from "../base/base-clients";
const prefix = "/api";
export class OcrAPI extends BaseAPI {
// Currently unused in favor for the endpoint using asset names
async fileToTsv(file: File) {
const formData = new FormData();
formData.append("file", file);
return await this.requests.post(`${prefix}/ocr/file-to-tsv`, formData);
}
async assetToTsv(recipeSlug: string, assetName: string) {
return await this.requests.post(`${prefix}/ocr/asset-to-tsv`, { recipeSlug, assetName });
}
}

View File

@ -38,7 +38,6 @@ const routes = {
recipesCategory: `${prefix}/recipes/category`,
recipesParseIngredient: `${prefix}/parser/ingredient`,
recipesParseIngredients: `${prefix}/parser/ingredients`,
recipesCreateFromOcr: `${prefix}/recipes/create-ocr`,
recipesTimelineEvent: `${prefix}/recipes/timeline/events`,
recipesRecipeSlug: (recipe_slug: string) => `${prefix}/recipes/${recipe_slug}`,
@ -159,15 +158,6 @@ export class RecipeAPI extends BaseCRUDAPI<CreateRecipe, Recipe, Recipe> {
return `${routes.recipesRecipeSlugExportZip(recipeSlug)}?token=${token}`;
}
async createFromOcr(file: File, makeFileRecipeImage: boolean) {
const formData = new FormData();
formData.append("file", file);
formData.append("extension", file.name.split(".").pop() ?? "");
formData.append("makefilerecipeimage", String(makeFileRecipeImage));
return await this.requests.post(routes.recipesCreateFromOcr, formData);
}
async updateLastMade(recipeSlug: string, timestamp: string) {
return await this.requests.patch<Recipe, RecipeLastMade>(routes.recipesSlugLastMade(recipeSlug), { timestamp })
}

View File

@ -1,51 +0,0 @@
<template>
<div>
<RecipeOcrEditorPage v-if="recipe" :recipe="recipe" />
</div>
</template>
<script lang="ts">
import { defineComponent, useRoute } from "@nuxtjs/composition-api";
import RecipeOcrEditorPage from "~/components/Domain/Recipe/RecipeOcrEditorPage/RecipeOcrEditorPage.vue";
import { useRecipe } from "~/composables/recipes";
export default defineComponent({
components: { RecipeOcrEditorPage },
setup() {
const route = useRoute();
const slug = route.value.params.slug;
const { recipe, loading } = useRecipe(slug);
return {
recipe,
loading,
};
},
});
</script>
<style lang="css">
.ghost {
opacity: 0.5;
}
body {
background: #eee;
}
canvas {
background: white;
box-shadow: 0px 2px 3px rgba(0, 0, 0, 0.2);
width: 100%;
image-rendering: optimizeQuality;
}
.box {
position: absolute;
border: 2px #90ee90 solid;
background-color: #90ee90;
z-index: 3;
}
</style>

View File

@ -52,11 +52,6 @@ export default defineComponent({
text: i18n.tc("recipe.import-with-zip"),
value: "zip",
},
{
icon: $globals.icons.fileImage,
text: i18n.tc("recipe.create-recipe-from-an-image"),
value: "ocr",
},
{
icon: $globals.icons.link,
text: i18n.tc("recipe.bulk-url-import"),

View File

@ -1,85 +0,0 @@
<template>
<div>
<v-card-title class="headline"> {{ $t('recipe.create-recipe-from-an-image') }} </v-card-title>
<v-card-text>
{{ $t('recipe.create-a-recipe-by-uploading-a-scan') }}
<v-form ref="domCreateByOcr"> </v-form>
</v-card-text>
<v-card-actions class="justify-center">
<v-file-input
v-model="imageUpload"
accept=".png"
label="recipe.png"
filled
clearable
class="rounded-lg mt-2"
rounded
truncate-length="100"
:hint="$t('recipe.upload-a-png-image-from-a-recipe-book')"
persistent-hint
prepend-icon=""
:prepend-inner-icon="$globals.icons.fileImage"
/>
</v-card-actions>
<v-card-actions class="justify-center">
<v-checkbox v-model="makeFileRecipeImage" :label="$t('new-recipe.make-recipe-image')" />
</v-card-actions>
<v-card-actions class="justify-center">
<div style="width: 250px">
<BaseButton :disabled="imageUpload === null" large rounded block :loading="loading" @click="createByOcr" />
</div>
</v-card-actions>
</div>
</template>
<script lang="ts">
import { defineComponent, reactive, toRefs, ref, useRouter, computed, useContext, useRoute } from "@nuxtjs/composition-api";
import { AxiosResponse } from "axios";
import { useUserApi } from "~/composables/api";
import { validators } from "~/composables/use-validators";
import { VForm } from "~/types/vuetify";
export default defineComponent({
setup() {
const state = reactive({
error: false,
loading: false,
makeFileRecipeImage: false,
});
const { $auth } = useContext();
const route = useRoute();
const groupSlug = computed(() => route.value.params.groupSlug || $auth.user?.groupSlug || "");
const api = useUserApi();
const router = useRouter();
const imageUpload = ref<File | null>(null);
function handleResponse(response: AxiosResponse<string> | null) {
if (response?.status !== 201) {
state.error = true;
state.loading = false;
return;
}
router.push(`/g/${groupSlug.value}/r/${response.data}/ocr-editor`);
}
const domCreateByOcr = ref<VForm | null>(null);
async function createByOcr() {
if (imageUpload.value === null) return; // Should never be true due to circumstances
state.loading = true;
const { response } = await api.recipes.createFromOcr(imageUpload.value, state.makeFileRecipeImage);
// @ts-ignore returns a string and not a full Recipe
handleResponse(response);
}
return {
domCreateByOcr,
createByOcr,
...toRefs(state),
validators,
imageUpload,
};
},
});
</script>

View File

@ -1,73 +0,0 @@
import { OcrTsvResponse } from "~/lib/api/types/ocr";
import { Recipe } from "~/lib/api/types/recipe";
export type CanvasRect = {
startX: number;
startY: number;
w: number;
h: number;
};
export type ImagePosition = {
sx: number;
sy: number;
sWidth: number;
sHeight: number;
dx: number;
dy: number;
dWidth: number;
dHeight: number;
scale: number;
panStartPoint: {
x: number;
y: number;
};
};
export type Mouse = {
current: {
x: number;
y: number;
};
down: boolean;
};
// https://stackoverflow.com/questions/58434389/export typescript-deep-keyof-of-a-nested-object/58436959#58436959
type Prev = [never, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...0[]];
type Join<K, P> = K extends string | number
? P extends string | number
? `${K}${"" extends P ? "" : "."}${P}`
: never
: never;
export type Leaves<T, D extends number = 10> = [D] extends [never]
? never
: T extends object
? { [K in keyof T]-?: Join<K, Leaves<T[K], Prev[D]>> }[keyof T]
: "";
export type Paths<T, D extends number = 10> = [D] extends [never]
? never
: T extends object
? {
[K in keyof T]-?: K extends string | number ? `${K}` | Join<K, Paths<T[K], Prev[D]>> : never;
}[keyof T]
: "";
export type SelectedRecipeLeaves = Leaves<Recipe>;
export type CanvasModes = "selection" | "panAndZoom";
export type SelectedTextSplitModes = keyof OcrTsvResponse | "flatten";
export type ToolbarIcons<T extends string> = {
sectionTitle: string;
eventHandler(mode: T): void;
highlight: T;
icons: {
name: T;
icon: string;
tooltip: string;
}[];
}[];

View File

@ -7,7 +7,6 @@ from . import (
comments,
explore,
groups,
ocr,
organizers,
parser,
recipe,
@ -32,4 +31,3 @@ router.include_router(unit_and_foods.router)
router.include_router(admin.router)
router.include_router(validators.router)
router.include_router(explore.router)
router.include_router(ocr.router)

View File

@ -1,7 +0,0 @@
from fastapi import APIRouter
from . import pytesseract
router = APIRouter(prefix="/ocr")
router.include_router(pytesseract.router)

View File

@ -1,37 +0,0 @@
from fastapi import APIRouter, File
from mealie.routes._base import BaseUserController, controller
from mealie.schema.ocr.ocr import OcrAssetReq, OcrTsvResponse
from mealie.services.ocr.pytesseract import OcrService
from mealie.services.recipe.recipe_data_service import RecipeDataService
from mealie.services.recipe.recipe_service import RecipeService
router = APIRouter()
@controller(router)
class OCRController(BaseUserController):
def __init__(self):
self.ocr_service = OcrService()
@router.post("/", response_model=str)
def image_to_string(self, file: bytes = File(...)):
return self.ocr_service.image_to_string(file)
@router.post("/file-to-tsv", response_model=list[OcrTsvResponse])
def file_to_tsv(self, file: bytes = File(...)):
tsv = self.ocr_service.image_to_tsv(file)
return self.ocr_service.format_tsv_output(tsv)
@router.post("/asset-to-tsv", response_model=list[OcrTsvResponse])
def asset_to_tsv(self, req: OcrAssetReq):
recipe_service = RecipeService(self.repos, self.user, self.group)
recipe = recipe_service._get_recipe(req.recipe_slug)
if recipe.id is None:
return []
data_service = RecipeDataService(recipe.id, recipe.group_id)
asset_path = data_service.dir_assets.joinpath(req.asset_name)
file = open(asset_path, "rb")
tsv = self.ocr_service.image_to_tsv(file.read())
return self.ocr_service.format_tsv_output(tsv)

View File

@ -27,10 +27,7 @@ from mealie.schema.make_dependable import make_dependable
from mealie.schema.recipe import Recipe, RecipeImageTypes, ScrapeRecipe
from mealie.schema.recipe.recipe import CreateRecipe, CreateRecipeByUrlBulk, RecipeLastMade, RecipeSummary
from mealie.schema.recipe.recipe_asset import RecipeAsset
from mealie.schema.recipe.recipe_ingredient import RecipeIngredient
from mealie.schema.recipe.recipe_scraper import ScrapeRecipeTest
from mealie.schema.recipe.recipe_settings import RecipeSettings
from mealie.schema.recipe.recipe_step import RecipeStep
from mealie.schema.recipe.request_helpers import RecipeDuplicate, RecipeZipTokenResponse, UpdateImageResponse
from mealie.schema.response import PaginationBase, PaginationQuery
from mealie.schema.response.pagination import RecipeSearchQuery
@ -489,37 +486,3 @@ class RecipeController(BaseRecipeController):
self.mixins.update_one(recipe, slug)
return asset_in
# ==================================================================================================================
# OCR
@router.post("/create-ocr", status_code=201, response_model=str)
def create_recipe_ocr(
self, extension: str = Form(...), file: UploadFile = File(...), makefilerecipeimage: bool = Form(...)
):
"""Takes an image and creates a recipe based on the image"""
slug = self.service.create_one(
Recipe(
name="New OCR Recipe",
recipe_ingredient=[RecipeIngredient(note="", title=None, unit=None, food=None, original_text=None)],
recipe_instructions=[RecipeStep(text="")],
is_ocr_recipe=True,
settings=RecipeSettings(show_assets=True),
id=None,
image=None,
recipe_yield=None,
rating=None,
orgURL=None,
date_added=None,
date_updated=None,
created_at=None,
update_at=None,
nutrition=None,
)
).slug
RecipeController.upload_recipe_asset(self, slug, "Original recipe image", "", extension, file)
if makefilerecipeimage:
# Get the pointer to the beginning of the file to read it once more
file.file.seek(0)
self.update_recipe_image(slug, file.file.read(), extension)
return slug

View File

@ -1,7 +0,0 @@
# This file is auto-generated by gen_schema_exports.py
from .ocr import OcrAssetReq, OcrTsvResponse
__all__ = [
"OcrAssetReq",
"OcrTsvResponse",
]

View File

@ -1,21 +0,0 @@
from mealie.schema._mealie import MealieModel
class OcrTsvResponse(MealieModel):
level: int = 0
page_num: int = 0
block_num: int = 0
par_num: int = 0
line_num: int = 0
word_num: int = 0
left: int = 0
top: int = 0
width: int = 0
height: int = 0
conf: float = 0.0
text: str = ""
class OcrAssetReq(MealieModel):
recipe_slug: str
asset_name: str

View File

@ -128,7 +128,6 @@ class Recipe(RecipeSummary):
assets: list[RecipeAsset] | None = []
notes: list[RecipeNote] | None = []
extras: dict | None = {}
is_ocr_recipe: bool | None = False
comments: list[RecipeCommentOut] | None = []

View File

@ -1,56 +0,0 @@
from io import BytesIO
import pytesseract
from PIL import Image
from mealie.schema.ocr.ocr import OcrTsvResponse
from mealie.services._base_service import BaseService
class OcrService(BaseService):
"""
Class for ocr engines.
"""
def image_to_string(self, image_data):
"""
Returns a plain text translation of an image
"""
return pytesseract.image_to_string(Image.open(image_data))
def image_to_tsv(self, image_data, lang=None):
"""
Returns the pytesseract default tsv output
"""
if lang is not None:
return pytesseract.image_to_data(Image.open(BytesIO(image_data)), lang=lang)
return pytesseract.image_to_data(Image.open(BytesIO(image_data)))
def format_tsv_output(self, tsv: str) -> list[OcrTsvResponse]:
"""
Returns a OcrTsvResponse from a default pytesseract tsv output
"""
lines = tsv.split("\n")
titles = [t.strip() for t in lines[0].split("\t")]
response: list[OcrTsvResponse] = []
for i in range(1, len(lines)):
if lines[i] == "":
continue
line = OcrTsvResponse()
for key, value in zip(titles, lines[i].split("\t"), strict=False):
if key == "text":
setattr(line, key, value.strip())
elif key == "conf":
setattr(line, key, float(value.strip()))
elif key in OcrTsvResponse.__fields__:
setattr(line, key, int(value.strip()))
else:
continue
if isinstance(line, OcrTsvResponse):
response.append(line)
return response

41
poetry.lock generated
View File

@ -605,6 +605,7 @@ files = [
{file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"},
{file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"},
{file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"},
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"},
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"},
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"},
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"},
@ -613,6 +614,7 @@ files = [
{file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"},
{file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"},
{file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"},
{file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"},
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"},
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"},
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"},
@ -642,6 +644,7 @@ files = [
{file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"},
{file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"},
{file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"},
{file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"},
{file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"},
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"},
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"},
@ -650,6 +653,7 @@ files = [
{file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"},
{file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"},
{file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"},
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"},
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"},
{file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"},
{file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"},
@ -1862,21 +1866,6 @@ files = [
html5lib = "*"
rdflib = "*"
[[package]]
name = "pytesseract"
version = "0.3.10"
description = "Python-tesseract is a python wrapper for Google's Tesseract-OCR"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytesseract-0.3.10-py3-none-any.whl", hash = "sha256:8f22cc98f765bf13517ead0c70effedb46c153540d25783e04014f28b55a5fc6"},
{file = "pytesseract-0.3.10.tar.gz", hash = "sha256:f1c3a8b0f07fd01a1085d451f5b8315be6eec1d5577a6796d46dc7a62bd4120f"},
]
[package.dependencies]
packaging = ">=21.3"
Pillow = ">=8.0.0"
[[package]]
name = "pytest"
version = "7.2.2"
@ -2024,6 +2013,7 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@ -2031,8 +2021,15 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@ -2049,6 +2046,7 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@ -2056,6 +2054,7 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@ -2972,6 +2971,16 @@ files = [
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
{file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
{file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
{file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
{file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
{file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
{file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
{file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
{file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@ -3025,4 +3034,4 @@ pgsql = ["psycopg2-binary"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "5e0cc403c1ec022e6a75a4969dd97c55ce312daafd7d2024a3617eca25b2129f"
content-hash = "984d725b667165ebbf82eeea32a38e4f4891192fc8c6be60864d25d5b36e7970"

View File

@ -29,7 +29,6 @@ passlib = "^1.7.4"
psycopg2-binary = { version = "^2.9.1", optional = true }
pydantic = "^1.10.4"
pyhumps = "^3.5.3"
pytesseract = "^0.3.9"
python = "^3.10"
python-dateutil = "^2.8.2"
python-dotenv = "^1.0.0"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@ -1,73 +0,0 @@
level page_num block_num par_num line_num word_num left top width height conf text
1 1 0 0 0 0 0 0 640 480 -1
2 1 1 0 0 0 36 92 582 269 -1
3 1 1 1 0 0 36 92 582 92 -1
4 1 1 1 1 0 36 92 544 30 -1
5 1 1 1 1 1 36 92 60 24 87.137558 This
5 1 1 1 1 2 109 92 20 24 87.137558 is
5 1 1 1 1 3 141 98 15 18 87.823906 a
5 1 1 1 1 4 169 92 32 24 87.823906 lot
5 1 1 1 1 5 212 92 28 24 92.965874 of
5 1 1 1 1 6 251 92 31 24 93.247513 12
5 1 1 1 1 7 296 92 68 30 92.734741 point
5 1 1 1 1 8 374 93 53 23 92.996040 text
5 1 1 1 1 9 437 93 26 23 93.160057 to
5 1 1 1 1 10 474 93 52 23 92.312637 test
5 1 1 1 1 11 536 92 44 24 92.312637 the
4 1 1 1 2 0 36 126 582 31 -1
5 1 1 1 2 1 36 132 45 18 90.505524 ocr
5 1 1 1 2 2 91 126 69 24 90.505524 code
5 1 1 1 2 3 172 126 51 24 91.169167 and
5 1 1 1 2 4 236 132 50 18 89.765854 see
5 1 1 1 2 5 299 126 15 24 85.827324 if
5 1 1 1 2 6 325 126 14 24 93.116241 it
5 1 1 1 2 7 348 126 85 24 92.394562 works
5 1 1 1 2 8 445 132 33 18 30.119690 on
5 1 1 1 2 9 500 126 29 24 30.119690 all
5 1 1 1 2 10 541 127 77 30 92.090988 types
4 1 1 1 3 0 36 160 187 24 -1
5 1 1 1 3 1 36 160 28 24 92.476135 of
5 1 1 1 3 2 72 160 41 24 90.919365 file
5 1 1 1 3 3 123 160 100 24 91.360367 format.
3 1 1 2 0 0 36 194 561 167 -1
4 1 1 2 1 0 36 194 549 31 -1
5 1 1 2 1 1 36 194 55 24 89.098892 The
5 1 1 2 1 2 102 194 75 30 89.098892 quick
5 1 1 2 1 3 189 194 85 24 91.415680 brown
5 1 1 2 1 4 287 194 52 31 91.943085 dog
5 1 1 2 1 5 348 194 108 31 92.167969 jumped
5 1 1 2 1 6 468 200 63 18 91.970985 over
5 1 1 2 1 7 540 194 45 24 92.843704 the
4 1 1 2 2 0 37 228 548 31 -1
5 1 1 2 2 1 37 228 55 31 92.262550 lazy
5 1 1 2 2 2 103 228 50 24 92.693161 fox.
5 1 1 2 2 3 165 228 55 24 92.947639 The
5 1 1 2 2 4 232 228 75 30 90.589806 quick
5 1 1 2 2 5 319 228 85 24 91.051247 brown
5 1 1 2 2 6 417 228 51 31 91.925011 dog
5 1 1 2 2 7 478 228 107 31 91.471077 jumped
4 1 1 2 3 0 36 262 561 31 -1
5 1 1 2 3 1 36 268 63 18 90.210129 over
5 1 1 2 3 2 109 262 44 24 90.210129 the
5 1 1 2 3 3 165 262 56 31 91.178192 lazy
5 1 1 2 3 4 231 262 50 24 92.794647 fox.
5 1 1 2 3 5 294 262 55 24 91.388016 The
5 1 1 2 3 6 360 262 75 30 92.525742 quick
5 1 1 2 3 7 447 262 85 24 90.425552 brown
5 1 1 2 3 8 545 262 52 31 90.425552 dog
4 1 1 2 4 0 43 296 518 31 -1
5 1 1 2 4 1 43 296 107 31 91.759590 jumped
5 1 1 2 4 2 162 302 64 18 92.923576 over
5 1 1 2 4 3 235 296 44 24 92.017929 the
5 1 1 2 4 4 292 296 55 31 91.558884 lazy
5 1 1 2 4 5 357 296 50 24 92.687485 fox.
5 1 1 2 4 6 420 296 55 24 91.922661 The
5 1 1 2 4 7 486 296 75 30 91.870224 quick
4 1 1 2 5 0 37 330 524 31 -1
5 1 1 2 5 1 37 330 85 24 92.923935 brown
5 1 1 2 5 2 135 330 52 31 91.468765 dog
5 1 1 2 5 3 196 330 108 31 91.425491 jumped
5 1 1 2 5 4 316 336 63 18 91.489830 over
5 1 1 2 5 5 388 330 45 24 91.740379 the
5 1 1 2 5 6 445 330 55 31 92.110054 lazy
5 1 1 2 5 7 511 330 50 24 93.180054 fox.
1 level page_num block_num par_num line_num word_num left top width height conf text
2 1 1 0 0 0 0 0 0 640 480 -1
3 2 1 1 0 0 0 36 92 582 269 -1
4 3 1 1 1 0 0 36 92 582 92 -1
5 4 1 1 1 1 0 36 92 544 30 -1
6 5 1 1 1 1 1 36 92 60 24 87.137558 This
7 5 1 1 1 1 2 109 92 20 24 87.137558 is
8 5 1 1 1 1 3 141 98 15 18 87.823906 a
9 5 1 1 1 1 4 169 92 32 24 87.823906 lot
10 5 1 1 1 1 5 212 92 28 24 92.965874 of
11 5 1 1 1 1 6 251 92 31 24 93.247513 12
12 5 1 1 1 1 7 296 92 68 30 92.734741 point
13 5 1 1 1 1 8 374 93 53 23 92.996040 text
14 5 1 1 1 1 9 437 93 26 23 93.160057 to
15 5 1 1 1 1 10 474 93 52 23 92.312637 test
16 5 1 1 1 1 11 536 92 44 24 92.312637 the
17 4 1 1 1 2 0 36 126 582 31 -1
18 5 1 1 1 2 1 36 132 45 18 90.505524 ocr
19 5 1 1 1 2 2 91 126 69 24 90.505524 code
20 5 1 1 1 2 3 172 126 51 24 91.169167 and
21 5 1 1 1 2 4 236 132 50 18 89.765854 see
22 5 1 1 1 2 5 299 126 15 24 85.827324 if
23 5 1 1 1 2 6 325 126 14 24 93.116241 it
24 5 1 1 1 2 7 348 126 85 24 92.394562 works
25 5 1 1 1 2 8 445 132 33 18 30.119690 on
26 5 1 1 1 2 9 500 126 29 24 30.119690 all
27 5 1 1 1 2 10 541 127 77 30 92.090988 types
28 4 1 1 1 3 0 36 160 187 24 -1
29 5 1 1 1 3 1 36 160 28 24 92.476135 of
30 5 1 1 1 3 2 72 160 41 24 90.919365 file
31 5 1 1 1 3 3 123 160 100 24 91.360367 format.
32 3 1 1 2 0 0 36 194 561 167 -1
33 4 1 1 2 1 0 36 194 549 31 -1
34 5 1 1 2 1 1 36 194 55 24 89.098892 The
35 5 1 1 2 1 2 102 194 75 30 89.098892 quick
36 5 1 1 2 1 3 189 194 85 24 91.415680 brown
37 5 1 1 2 1 4 287 194 52 31 91.943085 dog
38 5 1 1 2 1 5 348 194 108 31 92.167969 jumped
39 5 1 1 2 1 6 468 200 63 18 91.970985 over
40 5 1 1 2 1 7 540 194 45 24 92.843704 the
41 4 1 1 2 2 0 37 228 548 31 -1
42 5 1 1 2 2 1 37 228 55 31 92.262550 lazy
43 5 1 1 2 2 2 103 228 50 24 92.693161 fox.
44 5 1 1 2 2 3 165 228 55 24 92.947639 The
45 5 1 1 2 2 4 232 228 75 30 90.589806 quick
46 5 1 1 2 2 5 319 228 85 24 91.051247 brown
47 5 1 1 2 2 6 417 228 51 31 91.925011 dog
48 5 1 1 2 2 7 478 228 107 31 91.471077 jumped
49 4 1 1 2 3 0 36 262 561 31 -1
50 5 1 1 2 3 1 36 268 63 18 90.210129 over
51 5 1 1 2 3 2 109 262 44 24 90.210129 the
52 5 1 1 2 3 3 165 262 56 31 91.178192 lazy
53 5 1 1 2 3 4 231 262 50 24 92.794647 fox.
54 5 1 1 2 3 5 294 262 55 24 91.388016 The
55 5 1 1 2 3 6 360 262 75 30 92.525742 quick
56 5 1 1 2 3 7 447 262 85 24 90.425552 brown
57 5 1 1 2 3 8 545 262 52 31 90.425552 dog
58 4 1 1 2 4 0 43 296 518 31 -1
59 5 1 1 2 4 1 43 296 107 31 91.759590 jumped
60 5 1 1 2 4 2 162 302 64 18 92.923576 over
61 5 1 1 2 4 3 235 296 44 24 92.017929 the
62 5 1 1 2 4 4 292 296 55 31 91.558884 lazy
63 5 1 1 2 4 5 357 296 50 24 92.687485 fox.
64 5 1 1 2 4 6 420 296 55 24 91.922661 The
65 5 1 1 2 4 7 486 296 75 30 91.870224 quick
66 4 1 1 2 5 0 37 330 524 31 -1
67 5 1 1 2 5 1 37 330 85 24 92.923935 brown
68 5 1 1 2 5 2 135 330 52 31 91.468765 dog
69 5 1 1 2 5 3 196 330 108 31 91.425491 jumped
70 5 1 1 2 5 4 316 336 63 18 91.489830 over
71 5 1 1 2 5 5 388 330 45 24 91.740379 the
72 5 1 1 2 5 6 445 330 55 31 92.110054 lazy
73 5 1 1 2 5 7 511 330 50 24 93.180054 fox.

View File

@ -1,9 +0,0 @@
This is a lot of 12 point text to test the
ocr code and see if it works on all types
of file format.
The quick brown dog jumped over the
lazy fox. The quick brown dog jumped
over the lazy fox. The quick brown dog
jumped over the lazy fox. The quick
brown dog jumped over the lazy fox.

View File

@ -1,58 +0,0 @@
from pathlib import Path
import pytest
from mealie.services.ocr.pytesseract import OcrService
ocr_service = OcrService()
@pytest.mark.skip("Tesseract is not reliable between environments")
def test_image_to_string():
with open(Path("tests/data/images/test-ocr.png"), "rb") as image:
result = ocr_service.image_to_string(image)
with open(Path("tests/data/text/test-ocr.txt"), encoding="utf-8") as expected_result:
assert result == expected_result.read()
@pytest.mark.skip("Tesseract is not reliable between environments")
def test_image_to_tsv():
with open(Path("tests/data/images/test-ocr.png"), "rb") as image:
result = ocr_service.image_to_tsv(image.read())
with open(Path("tests/data/text/test-ocr.tsv"), encoding="utf-8") as expected_result:
assert result == expected_result.read()
def test_format_tsv_output():
tsv = " level\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext \n1\t1\t0\t0\t0\t0\t0\t0\t640\t480\t-1\t\n5\t1\t1\t1\t1\t1\t36\t92\t60\t24\t87.137558\tThis"
expected_result = [
{
"level": 1,
"page_num": 1,
"block_num": 0,
"par_num": 0,
"line_num": 0,
"word_num": 0,
"left": 0,
"top": 0,
"width": 640,
"height": 480,
"conf": -1.0,
"text": "",
},
{
"level": 5,
"page_num": 1,
"block_num": 1,
"par_num": 1,
"line_num": 1,
"word_num": 1,
"left": 36,
"top": 92,
"width": 60,
"height": 24,
"conf": 87.137558,
"text": "This",
},
]
assert ocr_service.format_tsv_output(tsv) == expected_result

View File

@ -113,12 +113,6 @@ groups_webhooks_rerun = "/api/groups/webhooks/rerun"
"""`/api/groups/webhooks/rerun`"""
media_docker_validate_txt = "/api/media/docker/validate.txt"
"""`/api/media/docker/validate.txt`"""
ocr = "/api/ocr/"
"""`/api/ocr/`"""
ocr_asset_to_tsv = "/api/ocr/asset-to-tsv"
"""`/api/ocr/asset-to-tsv`"""
ocr_file_to_tsv = "/api/ocr/file-to-tsv"
"""`/api/ocr/file-to-tsv`"""
organizers_categories = "/api/organizers/categories"
"""`/api/organizers/categories`"""
organizers_categories_empty = "/api/organizers/categories/empty"
@ -151,8 +145,6 @@ recipes_bulk_actions_tag = "/api/recipes/bulk-actions/tag"
"""`/api/recipes/bulk-actions/tag`"""
recipes_create_from_zip = "/api/recipes/create-from-zip"
"""`/api/recipes/create-from-zip`"""
recipes_create_ocr = "/api/recipes/create-ocr"
"""`/api/recipes/create-ocr`"""
recipes_create_url = "/api/recipes/create-url"
"""`/api/recipes/create-url`"""
recipes_create_url_bulk = "/api/recipes/create-url/bulk"