Drop problematic tests
Some tests had differing outcomes depending on the version of Tesseract installed on the test system. This lead to a bunch of false test failures, which lead to people (including me) just ignoring the Travis results. This commit removes those tests, and while it reduces our coverage, at least the results are predictable.
| 
		 Before Width: | Height: | Size: 136 KiB  | 
| 
		 Before Width: | Height: | Size: 123 KiB  | 
| 
		 Before Width: | Height: | Size: 121 KiB  | 
| 
		 Before Width: | Height: | Size: 46 KiB  | 
| 
		 Before Width: | Height: | Size: 53 KiB  | 
| 
		 Before Width: | Height: | Size: 136 KiB  | 
| 
		 Before Width: | Height: | Size: 136 KiB  | 
| 
		 Before Width: | Height: | Size: 136 KiB  | 
@ -5,7 +5,7 @@ from unittest import mock
 | 
			
		||||
from uuid import uuid4
 | 
			
		||||
 | 
			
		||||
from dateutil import tz
 | 
			
		||||
from django.test import TestCase, override_settings
 | 
			
		||||
from django.test import TestCase
 | 
			
		||||
 | 
			
		||||
from ..parsers import RasterisedDocumentParser
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
@ -16,46 +16,36 @@ class TestDate(TestCase):
 | 
			
		||||
    SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
 | 
			
		||||
    SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
 | 
			
		||||
 | 
			
		||||
    MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH"  # NOQA: E501
 | 
			
		||||
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        os.makedirs(self.SCRATCH, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
    def tearDown(self):
 | 
			
		||||
        shutil.rmtree(self.SCRATCH)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_1(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document._text = "lorem ipsum 130218 lorem ipsum"
 | 
			
		||||
        self.assertEqual(document.get_date(), None)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_2(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document._text = "lorem ipsum 2018 lorem ipsum"
 | 
			
		||||
        self.assertEqual(document.get_date(), None)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_3(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document._text = "lorem ipsum 20180213 lorem ipsum"
 | 
			
		||||
        self.assertEqual(document.get_date(), None)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_4(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
@ -69,10 +59,7 @@ class TestDate(TestCase):
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_5(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
@ -89,10 +76,7 @@ class TestDate(TestCase):
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_6(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
@ -109,10 +93,7 @@ class TestDate(TestCase):
 | 
			
		||||
        )
 | 
			
		||||
        self.assertEqual(document.get_date(), None)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_7(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
@ -130,10 +111,7 @@ class TestDate(TestCase):
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_8(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
@ -157,10 +135,7 @@ class TestDate(TestCase):
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_date_format_9(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
@ -178,398 +153,11 @@ class TestDate(TestCase):
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_1_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        date = document.get_date()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            date,
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 4, 1, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_1_png(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 4, 1, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_2_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2013, 2, 1, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_2_png(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2013, 2, 1, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @override_settings(OCR_LANGUAGE="deu")
 | 
			
		||||
    def test_get_text_3_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 10, 5, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @override_settings(OCR_LANGUAGE="deu")
 | 
			
		||||
    def test_get_text_3_png(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 10, 5, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @override_settings(OCR_LANGUAGE="eng")
 | 
			
		||||
    def test_get_text_4_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 10, 5, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @override_settings(OCR_LANGUAGE="eng")
 | 
			
		||||
    def test_get_text_4_png(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 10, 5, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_5_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 12, 17, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_5_png(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 12, 17, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_6_pdf_us(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        document.DATE_ORDER = "MDY"
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 12, 17, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_6_png_us(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        document.DATE_ORDER = "MDY"
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 12, 17, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_6_pdf_eu(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(document.get_date(), None)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_6_png_eu(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(document.get_date(), None)
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_7_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 4, 1, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_8_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2017, 12, 31, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_get_text_9_pdf(self):
 | 
			
		||||
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.DATE_ORDER = 'DMY'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            document.get_date(),
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2017, 12, 31, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_filename_date_1_pdf(self):
 | 
			
		||||
        input_file = os.path.join(
 | 
			
		||||
            self.SAMPLE_FILES,
 | 
			
		||||
            "tests_date_in_filename_2018-03-20_1.pdf"
 | 
			
		||||
        )
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.FILENAME_DATE_ORDER = 'YMD'
 | 
			
		||||
        document.get_text()
 | 
			
		||||
        date = document.get_date()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            date,
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 3, 20, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_filename_date_1_png(self):
 | 
			
		||||
        input_file = os.path.join(
 | 
			
		||||
            self.SAMPLE_FILES,
 | 
			
		||||
            "tests_date_in_filename_2018-03-20_1.png"
 | 
			
		||||
        )
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.FILENAME_DATE_ORDER = 'YMD'
 | 
			
		||||
        date = document.get_date()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            date,
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2018, 3, 20, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_filename_date_2_pdf(self):
 | 
			
		||||
        input_file = os.path.join(
 | 
			
		||||
            self.SAMPLE_FILES,
 | 
			
		||||
            "2013-12-11_tests_date_in_filename_2.pdf"
 | 
			
		||||
        )
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.FILENAME_DATE_ORDER = 'YMD'
 | 
			
		||||
        date = document.get_date()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), True)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            date,
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2013, 12, 11, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    def test_filename_date_2_png(self):
 | 
			
		||||
        input_file = os.path.join(
 | 
			
		||||
            self.SAMPLE_FILES,
 | 
			
		||||
            "2013-12-11_tests_date_in_filename_2.png"
 | 
			
		||||
        )
 | 
			
		||||
        document = RasterisedDocumentParser(input_file)
 | 
			
		||||
        document.FILENAME_DATE_ORDER = 'YMD'
 | 
			
		||||
        date = document.get_date()
 | 
			
		||||
        self.assertEqual(document._is_ocred(), False)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            date,
 | 
			
		||||
            datetime.datetime(
 | 
			
		||||
                2013, 12, 11, 0, 0,
 | 
			
		||||
                tzinfo=tz.gettz(settings.TIME_ZONE)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
 | 
			
		||||
        return_value="01-07-0590 00:00:00"
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_crazy_date_past(self, *args):
 | 
			
		||||
        document = RasterisedDocumentParser("/dev/null")
 | 
			
		||||
        document.get_text()
 | 
			
		||||
@ -579,10 +167,7 @@ class TestDate(TestCase):
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
 | 
			
		||||
        return_value="01-07-2350 00:00:00"
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_crazy_date_future(self, *args):
 | 
			
		||||
        document = RasterisedDocumentParser("/dev/null")
 | 
			
		||||
        document.get_text()
 | 
			
		||||
@ -592,10 +177,7 @@ class TestDate(TestCase):
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
 | 
			
		||||
        return_value="01-07-0590 00:00:00"
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(
 | 
			
		||||
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
 | 
			
		||||
        SCRATCH
 | 
			
		||||
    )
 | 
			
		||||
    @mock.patch(MOCK_SCRATCH, SCRATCH)
 | 
			
		||||
    def test_crazy_date_past(self, *args):
 | 
			
		||||
        document = RasterisedDocumentParser("/dev/null")
 | 
			
		||||
        document.get_text()
 | 
			
		||||
 | 
			
		||||