mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Add unitest for tika_parse()
This commit is contained in:
		
							parent
							
								
									3d37e49c1a
								
							
						
					
					
						commit
						daf90399bd
					
				@ -21,6 +21,7 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
 | 
			
		||||
    tika_server = settings.PAPERLESS_TIKA_ENDPOINT
 | 
			
		||||
 | 
			
		||||
    logging_name = "paperless.parsing.mail"
 | 
			
		||||
    _parsed = None
 | 
			
		||||
@ -133,13 +134,13 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
 | 
			
		||||
    def tika_parse(self, html: str):
 | 
			
		||||
        self.log("info", "Sending content to Tika server")
 | 
			
		||||
        tika_server = settings.PAPERLESS_TIKA_ENDPOINT
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            parsed = parser.from_buffer(html, tika_server)
 | 
			
		||||
            parsed = parser.from_buffer(html, self.tika_server)
 | 
			
		||||
        except Exception as err:
 | 
			
		||||
            raise ParseError(
 | 
			
		||||
                f"Could not parse content with tika server at " f"{tika_server}: {err}",
 | 
			
		||||
                f"Could not parse content with tika server at "
 | 
			
		||||
                f"{self.tika_server}: {err}",
 | 
			
		||||
            )
 | 
			
		||||
        if parsed["content"]:
 | 
			
		||||
            return parsed["content"]
 | 
			
		||||
@ -246,7 +247,7 @@ class MailDocumentParser(DocumentParser):
 | 
			
		||||
 | 
			
		||||
        html = StringIO()
 | 
			
		||||
 | 
			
		||||
        with open(html_file, "r") as html_template_handle:
 | 
			
		||||
        with open(html_file) as html_template_handle:
 | 
			
		||||
            for line in html_template_handle.readlines():
 | 
			
		||||
                for placeholder in placeholder_pattern.findall(line):
 | 
			
		||||
                    line = re.sub(
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										197
									
								
								src/paperless_mail/tests/samples/html.eml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								src/paperless_mail/tests/samples/html.eml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,197 @@
 | 
			
		||||
Return-Path: <someone@example.de>
 | 
			
		||||
Delivered-To: someoneelse@example.de
 | 
			
		||||
Received: from mail.example.de
 | 
			
		||||
	by mail.example.de with LMTP id KDcHIQh8fmPHVQAAFx6lBw
 | 
			
		||||
	for <someoneelse@example.de>; Sat, 15 Oct 2022 09:23:20 +0000
 | 
			
		||||
Content-Type: multipart/alternative;
 | 
			
		||||
 boundary="------------0UhSOOwwiiuLCrPveGIa7UzZ"
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=example.de;
 | 
			
		||||
	s=2018; t=1665825800;
 | 
			
		||||
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
 | 
			
		||||
	 to:to:cc:mime-version:mime-version:content-type:content-type;
 | 
			
		||||
	bh=/6OzHOWcwCHrfo1mlk+KcsiTCkt9lN5CEU2AETZBM/M=;
 | 
			
		||||
	b=AM/Q8Xlmh5jmccjofuedENG9dk1K9ItOL7CBtRhQlTEkjJqb1e1WgrT86SZmU5K9WTVerX
 | 
			
		||||
	b0GgndG9xavsCSsaKrZX9rIbozFVY1+pr80sl+sZB/UbUFlr2C4/CALwUBveC6H+HcAJUR
 | 
			
		||||
	uRQycv5zuGm8XAXdo28oFWxCKcAsE0Vs+b8UNs5Qd0VJY9inquLKXHlvLYx+ivnkg/yPCZ
 | 
			
		||||
	ZiOfv4+Ljfxh3oq6vjN0G7pHmANn1U3MmTLivgGLocl+PPxOCCzHeRp38gJQi3NC75JA/B
 | 
			
		||||
	4bSJxwjV0ghnq5z7RG/Yo8d9zlB8l7z31PwCNzbPy/bJVC2EFBvHdhVqow==
 | 
			
		||||
Message-ID: <a9215c39-5464-8dbf-bb8a-c9fa95eee30f@example.de>
 | 
			
		||||
Date: Sat, 15 Oct 2022 11:23:19 +0200
 | 
			
		||||
MIME-Version: 1.0
 | 
			
		||||
Content-Language: en-US
 | 
			
		||||
To: someone@example.de
 | 
			
		||||
From: Name <someone@example.de>
 | 
			
		||||
Subject: HTML Message
 | 
			
		||||
Authentication-Results: ORIGINATING;
 | 
			
		||||
	auth=pass smtp.auth=someoneelse@example.de smtp.mailfrom=someone@example.de
 | 
			
		||||
 | 
			
		||||
This is a multi-part message in MIME format.
 | 
			
		||||
--------------0UhSOOwwiiuLCrPveGIa7UzZ
 | 
			
		||||
Content-Type: text/plain; charset=UTF-8; format=flowed
 | 
			
		||||
Content-Transfer-Encoding: 7bit
 | 
			
		||||
 | 
			
		||||
Some Text
 | 
			
		||||
 | 
			
		||||
and an embedded image.
 | 
			
		||||
 | 
			
		||||
--------------0UhSOOwwiiuLCrPveGIa7UzZ
 | 
			
		||||
Content-Type: multipart/related;
 | 
			
		||||
 boundary="------------fyEsKoz3fdzPxAaSslESHcHz"
 | 
			
		||||
 | 
			
		||||
--------------fyEsKoz3fdzPxAaSslESHcHz
 | 
			
		||||
Content-Type: text/html; charset=UTF-8
 | 
			
		||||
Content-Transfer-Encoding: 7bit
 | 
			
		||||
 | 
			
		||||
<html>
 | 
			
		||||
  <head>
 | 
			
		||||
 | 
			
		||||
    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 | 
			
		||||
  </head>
 | 
			
		||||
  <body>
 | 
			
		||||
    <p>Some Text</p>
 | 
			
		||||
    <p><img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt=""></p>
 | 
			
		||||
    <p>and an embedded image.<br>
 | 
			
		||||
    </p>
 | 
			
		||||
  </body>
 | 
			
		||||
</html>
 | 
			
		||||
--------------fyEsKoz3fdzPxAaSslESHcHz
 | 
			
		||||
Content-Type: image/png; name="IntM6gnXFm00FEV5.png"
 | 
			
		||||
Content-Disposition: inline; filename="IntM6gnXFm00FEV5.png"
 | 
			
		||||
Content-Id: <part1.pNdUSz0s.D3NqVtPg@example.de>
 | 
			
		||||
Content-Transfer-Encoding: base64
 | 
			
		||||
 | 
			
		||||
iVBORw0KGgoAAAANSUhEUgAAAF0AAABdCAIAAABIE/2UAAAACXBIWXMAAA7EAAAOxAGVKw4b
 | 
			
		||||
AAAbQ0lEQVR4nO18eZxcVZX/+Z773qvq6iUbWSCsgsiu4BIEcQGRURAEHXWcGcdlXGB+I86o
 | 
			
		||||
g/7QQUHA+aGiAdFxHFTAARRlU0RBRFRAcRRkE4iShLBFkpBOd1fVe/ee7++Pe191JzRJE0Bi
 | 
			
		||||
Zk76U13dqXr93vede873fM+5BZLyv/YY02f6BDZR+19cJrfsmT6Bp9MohIDC+AMgQqEg/UAR
 | 
			
		||||
xB8E674V/4PiS7zQx0AwqW3u/iICEYLjTpHcAEIKgPR93bdu7vElXrBBSBJCCg0iYiKW/hOT
 | 
			
		||||
LZjN0F9IAiB7cSVaDCiMjiIgkByFmGRtbW649EABIHXkjaE1RWGICE1UIPr4oXVzw6UHSswn
 | 
			
		||||
8QnFAMRwIwRVjKQIImb/Q/wlPjEzVfXeC6RkmalzVBHVuJ5EhICIQDBZnt4c4m7yg9oAmJmI
 | 
			
		||||
mBkAoy1dvmRNNUqIRCdixCFFoEkX0+aAS1wHqK0XXMwMgm7VvfDSb3VDVxSAigkoTsQJYrB5
 | 
			
		||||
bJKWzQMXxn8kY4YhIQBQ+VKdXvvza7/z/UtNQQqNgJgwQpNS9GQOs1ngQgqEwoohEThKVXa8
 | 
			
		||||
lo/45V+46AuLV9/74xuvycRZMIJQo5CW3j4pf/mzx2U8MQscNF6kWaAz18hO+cKpdy65UxwX
 | 
			
		||||
L1kM1IQXlhhwSlCTHPbPHhcAwphwoSIgjBwNY4289b2ff/+Cyy7QlsuK/IHlD3bNuywTQUrQ
 | 
			
		||||
EE6ei0T+fHGJ+TglICFjciGM7LLKGsWNS276zFdOtz5YIaJYtepRA4MZhca6un78KvLPlb9M
 | 
			
		||||
5G8pPlBCYDt0WcjN99xy7Mf/cenK+/qm9XVDaUIoKKCQgNU5qKbAfzb8JS59Su0IFFIkPZKk
 | 
			
		||||
TCS10VSlstI13aiMffKMk+/9433N6QMdX4m6omgsWbpkrNPO8ywlr1QyCmVyn9k0cUlENAaC
 | 
			
		||||
+osQIHIT9IATqVdTtyyzhi5bvvSYD//DbUvubM7s74SuukKoAhkZGTNv0cmICdXk45zBpohL
 | 
			
		||||
nSqiWyQXMYLkhCuLcUJIGq1TdfIiW75y+Uf/7aM//eV1zFlZgLooKZhRVIIE9nLQ49Dcnm2K
 | 
			
		||||
uPRuJ0jCjDRIEAZIIE3EIoMjQmU+hOFypGjkP7vr568//q+uvPVHfXP7jSEXOC/iTUSoEJFc
 | 
			
		||||
FYAjMxG3Id1uU4y7URQwiUUdYmh0IqQoCVULBohAg1hp5WDf4CU/veS4U457NBsbmjWtqioR
 | 
			
		||||
MlABAQyJByMqLoyEuLbH0Xc3RVykV/sTESMhRKgiIqBRI6MNpSvyYN1P/sep37zywrZWzb5m
 | 
			
		||||
VfrobQB6eWqC7CAWZZkJQExaH22KuFDiNcGEEmOt0EhAKEYRQEsri6J564N3fPJzJ193889c
 | 
			
		||||
v+NAFoLH2oEjiv5rKXgkNMb19dmmiEtMPSY0Ee3xN+89SDWK0AHOXfzTS04589SlK5b1zerv
 | 
			
		||||
sPIMOUAjXK2wQMREVMwCoEZuINhOsGcSF46H2FjsMarzIkIQJqI9lQ2auXbVrsz6mv0r/YoT
 | 
			
		||||
Tzvxe9de0c18MbPZDl0qANBEVU2oUWegAUrSOReCJ8m0tjjxr0/qOM8kLhh/TN2KqBOYiFJs
 | 
			
		||||
Ys8LUlaVZciy/p/ddtPC80+/9sZr+6f3qZNSSqrAnBMVMQpRF4WoYSY5ffqMPMshEICgmrDu
 | 
			
		||||
kEzaJ3lm/aVuB1JiAEhn2cvTJhAxM4JaZA8+8vDCr5518VWXjbjVfbNapVRmXjLV4JRQgoCp
 | 
			
		||||
qYlo7S+iCvHeb731/L6+hnkCAJEoENJffuy5PaO4MN5ReFBIF6MJSaEJQVWvnh5NF4Q/+PUP
 | 
			
		||||
P/rpExY/fG9r5mAfmiSVTuEkEKIAidpZ6guNoKsoTDJozkyMVApJBZKbYJPQvdeqa2rdGRRI
 | 
			
		||||
yr6kCDQETwmCqtlo3vPwH/79/C9f9MPvdLNyYM600kpnDkxLIh02ZvXHXGBK955bzd1SBcFX
 | 
			
		||||
WUPZKzYpCtpkHvOnxiWeQjoRMlZwWcyfFKScKt3KN1t9XrrfvemK077wmdsX3dGc0UKuPhjU
 | 
			
		||||
xUtJEnY8LCE14xl3AApBB7XKD7UGVRDqmAZhlG02lXW0lr+QUKUZIDRCIm9hJ1SDrdZvl961
 | 
			
		||||
8ILP/uAnV1bm++cOdqwUIJgJJVNEajbuIOPfJ/wOoDAEm94/bZ899yGo0DTFkNzNLAL8jPvL
 | 
			
		||||
Wv3ACBAQfCVGwjyD5FmujQt/fNkpnzt1SXtR/7RWJkXHOnQgxDlHMzGJBGf93Cz6Qqj87P7Z
 | 
			
		||||
L9j9BUKIquiEOlyAmvut894/BS6saWvU6evkk5xFRES1sqq0smg2lq5c9sWvf/nbV14aMhuY
 | 
			
		||||
PhBYCQOgqeUTTEmI1m2x+vpjdhaRqDbVvROBBB+222G7vqxJi0FdKFAhhCaqYozZ60+PS13r
 | 
			
		||||
pP5eTeKSmmjGThUCOK057bKfX/6ps/7tdw/c09iiZWrqTUWZQrOIGOtQndRL1MevBVtJ4ZdC
 | 
			
		||||
CEWhZVnt+uxdBlwrlMFBCRKkxHmG5L2PJTB/Yn8Z76PHhjHIEEo0tKr8Z88940vnfXEU7YG5
 | 
			
		||||
Q6M2QkUfGhGKyE9Rt4rWaSr3/CX+MvpLzFAKsRDmDM0GaWaZcxG5GHQt6izPWN0YuazQ6CkO
 | 
			
		||||
VIj40ouYIDSK1k33/Phf/9+Jt//+rmyoxTzrBHNosQpBpBco41FiJotQTUg7AtIgJpZR1NQg
 | 
			
		||||
ldIUIfhpzf49tt2NInA0NQicZLGTnwDZOFweq6ROtIl9z8d7FwGN6ppmkcKqgGKVVK2ieeGP
 | 
			
		||||
zj/+08ePWac1c6BDH2KeDXCiomZMIalmOzUcE0piCAWa2iUWfwQFCkhgf6O1x3P2AACo0ZSq
 | 
			
		||||
qmKaBIaN1l/WYhxrX/bEF6zvXawnuiiAIIi3MkjVZvuz55xx5jlfzFvImwMd86aRvMchHsbU
 | 
			
		||||
AY4XmD35fnxwUOqLi4sVUfg0QB3Ud8Jzdn3OjJkz4h1yzkn8X01N/HUgfgK4TOov6/xm/f6S
 | 
			
		||||
bmfUUARmFVyAyr+c9OHzr754aKuZlVnHShcvuZ6gjDM8E7xxrUtYt/lDYcpRZNTjyBx5Z3T4
 | 
			
		||||
uTvv1cpawXt1Lg5/RKgx6f2cIi4TL368UVOP3PTmBx7vvQkdYdSog7Ebyizn8pUPfui0j1x3
 | 
			
		||||
6w1DW03vqhcycwhmGhupvXZ6T3HbgIpUZyzEcsIAKoVl6HetvXbeM5bRE3GYdO1PGRcmR0hA
 | 
			
		||||
KIQSR0seC5asXR/LBCfqDaRUvps387seuPt9H3vfbUvvKGa2Ar0GQ4wHoJrFBWNKAmpQwrAB
 | 
			
		||||
/lbXE9pjALECYhm2mT3/gH0OoJiq9q5FdcNq//pekY7em1UzETIuUQji1M1agUZSfToRzQiS
 | 
			
		||||
cyoimWL12PD//fQJNy29PZ830GaZCxolc0JpWRBHRsYFIUFB7IlsQGVjLR1EIhBzdZZrp9M9
 | 
			
		||||
7FWHzhuaU1Zl4lATurcbj0sSeCgkzSyIXzmy6re3/1ZSdFxrkq0HQapWo7xcI9gtSx+qDjvH
 | 
			
		||||
nfqhG377i/7ZgyO+SxUEayCHUQkVUUIsBs9YYyMV2+u11FLqddwgFAllGGi0XnPga0Tg1EU/
 | 
			
		||||
wgTbeFzGPaVk5Suv4b8X33z6BQuHuSbQKycmCpH6OymhLo4liFUWLFTircBJ55580U++M21W
 | 
			
		||||
SztlXylNy03QVYooRAM0KKggoKKZAWuVho+PSyLTdCwF3sQy5DJmu87feadtdgpGVbehY0wN
 | 
			
		||||
l3F9JGVK8eYrC5dfdfkNv7mhHTqaaWrycvz1iMw+gRWZOI3mEQaK/qtv/NG5F/3XjLkzu1VX
 | 
			
		||||
gUxUzAQwxFlaEOkLQNTfxgn+eq2nXRJmMIo6c9VoddjBr5meDQTvVcfHMyelHVPFZfxtEFC8
 | 
			
		||||
WLPRt3TF0ht/84vhsZHLr/4u4EytDrB12oZEMh4viUJRGCyDu+ehe888+wsuzwKJzJnFSkfq
 | 
			
		||||
GEJJi4dITfn0THphfD1WMz8PJTIn6rth+y23ffmCl3WqTu5y1sVHL7JsZHwZfxtFVAga+K3L
 | 
			
		||||
vr34gfvQ1G9c9I37Rx6opOpVxnWgETC12euBAQYNlfqTzjjp1ntuzZq5kSEEOGf1yUbBIabS
 | 
			
		||||
3lfv2ZT8JVVNoIOZOMn9muo9b/n7PbfezXyFEGdkesn6SfjL+J80hhAIGbH2Tbf8d8gsG8zv
 | 
			
		||||
XPy7q268Ond5t+z44M3MzJjIVFxIkaRKYMiy/Cc3/+y6X/20mN5XiQVlarAiMfA0l4LJ59ym
 | 
			
		||||
YjWdZzDLXVaNls/acvvXv/qoQN8oGqIT+P6U7XH9hazHW0nNdNGSRYuWLEJDK3jX78779nlL
 | 
			
		||||
Hr0vi52HtdedUMRIEwAm7IbOOd88Zw1Hq8IHkACdBJJJeAbrcnnDp91bVfVXGoyJSxJw1IzK
 | 
			
		||||
sfC6Q47YophFI8TBaWwtTd5wfUK4pEulxJyTqfvd3b9bPTactbK2tRuDzVvuuHnh2QuLrKjK
 | 
			
		||||
SiZuYorMkwQQLBRa3HLXrb+65abGYNHVMoiZiAmpNQFMNGVq/pIWQx2TUpgeJ0q5Oj9Wzuwf
 | 
			
		||||
evUrXiUUB0doSK+UJ+SQjxt3U7YnBOLpb7njlsoqgi5z7W53aIvp3/nuxd/78RWNZsPoRWK9
 | 
			
		||||
HEdUxgckDXbeReeOVKOSa7AQG8P1zg6wDiw9dW0D1pt1SoBEeKhQEZFABFoV3v32d++67XNC
 | 
			
		||||
8IDGSoipdf9U+EsPIFWMofPw8P0h76og90VmRQXrDvlPfOXk+9sPBvH0XgkxUlCJmFogTeX2
 | 
			
		||||
h+685pZrbcgCrQjNzCOjOBMXM9ETOM8aGJG4sklHc0IHceZNzRqqYbi73x77vuP175SQZ5JJ
 | 
			
		||||
ECWyuFrjwn2SuNR5lzHjlFX3/gfvLxqF95WESLkNuS5btezE009aMbJSnVpitwIRHzyFCtx9
 | 
			
		||||
z91ruiOSqxmVqgRItQmhBfJE/CVdGcREaDAqPUOWxx6s9KN1zNuO7pOm+B7qBDxoeIJ3Yf3+
 | 
			
		||||
AhFRaFVWyx9erqrJY+MfpBX9xQWXXbDwnDOQZZ5lZIBqdHAWghL3LP79aLsNOMT6IMZd1KOl
 | 
			
		||||
mBBBZQr5IomTAjEgCCyIRwZPE3GrVw6/6dVvfOmeL/VVlasm/QrBGJhU/yftLz0+Ek/6oRUP
 | 
			
		||||
Rbmorsok3uIuyplbz/qvSy74zk8vdnnR9aUQsOi3Uor9fsm9LssMluqX1JdAVJ6Sj0zdXxI6
 | 
			
		||||
EUSjBCIYg4p2R8v9n/+S97/9WOn4gjmgBAWpA1Dnu6covkAkhADghuuvX7FyRV4UKUhGwFQM
 | 
			
		||||
VqJyQ/kHTzzuwh9+M28UY2U3mEFYFHlJv/yRR5g7E6lHfZ4iS+UCBZKLSsfm9M0+9YOnzBma
 | 
			
		||||
o3SZZiKisSqX3hinbLAun2jr85dEnIWddjelJ60HMkQgWnkTl83bfv5qP/z5s89YumqpNlwQ
 | 
			
		||||
T9I5HWmPLXlgmeaZ1Z2JqdraPIVpLpP1hSbnomgmmQtORnns3/6f5265R6iYFQ1RiKZ5BYqK
 | 
			
		||||
OHL9w91TxqVX70TCkDdyVUUcuYg3QMSMuWusWTM6fdbM/V6+/+/uu+vDJx73yIqHVZOr+8pX
 | 
			
		||||
wTRTSx2aKUPTawelod1UTI7LD8xIBzgIQjt88L3//I7XvbXqVA4qEqOYpQ2vcEAGKKcQvjaM
 | 
			
		||||
i9QUVqEkd9zhWQP9A+1OR1Wjqh7jgxKNZvPW2297z9+9d8G+Cy770fe+dOGXszzrdjshmMud
 | 
			
		||||
OoQQIHW3cYpW+wssSWP1o8S2WIAZmMGtfvjRV+530NFvfI/zmklGBZUCM6FJTVpSmJyKYjEF
 | 
			
		||||
XOr/h4B77bFbf2tQJDd1Qb2pN1FlQ0NXmvLomjUXn3fBsW89eto2c8++6vxLb7rE9bluGAMr
 | 
			
		||||
xyqn5Ua1JzIpnJiwqDkx7Q0Mgsh85kx9s501QvePo39z0FtOOvoTmc8oJoVBCSjonKgDHKmo
 | 
			
		||||
uWQ87lOFCygki7wYHBgSi3WhReJhVDqIDzP6hy75/uVrxtpvOOzIPz704Jlnf2l1dw0yV/mS
 | 
			
		||||
PmQSIwKmvrxZl1yExhBrMdbBTA1gXrnVD6x+7UGHL/zY57eeNd9o6mDiI8mOyp3GtScTqdJT
 | 
			
		||||
6C8AaQPNwW223NoC03iEQEXEsatommsSHGhccu2V73rzW1+4216/vvWW0876rFOHQmfNmukr
 | 
			
		||||
n0JDGimcAk1JFRcJMUhQmpppsKR3qq7WN73qrz7+/hN85b33RZalida6/qD01k8iRk+wnN6g
 | 
			
		||||
vwjN2GBzzozZoRNg0NQwp5l5p85TqsoNFj/59fVzZsw46pWvNeC711xx6723DbWmz5u7Jb04
 | 
			
		||||
giKmScbbYMJOW5ujBCc9MOE0U8qqP658yyF//cUPn7lFcws1NFwuAojWLSXUmWECV6rhfrK4
 | 
			
		||||
TJCyxcwy5HvsvKeDc3BOtCauNWeFUOXRNSuvvuaHh73iNbNnzV4+uuIHN1zVb83B1oCE6Nsw
 | 
			
		||||
WEpLGzrB+mMmaDB1AOGCNiRHx9DmP77lmOOPOR5BG5L15UWETjVOtvSUnSdr69MxKVTVTDML
 | 
			
		||||
fsELXjRzYLpVIW3hp0CYBa1UO8qcbGX51y44d9ZWc56/+15jvn3Vz68p1XbcYcdQeo20B/Wk
 | 
			
		||||
JWpqsrakWMuME0cVzFdVDteUolrVHeLAmScsPOk9H29qAxRYzZsRY5JLLaSngkCuDxfU9VHw
 | 
			
		||||
Yaetdtxh22eVnTKr291qKIJ0M2nnLIxDLr9z2R9uvueOQ/Y/MGtldy9ZdN1tP9tj9z1bfX3m
 | 
			
		||||
TUCrpXBhr1SqOy2S4KhbGD01x5qNLHS77dWjBy448KunfeXwBYeFTmi6LFMwdrRq+p3G5aag
 | 
			
		||||
e248LmvJwgDJftc68rVHWmWMXUASwszgAa8QY6Z4tL3mljtv323H5wwMtoZHVv/qV7/cZaed
 | 
			
		||||
m1lfhARJGlICSULpQUOoKkmrHUYgqtrUorumM60xdNzRHzrrkwv33WVBOVIVzOOJOxenR3qV
 | 
			
		||||
ci+uPwUOs/5+QNyXLLm6KnSOfOlhe+/+/DVj3ZA7L95QebU+r31V1nYcbpRFQx9aumyrrbYb
 | 
			
		||||
zPty86MjK+b2z9lxm12rtmbIxLqMG2YIo0aaF7OUOvXew2mc3BV14tzo6Fjn4fCK3V75vbOu
 | 
			
		||||
+MBR7x+yweAtbzWsIalvWwvjIoDGoflxN39acOnhk5BR573NbEx/+5F/4ypRE8lQihdHNVET
 | 
			
		||||
UTWFiN1//zLAOZcVWfHwQw81WOz7wn19p1JKrnASlEFJpWn8fBqaU/G+cqpq0pCswRxj1nlk
 | 
			
		||||
9MW7L/jsJ0474+TTt91iK+9LFXXOCUit9ySlRYfxSbCNSDwbgUtqdkBEtVE0u1X70P0P3nv7
 | 
			
		||||
3apHOy7LvaZQIXGcv/JZo1i8bOnw2KjmDR9k7uwtFe6Nr3vDlrPnhU4FEzGv4pVeEQReJACB
 | 
			
		||||
DArNoCjNr6nCqnLr1tx/OOpdZ5/472942RF9LFh5BydU1opwIm+1OE/W22KnSJCmYOubZ4hD
 | 
			
		||||
9BYnQokGGwNF/yeOPf5t//Lu0XqcxIkBDqLOudLK0sogQZAFL/NmbQVi1tDM5+6259U3XNls
 | 
			
		||||
9XVCGSTEcl2gACiaaSZd6451C8mfv/PeRxx8+MEHHLj1jPndbkfKsuUKJRgIx+QdQgp04vx6
 | 
			
		||||
XErj9/NpxyVReAMdJXNZqPz+e7zsUx855b0nvb9vehHQIQFk0XPUKR3vuW/R6tUjc2bOPfCA
 | 
			
		||||
V4xVo4PF4KsP+otrfnl16avgYIJcMwhoqLqV0R4dXrNla9bhBx121CFH7rPH3rPcDJMQur4P
 | 
			
		||||
/ZKJAjRIHBekSJxuiel4YxTipwKXWJEaqRIAiKgia3faB7/4kHce8Xdfu/jrnB4ELjaMBAy0
 | 
			
		||||
aVtMX/LA0lXLVx74isN23mbnUJZmnDNvbt5slOzm2hS6ql3SW+iGmYPTd91pl7133/ule+/3
 | 
			
		||||
0r1ekonrWln5KkOWuUJSey4l8tTDnDBs+/SBsgFcYmsGYk5KwpE5FCEAyE445qNDfc1PfuuU
 | 
			
		||||
6TNmS4dZlokTs87MLWb94NqrChSHH3xYgXys7LSazSu//8PVo6OtGY01y9c00JzRP23reVu+
 | 
			
		||||
4TVHHfySA+fNmjfkhoJYtyxNrHBF7LFDIGneS6zu9kqq/0CKYryj9qfGRYSxQqyjW1zZ6qBZ
 | 
			
		||||
sA+8458W29Lzzr9wxsBsOPUhZI1i0eJFK+9b+YLdn3vAC/frdjqtwWnfv/4Hl115WaPRGOqb
 | 
			
		||||
9qbD//LZW+304hct2GX+swezQRHSe9/pClxDCxGhxQ+fSDNhJmJIH0WhiaykD8t6OjERWatV
 | 
			
		||||
OBkuHJ9HjqeSij4LIVjouuqsr33p65edt9I/qgMu0HdH2/Onz/vGyf+557Z7hUrvXX3/mz/w
 | 
			
		||||
xgdX3T+AgYUf+/yhL/qLeFyTYD6NACK1NnsH71W/WOcH1Lv8pH7+9C2l9c4FpcoDSPpOTRFI
 | 
			
		||||
AKpa+OLD7/zgCf/0r9PywfaKsT7p0wqvP/T1e2//vM5YRxvZ+Zeff9+KZYHhr498y6tfdEhZ
 | 
			
		||||
llVVmZkYXJZDHTSNGgqk7qv2dEsBRCc8Sl0N9Z4/jcb1mVl6TBMLNsFCCN12OTy2Zphjv1z8
 | 
			
		||||
m6M/c+z8g7Y/9INHLKmWDY+u9t5fd/v1Ox6+y6zD5u79thf8fsWScixUpbfoaSFMPBTX+Z6e
 | 
			
		||||
0IzGdR9p6bTqquFpsfXHlwk3pp5P6607CDJRoBgb7ey13e5n/fPnXFvuXHRnA40iawyXaz7/
 | 
			
		||||
tc8NV6s63fabD33js2ZuW414V7g0V8U0F1kP1PaCRc8HJjgD1n3EY17ylNsTUF3XauZHdSzT
 | 
			
		||||
LM+n9Q1pJVWojn3X+0YeGVn0h0WNonH6uQuv/sVVLtf99ljwple+ueyWTtMgZ6wSpzif80zZ
 | 
			
		||||
E9s3MZ4voyymQooZM2Sefv4W8+fN2+ruuxbNnDXnPy/+6rTZQ6jklPd/arvp21RjFfJ4F9b6
 | 
			
		||||
nMbNBBcR6cklIhJgAGCg0QIbRb7D9jv86Pprr/jtVZ28Ld1w+L5H7LPD3n7Eq4PXbiaF1Buo
 | 
			
		||||
yLUA2tRsY/bZkFSoWQgSnDqIKmAM0GLbHbb/6ufPcXO0GCzQ5nv/9hilCJUwbz7Topfi4qE2
 | 
			
		||||
TVBk4z7/JfINAK6ONRSqywI5Z/vZ/XP6p08bGFk2/Jcvf/M+2z4vBEMTcK7p+uUJdR2fUdu4
 | 
			
		||||
z8VJLWJAUReX3gcADy9/2MrQWdU+4Hn7Hf/ej3S7nbg1MT70ujlP7TU8HbZxuPSEw3pUnswz
 | 
			
		||||
N9wdvvSiS9kO2ai++03vmtOYqaZS97nTfvjx6Z5N2p7UPj4gfuiZCMWpsgoHLNh/oNX3rHnb
 | 
			
		||||
vWrBq7plp3A5VEUosPSZI3BPL4F/imz99dHklmqW3vtoQqUEcxTBsB9poVloJiagxrJYEMQo
 | 
			
		||||
6kA3YUP8pmsbh0vaSJbKOaaNpSahpBenBXOkycm6/6kWBU+JnGez9JeEQvrG1F1VE5HY1gcE
 | 
			
		||||
kjZWACJpJ1eadd9kOctE+/+PsA04/7ZXkgAAAABJRU5ErkJggg==
 | 
			
		||||
 | 
			
		||||
--------------fyEsKoz3fdzPxAaSslESHcHz--
 | 
			
		||||
 | 
			
		||||
--------------0UhSOOwwiiuLCrPveGIa7UzZ--
 | 
			
		||||
@ -6,6 +6,7 @@ import pytest
 | 
			
		||||
from django.test import TestCase
 | 
			
		||||
from documents.parsers import ParseError
 | 
			
		||||
from paperless_mail.parsers import MailDocumentParser
 | 
			
		||||
from paperless_mail.parsers import settings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestParser(TestCase):
 | 
			
		||||
@ -201,3 +202,26 @@ class TestParser(TestCase):
 | 
			
		||||
            }
 | 
			
		||||
            in metadata,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.loggers.LoggingMixin.log")  # Disable log output
 | 
			
		||||
    def test_tika_parse(self, m):
 | 
			
		||||
        html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
 | 
			
		||||
        expected_text = "\n\n\n\n\n\n\n\n\nSome Text\n"
 | 
			
		||||
 | 
			
		||||
        parser = MailDocumentParser(None)
 | 
			
		||||
        tika_server_original = parser.tika_server
 | 
			
		||||
 | 
			
		||||
        # Check if exception is raised when Tika cannot be reached.
 | 
			
		||||
        with pytest.raises(ParseError):
 | 
			
		||||
            parser.tika_server = ""
 | 
			
		||||
            parser.tika_parse(html)
 | 
			
		||||
 | 
			
		||||
        # Check unsuccessful parsing
 | 
			
		||||
        parser.tika_server = tika_server_original
 | 
			
		||||
 | 
			
		||||
        parsed = parser.tika_parse(None)
 | 
			
		||||
        self.assertEqual("", parsed)
 | 
			
		||||
 | 
			
		||||
        # Check successful parsing
 | 
			
		||||
        parsed = parser.tika_parse(html)
 | 
			
		||||
        self.assertEqual(expected_text, parsed)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user