mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-25 15:52:31 -04:00 
			
		
		
		
	[refactor] duration strings: move parsing logic to utils.py
This commit is contained in:
		
							parent
							
								
									c28d35c7fc
								
							
						
					
					
						commit
						4dfc47584d
					
				| @ -56,18 +56,6 @@ def request(query, params): | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # Format the video duration | ||||
| def format_duration(duration): | ||||
|     if not ":" in duration: | ||||
|         return None | ||||
|     minutes, seconds = map(int, duration.split(":")) | ||||
|     total_seconds = minutes * 60 + seconds | ||||
| 
 | ||||
|     formatted_duration = str(timedelta(seconds=total_seconds))[2:] if 0 <= total_seconds < 3600 else "" | ||||
| 
 | ||||
|     return formatted_duration | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     search_res = resp.json() | ||||
| 
 | ||||
| @ -83,7 +71,12 @@ def response(resp): | ||||
|         unix_date = item["pubdate"] | ||||
| 
 | ||||
|         formatted_date = datetime.fromtimestamp(unix_date) | ||||
|         formatted_duration = format_duration(item["duration"]) | ||||
| 
 | ||||
|         # the duration only seems to be valid if the video is less than 60 mins | ||||
|         duration = utils.parse_duration_string(item["duration"]) | ||||
|         if duration and duration > timedelta(minutes=60): | ||||
|             duration = None | ||||
| 
 | ||||
|         iframe_url = f"https://player.bilibili.com/player.html?aid={video_id}&high_quality=1&autoplay=false&danmaku=0" | ||||
| 
 | ||||
|         results.append( | ||||
| @ -93,7 +86,7 @@ def response(resp): | ||||
|                 "content": description, | ||||
|                 "author": author, | ||||
|                 "publishedDate": formatted_date, | ||||
|                 "length": formatted_duration, | ||||
|                 "length": duration, | ||||
|                 "thumbnail": thumbnail, | ||||
|                 "iframe_src": iframe_url, | ||||
|                 "template": "videos.html", | ||||
|  | ||||
| @ -2,9 +2,10 @@ | ||||
| """iQiyi: A search engine for retrieving videos from iQiyi.""" | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from datetime import datetime, timedelta | ||||
| from datetime import datetime | ||||
| 
 | ||||
| from searx.exceptions import SearxEngineAPIException | ||||
| from searx.utils import parse_duration_string | ||||
| 
 | ||||
| about = { | ||||
|     "website": "https://www.iqiyi.com/", | ||||
| @ -55,20 +56,7 @@ def response(resp): | ||||
|             except (ValueError, TypeError): | ||||
|                 pass | ||||
| 
 | ||||
|         length = None | ||||
|         subscript_content = album_info.get("subscriptContent") | ||||
|         if subscript_content: | ||||
|             try: | ||||
|                 time_parts = subscript_content.split(":") | ||||
|                 if len(time_parts) == 2: | ||||
|                     minutes, seconds = map(int, time_parts) | ||||
|                     length = timedelta(minutes=minutes, seconds=seconds) | ||||
|                 elif len(time_parts) == 3: | ||||
|                     hours, minutes, seconds = map(int, time_parts) | ||||
|                     length = timedelta(hours=hours, minutes=minutes, seconds=seconds) | ||||
|             except (ValueError, TypeError): | ||||
|                 pass | ||||
| 
 | ||||
|         length = parse_duration_string(album_info.get("subscriptionContent")) | ||||
|         results.append( | ||||
|             { | ||||
|                 'url': album_info.get("pageUrl", "").replace("http://", "https://"), | ||||
|  | ||||
| @ -6,7 +6,7 @@ | ||||
| 
 | ||||
| import re | ||||
| from urllib.parse import urlencode | ||||
| from datetime import datetime | ||||
| from datetime import datetime, timedelta | ||||
| from dateutil.parser import parse | ||||
| from dateutil.relativedelta import relativedelta | ||||
| 
 | ||||
| @ -50,12 +50,6 @@ safesearch = True | ||||
| safesearch_table = {0: 'both', 1: 'false', 2: 'false'} | ||||
| 
 | ||||
| 
 | ||||
| def minute_to_hm(minute): | ||||
|     if isinstance(minute, int): | ||||
|         return "%d:%02d" % (divmod(minute, 60)) | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     """Assemble request for the Peertube API""" | ||||
| 
 | ||||
| @ -117,13 +111,17 @@ def video_response(resp): | ||||
|             if x | ||||
|         ] | ||||
| 
 | ||||
|         duration = result.get('duration') | ||||
|         if duration: | ||||
|             duration = timedelta(seconds=duration) | ||||
| 
 | ||||
|         results.append( | ||||
|             { | ||||
|                 'url': result['url'], | ||||
|                 'title': result['name'], | ||||
|                 'content': html_to_text(result.get('description') or ''), | ||||
|                 'author': result.get('account', {}).get('displayName'), | ||||
|                 'length': minute_to_hm(result.get('duration')), | ||||
|                 'length': duration, | ||||
|                 'views': humanize_number(result['views']), | ||||
|                 'template': 'videos.html', | ||||
|                 'publishedDate': parse(result['publishedAt']), | ||||
|  | ||||
| @ -73,7 +73,7 @@ Implementations | ||||
| from urllib.parse import urlencode, urlparse | ||||
| from searx import locales | ||||
| from searx.network import get | ||||
| from searx.utils import gen_useragent, html_to_text | ||||
| from searx.utils import gen_useragent, html_to_text, parse_duration_string | ||||
| 
 | ||||
| about = { | ||||
|     "website": "https://presearch.io", | ||||
| @ -270,7 +270,7 @@ def response(resp): | ||||
|                     'url': item.get('link'), | ||||
|                     'content': item.get('description', ''), | ||||
|                     'thumbnail': item.get('image'), | ||||
|                     'length': item.get('duration'), | ||||
|                     'length': parse_duration_string(item.get('duration')), | ||||
|                 } | ||||
|             ) | ||||
| 
 | ||||
|  | ||||
| @ -1,7 +1,5 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """Utility functions for the engines | ||||
| 
 | ||||
| """ | ||||
| """Utility functions for the engines""" | ||||
| 
 | ||||
| from __future__ import annotations | ||||
| 
 | ||||
| @ -18,6 +16,7 @@ from random import choice | ||||
| from html.parser import HTMLParser | ||||
| from html import escape | ||||
| from urllib.parse import urljoin, urlparse, parse_qs, urlencode | ||||
| from datetime import timedelta | ||||
| from markdown_it import MarkdownIt | ||||
| 
 | ||||
| from lxml import html | ||||
| @ -831,3 +830,25 @@ def js_variable_to_python(js_variable): | ||||
|     s = s.replace(chr(1), ':') | ||||
|     # load the JSON and return the result | ||||
|     return json.loads(s) | ||||
| 
 | ||||
| 
 | ||||
| def parse_duration_string(duration_str: str) -> timedelta | None: | ||||
|     """Parse a time string in format MM:SS or HH:MM:SS and convert it to a `timedelta` object. | ||||
| 
 | ||||
|     Returns None if the provided string doesn't match any of the formats. | ||||
|     """ | ||||
|     duration_str = duration_str.strip() | ||||
| 
 | ||||
|     if not duration_str: | ||||
|         return None | ||||
| 
 | ||||
|     try: | ||||
|         # prepending ["00"] here inits hours to 0 if they are not provided | ||||
|         time_parts = (["00"] + duration_str.split(":"))[:3] | ||||
|         hours, minutes, seconds = map(int, time_parts) | ||||
|         return timedelta(hours=hours, minutes=minutes, seconds=seconds) | ||||
| 
 | ||||
|     except (ValueError, TypeError): | ||||
|         pass | ||||
| 
 | ||||
|     return None | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user