mirror of
				https://github.com/searxng/searxng.git
				synced 2025-10-26 16:22:30 -04:00 
			
		
		
		
	Merge pull request #1791 from return42/fix-1790
[fix] google-news: origin result does not have a content area
This commit is contained in:
		
						commit
						b1699f18ac
					
				| @ -150,24 +150,12 @@ def response(resp): | |||||||
|         # the first <h3> tag in the <article> contains the title of the link |         # the first <h3> tag in the <article> contains the title of the link | ||||||
|         title = extract_text(eval_xpath(result, './article/h3[1]')) |         title = extract_text(eval_xpath(result, './article/h3[1]')) | ||||||
| 
 | 
 | ||||||
|         # the first <div> tag in the <article> contains the content of the link |  | ||||||
|         content = extract_text(eval_xpath(result, './article/div[1]')) |  | ||||||
| 
 |  | ||||||
|         # the second <div> tag contains origin publisher and the publishing date |  | ||||||
| 
 |  | ||||||
|         pub_date = extract_text(eval_xpath(result, './article/div[2]//time')) |  | ||||||
|         pub_origin = extract_text(eval_xpath(result, './article/div[2]//a')) |  | ||||||
| 
 |  | ||||||
|         pub_info = [] |  | ||||||
|         if pub_origin: |  | ||||||
|             pub_info.append(pub_origin) |  | ||||||
|         if pub_date: |  | ||||||
|         # The pub_date is mostly a string like 'yesertday', not a real |         # The pub_date is mostly a string like 'yesertday', not a real | ||||||
|         # timezone date or time.  Therefore we can't use publishedDate. |         # timezone date or time.  Therefore we can't use publishedDate. | ||||||
|             pub_info.append(pub_date) |         pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time')) | ||||||
|         pub_info = ', '.join(pub_info) |         pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a')) | ||||||
|         if pub_info: | 
 | ||||||
|             content = pub_info + ': ' + content |         content = ' / '.join([x for x in [pub_origin, pub_date] if x]) | ||||||
| 
 | 
 | ||||||
|         # The image URL is located in a preceding sibling <img> tag, e.g.: |         # The image URL is located in a preceding sibling <img> tag, e.g.: | ||||||
|         # "https://lh3.googleusercontent.com/DjhQh7DMszk.....z=-p-h100-w100" |         # "https://lh3.googleusercontent.com/DjhQh7DMszk.....z=-p-h100-w100" | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user