mirror of
				https://github.com/searxng/searxng.git
				synced 2025-11-02 18:47:05 -05:00 
			
		
		
		
	[fix] bing_new engine : fix published date parsing
This commit is contained in:
		
							parent
							
								
									7f1e2ba211
								
							
						
					
					
						commit
						a92e3ba446
					
				@ -56,10 +56,14 @@ def response(resp):
 | 
				
			|||||||
        link = result.xpath('.//div[@class="newstitle"]/a')[0]
 | 
					        link = result.xpath('.//div[@class="newstitle"]/a')[0]
 | 
				
			||||||
        url = link.attrib.get('href')
 | 
					        url = link.attrib.get('href')
 | 
				
			||||||
        title = ' '.join(link.xpath('.//text()'))
 | 
					        title = ' '.join(link.xpath('.//text()'))
 | 
				
			||||||
        content = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')))
 | 
					        contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')
 | 
				
			||||||
 | 
					        if contentXPath != None:
 | 
				
			||||||
 | 
					            content = escape(' '.join(contentXPath))
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
        # parse publishedDate
 | 
					        # parse publishedDate
 | 
				
			||||||
        publishedDate = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_ST"]//span[@class="sn_tm"]//text()')))
 | 
					        publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()')
 | 
				
			||||||
 | 
					        if publishedDateXPath != None:
 | 
				
			||||||
 | 
					            publishedDate = escape(' '.join(publishedDateXPath))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
 | 
					        if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
 | 
				
			||||||
            timeNumbers = re.findall(r'\d+', publishedDate)
 | 
					            timeNumbers = re.findall(r'\d+', publishedDate)
 | 
				
			||||||
@ -74,8 +78,17 @@ def response(resp):
 | 
				
			|||||||
            publishedDate = datetime.now()\
 | 
					            publishedDate = datetime.now()\
 | 
				
			||||||
                - timedelta(hours=int(timeNumbers[0]))\
 | 
					                - timedelta(hours=int(timeNumbers[0]))\
 | 
				
			||||||
                - timedelta(minutes=int(timeNumbers[1]))
 | 
					                - timedelta(minutes=int(timeNumbers[1]))
 | 
				
			||||||
 | 
					        elif re.match("^[0-9]+ day(s|) ago$", publishedDate):
 | 
				
			||||||
 | 
					            timeNumbers = re.findall(r'\d+', publishedDate)
 | 
				
			||||||
 | 
					            publishedDate = datetime.now()\
 | 
				
			||||||
 | 
					                - timedelta(days=int(timeNumbers[0]))
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            publishedDate = parser.parse(publishedDate)  
 | 
					            try:
 | 
				
			||||||
 | 
					                # FIXME use params['language'] to parse either mm/dd or dd/mm
 | 
				
			||||||
 | 
					                publishedDate = parser.parse(publishedDate, dayfirst=False)
 | 
				
			||||||
 | 
					            except TypeError:
 | 
				
			||||||
 | 
					                # FIXME
 | 
				
			||||||
 | 
					                publishedDate = datetime.now()
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
        # append result
 | 
					        # append result
 | 
				
			||||||
        results.append({'url': url, 
 | 
					        results.append({'url': url, 
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user