mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 02:27:01 -04:00 
			
		
		
		
	New recipe for The Houston Chronicle by Kovid Goyal
This commit is contained in:
		
							parent
							
								
									154466cbbc
								
							
						
					
					
						commit
						2682d46cf7
					
				
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/houston_chronicle-2.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/houston_chronicle-2.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 406 B | 
							
								
								
									
										66
									
								
								resources/recipes/houston_chronicle.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								resources/recipes/houston_chronicle.recipe
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | ||||
| from calibre.web.feeds.news import BasicNewsRecipe | ||||
| 
 | ||||
| class HoustonChronicle(BasicNewsRecipe): | ||||
| 
 | ||||
|     title          = u'The Houston Chronicle' | ||||
|     description    = 'News from Houston, Texas' | ||||
|     __author__	   = 'Kovid Goyal' | ||||
|     language       = 'US' | ||||
|     timefmt        = ' [%a, %d %b, %Y]' | ||||
|     no_stylesheets = True | ||||
| 
 | ||||
|     keep_only_tags = [dict(id=['story-head', 'story'])] | ||||
|     remove_tags = [dict(id=['share-module', 'resource-box', | ||||
|         'resource-box-header'])] | ||||
| 
 | ||||
|     def parse_index(self): | ||||
|         soup = self.index_to_soup('http://www.chron.com/news/') | ||||
|         container = soup.find('table', attrs={'class':'body-columns'}) | ||||
| 
 | ||||
|         feeds = [] | ||||
|         current_section = 'Top Stories' | ||||
|         current_articles = [] | ||||
| 
 | ||||
|         self.log('\tFound section:', current_section) | ||||
| 
 | ||||
|         for div in container.findAll('div'): | ||||
|             if div.get('class', None) == 'module-mast': | ||||
|                 t = self.tag_to_string(div).replace(u'\xbb', '').strip() | ||||
|                 if t and 'interactives' not in t: | ||||
|                     if current_section and current_articles: | ||||
|                         feeds.append((current_section, current_articles)) | ||||
|                     current_section = t | ||||
|                     current_articles = [] | ||||
|                     self.log('\tFound section:', current_section) | ||||
|             elif div.get('storyid', False): | ||||
|                 a = div.find('a', href=True) | ||||
|                 if a: | ||||
|                     title = self.tag_to_string(a) | ||||
|                     url = a.get('href') | ||||
|                     if title and url: | ||||
|                         if url.startswith('/'): | ||||
|                             url = 'http://www.chron.com'+url | ||||
|                         self.log('\t\tFound article:', title) | ||||
|                         self.log('\t\t\t', url) | ||||
|                         current_articles.append({'title':title, 'url':url, | ||||
|                             'date':'', 'description':''}) | ||||
|             elif div.get('class', None) == 'columnbox' and \ | ||||
|                     'special' in current_section.lower(): | ||||
|                 a = div.find('a') | ||||
|                 if a: | ||||
|                     title = self.tag_to_string(a) | ||||
|                     url = a.get('href') | ||||
|                     if title and url: | ||||
|                         if not url.startswith('/'): continue | ||||
|                         url = 'http://www.chron.com'+url | ||||
|                         self.log('\t\tFound article:', title) | ||||
|                         self.log('\t\t\t', url) | ||||
|                         a.extract() | ||||
|                         desc = self.tag_to_string(div) | ||||
|                         current_articles.append({'title':title, 'url':url, | ||||
|                             'date':'', 'description':desc}) | ||||
| 
 | ||||
|         if current_section and current_articles: | ||||
|             feeds.append((current_section, current_articles)) | ||||
|         return feeds | ||||
| 
 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user