mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Cleanup after latest changes to site
This commit is contained in:
		
							parent
							
								
									0ac9ac2e15
								
							
						
					
					
						commit
						2a6bc4f501
					
				@ -59,23 +59,26 @@ class Economist(BasicNewsRecipe):
 | 
			
		||||
    oldest_article = 7.0
 | 
			
		||||
    resolve_internal_links = True
 | 
			
		||||
    remove_tags = [
 | 
			
		||||
        dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
 | 
			
		||||
        dict(
 | 
			
		||||
            attrs={
 | 
			
		||||
        dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
 | 
			
		||||
        dict(attrs={
 | 
			
		||||
                'class': [
 | 
			
		||||
                    'dblClkTrk', 'ec-article-info', 'share_inline_header',
 | 
			
		||||
                    'related-items', 'main-content-container', 'ec-topic-widget'
 | 
			
		||||
                    'related-items', 'main-content-container', 'ec-topic-widget', 
 | 
			
		||||
                    'teaser', 'blog-post__bottom-panel-bottom', 'blog-post__comments-label',
 | 
			
		||||
                    'blog-post__foot-note'
 | 
			
		||||
                ]
 | 
			
		||||
            }
 | 
			
		||||
        ),
 | 
			
		||||
        {
 | 
			
		||||
            'class': lambda x: x and 'share-links-header' in x
 | 
			
		||||
        },
 | 
			
		||||
        dict(attrs={'class': lambda x: x and 'share-links-header' in x.split()}),
 | 
			
		||||
        dict(attrs={'class': lambda x: x and 'teaser--wrapped' in x.split()}),
 | 
			
		||||
    ]
 | 
			
		||||
    keep_only_tags = [dict(name='article', id=lambda x: not x)]
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    preprocess_regexps = [(re.compile('</html>.*', re.DOTALL), lambda x: '</html>')]
 | 
			
		||||
 | 
			
		||||
    preprocess_regexps = [
 | 
			
		||||
      (re.compile('</html>.*', re.DOTALL), lambda x: '</html>'),
 | 
			
		||||
      (re.compile('<h1 class="flytitle-and-title__body".*?><span class="flytitle-and-title__flytitle".*?>',re.DOTALL|re.IGNORECASE), lambda x: '<h2><br />'),
 | 
			
		||||
      (re.compile('</span><span class="flytitle-and-title__title".*?>',re.DOTALL|re.IGNORECASE), lambda x: '</h2><h1><span class="flytitle-and-title__title">')      
 | 
			
		||||
    ]
 | 
			
		||||
    # economist.com has started throttling after about 60% of the total has
 | 
			
		||||
    # downloaded with connection reset by peer (104) errors.
 | 
			
		||||
    delay = 1
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user