From 4375a580dccf13e98113e16d7d06ab352ebe79a4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 2 Jan 2018 09:05:19 +0530
Subject: [PATCH] Update LA Times

---
 recipes/latimes.recipe | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/recipes/latimes.recipe b/recipes/latimes.recipe
index 668659100b..1939603a59 100644
--- a/recipes/latimes.recipe
+++ b/recipes/latimes.recipe
@@ -8,6 +8,15 @@ from pprint import pformat
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
+def classes(classes):
+    q = frozenset(classes.split(' '))
+    return dict(
+        attrs={
+            'class': lambda x: x and frozenset(x.split()).intersection(q)
+        }
+    )
+
+
 def absurl(url):
     if url.startswith('/'):
         url = 'http://www.latimes.com' + url
@@ -30,13 +39,21 @@ class LATimes(BasicNewsRecipe):
     cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
 
     keep_only_tags = [
-        dict(itemprop='articleBody'),
         dict(name='h1'),
-        dict(attrs={'data-content-type': 'image'}),
+        dict(attrs={
+            'class': 'trb_ar_main'
+        }),
     ]
+
+    remove_tags_after = [
+        dict(itemprop='articleBody'),
+    ]
+
     remove_tags = [
-        dict(attrs={'data-content-type': 'story'}),
-        dict(attrs={'data-load-type': 'commentFrame'}),
+        dict(attrs={
+            'data-content-type': 'blurb'
+        }),
+        classes('trb_ar_cont trb_gptAd trb_filmstrip trb_ar_sponsoredmod'),
     ]
 
     def parse_index(self):
@@ -49,7 +66,11 @@ class LATimes(BasicNewsRecipe):
                 'data-content-slug': True,
             }
         ):
-            a = x.find('a', attrs={'class': lambda x: not x or 'SectionHeading' not in x})
+            a = x.find(
+                'a', attrs={
+                    'class': lambda x: not x or 'SectionHeading' not in x
+                }
+            )
             if a is not None:
                 url = absurl(a['href'])
                 section = x['data-content-section'].capitalize()