pep8

2025-08-30 23:00:21 -04:00 · 2018-07-13 08:16:38 +05:30 · 2018-07-13 08:16:38 +05:30 · b7181cfd37
commit b7181cfd37
parent 17cc008148
2 changed files with 43 additions and 40 deletions
--- a/recipes/bloomberg_columns.recipe
+++ b/recipes/bloomberg_columns.recipe
@ -5,18 +5,14 @@ __copyright__ = '2018, Dale Furrow dkfurrow@gmail.com'
 '''
 chron.com
 '''
-import re
-import time
-from datetime import datetime, timedelta, date
+from datetime import datetime, timedelta
 from lxml import html, etree
 from StringIO import StringIO
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import urllib2
-import traceback
 from collections import OrderedDict
 import calendar
-import sys
-from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
+from calibre.ebooks.BeautifulSoup import Tag


 contributors_url = "https://www.bloomberg.com/view/contributors"
@ -38,6 +34,7 @@ def get_article_parsed(this_url):
    parsed = html.parse(StringIO(content), parser)
    return parsed

+
 class BloombergContributor:
    _name = None
    _url_name = None
@ -55,20 +52,23 @@ class BloombergContributor:
        return "{0} ({1}): {2:d} articles".format(self._name, self._url_name, len(self._article_list))

    def populate_article_list(self):
-        list_url = "{0}/{1}/{2}/articles".format(contributors_url, self._url_code, self._url_name)
+        list_url = "{0}/{1}/{2}/articles".format(
+            contributors_url, self._url_code, self._url_name)
        parsed_list = get_article_parsed(list_url)
        articles = parsed_list.xpath("//li[contains(@class, 'item_lwCjl')]")
        for article in articles:
            headline = article.find('a')
            link = headline.attrib['href']
            title = headline.text.strip()
-            article_date_eles = article.xpath(".//span[contains(@class, 'time_3qQJR')]")
+            article_date_eles = article.xpath(
+                ".//span[contains(@class, 'time_3qQJR')]")
            if len(article_date_eles) > 0:
                article_date_str = article_date_eles[0].text.strip()
                article_date = self.parse_date_str(article_date_str)
            else:
                article_date = None
-            summary_eles = article.xpath(".//p[contains(@class, 'summary_17SO6')]")
+            summary_eles = article.xpath(
+                ".//p[contains(@class, 'summary_17SO6')]")
            if len(summary_eles) > 0:
                summary = summary_eles[0].text.strip()
            else:
@ -82,13 +82,14 @@ class BloombergContributor:
        # 2nd sunday March, 1st Sunday Nov
        c = calendar.Calendar(firstweekday=calendar.SUNDAY)
        march_cal = c.monthdatescalendar(article_date.year, 3)
-        dst_start = [day for week in march_cal for day in week if \
-                     day.weekday() == calendar.SUNDAY and \
-                     day.month == 3][1]
+        dst_start = [
+                day for week in march_cal for day in week
+                if day.weekday() == calendar.SUNDAY and day.month == 3
+        ][1]
        nov_cal = c.monthdatescalendar(article_date.year, 11)
-        dst_end = [day for week in nov_cal for day in week if \
-                   day.weekday() == calendar.SUNDAY and \
-                   day.month == 11][0]
+        dst_end = [day for week in nov_cal for day in week
+                if day.weekday() == calendar.SUNDAY and day.month == 11
+        ][0]
        dst_start = datetime(dst_start.year, dst_start.month, dst_start.day, 2)
        dst_end = datetime(dst_end.year, dst_end.month, dst_end.day, 1)
        if dst_start > article_date > dst_end:
@ -97,14 +98,13 @@ class BloombergContributor:
            shift = timedelta(hours=5)
        return float((article_date + shift - datetime.utcfromtimestamp(0)).total_seconds())

-
    def parse_date_str(self, date_str):
        parsed = None
        for date_format in self.date_formats:
            try:
                parsed = datetime.strptime(date_str[0:-4], date_format)
                break
-            except Exception as ex:
+            except Exception:
                pass
        return parsed

@ -126,6 +126,7 @@ class BloombergContributor:
    def get_name(self):
        return self._name

+
 class BloombergContributors(BasicNewsRecipe):
    title = u'Bloomberg, Editorial Contributors'
    description = 'Articles from Bloomberg.com contributors'
@ -135,8 +136,9 @@ class BloombergContributors(BasicNewsRecipe):
    no_stylesheets = True
    remove_attributes = ['style', 'xmlns']
    keep_only_tags = [dict(name='article', attrs={'data-type': 'article'})]
-    remove_tags = [dict(name='div', attrs=
-    {'class': ['share-article-button ', 'text-to-speech']})]  # note space...
+    # note space...
+    remove_tags = [
+        dict(name='div', attrs={'class': ['share-article-button ', 'text-to-speech']})]
    oldest_article = 7.0
    ignore_duplicate_articles = {'url'}
    recursions = 0
@ -150,7 +152,8 @@ class BloombergContributors(BasicNewsRecipe):
        for el in els:
            name = el.find("span").text.strip()  # name
            contibutor_items = el.attrib['href'].split('/')
-            contributor = BloombergContributor(name, contibutor_items[4], contibutor_items[3])
+            contributor = BloombergContributor(
+                name, contibutor_items[4], contibutor_items[3])
            contributor_list.append(contributor)
        for contributor in contributor_list:
            contributor.populate_article_list()
@ -176,24 +179,26 @@ class BloombergContributors(BasicNewsRecipe):
            insert_tag.insert(0, parsed_time)
            soup.time.replaceWith(insert_tag)

-
        return soup

    def parse_index(self):
        self.timefmt = ' [%a, %d %b, %Y]'
-        self.log('starting parse_index: {0}'.format(datetime.now().strftime("%B %d, %Y %I:%M %p")))
+        self.log('starting parse_index: {0}'.format(
+            datetime.now().strftime("%B %d, %Y %I:%M %p")))
        feeds = []
        feed_dict = OrderedDict()
        contributor_list = self.get_contributors_list()
-        self.log("Found {0:d} contibutors on main page".format(len(contributor_list)))
+        self.log("Found {0:d} contibutors on main page".format(
+            len(contributor_list)))
        for contributor in contributor_list:
            articles = contributor.get_ordered_article_feed()
            feed_dict.update(articles)
-        feed_dict = OrderedDict(sorted(feed_dict.items(), key=lambda t: t[0], reverse=True))
-        self.log("Found {0:d} linked articles from contributors".format(len(feed_dict)))
+        feed_dict = OrderedDict(
+            sorted(feed_dict.items(), key=lambda t: t[0], reverse=True))
+        self.log("Found {0:d} linked articles from contributors".format(
+            len(feed_dict)))
        feeds.append(("Columns", list(feed_dict.values())))
        # self.log("Total of {0:d} {1} articles".format(len(article_list), cat))
-        self.log('finishing parse_index: {0}'.format(datetime.now().strftime("%B %d, %Y %I:%M %p")))
+        self.log('finishing parse_index: {0}'.format(
+            datetime.now().strftime("%B %d, %Y %I:%M %p")))
        return feeds
-
-
--- a/recipes/houston_chronicle.recipe
+++ b/recipes/houston_chronicle.recipe
@ -9,26 +9,24 @@ import re
 import time
 import urllib2
 from StringIO import StringIO
-from datetime import datetime, timedelta, date
+from datetime import datetime
 import traceback
 import sys
 from collections import OrderedDict

-from datetime import datetime, timedelta, date
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.web.feeds import Article
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.BeautifulSoup import NavigableString
-from calibre.utils.date import dt_factory, utcnow, local_tz
+from calibre.utils.date import dt_factory, local_tz
 from lxml import html
 from lxml import etree

-regex_date_only = re.compile("""(?:January|February|March|April|
+regex_date_only = re.compile(r"""(?:January|February|March|April|
 {8}May|June|July|August|September|October|November|
 {8}December)\s[0-9]{1,2},\s20[01][0-9]""")
-regex_time_only = re.compile("""[0-9]{1,2}:[0-9]{1,2} \w{2}""")
-sentence_regex = re.compile("(\S.+?[.!?])(?=\s+|$)")
-blog_regex = re.compile('post-\d+')
+regex_time_only = re.compile(r"""[0-9]{1,2}:[0-9]{1,2} \w{2}""")
+sentence_regex = re.compile(r"(\S.+?[.!?])(?=\s+|$)")
+blog_regex = re.compile(r'post-\d+')

 pages = OrderedDict([('news', ('/news/houston-texas/', ['business', 'sports'])),
                     ('business', ('/business/', ['sports'])),
@ -40,7 +38,7 @@ base_url = "http://www.chron.com"
 xpath_general = """//div[contains(@class, 'centerpiece-tabs') or
                    contains(@class, 'wrapper') or
                    contains(@class, 'contentGroups') or
-                    contains(@class, 'headline-list') or 
+                    contains(@class, 'headline-list') or
                    contains(@class, 'core-package sports') or
                    contains(@class, 'news')]
                   //a[contains(@class, 'hdn-analytics')]"""
@ -116,7 +114,7 @@ def get_all_links_from_sections():
    article_set = set()
    final_dict = OrderedDict()
    for item in pages.items():
-        print "getting links from {0}".format(item[0])
+        print("getting links from {0}".format(item[0]))
        all_sections.append(get_links_from_section_page(item))
    for section in all_sections:
        section_id = section[0]
@ -200,8 +198,8 @@ class HoustonChronicle(BasicNewsRecipe):
            except ValueError:
                return None

-        el = page_doc.findAll(lambda this_tag: this_tag.name == "time" and
-                              ('itemprop', 'datePublished') in this_tag.attrs)
+        el = page_doc.findAll(
+                lambda this_tag: this_tag.name == "time" and ('itemprop', 'datePublished') in this_tag.attrs)
        if len(el) == 1:
            return get_regular_timestamp(el[0].get('datetime'))
        else: