From 9a28d7017f63a12fc9dc698a7b32e552a4384428 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 1 Jan 2010 09:55:20 -0700 Subject: [PATCH] New recipes for Cyprus Weekly by kwetal and RTE by Robin Phillips --- resources/recipes/cyprus_weekly.recipe | 108 +++++++++++++++++++++++++ resources/recipes/rte.recipe | 15 ++++ src/calibre/devices/usbms/device.py | 3 + src/calibre/gui2/device.py | 2 +- 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 resources/recipes/cyprus_weekly.recipe create mode 100644 resources/recipes/rte.recipe diff --git a/resources/recipes/cyprus_weekly.recipe b/resources/recipes/cyprus_weekly.recipe new file mode 100644 index 0000000000..d8762609a2 --- /dev/null +++ b/resources/recipes/cyprus_weekly.recipe @@ -0,0 +1,108 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from datetime import datetime, timedelta + +class CyNewsLiveRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en_CY' + version = 1 + + title = u'Cyprus Weekly' + publisher = u'The Cyprus Weekly' + category = u'News, Newspaper' + description = u'News from Cyprus' + + use_embedded_content = False + remove_empty_feeds = True + oldest_article = 7 + max_articles_per_feed = 100 + + no_stylesheets = True + remove_javascript = True + + pubTime = None + minTime = None + articleCount = 0 + + INDEX = 'http://www.cyprusweekly.com.cy/main/default.aspx' + + feeds = [] + feeds.append(('News: Cyprus', 'http://www.cyprusweekly.com.cy/main/92,0,0,0-CYPRUS.aspx')) + feeds.append(('News: World', 'http://www.cyprusweekly.com.cy/main/78,0,0,0-UKWORLD.aspx')) + feeds.append(('Sport: Football', 'http://www.cyprusweekly.com.cy/main/82,0,0,0-FOOTBALL.aspx')) + feeds.append(('Sport: Rugby', 'http://www.cyprusweekly.com.cy/main/83,0,0,0-RUGBY.aspx')) + feeds.append(('Sport: Cricket', 'http://www.cyprusweekly.com.cy/main/85,0,0,0-CRICKET.aspx')) + feeds.append(('Sport: Tennis', 'http://www.cyprusweekly.com.cy/main/84,0,0,0-TENNIS.aspx')) + feeds.append(('Sport: Other', 'http://www.cyprusweekly.com.cy/main/86,0,0,0-OTHER.aspx')) + feeds.append(('Business: Local', 'http://www.cyprusweekly.com.cy/main/100,0,0,0-LOCAL.aspx')) + feeds.append(('Business: Foreign', 'http://www.cyprusweekly.com.cy/main/101,0,0,0-FOREIGN.aspx')) + feeds.append(('Whats On: Places of Interest', 'http://www.cyprusweekly.com.cy/main/123,0,0,0-PLACES-OF-INTEREST.aspx')) + feeds.append(('Whats On: Going Out', 'http://www.cyprusweekly.com.cy/main/153,0,0,0-GOING-OUT.aspx')) + feeds.append(('Whats On: Arts & Entertainment', 'http://www.cyprusweekly.com.cy/main/135,0,0,0-ARTS--and-ENTERTAINMENT.aspx')) + feeds.append(('Whats On: Things To Do', 'http://www.cyprusweekly.com.cy/main/136,0,0,0-THINGS-TO-DO.aspx')) + feeds.append(('Whats On: Shopping Guide', 'http://www.cyprusweekly.com.cy/main/142,0,0,0-SHOPPING-GUIDE.aspx')) + feeds.append(('Culture', 'http://www.cyprusweekly.com.cy/main/208,0,0,0-CULTURE.aspx')) + feeds.append(('Environment', 'http://www.cyprusweekly.com.cy/main/93,0,0,0-ENVIRONMENT.aspx')) + feeds.append(('Info', 'http://www.cyprusweekly.com.cy/main/91,0,0,0-INFO.aspx')) + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'ArticleCategories'})) + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif ;} + ''' + + def parse_index(self): + answer = [] + for feed in self.feeds: + self.articleCount = 0 + articles = [] + soup = self.index_to_soup(feed[1]) + + table = soup.find('table', attrs = {'id': 'ctl00_cp_ctl01_listp'}) + if table: + self.pubTime = datetime.now() + self.minTime = self.pubTime - timedelta(days = self.oldest_article) + + self.find_articles(table, articles) + + answer.append((feed[0], articles)) + + return answer + + def postprocess_html(self, soup, first): + for el in soup.findAll(attrs = {'style': True}): + del el['style'] + + for el in soup.findAll('font'): + el.name = 'div' + for attr, value in el: + del el[attr] + + return soup + + def find_articles(self, table, articles): + for div in table.findAll('div', attrs = {'class': 'ListArticle'}): + el = div.find('div', attrs = {'class': 'ListArticle_T'}) + title = self.tag_to_string(el.a) + url = self.INDEX + el.a['href'] + + description = self.tag_to_string(div.find('div', attrs = {'class': 'ListArticle_BODY300'})) + + el = div.find('div', attrs = {'class': 'ListArticle_D'}) + if el: + dateParts = self.tag_to_string(el).split(' ') + monthNames = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6, + 'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11, + 'December': 12} + timeParts = dateParts[3].split(':') + self.pubTime = datetime(year = int(dateParts[2]), month = int(monthNames[dateParts[1]]), + day = int(dateParts[0]), hour = int(timeParts[0]), + minute = int(timeParts[1])) + + if self.pubTime >= self.minTime and self.articleCount <= self.max_articles_per_feed: + articles.append({'title': title, 'date': self.pubTime, 'url': url, 'description': description}) + self.articleCount += 1 + else: + return + diff --git a/resources/recipes/rte.recipe b/resources/recipes/rte.recipe new file mode 100644 index 0000000000..eba684115f --- /dev/null +++ b/resources/recipes/rte.recipe @@ -0,0 +1,15 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class RTE(BasicNewsRecipe): + title = u'RTE News' + oldest_article = 7 + max_articles_per_feed = 100 + __author__ = u'Robin Phillips' + language = 'en_GB' + + remove_tags = [dict(attrs={'class':['topAd','botad','previousNextItem','headline','footerLinks','footernav']})] + + feeds = [(u'News', u'http://www.rte.ie/rss/news.xml'), (u'Sport', u'http://www.rte.ie/rss/sport.xml'), (u'Soccer', u'http://www.rte.ie/rss/soccer.xml'), (u'GAA', u'http://www.rte.ie/rss/gaa.xml'), (u'Rugby', u'http://www.rte.ie/rss/rugby.xml'), (u'Racing', u'http://www.rte.ie/rss/racing.xml'), (u'Business', u'http://www.rte.ie/rss/business.xml'), (u'Entertainment', u'http://www.rte.ie/rss/entertainment.xml')] + + def print_version(self, url): + return url.replace('http://www', 'http://m') diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 5e74904bc9..095f01ee21 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -865,6 +865,7 @@ class Device(DeviceConfig, DevicePlugin): use_subdirs = self.SUPPORTS_SUB_DIRS and settings.use_subdirs fname = sanitize(fname) + ext = os.path.splitext(fname)[1] if special_tag is None: from calibre.library.save_to_disk import get_components @@ -892,6 +893,8 @@ class Device(DeviceConfig, DevicePlugin): fname = sanitize(self.filename_callback(fname, mdata)) extra_components.append(fname) extra_components = [str(x) for x in extra_components] + else: + extra_components[-1] += ext def remove_trailing_periods(x): ans = x diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 7f81b99223..a8c30f9532 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -846,7 +846,7 @@ class DeviceGUI(object): Upload books to device. :param files: List of either paths to files or file like objects ''' - titles = [i['title'] for i in metadata] + titles = [i.title for i in metadata] job = self.device_manager.upload_books( Dispatcher(self.books_uploaded), files, names, on_card=on_card,