NYTimes Global by Krittika Goyal

2026-02-18 01:00:07 -05:00 · 2011-12-27 18:13:54 +05:30 · 2011-12-27 18:13:54 +05:30 · 0e31649305
commit 0e31649305
parent 89ce33ebc9
2 changed files with 24 additions and 57 deletions
--- a/recipes/iht.recipe
+++ b/recipes/iht.recipe
@ -1,63 +1,30 @@
-__license__   = 'GPL v3'
-__copyright__ = '2008, Derry FitzGerald'
-'''
-iht.com
-'''
-import re
-
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ptempfile import PersistentTemporaryFile

+class NYTimesGlobal(BasicNewsRecipe):
+    title          = u'NY Times Global'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    use_embedded_content = False

-class InternationalHeraldTribune(BasicNewsRecipe):
-    title          = u'The International Herald Tribune'
-    __author__     = 'Derry FitzGerald'
-    language = 'en'
-
-    oldest_article = 1
-    max_articles_per_feed = 30
    no_stylesheets = True
+    auto_cleanup = True

-    remove_tags    = [dict(name='div', attrs={'class':['footer','header']}),
-                      dict(name=['form'])]
-    preprocess_regexps = [
-            (re.compile(r'<!-- webtrends.*', re.DOTALL),
-             lambda m:'</body></html>')
-                          ]
-    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'

-    remove_empty_feeds = True
-    
    feeds          = [
-                      (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
-                      (u'Business', u'http://www.iht.com/rss/business.xml'),
-                      (u'Americas', u'http://www.iht.com/rss/america.xml'),
-                      (u'Europe', u'http://www.iht.com/rss/europe.xml'),
-                      (u'Asia', u'http://www.iht.com/rss/asia.xml'),
-                      (u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
-                      (u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
-                      (u'Technology', u'http://www.iht.com/rss/technology.xml'),
-                      (u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
-                      (u'Sports', u'http://www.iht.com/rss/sports.xml'),
-                      (u'Culture', u'http://www.iht.com/rss/arts.xml'),
-                      (u'Style and Design', u'http://www.iht.com/rss/style.xml'),
-                      (u'Travel', u'http://www.iht.com/rss/travel.xml'),
-                      (u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
-                      (u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
-                      (u'Properties', u'http://www.iht.com/rss/properties.xml')
-                    ]
-    temp_files = []
-    articles_are_obfuscated = True
-    
-    masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
-    
-    def get_obfuscated_article(self, url):
-        br = self.get_browser()
-        br.open(url)
-        response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
-        html = response1.read()
-        
-        self.temp_files.append(PersistentTemporaryFile('_iht.html'))
-        self.temp_files[-1].write(html)
-        self.temp_files[-1].close()
-        return self.temp_files[-1].name
+('NYTimes',
+ 'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
+('NYTimes global',
+ 'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
+('World',
+ 'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
+('U.S.',
+ 'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
+('Business',
+ 'http://feeds.nytimes.com/nyt/rss/Business'),
+('Sports',
+ 'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
+('Technology',
+ 'http://feeds.nytimes.com/nyt/rss/Technology'),
+]
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -21,7 +21,7 @@ NS = 'http://calibre-ebook.com/recipe_collection'
 E = ElementMaker(namespace=NS, nsmap={None:NS})

 def iterate_over_builtin_recipe_files():
-    exclude = ['craigslist', 'iht', 'toronto_sun',
+    exclude = ['craigslist', 'toronto_sun',
            'livemint']
    d = os.path.dirname
    base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')