From e7e657d27f3271121e1686458e3832147e2aa570 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 6 Mar 2008 07:17:54 +0000 Subject: [PATCH] Make web2lrf not ignore articles with no pubdate. --- src/libprs500/ebooks/lrf/web/profiles/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libprs500/ebooks/lrf/web/profiles/__init__.py b/src/libprs500/ebooks/lrf/web/profiles/__init__.py index f86d087c9b..79df4e5da1 100644 --- a/src/libprs500/ebooks/lrf/web/profiles/__init__.py +++ b/src/libprs500/ebooks/lrf/web/profiles/__init__.py @@ -19,6 +19,7 @@ particular websites. import tempfile, time, calendar, re, operator, atexit, shutil, os from htmlentitydefs import name2codepoint +from email.utils import formatdate from libprs500 import __appname__, iswindows, browser from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, CData, Tag @@ -324,8 +325,7 @@ class DefaultProfile(object): if not pubdate: pubdate = item.find('dc:date') if not pubdate or not pubdate.string: - self.logger.debug('Skipping article %s as it does not have publication date'%atitle) - continue + pubdate = formatdate() pubdate = self.tag_to_string(pubdate) pubdate = pubdate.replace('+0000', 'GMT') @@ -354,7 +354,7 @@ class DefaultProfile(object): 'title' : atitle, 'url' : purl, 'timestamp': self.strptime(pubdate) if self.use_pubdate else time.time(), - 'date' : pubdate if self.use_pubdate else time.ctime(), + 'date' : pubdate if self.use_pubdate else formatdate(), 'content' : content, } delta = time.time() - d['timestamp']