Make web2lrf not ignore articles with no pubdate.

This commit is contained in:
Kovid Goyal 2008-03-06 07:17:54 +00:00
parent 8b0e1ff4f0
commit e7e657d27f

View File

@ -19,6 +19,7 @@ particular websites.
import tempfile, time, calendar, re, operator, atexit, shutil, os import tempfile, time, calendar, re, operator, atexit, shutil, os
from htmlentitydefs import name2codepoint from htmlentitydefs import name2codepoint
from email.utils import formatdate
from libprs500 import __appname__, iswindows, browser from libprs500 import __appname__, iswindows, browser
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, CData, Tag from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, CData, Tag
@ -324,8 +325,7 @@ class DefaultProfile(object):
if not pubdate: if not pubdate:
pubdate = item.find('dc:date') pubdate = item.find('dc:date')
if not pubdate or not pubdate.string: if not pubdate or not pubdate.string:
self.logger.debug('Skipping article %s as it does not have publication date'%atitle) pubdate = formatdate()
continue
pubdate = self.tag_to_string(pubdate) pubdate = self.tag_to_string(pubdate)
pubdate = pubdate.replace('+0000', 'GMT') pubdate = pubdate.replace('+0000', 'GMT')
@ -354,7 +354,7 @@ class DefaultProfile(object):
'title' : atitle, 'title' : atitle,
'url' : purl, 'url' : purl,
'timestamp': self.strptime(pubdate) if self.use_pubdate else time.time(), 'timestamp': self.strptime(pubdate) if self.use_pubdate else time.time(),
'date' : pubdate if self.use_pubdate else time.ctime(), 'date' : pubdate if self.use_pubdate else formatdate(),
'content' : content, 'content' : content,
} }
delta = time.time() - d['timestamp'] delta = time.time() - d['timestamp']