Merge from trunk

This commit is contained in:
Charles Haley 2010-06-11 22:10:42 +01:00
commit cfce2dcc94
2 changed files with 19 additions and 21 deletions

View File

@ -1,5 +1,6 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class AdvancedUserRecipe1268409464(BasicNewsRecipe):
title = u'The Sun'
@ -14,24 +15,27 @@ class AdvancedUserRecipe1268409464(BasicNewsRecipe):
remove_javascript = True
keep_only_tags = [
dict(name='div', attrs={'class':'medium-centered'})
,dict(name='div', attrs={'class':'article'})
,dict(name='div', attrs={'class':'clear-left'})
,dict(name='div', attrs={'class':'text-center'})
dict(id='column-print')
]
remove_tags = [
dict(name='div', attrs={'class':'slideshow'})
,dict(name='div', attrs={'class':'float-left'})
,dict(name='div', attrs={'class':'ltbx-slideshow ltbx-btn-ss'})
,dict(name='a', attrs={'class':'add_a_comment'})
,dict(name='div', attrs={'id':'vxFlashPlayerContent'})
,dict(name='div', attrs={'id':'k1006094r1c1t5w380h529'})
,dict(name='div', attrs={'id':'tum_login_form_container'})
,dict(name='div', attrs={'class':'discHeader'})
,dict(name='div', attrs={'class':'margin-bottom-neg-2'})
dict(name='div', attrs={'class':[
'clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line',
'clear width-625 bg-fff padding-top-10'
]}),
dict(name='video'),
]
def preprocess_html(self, soup):
h1 = soup.find('h1')
if h1 is not None:
text = self.tag_to_string(h1)
nh = Tag(soup, 'h1')
nh.insert(0, text)
h1.replaceWith(nh)
return soup
feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')

View File

@ -12,7 +12,7 @@ from uuid import uuid4
from lxml import etree
from calibre import prints, guess_type, iswindows, islinux
from calibre import prints, guess_type
from calibre.devices.errors import DeviceError
from calibre.devices.usbms.driver import debug_print
from calibre.constants import DEBUG
@ -47,10 +47,7 @@ def strptime(src):
src[2] = str(MONTH_MAP[src[2]])
return time.strptime(' '.join(src), '%w, %d %m %Y %H:%M:%S %Z')
def strftime(epoch, zone=None):
zone = time.gmtime
if islinux:
zone = time.localtime
def strftime(epoch, zone=time.localtime):
src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone(epoch)).split()
src[0] = INVERSE_DAY_MAP[int(src[0][:-1])]+','
src[2] = INVERSE_MONTH_MAP[int(src[2])]
@ -427,9 +424,6 @@ class XMLCache(object):
def update_text_record(self, record, book, path, bl_index):
timestamp = os.path.getmtime(path)
# Correct for MS DST time 'adjustment'
if iswindows and time.daylight:
timestamp -= time.altzone - time.timezone
date = strftime(timestamp)
if date != record.get('date', None):
record.set('date', date)