Sync to trunk

This commit is contained in:
John Schember 2009-01-15 20:42:23 -05:00
commit 1dfba2c88c
6 changed files with 88 additions and 34 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.4.127' __version__ = '0.4.128'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

Binary file not shown.

After

Width:  |  Height:  |  Size: 992 B

View File

@ -7,8 +7,8 @@ var column_titles = {
'rating' : 'Rating', 'rating' : 'Rating',
'date' : 'Date', 'date' : 'Date',
'tags' : 'Tags', 'tags' : 'Tags',
'series' : 'Series', 'series' : 'Series'
} };
String.prototype.format = function() { String.prototype.format = function() {
var pattern = /\{\d+\}/g; var pattern = /\{\d+\}/g;

View File

@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in (
'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes', 'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes',
'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
tomshardware.com tomshardware.com
''' '''
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Tomshardware(BasicNewsRecipe): class Tomshardware(BasicNewsRecipe):
@ -50,7 +49,7 @@ class Tomshardware(BasicNewsRecipe):
rmain, rsep, article_id = main.rpartition(',') rmain, rsep, article_id = main.rpartition(',')
tmain, tsep, trest = rmain.rpartition('/reviews/') tmain, tsep, trest = rmain.rpartition('/reviews/')
if tsep: if tsep:
return 'http://www.tomshardware.com/review_print.php?p1=' + article_id return 'http://www.tomshardware.com/review_print.php?p1=' + article_id
return 'http://www.tomshardware.com/news_print.php?p1=' + article_id return 'http://www.tomshardware.com/news_print.php?p1=' + article_id
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -0,0 +1,54 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch tomshardware.
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class TomsHardwareDe(BasicNewsRecipe):
title = 'Tom\'s Hardware German'
description = 'Computer news in german'
__author__ = 'Oliver Niesner'
use_embedded_content = False
timefmt = ' [%d %b %Y]'
max_articles_per_feed = 50
no_stylesheets = True
encoding = 'utf-8'
#preprocess_regexps = \
# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
# [
# (r'<84>', lambda match: ''),
# (r'<93>', lambda match: ''),
# ]
# ]
remove_tags = [dict(id='outside-advert'),
dict(id='advertRightWhite'),
dict(id='header-advert'),
dict(id='header-banner'),
dict(id='header-menu'),
dict(id='header-top'),
dict(id='header-tools'),
dict(id='nbComment'),
dict(id='internalSidebar'),
dict(id='header-news-infos'),
dict(id='breadcrumbs'),
dict(id=''),
dict(name='div', attrs={'class':'pyjama'}),
dict(name='href', attrs={'class':'comment'}),
dict(name='div', attrs={'class':'greyBoxR clearfix'}),
dict(name='div', attrs={'class':'greyBoxL clearfix'}),
dict(name='div', attrs={'class':'greyBox clearfix'}),
dict(id='')]
#remove_tags_before = [dict(id='header-news-title')]
remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})]
#remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})]
feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ]