Sync to trunk.

This commit is contained in:
John Schember 2009-10-19 06:41:51 -04:00
commit 1e3832a204
11 changed files with 125 additions and 52 deletions

View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1255797795(BasicNewsRecipe):
title = u'Corren'
__author__ = 'Jonas Svensson'
simultaneous_downloads = 1
no_stylesheets = True
oldest_article = 7
max_articles_per_feed = 100
remove_attributes = ['onload']
timefmt = ''
feeds = [
(u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'),
(u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'),
(u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'),
(u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'),
(u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'),
(u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'),
(u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'),
(u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'),
(u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'),
(u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/')
]
def print_version(self, url):
url = url.replace("ekonomi/artikel.aspx", "Print.aspx")
url = url.replace("bostad/artikel.aspx", "Print.aspx")
url = url.replace("kultur/artikel.aspx", "Print.aspx")
url = url.replace("motor/artikel.aspx", "Print.aspx")
url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
url = url.replace("sport/artikel.aspx", "Print.aspx")
url = url.replace("asikter/artikel.aspx", "Print.aspx")
url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx")
url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx")
return url.replace("nyheter/artikel.aspx", "Print.aspx")

View File

@ -18,7 +18,6 @@ class Economist(BasicNewsRecipe):
__author__ = "Kovid Goyal"
description = 'Global news and current affairs from a European perspective'
oldest_article = 7.0
needs_subscription = False # Strange but true
INDEX = 'http://www.economist.com/printedition'
cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
remove_tags = [dict(name=['script', 'noscript', 'title'])]

View File

@ -6,9 +6,12 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
politico.com
'''
import re, traceback
from calibre.web.feeds.news import BasicNewsRecipe
class Politico(BasicNewsRecipe):
title = 'Politico'
__author__ = 'Darko Miletic'
description = 'Political news from USA'
@ -55,13 +58,13 @@ class Politico(BasicNewsRecipe):
del item['style']
return soup
def print_url(self, soup, default):
printtags = soup.findAll('a',href=True)
for printtag in printtags:
if printtag.string == "Print":
return printtag['href']
return default
url_pat = re.compile(r'<a href="([^"]+printstory\.cfm[^"]+)"')
def print_version(self, url):
soup = self.index_to_soup(url)
return self.print_url(soup, None)
raw = self.index_to_soup(url, raw=True)
try:
url = self.url_pat.search(raw).group(1)
except:
traceback.print_exc()
url = None
return url

View File

@ -17,18 +17,36 @@ class Time(BasicNewsRecipe):
no_stylesheets = True
language = 'en'
extra_css = '''.headline {font-size: large;}
.fact { padding-top: 10pt }
h1 {font-family:Arial,Sans-serif}
.byline{font-family:Arial,Sans-serif; font-size:xx-small ;color:blue}
.timestamp{font-family:Arial,Sans-serif; font-size:x-small ;color:gray}'''
remove_tags_before = dict(id="artHd")
remove_tags_after = {'class':"ltCol"}
remove_tags = [
{'class':['articleTools', 'enlarge', 'search','socialtools','blogtools','moretools','page','nextUp','next','subnav','RSS','line2','first','ybuzz','articlePagination','chiclets','imgcont','createListLink','rlinks','tabsWrap','pagination']},
{'id':['quigoArticle', 'contentTools', 'articleSideBar', 'header', 'navTop','articleTools','feedmodule','feedmodule3','promos','footer','linksFooter','timeArchive','belt','relatedStories','packages','Features']},
{'target':'_blank'},
]
extra_css = ''' h1 {font-family:Arial,Sans-serif;}
h2 {font-family:Arial,Sans-serif;}
.name{font-family:Arial,Sans-serif; font-size:x-small; }
.date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
.byline{font-family:Arial,Sans-serif; font-size:x-small ;}
.photoBkt{ font-size:x-small ;}
.vertPhoto{font-size:x-small ;}
.credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
.artTxt{font-family:georgia,serif;}
#article{font-family:georgia,serif;}
.caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
.credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
a:link{color:#CC0000;}
'''
# remove_tags_before = dict(id="artHd")
# remove_tags_after = {'class':"ltCol"}
# remove_tags = [
# {'class':['articleTools', 'enlarge', 'search','socialtools','blogtools','moretools','page','nextUp','next','subnav','RSS','line2','first','ybuzz','articlePagination','chiclets','imgcont','createListLink','rlinks','tabsWrap','pagination']},
# {'id':['quigoArticle', 'contentTools', 'articleSideBar', 'header', 'navTop','articleTools','feedmodule','feedmodule3','promos','footer','linksFooter','timeArchive','belt','relatedStories','packages','Features']},
# {'target':'_blank'},
# ]
keep_only_tags = [ dict(name ="div",attrs = {"id" :["article",]}) ,
dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
remove_tags = [ dict(name ="div",attrs = {'class':['articlePagination','nextUp',"rtCol","pagination","enlarge",]}),
dict(name ="span",attrs = {'class':['see']}),
dict(name ="div",attrs = {'id':['articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
dict(name ="a",attrs = {'class':['listLink']}),
]
recursions = 1
match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html']
@ -81,20 +99,3 @@ class Time(BasicNewsRecipe):
else:
ans.append(unicode(t))
return u' '.join(ans).replace(u'\xa0', u'').strip()
def postprocess_html(self, soup, first_page):
div = soup.find(attrs={'class':'artPag'})
if div is not None:
div.extract()
if not first_page:
for cls in ('photoBkt', 'artHd'):
div = soup.find(attrs={'class':cls})
if div is not None:
div.extract()
div = soup.find(attrs={'class':'artTxt'})
if div is not None:
p = div.find('p')
if p is not None:
p.extract()
return soup

View File

@ -15,7 +15,7 @@ class ZeitDe(BasicNewsRecipe):
language = 'de'
lang = 'de_DE'
__author__ = 'Martin Pitt and Suajta Raman'
__author__ = 'Martin Pitt and Sujata Raman'
use_embedded_content = False
max_articles_per_feed = 40
remove_empty_feeds = True
@ -41,7 +41,8 @@ class ZeitDe(BasicNewsRecipe):
.article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
.headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
'''
filter_regexps = [r'ad.de.doubleclick.net/']
#filter_regexps = [r'ad.de.doubleclick.net/']
keep_only_tags = [
dict(name='div', attrs={'class':["article"]}) ,
]
@ -51,15 +52,32 @@ class ZeitDe(BasicNewsRecipe):
dict(name='div', attrs={'id':["place_5","place_4"]})
]
def get_article_url(self, article):
ans = article.get('guid',None)
try:
self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans)
x = soup.find(text="Auf einer Seite lesen")
if x is not None:
a = x.parent
if a and a.has_key('href'):
ans = a['href']
self.log('Found full story link', ans)
except:
pass
if 'video' in ans or 'quiz' in ans :
url = article.get('guid', None)
if 'video' in url or 'quiz' in url :
url = None
return url
ans = None
return ans
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
@ -69,6 +87,7 @@ class ZeitDe(BasicNewsRecipe):
return soup
#def print_version(self,url):
# return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '')

View File

@ -46,6 +46,7 @@ class LinuxFreeze(Command):
'/usr/lib/libmng.so.1',
'/usr/lib/libpodofo.so.0.6.99',
'/lib/libz.so.1',
'/lib/libuuid.so.1',
'/usr/lib/libtiff.so.3',
'/lib/libbz2.so.1',
'/usr/lib/libpoppler.so.5',

View File

@ -120,7 +120,7 @@
</Condition>
<InstallExecuteSequence>
<Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
<RemoveExistingProducts Before="InstallInitialize" />
<RemoveExistingProducts After="InstallFinalize" />
</InstallExecuteSequence>
<InstallUISequence>
<Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>

View File

@ -165,7 +165,7 @@ def main(args=sys.argv):
sys.argv = args[:1]
exec opts.command
elif opts.exec_file:
sys.argv = args[:1]
sys.argv = args
base = os.path.dirname(os.path.abspath(opts.exec_file))
sys.path.insert(0, base)
g = globals()

View File

@ -141,6 +141,11 @@ Now you should be able to access your books on your iPhone by opening Stanza and
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.
How do I use |app| with my Android phone?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
First install the WordPlayer e-book reading app from the Android Marketplace onto you phone. Then simply plug your phone into the computer with a USB cable. |app| should automatically detect the phone and then you can transfer books to it by clicking the Send to Device button. |app| does not have support for every single androind device out there, so if you would like to have support for your device added, follow the instructions above for getting your device supported in |app|.
I get the error message "Failed to start content server: Port 8080 not free on '0.0.0.0'"?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -501,8 +501,10 @@ class BasicNewsRecipe(Recipe):
if isinstance(self.feeds, basestring):
self.feeds = [self.feeds]
if self.needs_subscription and (self.username is None or self.password is None):
raise ValueError('The %s recipe needs a username and password.'%self.title)
if self.needs_subscription and (\
self.username is None or self.password is None or \
(not self.username and not self.password)):
raise ValueError(_('The "%s" recipe needs a username and password.')%self.title)
self.browser = self.get_browser()
self.image_map, self.image_counter = {}, 1

View File

@ -193,11 +193,16 @@ class SchedulerConfig(object):
def write_scheduler_file(self):
from calibre.utils.lock import ExclusiveFile
self.root.text = '\n\n\t'
for x in self.root:
x.tail = '\n\n\t'
if len(self.root) > 0:
self.root[-1].tail = '\n\n'
with ExclusiveFile(self.conf_path) as f:
f.seek(0)
f.truncate()
f.write(etree.tostring(self.root, encoding='utf-8',
xml_declaration=True, pretty_print=True))
xml_declaration=True, pretty_print=False))
def serialize_schedule(self, typ, schedule):
s = E.schedule({'type':typ})