This commit is contained in:
GRiker 2012-12-07 03:33:29 -07:00
commit 92e06792f8
9 changed files with 353 additions and 286 deletions

View File

@ -19,6 +19,50 @@
# new recipes:
# - title:
- version: 0.9.9
date: 2012-12-07
new features:
- title: "64 bit build for windows"
type: major
description: "calibre now has a 64 bit version for windows, available at: http://calibre-ebook.com/download_windows64 The 64bit build is not limited to using only 3GB of RAM when converting large/complex documents. It may also be slightly faster for some tasks. You can have both the 32 bit and the 64 bit build installed at the same time, they will use the same libraries, plugins and settings."
- title: "Content server: Make the identifiers in each books metadata clickable."
tickets: [1085726]
bug fixes:
- title: "EPUB Input: Fix an infinite loop while trying to recover a damaged EPUB file."
tickets: [1086917]
- title: "KF8 Input: Fix handling of links in files that link to the obsolete <a name> tags instead of tags with an id attribute."
tickets: [1086705]
- title: "Conversion: Fix a bug in removal of invalid entries from the spine, where not all invalid entries were removed, causing conversion to fail."
tickets: [1086054]
- title: "KF8 Input: Ignore invalid flow references in the KF8 document instead of erroring out on them."
tickets: [1085306]
- title: "Fix command line output on linux systems with incorrect LANG/LC_TYPE env vars."
tickets: [1085103]
- title: "KF8 Input: Fix page breaks specified using the data-AmznPageBreak attribute being ignored by calibre."
- title: "PDF Output: Fix custom size field not accepting fractional numbers as sizes"
- title: "Get Books: Update libre.de and publio for website changes"
- title: "Wireless driver: Increase timeout interval, and when allocating a random port try 9090 first"
improved recipes:
- New York Times
- Weblogs SL
- Zaman Gazetesi
- Aksiyon Dergisi
- Endgadget
- Metro UK
- Heise Online
- version: 0.9.8
date: 2012-11-30

View File

@ -16,6 +16,7 @@ class Harpers(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
auto_cleanup = True
conversion_options = {
'comment' : description
@ -31,14 +32,14 @@ class Harpers(BasicNewsRecipe):
.caption{font-family:Verdana,sans-serif;font-size:x-small;color:#666666;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
remove_tags = [
dict(name='table', attrs={'class':['rcnt','rcnt topline']})
,dict(name=['link','object','embed','meta','base'])
]
remove_attributes = ['width','height']
#keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
#remove_tags = [
#dict(name='table', attrs={'class':['rcnt','rcnt topline']})
#,dict(name=['link','object','embed','meta','base'])
#]
#remove_attributes = ['width','height']
feeds = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')]
feeds = [(u"Harper's Magazine", u'http://harpers.org/feed/')]
def get_cover_url(self):
cover_url = None
@ -49,9 +50,9 @@ class Harpers(BasicNewsRecipe):
cover_url = 'http://harpers.org' + link_item['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(xmlns=True):
del item['xmlns']
return soup
#def preprocess_html(self, soup):
#for item in soup.findAll(style=True):
#del item['style']
#for item in soup.findAll(xmlns=True):
#del item['xmlns']
#return soup

View File

@ -15,23 +15,12 @@ class AdvancedUserRecipe(BasicNewsRecipe):
timeout = 5
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'id':'mitte_news'}),
dict(name='h1', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'meldung_wrapper'})]
remove_tags_after = dict(name ='p', attrs={'class':'editor'})
remove_tags = [dict(id='navi_top_container'),
dict(id='navi_bottom'),
dict(id='mitte_rechts'),
dict(id='navigation'),
dict(id='subnavi'),
dict(id='social_bookmarks'),
dict(id='permalink'),
dict(id='content_foren'),
dict(id='seiten_navi'),
dict(id='adbottom'),
dict(id='sitemap'),
dict(name='div', attrs={'id':'sitemap'}),
dict(name='ul', attrs={'class':'erste_zeile'}),
dict(name='ul', attrs={'class':'zweite_zeile'}),
dict(name='div', attrs={'class':'navi_top_container'})]
dict(name='p', attrs={'class':'size80'})]
feeds = [
('Newsticker', 'http://www.heise.de/newsticker/heise.rdf'),
@ -54,5 +43,3 @@ class AdvancedUserRecipe(BasicNewsRecipe):
def print_version(self, url):
return url + '?view=print'

View File

@ -1,43 +1,74 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
import re
import datetime
import time
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
description = 'Author Dave Asbury : News from The Metro - UK'
description = 'News as provided by The Metro -UK'
#timefmt = ''
__author__ = 'Dave Asbury'
#last update 9/9/12
#last update 9/6/12
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
no_stylesheets = True
oldest_article = 1
max_articles_per_feed = 12
remove_empty_feeds = True
remove_javascript = True
#auto_cleanup = True
auto_cleanup = True
encoding = 'UTF-8'
cover_url ='http://profile.ak.fbcdn.net/hprofile-ak-snc4/157897_117118184990145_840702264_n.jpg'
language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:900;font-size:1.6em;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:1.2em;}
p{font-family:Arial,Helvetica,sans-serif;font-size:1.0em;}
body{font-family:Helvetica,Arial,sans-serif;font-size:1.0em;}
'''
keep_only_tags = [
#dict(name='h1'),
#dict(name='h2'),
#dict(name='div', attrs={'class' : ['row','article','img-cnt figure','clrd']})
#dict(name='h3'),
#dict(attrs={'class' : 'BText'}),
]
remove_tags = [
dict(name='div',attrs={'class' : 'art-fd fd-gr1-b clrd'}),
dict(name='span',attrs={'class' : 'share'}),
dict(name='li'),
dict(attrs={'class' : ['twitter-share-button','header-forms','hdr-lnks','close','art-rgt','fd-gr1-b clrd google-article','news m12 clrd clr-b p5t shareBtm','item-ds csl-3-img news','c-1of3 c-last','c-1of1','pd','item-ds csl-3-img sport']}),
dict(attrs={'id' : ['','sky-left','sky-right','ftr-nav','and-ftr','notificationList','logo','miniLogo','comments-news','metro_extras']})
]
remove_tags_before = dict(name='h1')
#remove_tags_after = dict(attrs={'id':['topic-buttons']})
feeds = [
(u'News', u'http://www.metro.co.uk/rss/news/'), (u'Money', u'http://www.metro.co.uk/rss/money/'), (u'Sport', u'http://www.metro.co.uk/rss/sport/'), (u'Film', u'http://www.metro.co.uk/rss/metrolife/film/'), (u'Music', u'http://www.metro.co.uk/rss/metrolife/music/'), (u'TV', u'http://www.metro.co.uk/rss/tv/'), (u'Showbiz', u'http://www.metro.co.uk/rss/showbiz/'), (u'Weird News', u'http://www.metro.co.uk/rss/weird/'), (u'Travel', u'http://www.metro.co.uk/rss/travel/'), (u'Lifestyle', u'http://www.metro.co.uk/rss/lifestyle/'), (u'Books', u'http://www.metro.co.uk/rss/lifestyle/books/'), (u'Food', u'http://www.metro.co.uk/rss/lifestyle/restaurants/')]
def parse_index(self):
articles = {}
key = None
ans = []
feeds = [ ('UK', 'http://metro.co.uk/news/uk/'),
('World', 'http://metro.co.uk/news/world/'),
('Weird', 'http://metro.co.uk/news/weird/'),
('Money', 'http://metro.co.uk/news/money/'),
('Sport', 'http://metro.co.uk/sport/'),
('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
]
for key, feed in feeds:
soup = self.index_to_soup(feed)
articles[key] = []
ans.append(key)
today = datetime.date.today()
today = time.mktime(today.timetuple())-60*60*24
for a in soup.findAll('a'):
for name, value in a.attrs:
if name == "class" and value=="post":
url = a['href']
title = a['title']
print title
description = ''
m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
skip = 1
if len(m.groups()) == 3:
g = m.groups()
dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
pubdate = time.strftime('%a, %d %b', dt.timetuple())
dt = time.mktime(dt.timetuple())
if dt >= today:
print pubdate
skip = 0
else:
pubdate = strftime('%a, %d %b')
summary = a.find(True, attrs={'class':'excerpt'})
if summary:
description = self.tag_to_string(summary, use_alt=False)
if skip == 0:
articles[key].append(
dict(title=title, url=url, date=pubdate,
description=description,
content=''))
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 9, 8)
numeric_version = (0, 9, 9)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -13,6 +13,7 @@ const calibre_device_entry_t calibre_mtp_device_table[] = {
// Amazon Kindle Fire HD
, { "Amazon", 0x1949, "Fire HD", 0x0007, DEVICE_FLAGS_ANDROID_BUGS}
, { "Amazon", 0x1949, "Fire HD", 0x000a, DEVICE_FLAGS_ANDROID_BUGS}
// Nexus 10
, { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS}

View File

@ -60,9 +60,9 @@ class PublioStore(BasicStoreConfig, StorePlugin):
series = ''.join(data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[last()]/a/@title'))
title = title + ' (seria ' + series + ')'
author = ', '.join(data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[@class="row"][1]/a/@title'))
price = ''.join(data.xpath('.//div[@class="priceBoxContener "]/div/ins/text()'))
price = ''.join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/ins/text()'))
if not price:
price = ''.join(data.xpath('.//div[@class="priceBoxContener "]/div/text()'))
price = ''.join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/text()')).strip()
formats = ', '.join(data.xpath('.//div[@class="formats"]/a/img/@alt'))
counter -= 1

File diff suppressed because it is too large Load Diff

View File

@ -48,7 +48,7 @@ class ConnectedWorker(Thread):
try:
eintr_retry_call(conn.send, self.args)
self.res = eintr_retry_call(conn.recv)
except:
except BaseException:
self.tb = traceback.format_exc()
def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,
@ -77,6 +77,8 @@ def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,
if cw.tb:
raise WorkerError('Failed to communicate with worker process')
if cw.res is None:
raise WorkerError('Something strange happened. The worker process was aborted without an exception.')
if cw.res.get('tb', None):
raise WorkerError('Worker failed', cw.res['tb'])
ans['result'] = cw.res['result']