Sync to trunk.

This commit is contained in:
John Schember 2009-12-26 15:18:40 -05:00
commit 028136c7ae
79 changed files with 15150 additions and 10844 deletions

View File

@ -4,6 +4,116 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
- version: 0.6.30
date: 2009-12-26
new features:
- title: "Update graphical toolkit to Qt 4.6 for better integration with Windows 7 and OS X Snow Leopard."
description: >
"The library calibre uses to draw its user interface, Qt, has been updated in all binary builds to
version 4.6. This provides better support for Windows 7 and OS X Snow Leopard,a s well as various
speed ups in the user interface and e-book viewer rendering. Note that calibre will still run with
Qt 4.5"
- title: "Device drivers: Support for device specific icons"
- title: "Add menu options to delete specific formats/covers from the library"
tickets: [3509]
- title: "Metadata dialog: Auto-increment the series number when editing the series and validate the input ISBN based on the check digit"
tickets: [4285]
- title: "Add option to swap author first and last names when reading metadata from a file name"
- title: "Replace underscores with spaces when reading metadata"
- title: "Nook driver: Upload covers when sending to device. Also add Output Profile for the Nook"
- title: "Clicking on row numbers in the book list will now open the book in the viewer"
tickets: [4266]
- title: "Driver for the Boox reader"
bug fixes:
- title: "MOBI Metadata reader: Correctly handle non ASCII characters when reading embedded metadata."
tickets: [4223]
- title: "LRF Output: Set category metadata in generated LRF file based on tags"
tickets: [4286]
- title: "News download: Correctly handle URLs with non ASCII characters in them"
- title: "Fix windows only crash when vieweing MOBI files"
tickets: [4259]
- title: "Remeber selection when sorting and switching between library and device views"
tickets: [4279, 4274]
- title: "Add a retry loop when querying database to workaround intermittent database access problems in windows"
tickets: [4264]
- title: "When adding books, do not add OPF as a separate format"
- title: "Fix initialize method of plugins not being called"
- title: "Handle interrupted system calls duriong device detection on OS X"
tickets: [4278]
- title: "EPUB Output: Make splitting to size more accurate by moving it after the workarounds"
- title: "When adding books via the add books button to the device, restrict to formats supported by device"
new recipes:
- title: Cyprus News Live
author: kwetal
- title: Kleine Zeitung
author: kwetal
- title: Business Week Magazine
author: Darko Mieltic
- title: CNET News
author: Darko Miletic
- title: Dilbert
author: Darko Miletic
- title: Le Temps
author: Sujata Raman
- title: Inc magazine
author: kwetal
- title: SME
author: kwetal
- title: Pravda
author: kwetal
- title: Houston Chronicle
author: Kovid Goyal
- title: Strategy and Business
author: kwetal
- title: Watching America
author: kwetal
- title: Aftenposten
author: davotibarna
improved recipes:
- Sueddeutsche
- Irish Independent
- The Straits Times
- Harvard Business Review
- Wall Street Journal
- version: 0.6.29
date: 2009-12-18

Binary file not shown.

After

Width:  |  Height:  |  Size: 446 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 892 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 952 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 657 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 406 B

View File

@ -0,0 +1,64 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
http://www.businessweek.com/magazine/news/articles/business_news.htm
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class BWmagazine(BasicNewsRecipe):
title = 'BusinessWeek Magazine'
__author__ = 'Darko Miletic'
description = 'Stay up to date with BusinessWeek magazine articles. Read news on international business, personal finances & the economy in the BusinessWeek online magazine.'
publisher = 'Bloomberg L.P.'
category = 'news, International Business News, current news in international business,international business articles, personal business, business week magazine, business week magazine articles, business week magazine online, business week online magazine'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en'
INDEX = 'http://www.businessweek.com/magazine/news/articles/business_news.htm'
cover_url = 'http://images.businessweek.com/mz/covers/current_120x160.jpg'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
ditem = soup.find('div',attrs={'id':'column2'})
if ditem:
for item in ditem.findAll('h3'):
title_prefix = ''
description = ''
feed_link = item.find('a')
if feed_link and feed_link.has_key('href'):
url = 'http://www.businessweek.com/magazine/' + feed_link['href'].partition('../../')[2]
title = title_prefix + self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [(soup.head.title.string, articles)]
keep_only_tags = dict(name='div', attrs={'id':'storyBody'})
def print_version(self, url):
rurl = url.rpartition('?')[0]
if rurl == '':
rurl = url
return rurl.replace('.com/magazine/','.com/print/magazine/')

View File

@ -0,0 +1,37 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
news.cnet.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class CnetNews(BasicNewsRecipe):
title = 'CNET News'
__author__ = 'Darko Miletic'
description = 'Tech news and business reports by CNET News. Focused on information technology, core topics include computers, hardware, software, networking, and Internet media.'
publisher = 'CNET'
category = 'news, IT, USA'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [
dict(name='div', attrs={'id':'tweetmemeAndFacebook'})
,dict(name='ul', attrs={'class':'contentTools'})
]
keep_only_tags = dict(name='div', attrs={'class':'txtWrap'})
feeds = [(u'News', u'http://news.cnet.com/2547-1_3-0-20.xml')]

View File

@ -0,0 +1,80 @@
from calibre.web.feeds.news import BasicNewsRecipe
class ColumbusDispatchRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 1
title = u'The Columbus Dispatch'
publisher = u'The Columbus Dispatch'
category = u'News, Newspaper'
description = u'Daily newspaper from central Ohio'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 1.2
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
# Seems to work best, but YMMV
simultaneous_downloads = 2
# Feeds from http://www.dispatch.com/live/content/rss/index.html
feeds = []
feeds.append((u'News: Local and state news', u'http://www.dispatch.com/live/static/crt/2_rss_localnews.xml'))
feeds.append((u'News: National news', u'http://www.dispatch.com/live/static/crt/2_rss_nationalnews.xml'))
feeds.append((u'News: Editorials', u'http://www.dispatch.com/live/static/crt/2_rss_editorials.xml'))
feeds.append((u'News: Columnists', u'http://www.dispatch.com/live/static/crt/2_rss_columnists.xml'))
feeds.append((u'News: Health news', u'http://www.dispatch.com/live/static/crt/2_rss_health.xml'))
feeds.append((u'News: Science news', u'http://www.dispatch.com/live/static/crt/2_rss_science.xml'))
feeds.append((u'Sports: OSU football', u'http://www.dispatch.com/live/static/crt/2_rss_osufootball.xml'))
feeds.append((u'Sports: OSU men\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osumensbball.xml'))
feeds.append((u'Sports: OSU women\'s basketball', u'http://www.dispatch.com/live/static/crt/2_rss_osuwomensbball.xml'))
feeds.append((u'Sports: OSU sports', u'http://www.dispatch.com/live/static/crt/2_rss_osusports.xml'))
feeds.append((u'Sports: Blue Jackets', u'http://www.dispatch.com/live/static/crt/2_rss_bluejackets.xml'))
feeds.append((u'Sports: Crew', u'http://www.dispatch.com/live/static/crt/2_rss_crew.xml'))
feeds.append((u'Sports: Clippers', u'http://www.dispatch.com/live/static/crt/2_rss_clippers.xml'))
feeds.append((u'Sports: Indians', u'http://www.dispatch.com/live/static/crt/2_rss_indians.xml'))
feeds.append((u'Sports: Reds', u'http://www.dispatch.com/live/static/crt/2_rss_reds.xml'))
feeds.append((u'Sports: Golf', u'http://www.dispatch.com/live/static/crt/2_rss_golf.xml'))
feeds.append((u'Sports: Outdoors', u'http://www.dispatch.com/live/static/crt/2_rss_outdoors.xml'))
feeds.append((u'Sports: Cavs/NBA', u'http://www.dispatch.com/live/static/crt/2_rss_cavaliers.xml'))
feeds.append((u'Sports: High Schools', u'http://www.dispatch.com/live/static/crt/2_rss_highschools.xml'))
feeds.append((u'Sports: Browns', u'http://www.dispatch.com/live/static/crt/2_rss_browns.xml'))
feeds.append((u'Sports: Bengals', u'http://www.dispatch.com/live/static/crt/2_rss_bengals.xml'))
feeds.append((u'Sports: Auto Racing', u'http://www.dispatch.com/live/static/crt/2_rss_autoracing.xml'))
feeds.append((u'Business News', u'http://www.dispatch.com/live/static/crt/2_rss_business.xml'))
feeds.append((u'Features: Weekender', u'http://www.dispatch.com/live/static/crt/2_rss_weekender.xml'))
feeds.append((u'Features: Life and Arts', u'http://www.dispatch.com/live/static/crt/2_rss_lifearts.xml'))
feeds.append((u'Features: Food', u'http://www.dispatch.com/live/static/crt/2_rss_food.xml'))
feeds.append((u'Features: NOW! for kids', u'http://www.dispatch.com/live/static/crt/2_rss_now.xml'))
feeds.append((u'Features: Travel', u'http://www.dispatch.com/live/static/crt/2_rss_travel.xml'))
feeds.append((u'Features: Home and Garden', u'http://www.dispatch.com/live/static/crt/2_rss_homegarden.xml'))
feeds.append((u'Features: Faith and Values', u'http://www.dispatch.com/live/static/crt/2_rss_faithvalues.xml'))
#feeds.append((u'', u''))
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'colhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'hed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subhed'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'date'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'byline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'srcline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'body'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id': 'middle-story-ad-container'}))
extra_css = '''
body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
a {text-decoration: none; color: blue;}
div.colhed {font-weight: bold;}
div.hed {font-size: xx-large; font-weight: bold; margin-bottom: 0.2em;}
div.subhed {font-size: large;}
div.date {font-size: x-small; font-style: italic; color: #666666; margin-top: 0.4em; margin-bottom: 0.4em;}
div.byline, div.srcline {font-size: small; color: #696969;}
'''

View File

@ -0,0 +1,101 @@
from calibre.web.feeds.news import BasicNewsRecipe
from datetime import datetime, timedelta
class CyNewsLiveRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en_CY'
version = 1
title = u'Cyprus News Live'
publisher = u'The Cyprus Weekly'
category = u'News, Newspaper'
description = u'News from Cyprus'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
pubTime = None
minTime = None
articleCount = 0
INDEX = 'http://www.cynewslive.com'
feeds = []
feeds.append(('News: Cyprus', 'http://www.cynewslive.com/main/92,0,0,0-CYPRUS.aspx'))
feeds.append(('News: World', 'http://www.cynewslive.com/main/78,0,0,0-UKWORLD.aspx'))
feeds.append(('Sport: Football', 'http://www.cynewslive.com/main/82,0,0,0-FOOTBALL.aspx'))
feeds.append(('Sport: Rugby', 'http://www.cynewslive.com/main/83,0,0,0-RUGBY.aspx'))
feeds.append(('Sport: Cricket', 'http://www.cynewslive.com/main/85,0,0,0-CRICKET.aspx'))
feeds.append(('Sport: Tennis', 'http://www.cynewslive.com/main/84,0,0,0-TENNIS.aspx'))
feeds.append(('Sport: Other', 'http://www.cynewslive.com/main/86,0,0,0-OTHER.aspx'))
feeds.append(('Business: Local', 'http://www.cynewslive.com/main/100,0,0,0-LOCAL.aspx'))
feeds.append(('Business: Foreign', 'http://www.cynewslive.com/main/101,0,0,0-FOREIGN.aspx'))
feeds.append(('Environment', 'http://www.cynewslive.com/main/93,0,0,0-ENVIRONMENT.aspx'))
feeds.append(('Culture', 'http://www.cynewslive.com/main/208,0,0,0-CULTURE.aspx'))
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'ArticleCategories'}))
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
'''
def parse_index(self):
answer = []
for feed in self.feeds:
self.articleCount = 0
articles = []
soup = self.index_to_soup(feed[1])
table = soup.find('table', attrs = {'id': 'ctl00_cp_ctl01_listp'})
if table:
self.pubTime = datetime.now()
self.minTime = self.pubTime - timedelta(days = self.oldest_article)
self.find_articles(table, articles)
answer.append((feed[0], articles))
return answer
def postprocess_html(self, soup, first):
for el in soup.findAll(attrs = {'style': True}):
del el['style']
for el in soup.findAll('font'):
el.name = 'div'
for attr, value in el:
del el[attr]
return soup
def find_articles(self, table, articles):
for div in table.findAll('div', attrs = {'class': 'ListArticle'}):
el = div.find('div', attrs = {'class': 'ListArticle_T'})
title = self.tag_to_string(el.a)
url = self.INDEX + el.a['href']
description = self.tag_to_string(div.find('div', attrs = {'class': 'ListArticle_BODY300'}))
el = div.find('div', attrs = {'class': 'ListArticle_D'})
if el:
dateParts = self.tag_to_string(el).split(' ')
monthNames = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 'June': 6,
'July': 7, 'August': 8, 'September': 9, 'October': 10, 'November': 11,
'December': 12}
timeParts = dateParts[3].split(':')
self.pubTime = datetime(year = int(dateParts[2]), month = int(monthNames[dateParts[1]]),
day = int(dateParts[0]), hour = int(timeParts[0]),
minute = int(timeParts[1]))
if self.pubTime >= self.minTime and self.articleCount <= self.max_articles_per_feed:
articles.append({'title': title, 'date': self.pubTime, 'url': url, 'description': description})
self.articleCount += 1
else:
return

View File

@ -0,0 +1,41 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
http://www.dilbert.com
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class DosisDiarias(BasicNewsRecipe):
title = 'Dilbert'
__author__ = 'Darko Miletic'
description = 'Dilbert'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = True
encoding = 'utf-8'
publisher = 'UNITED FEATURE SYNDICATE, INC.'
category = 'comic'
language = 'en'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
feeds = [(u'Dilbert', u'http://feeds.dilbert.com/DilbertDailyStrip' )]
def get_article_url(self, article):
return article.get('feedburner_origlink', None)
def preprocess_html(self, soup):
for tag in soup.findAll(name='a'):
if tag['href'].find('http://feedads') >= 0:
tag.extract()
return soup

View File

@ -11,17 +11,15 @@ class HBR(BasicNewsRecipe):
language = 'en'
no_stylesheets = True
LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
INDEX = 'http://hbr.harvardbusiness.org/current'
keep_only_tags = [dict(name='div', id='content')]
remove_tags = [
dict(id=['articleDate', 'subscriptionModule', 'errorArea',
'feedbackForm', 'relatedModule', 'articleSecondaryModule',
'contentRight', 'summaryLink']),
dict(name='form'),
]
LOGIN_URL = 'http://hbr.org/login?request_url=/'
INDEX = 'http://hbr.org/current'
keep_only_tags = [dict(name='div', id='pageContainer')]
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'mailingListTout', 'partnerCenter', 'pageFooter']),
dict(name='iframe')]
extra_css = '''
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
@ -34,14 +32,14 @@ class HBR(BasicNewsRecipe):
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN_URL)
br.select_form(nr=0)
br['ssousername'] = self.username
br['password'] = self.password
br.select_form(name='signInForm')
br['signInForm:username'] = self.username
br['signInForm:password'] = self.password
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
self.logout_url = None
link = br.find_link(text='(sign out)')
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
return br
@ -54,56 +52,70 @@ class HBR(BasicNewsRecipe):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
def get_features(self, soup):
div = soup.find('div', id='issueFeatures')
for li in div.findAll('li'):
a = li.find('a', href=True)
url = 'http://hbr.harvardbusiness.org'+a['href']
url = self.map_url(url)
if not url:
continue
title = self.tag_to_string(a)
p = li.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
yield {'title':title, 'url':url, 'description':desc}
def get_departments(self, soup):
div = soup.find('div', id='issueDepartmentsContent')
for h4 in div.findAll('h4'):
feed = self.tag_to_string(h4)
articles = []
ul = h4.findNextSibling('ul')
for li in ul.findAll('li'):
a = li.find('a', href=True)
url = 'http://hbr.harvardbusiness.org'+a['href']
url = self.map_url(url)
if not url:
continue
def hbr_get_toc(self):
soup = self.index_to_soup(self.INDEX)
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
return self.index_to_soup('http://hbr.org'+url)
def hbr_parse_section(self, container, feeds):
current_section = None
current_articles = []
for x in container.findAll(name=['li', 'h3', 'h4']):
if x.name in ['h3', 'h4'] and not x.findAll(True):
if current_section and current_articles:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
self.log('\tFound section:', current_section)
if x.name == 'li':
a = x.find('a', href=True)
if a is not None:
title = self.tag_to_string(a)
p = li.find('p')
url = a.get('href')
if '/ar/' not in url:
continue
if url.startswith('/'):
url = 'http://hbr.org'+url
url = self.map_url(url)
p = x.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
articles.append({'title':title, 'url':url, 'description':desc})
yield [feed, articles]
if not title or not url:
continue
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', desc)
current_articles.append({'title':title, 'url':url,
'description':desc, 'date':''})
if current_section and current_articles:
feeds.append((current_section, current_articles))
def hbr_parse_toc(self, soup):
feeds = []
features = soup.find(id='issueFeaturesContent')
self.hbr_parse_section(features, feeds)
departments = soup.find(id='issueDepartments')
self.hbr_parse_section(departments, feeds)
return feeds
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
feeds.append(('Features', list(self.get_features(soup))))
feeds.extend(self.get_departments(soup))
soup = self.hbr_get_toc()
feeds = self.hbr_parse_toc(soup)
return feeds
def get_cover_url(self):
cover_url = None
index = 'http://hbr.harvardbusiness.org/current'
index = 'http://hbr.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
if link_item:
cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
cover_url = 'http://hbr.org' + link_item['src']
return cover_url

View File

@ -0,0 +1,66 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HoustonChronicle(BasicNewsRecipe):
title = u'The Houston Chronicle'
description = 'News from Houston, Texas'
__author__ = 'Kovid Goyal'
language = 'US'
timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True
keep_only_tags = [dict(id=['story-head', 'story'])]
remove_tags = [dict(id=['share-module', 'resource-box',
'resource-box-header'])]
def parse_index(self):
soup = self.index_to_soup('http://www.chron.com/news/')
container = soup.find('table', attrs={'class':'body-columns'})
feeds = []
current_section = 'Top Stories'
current_articles = []
self.log('\tFound section:', current_section)
for div in container.findAll('div'):
if div.get('class', None) == 'module-mast':
t = self.tag_to_string(div).replace(u'\xbb', '').strip()
if t and 'interactives' not in t:
if current_section and current_articles:
feeds.append((current_section, current_articles))
current_section = t
current_articles = []
self.log('\tFound section:', current_section)
elif div.get('storyid', False):
a = div.find('a', href=True)
if a:
title = self.tag_to_string(a)
url = a.get('href')
if title and url:
if url.startswith('/'):
url = 'http://www.chron.com'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title':title, 'url':url,
'date':'', 'description':''})
elif div.get('class', None) == 'columnbox' and \
'special' in current_section.lower():
a = div.find('a')
if a:
title = self.tag_to_string(a)
url = a.get('href')
if title and url:
if not url.startswith('/'): continue
url = 'http://www.chron.com'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
a.extract()
desc = self.tag_to_string(div)
current_articles.append({'title':title, 'url':url,
'date':'', 'description':desc})
if current_section and current_articles:
feeds.append((current_section, current_articles))
return feeds

View File

@ -0,0 +1,71 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class IncMagazineRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en'
version = 1
title = u'Inc Magazine'
publisher = u'Mansueto Ventures LLC'
category = u'News, Business'
description = u'Handbook of the American Entrepeneur'
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
INDEX = 'http://www.inc.com/magazine'
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id' : 'advt'}))
extra_css = '''
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
div#deck {font-weight: bold;}
div.byline {font-size: x-small; color: #696969; margin-top: 0.4em;}
'''
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
url = self.browser.geturl()
date = url.rpartition('/')[0].rpartition('/')[2]
self.title = self.title + ' ' + date[4:6] + ', ' + date[0:4]
answer = []
for feature in soup.findAll('div', attrs = {'class': re.compile('magazinesection.*')}):
h2 = feature.find('h2')
if h2:
feedTitle = self.tag_to_string(h2)
else:
img = feature.find('img', attrs = {'class': 'howtohead'})
if img:
feedTitle = img['alt']
else:
feedTitle = 'Unknown Feature'
articles = []
for div in feature.findAll('div', attrs = {'class': re.compile('article.*|column.*')}):
h3 = div.find('h3')
title = self.tag_to_string(h3)
href = h3.a['href'].replace('.html', '_Printer_Friendly.html')
p = div.find('p', attrs = {'class': 'deck'})
description = self.tag_to_string(p)
articles.append({'title': title, 'date': u'', 'url': href, 'description': description})
answer.append((feedTitle, articles))
return answer
def preprocess_html(self, soup):
img = soup.find('img', attrs = {'src': 'http://images.inc.com/nav/lofi_logo.gif'})
if img:
img.parent.extract()
return soup

View File

@ -15,6 +15,7 @@ class IrishIndependent(BasicNewsRecipe):
max_articles_per_feed = 100
remove_tags_before = dict(id='article')
remove_tags_after = [dict(name='div', attrs={'class':'toolsBottom'})]
no_stylesheets = True
remove_tags = [
dict(name='div',attrs={'class':'toolsBottom'}),
dict(name='div',attrs={'class':'toolsTop'}),

View File

@ -0,0 +1,53 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class KleineZeitungRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'de_AT'
version = 1
title = u'Kleine Zeitung'
publisher = u'Kleine Zeitung GmbH & Co KG'
category = u'News, Newspaper'
description = u'Nachrichten aus \u00D6sterreich'
use_embedded_content = False
remove_empty_feeds = True
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
# Feeds from http://www.kleinezeitung.at/allgemein/multimedia/102434/wichtige-news-immer-sofort-ueber-rss-feed-abrufen.story
feeds = []
feeds.append((u'Chronik', u'http://www.kleinezeitung.at/klon/rss/nachrichten'))
feeds.append((u'Wirtschaft', u'http://www.kleinezeitung.at/klon/rss/wirtschaft'))
feeds.append((u'Leute', u'http://www.kleinezeitung.at/klon/rss/leute'))
feeds.append((u'Sport', u'http://www.kleinezeitung.at/klon/rss/sport'))
feeds.append((u'Nachrichten aus der Steiermark', u'http://www.kleinezeitung.at/klon/rss/steiermark'))
feeds.append((u'Nachrichten aus Kaernten', u'http://www.kleinezeitung.at/klon/rss/kaernten'))
feeds.append((u'Multimedia-News', u'http://www.kleinezeitung.at/klon/rss/multimedia'))
feeds.append((u'Kino, Events & Tickets', u'http://www.kleinezeitung.at/klon/rss/events'))
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'article_body'}))
remove_tags = []
remove_tags.append(dict(name = 'a', attrs = {'id': 'comment_count'}))
remove_tags.append(dict(name = 'div', attrs = {'class': re.compile('adv[0-9]+')}))
remove_tags.append(dict(name = 'div', attrs = {'class': 'art_info'}))
remove_tags.append(dict(name = 'div', attrs = {'id': re.compile('grafikoverlay_.*')}))
remove_tags.append(dict(name = 'a', attrs = {'class': 'zoom'}))
extra_css = '''
body {font-family:verdana,arial,helvetica,geneva,sans-serif ;}
h1 {text-align: left;}
span {margin-left: 0.1em; margin-right: 0.1em;}
span.update {font-size: x-small; color: #666666}
span.update strong {font-weight: normal;}
p.intro {font-size: large;}
div.art_foto_big, div.art_foto {font-size: xx-small; color: #696969; margin-bottom: 0.5em;}
div.art_foto_big span.src {float: right;}
'''

View File

@ -14,75 +14,77 @@ class LeTemps(BasicNewsRecipe):
title = u'Le Temps'
oldest_article = 7
max_articles_per_feed = 100
__author__ = 'Sujata Raman'
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'id':'footer'})]
remove_tags = [dict(name='div', attrs={'class':'box links'})]
remove_tags = [dict(name='script')]
extra_css = '''.heading {font-size: 13px; line-height: 15px;
margin: 20px 0;} \n h2 {font-size: 24px; line-height: 25px; margin-bottom:
14px;} \n .author {font-size: 11px; margin: 0 0 5px 0;} \n .lead {font-
weight: 700; margin: 10px 0;} \n p {margin: 0 0 10px 0;}'''
remove_javascript = True
recursions = 1
encoding = 'UTF-8'
match_regexps = [r'http://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]']
lang = 'fr'
keep_only_tags = [dict(name='div', attrs={'id':'content'}),
dict(name='div', attrs={'class':'story'})
]
remove_tags = [dict(name='div', attrs={'id':['footer','sub']}),
dict(name='div', attrs={'class':['box additional','box function','right','box links','follow']})]
extra_css = '''h1{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;}
.headline{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;color:#990000;}
.summary_gal{color:#777777;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
#capt{color:#1B1B1B;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
#content{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
h2 {font-size: 24px; line-height: 25px; margin-bottom: 14px; text-transform:uppercase;}
.author {font-size:x-small; margin: 0 0 5px 0; color:#797971; font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
.lead {font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;font-weight: bold; margin: 10px 0;font-size:small;}
p {margin: 0 0 10px 0;}
h3{font-size:small;font-weight:bold;}
.heading{color:#940026;font-size:x-small;}
.description{font-size:x-small;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;color:#797971; }
a {color:#1B1B1B; font-size:small;}
.linkbox{font-size:x-small;color:#1B1B1B;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} '''
feeds = [
('Actualité',
'http://www.letemps.ch/rss/site/'),
('Monde',
'http://www.letemps.ch/rss/site/actualite/monde'),
('Suisse & Régions',
'http://www.letemps.ch/rss/site/actualite/suisse_regions'),
('Sciences & Environnement',
'http://www.letemps.ch/rss/site/actualite/sciences_environnement'),
('Société',
'http://www.letemps.ch/rss/site/actualite/societe'),
('Economie & Finance',
'http://www.letemps.ch/rss/site/economie_finance'),
('Economie & Finance - Finance',
'http://www.letemps.ch/rss/site/economie_finance/finance'),
('Economie & Finance - Fonds de placement',
'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'),
('Economie & Finance - Carrières',
'http://www.letemps.ch/rss/site/economie_finance/carrieres'),
('Culture',
'http://www.letemps.ch/rss/site/culture'),
('Culture - Cinéma',
'http://www.letemps.ch/rss/site/culture/cinema'),
('Culture - Musiques',
'http://www.letemps.ch/rss/site/culture/musiques'),
('Culture - Scènes',
'http://www.letemps.ch/rss/site/culture/scenes'),
('Culture - Arts plastiques',
'http://www.letemps.ch/rss/site/culture/arts_plastiques'),
('Livres',
'http://www.letemps.ch/rss/site/culture/livres'),
('Opinions',
'http://www.letemps.ch/rss/site/opinions'),
('Opinions - Editoriaux',
'http://www.letemps.ch/rss/site/opinions/editoriaux'),
('Opinions - Invités',
'http://www.letemps.ch/rss/site/opinions/invites'),
('Opinions - Chroniques',
'http://www.letemps.ch/rss/site/opinions/chroniques'),
('LifeStyle',
'http://www.letemps.ch/rss/site/lifestyle'),
('LifeStyle - Luxe',
'http://www.letemps.ch/rss/site/lifestyle/luxe'),
('LifeStyle - Horlogerie & Joaillerie',
'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
('LifeStyle - Design',
'http://www.letemps.ch/rss/site/lifestyle/design'),
('LifeStyle - Voyages',
'http://www.letemps.ch/rss/site/lifestyle/voyages'),
('LifeStyle - Gastronomie',
'http://www.letemps.ch/rss/site/lifestyle/gastronomie'),
('LifeStyle - Architecture & Immobilier',
'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'),
('LifeStyle - Automobile',
'http://www.letemps.ch/rss/site/lifestyle/automobile'),
('Sports',
'http://www.letemps.ch/rss/site/actualite/sports'),
(u'Actualit\xe9', 'http://www.letemps.ch/rss/site/'),
('Monde', 'http://www.letemps.ch/rss/site/actualite/monde'),
(u'Suisse & R\xe9gions', 'http://www.letemps.ch/rss/site/actualite/suisse_regions'),
('Sciences & Environnement', 'http://www.letemps.ch/rss/site/actualite/sciences_environnement'),
(u'Soci\xe9t\xe9', 'http://www.letemps.ch/rss/site/actualite/societe'),
('Economie & Finance', 'http://www.letemps.ch/rss/site/economie_finance'),
('Economie & Finance - Finance', 'http://www.letemps.ch/rss/site/economie_finance/finance'),
('Economie & Finance - Fonds de placement', 'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'),
(u'Economie & Finance - Carri\xe9res', 'http://www.letemps.ch/rss/site/economie_finance/carrieres'),
('Culture', 'http://www.letemps.ch/rss/site/culture'),
(u'Culture - Cin\xe9ma', 'http://www.letemps.ch/rss/site/culture/cinema'),
('Culture - Musiques', 'http://www.letemps.ch/rss/site/culture/musiques'),
(u'Culture - Sc\xe9nes', 'http://www.letemps.ch/rss/site/culture/scenes'),
('Culture - Arts plastiques', 'http://www.letemps.ch/rss/site/culture/arts_plastiques'),
('Livres', 'http://www.letemps.ch/rss/site/culture/livres'),
('Opinions', 'http://www.letemps.ch/rss/site/opinions'),
('Opinions - Editoriaux', 'http://www.letemps.ch/rss/site/opinions/editoriaux'),
(u'Opinions - Invit\xe9s', 'http://www.letemps.ch/rss/site/opinions/invites'),
('Opinions - Chroniques', 'http://www.letemps.ch/rss/site/opinions/chroniques'),
('LifeStyle', 'http://www.letemps.ch/rss/site/lifestyle'),
('LifeStyle - Luxe', 'http://www.letemps.ch/rss/site/lifestyle/luxe'),
('LifeStyle - Horlogerie & Joaillerie', 'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
('LifeStyle - Design', 'http://www.letemps.ch/rss/site/lifestyle/design'),
('LifeStyle - Voyages', 'http://www.letemps.ch/rss/site/lifestyle/voyages'),
('LifeStyle - Gastronomie', 'http://www.letemps.ch/rss/site/lifestyle/gastronomie'),
('LifeStyle - Architecture & Immobilier', 'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'),
('LifeStyle - Automobile', 'http://www.letemps.ch/rss/site/lifestyle/automobile'),
('Sports', 'http://www.letemps.ch/rss/site/actualite/sports'),
]
def print_version(self, url):
return url.replace('Page', 'Facet/print')
def postprocess_html(self, soup, first):
for tag in soup.findAll('div', attrs = {'class':'box pagination'}):
tag.extract()
if not first:
h = soup.find('h1')
if h is not None:
h.extract()
return soup
# def print_version(self, url):
# return url.replace('Page', 'Facet/print')

View File

@ -0,0 +1,58 @@
from calibre.web.feeds.news import BasicNewsRecipe
class PravdaSlovakiaRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'sk'
version = 1
title = u'Pravda'
publisher = u''
category = u'News, Newspaper'
description = u'News from Slovakia'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# Feeds from: http://spravy.pravda.sk/info.asp?y=sk_kskop/rssinfo.htm
feeds = []
feeds.append((u'Spravodajstvo', u'http://servis.pravda.sk/rss.asp'))
feeds.append((u'N\u00E1zory', u'http://servis.pravda.sk/rss.asp?o=sk_nazory'))
feeds.append((u'\u0160port', u'http://servis.pravda.sk/rss.asp?o=sk_sport'))
feeds.append((u'Peniaze', u'http://servis.pravda.sk/rss.asp?o=sk_peniaze'))
feeds.append((u'Koktail', u'http://servis.pravda.sk/rss.asp?o=sk_koktail'))
feeds.append((u'Kult\u00FAra', u'http://servis.pravda.sk/rss.asp?o=sk_kultura'))
feeds.append((u'B\u00FDvanie', u'http://servis.pravda.sk/rss.asp?o=sk_byvanie'))
feeds.append((u'Veda a Technika', u'http://servis.pravda.sk/rss.asp?o=sk_veda'))
feeds.append((u'Mozgov\u00F1a', u'http://servis.pravda.sk/rss.asp?o=sk_mozgovna'))
feeds.append((u'Auto', u'http://servis.pravda.sk/rss.asp?o=sk_autoweb'))
feeds.append((u'Cestovanie', u'http://servis.pravda.sk/rss.asp?o=sk_cestovanie'))
feeds.append((u'Regi\u00F3ny', u'http://servis.pravda.sk/rss.asp?r=sk_regiony'))
feeds.append((u'Profesia', u'http://servis.pravda.sk/rss.asp?o=sk_profesia'))
feeds.append((u'Zdravie', u'http://servis.pravda.sk/rss.asp?o=sk_zdravie'))
feeds.append((u'\u010C\u00EDtajme de\u0165om', u'http://servis.pravda.sk/rss.asp?o=sk_citajme'))
remove_tags = []
remove_tags.append(dict(name = 'p', attrs = {'class': 'spatNaClanok'}))
remove_tags.append(dict(name = 'ul'))
extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)}
body {font-family: sans1, serif1;}
.art-info {font-size: x-small; color: #696969; margin-bottom: 0.3em;}
.img-info {font-size: x-small; color: #696969;}
'''
def print_version(self, url):
ignore, sep, main = url.rpartition('/')
app, sep, id = main.rpartition('?')
app = app.replace('.asp', '')
return 'http://sport.pravda.sk/tlac.asp?r=' + app + '&' + id

View File

@ -0,0 +1,69 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class SmeRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'sk'
version = 1
title = u'SME'
publisher = u''
category = u'News, Newspaper'
description = u'News from Slovakia'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
# Feeds from: http://rss.sme.sk/
feeds = []
feeds.append((u'Tituln\u00E1 strana', u'http://rss.sme.sk/rss/rss.asp?id=frontpage'))
feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 4 hodiny', u'http://rss.sme.sk/rss/rss.asp?id=smenajcit4'))
feeds.append((u'Naj\u010D\u00EDtanej\u0161ie za 24 hod\u00EDn', u'http://rss.sme.sk/rss/rss.asp?id=smenajcit24'))
feeds.append((u'Z domova', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zdom'))
feeds.append((u'Zahrani\u010Die', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_zahr'))
feeds.append((u'Z domova + zahrani\u010Die', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline'))
feeds.append((u'Ekonomika', u'http://rss.sme.sk/rss/rss.asp?sek=ekon'))
feeds.append((u'Kult\u00FAra', u'http://rss.sme.sk/rss/rss.asp?sek=kult'))
feeds.append((u'Koment\u00E1re', u'http://rss.sme.sk/rss/rss.asp?sek=koment'))
feeds.append((u'Volby', u'http://rss.sme.sk/rss/rss.asp?sek=eVolby'))
#feeds.append((u'Press foto', u'http://rss.sme.sk/rss/rss.asp?sek=smeonline&rub=online_foto'))
feeds.append((u'\u0160port', u'http://rss.sme.sk/rss/rss.asp?sek=sport'))
feeds.append((u'Futbal', u'http://rss.sme.sk/rss/rss.asp?sek=futbal'))
feeds.append((u'Hokej', u'http://rss.sme.sk/rss/rss.asp?sek=hokej'))
feeds.append((u'Po\u010D\u00EDta\u010De', u'http://rss.sme.sk/rss/rss.asp?sek=pocit'))
feeds.append((u'Mobil', u'http://rss.sme.sk/rss/rss.asp?sek=mobil'))
feeds.append((u'Veda', u'http://rss.sme.sk/rss/rss.asp?sek=veda'))
feeds.append((u'Natankuj', u'http://rss.sme.sk/rss/rss.asp?sek=natankuj'))
feeds.append((u'Auto', u'http://rss.sme.sk/rss/rss.asp?sek=auto'))
feeds.append((u'Dom\u00E1cnos\u0165', u'http://rss.sme.sk/rss/rss.asp?sek=domac'))
feeds.append((u'\u017Dena', u'http://rss.sme.sk/rss/rss.asp?sek=zena'))
feeds.append((u'Z\u00E1bava', u'http://rss.sme.sk/rss/rss.asp?sek=zabava'))
feeds.append((u'Hry', u'http://rss.sme.sk/rss/rss.asp?sek=hry'))
#feeds.append((u'', u''))
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'contenth'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'articlec col'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id': re.compile('smeplayer.*')}))
remove_tags_after = [dict(name = 'p', attrs = {'class': 'autor_line'})]
extra_css = '''
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/LiberationSans.ttf)}
body {font-family: sans1, serif1;}
'''
def print_version(self, url):
parts = url.split('/')
id = parts[4]
return u'http://korzar.sme.sk/clanok_tlac.asp?cl=' + str(id)

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
@ -6,6 +5,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
www.straitstimes.com
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class StraitsTimes(BasicNewsRecipe):
@ -29,9 +29,21 @@ class StraitsTimes(BasicNewsRecipe):
,'publisher' : publisher
}
remove_tags = [dict(name=['object','link','map'])]
preprocess_regexps = [
(re.compile(
r'<meta name="description" content="[^"]+"\s*/?>',
re.IGNORECASE|re.DOTALL),
lambda m:''),
(re.compile(r'<!--.+?-->', re.IGNORECASE|re.DOTALL),
lambda m: ''),
]
remove_tags = [
dict(name=['object','link','map'])
,dict(name='div',attrs={'align':'left'})
]
keep_only_tags = [dict(name='div', attrs={'class':['top_headline','story_text']})]
keep_only_tags = [dict(name='div', attrs={'class':'stleft'})]
remove_tags_after=dict(name='div',attrs={'class':'hr_thin'})
feeds = [
(u'Singapore' , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
@ -47,4 +59,3 @@ class StraitsTimes(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -22,18 +22,24 @@ class Sueddeutsche(BasicNewsRecipe):
encoding = 'iso-8859-15'
remove_javascript = True
keep_only_tags = [
dict(name='div', attrs={'id':["artikel","contentTable"]}) ,
]
remove_tags = [ dict(name='link'), dict(name='iframe'),
dict(name='div', attrs={'id':["themenbox","artikelfoot","CAD_AD","SKY_AD","NT1_AD","rechteSpalte"]}),
dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg","pages closed"]}),
dict(name='div', attrs={'class':["listHeader","listHeader2","hr2","item","videoBigButton"]}),
dict(name='p', attrs={'class':["ressortartikeln",]}),
dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
"SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
"pages closed","basebox right narrow"]}),
dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
"item","videoBigButton","articlefooter full-column",
"bildbanderolle full-column","footerCopy padleft5"]}),
dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
dict(name='div', attrs={'style':["position:relative;"]}),
dict(name='span', attrs={'class':["nlinkheaderteaserschwarz",]}),
dict(name='table', attrs={'class':["kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
dict(name='ul', attrs={'class':["breadcrumb","articles","activities"]}),
dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav"]}),
dict(name='td', attrs={'class':["artikelDruckenRight"]}),
dict(name='p', text = "ANZEIGE")
]
@ -64,8 +70,8 @@ class Sueddeutsche(BasicNewsRecipe):
(u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss')
]
# def print_version(self, url):
# return url.replace('/text/', '/text/print.html')
def print_version(self, url):
return url.replace('/text/', '/text/print.html')

View File

@ -16,7 +16,7 @@ class WallStreetJournal(BasicNewsRecipe):
needs_subscription = True
language = 'en'
max_articles_per_feed = 10
max_articles_per_feed = 25
timefmt = ' [%a, %b %d, %Y]'
no_stylesheets = True

View File

@ -23,13 +23,13 @@ class LinuxFreeze(Command):
is64bit = platform.architecture()[0] == '64bit'
arch = 'x86_64' if is64bit else 'i686'
ffi = '/usr/lib/libffi.so.5' if is64bit else '/usr/lib/gcc/i686-pc-linux-gnu/4.4.1/libffi.so.4'
ffi = '/usr/lib/gcc/x86_64-pc-linux-gnu/4.4.2/libffi.so.4' if is64bit else '/usr/lib/gcc/i686-pc-linux-gnu/4.4.1/libffi.so.4'
stdcpp = '/usr/lib/gcc/%s-pc-linux-gnu/%s/libstdc++.so.6'%(arch, '4.4.2'
if is64bit else '4.4.1')
QTDIR = '/usr/lib/qt4'
QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml',
'QtWebKit', 'QtDBus')
'QtWebKit', 'QtDBus', 'QtXmlPatterns')
binary_excludes = ['libGLcore*', 'libGL*', 'libnvidia*']

View File

@ -13,8 +13,8 @@ from setup import Command, modules, functions, basenames, __version__, \
from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn
QT_DIR = 'C:\\Qt\\4.5.2'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'phonon']
QT_DIR = 'C:\\Qt\\4.6.0'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw'
@ -347,7 +347,6 @@ class Win32Freeze(Command, WixMixIn):
cmd = [msvc.cc] + xflags + ['/Tc'+src, '/Fo'+dest]
self.run_builder(cmd)
exe = self.j(self.base, bname+'.exe')
manifest = exe+'.manifest'
lib = dll.replace('.dll', '.lib')
if self.newer(exe, [dest, lib, self.rc_template, __file__]):
self.info('Linking', bname)

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.6.29'
__version__ = '0.6.30'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re

View File

@ -363,7 +363,7 @@ class NookOutput(OutputProfile):
description = _('This profile is intended for the B&N Nook.')
# Screen size is a best guess
screen_size = (600, 770)
screen_size = (600, 730)
dpi = 167
fbase = 16
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]

View File

@ -253,7 +253,9 @@ run_plugins_on_postprocess = functools.partial(_run_filetype_plugins,
def initialize_plugin(plugin, path_to_zip_file):
try:
return plugin(path_to_zip_file)
p = plugin(path_to_zip_file)
p.initialize()
return p
except Exception:
print 'Failed to initialize plugin:', plugin.name, plugin.version
tb = traceback.format_exc()

View File

@ -6,9 +6,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Embedded console for debugging.
'''
import sys, os, pprint
import sys, os
from calibre.utils.config import OptionParser
from calibre.constants import iswindows, isosx
from calibre.constants import iswindows
from calibre import prints
def option_parser():
@ -61,77 +61,8 @@ def migrate(old, new):
print 'Database migrated to', os.path.abspath(new)
def debug_device_driver():
from calibre.customize.ui import device_plugins
from calibre.devices.scanner import DeviceScanner
s = DeviceScanner()
s.scan()
devices = s.devices
if not iswindows:
devices = [list(x) for x in devices]
for d in devices:
for i in range(3):
d[i] = hex(d[i])
print 'USB devices on system:\n', pprint.pprint(devices)
if iswindows:
wmi = __import__('wmi', globals(), locals(), [], -1)
drives = []
print 'Drives detected:'
print '\t', '(ID, Partitions, Drive letter)'
for drive in wmi.WMI(find_classes=False).Win32_DiskDrive():
if drive.Partitions == 0:
continue
try:
partition = drive.associators("Win32_DiskDriveToDiskPartition")[0]
logical_disk = partition.associators('Win32_LogicalDiskToPartition')[0]
prefix = logical_disk.DeviceID+os.sep
drives.append((str(drive.PNPDeviceID), drive.Index, prefix))
except IndexError:
drives.append((str(drive.PNPDeviceID), 'No mount points found'))
for drive in drives:
print '\t', drive
if isosx:
from calibre.devices.usbms.device import Device
raw = Device.run_ioreg()
open('/tmp/ioreg.txt', 'wb').write(raw)
print 'ioreg output saved to /tmp/ioreg.txt'
connected_devices = []
for dev in device_plugins():
print 'Looking for', dev.__class__.__name__
connected = s.is_device_connected(dev, debug=True)
if connected:
connected_devices.append(dev)
errors = {}
success = False
for dev in connected_devices:
print 'Device possibly connected:', dev.__class__.name
print 'Trying to open device...',
try:
dev.open()
print 'OK'
except:
import traceback
errors[dev] = traceback.format_exc()
print 'failed'
continue
success = True
if hasattr(dev, '_main_prefix'):
print 'Main memory:', repr(dev._main_prefix)
print 'Total space:', dev.total_space()
break
if not success and errors:
print 'Opening of the following devices failed'
for dev,msg in errors.items():
print dev
print msg
print
if isosx and os.path.exists('/tmp/ioreg.txt'):
print
print
print "Don't forget to send the file /tmp/ioreg.txt as well"
print "You can view it by typing the command: open /tmp/ioreg.txt"
from calibre.devices import debug
print debug()
if iswindows:
raw_input('Press Enter to continue...')

View File

@ -76,7 +76,9 @@ def debug():
ioreg = None
if isosx:
from calibre.devices.usbms.device import Device
mount = repr(Device.osx_run_mount())
ioreg = Device.run_ioreg()
ioreg = 'Output from mount:\n\n'+mount+'\n\n'+ioreg
connected_devices = []
for dev in device_plugins():
out('Looking for', dev.__class__.__name__)

View File

@ -55,7 +55,6 @@ class CYBOOKG3(USBMS):
@classmethod
def can_handle(cls, device_info, debug=False):
USBMS.can_handle(device_info, debug)
if islinux:
return device_info[3] == 'Bookeen' and device_info[4] == 'Cybook Gen3'
return True
@ -88,7 +87,6 @@ class CYBOOK_OPUS(CYBOOKG3):
@classmethod
def can_handle(cls, device_info, debug=False):
USBMS.can_handle(device_info, debug)
if islinux:
return device_info[3] == 'Bookeen'
return True

View File

@ -55,7 +55,15 @@ class DevicePlugin(Plugin):
return False
@classmethod
def is_usb_connected_windows(cls, devices_on_system):
def print_usb_device_info(cls, info):
try:
print '\t', repr(info)
except:
import traceback
traceback.print_exc()
@classmethod
def is_usb_connected_windows(cls, devices_on_system, debug=False):
def id_iterator():
if hasattr(cls.VENDOR_ID, 'keys'):
@ -75,8 +83,12 @@ class DevicePlugin(Plugin):
vid, pid = 'vid_%4.4x'%vendor_id, 'pid_%4.4x'%product_id
vidd, pidd = 'vid_%i'%vendor_id, 'pid_%i'%product_id
for device_id in devices_on_system:
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if cls.test_bcd_windows(device_id, bcd) and cls.can_handle(device_id):
if (vid in device_id or vidd in device_id) and \
(pid in device_id or pidd in device_id) and \
cls.test_bcd_windows(device_id, bcd):
if debug:
cls.print_usb_device_info(device_id)
if cls.can_handle(device_id):
return True
return False
@ -97,7 +109,7 @@ class DevicePlugin(Plugin):
:param devices_on_system: List of devices currently connected
'''
if iswindows:
return cls.is_usb_connected_windows(devices_on_system)
return cls.is_usb_connected_windows(devices_on_system, debug=debug)
vendors_on_system = set([x[0] for x in devices_on_system])
vendors = cls.VENDOR_ID if hasattr(cls.VENDOR_ID, '__len__') else [cls.VENDOR_ID]
@ -118,8 +130,10 @@ class DevicePlugin(Plugin):
cbcd = cls.VENDOR_ID[vid][pid]
else:
cbcd = cls.BCD
if cls.test_bcd(bcd, cbcd) and cls.can_handle(dev,
debug=debug):
if cls.test_bcd(bcd, cbcd):
if debug:
cls.print_usb_device_info(dev)
if cls.can_handle(dev, debug=debug):
return True
return False
@ -152,12 +166,6 @@ class DevicePlugin(Plugin):
:param device_info: On windows a device ID string. On Unix a tuple of
``(vendor_id, product_id, bcd)``.
'''
try:
if debug:
print '\t', repr(device_info)
except:
import traceback
traceback.print_exc()
return True
def open(self):

View File

@ -204,20 +204,20 @@ class PRS505(CLI, Device):
class PRS700(PRS505):
name = 'PRS-600/700 Device Interface'
description = _('Communicate with the Sony PRS-600/700 eBook reader.')
name = 'PRS-600/700/900 Device Interface'
description = _('Communicate with the Sony PRS-600/700/900 eBook reader.')
author = 'Kovid Goyal and John Schember'
gui_name = 'SONY Touch edition'
gui_name = 'SONY Touch/Daily edition'
supported_platforms = ['windows', 'osx', 'linux']
BCD = [0x31a]
WINDOWS_MAIN_MEM = re.compile('PRS-((700/)|(600&))')
WINDOWS_CARD_A_MEM = re.compile(r'PRS-((700/\S+:)|(600_))MS')
WINDOWS_CARD_B_MEM = re.compile(r'PRS-((700/\S+:)|(600_))SD')
WINDOWS_MAIN_MEM = re.compile('PRS-((700/)|((6|9)00&))')
WINDOWS_CARD_A_MEM = re.compile(r'PRS-((700/\S+:)|((6|9)00_))MS')
WINDOWS_CARD_B_MEM = re.compile(r'PRS-((700/\S+:)|((6|9)00_))SD')
OSX_MAIN_MEM = re.compile(r'Sony PRS-((700/[^:]+)|(600)) Media')
OSX_CARD_A_MEM = re.compile(r'Sony PRS-((700/[^:]+:)|(600 ))MS Media')
OSX_CARD_B_MEM = re.compile(r'Sony PRS-((700/[^:]+:)|(600 ))SD Media')
OSX_MAIN_MEM = re.compile(r'Sony PRS-((700/[^:]+)|((6|9)00)) Media')
OSX_CARD_A_MEM = re.compile(r'Sony PRS-((700/[^:]+:)|((6|9)00 ))MS Media')
OSX_CARD_B_MEM = re.compile(r'Sony PRS-((700/[^:]+:)|((6|9)00 ))SD Media')

View File

@ -40,6 +40,7 @@ class LinuxScanner(object):
prod = os.path.join(base, 'idProduct')
bcd = os.path.join(base, 'bcdDevice')
man = os.path.join(base, 'manufacturer')
serial = os.path.join(base, 'serial')
prod_string = os.path.join(base, 'product')
dev = []
try:
@ -62,6 +63,11 @@ class LinuxScanner(object):
dev.append(open(prod_string).read().strip())
except:
dev.append('')
try:
dev.append(open(serial).read().strip())
except:
dev.append('')
ans.add(tuple(dev))
return ans

View File

@ -323,8 +323,16 @@ class Device(DeviceConfig, DevicePlugin):
ioreg = '/usr/sbin/ioreg'
if not os.access(ioreg, os.X_OK):
ioreg = 'ioreg'
return subprocess.Popen((ioreg+' -w 0 -S -c IOMedia').split(),
cmd = (ioreg+' -w 0 -S -c IOMedia').split()
for i in range(3):
try:
return subprocess.Popen(cmd,
stdout=subprocess.PIPE).communicate()[0]
except IOError: # Probably an interrupted system call
if i == 2:
raise
time.sleep(2)
def osx_sort_names(self, names):
return names
@ -372,14 +380,28 @@ class Device(DeviceConfig, DevicePlugin):
break
return self.osx_sort_names(names)
@classmethod
def osx_run_mount(cls):
for i in range(3):
try:
return subprocess.Popen('mount',
stdout=subprocess.PIPE).communicate()[0]
except IOError: # Probably an interrupted system call
if i == 2:
raise
time.sleep(2)
def open_osx(self):
mount = subprocess.Popen('mount', shell=True, stdout=subprocess.PIPE).stdout.read()
mount = self.osx_run_mount()
names = self.get_osx_mountpoints()
dev_pat = r'/dev/%s(\w*)\s+on\s+([^\(]+)\s+'
if 'main' not in names.keys():
raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
main_pat = dev_pat % names['main']
self._main_prefix = re.search(main_pat, mount).group(2) + os.sep
main_match = re.search(main_pat, mount)
if main_match is None:
raise DeviceError(_('Unable to detect the %s mount point. Try rebooting.')%self.__class__.__name__)
self._main_prefix = main_match.group(2) + os.sep
card_a_pat = names['carda'] if 'carda' in names.keys() else None
card_b_pat = names['cardb'] if 'cardb' in names.keys() else None

View File

@ -2,11 +2,7 @@ usbobserver.so : usbobserver.o
gcc -arch i386 -arch ppc -bundle usbobserver.o -o usbobserver.so -framework Python -framework IOKit -framework CoreFoundation
usbobserver.o : usbobserver.c
gcc -arch i386 -arch ppc -dynamic -I/Library/Frameworks/Python.framework/Versions/2.5/include/python2.5 -c usbobserver.c -o usbobserver.o
install : usbobserver.so
cp usbobserver.so /Library/Frameworks/Python.framework/Versions/2.5/lib/python2.5/site-packages/
gcc -arch i386 -arch ppc -dynamic -I/Library/Frameworks/Python.framework/Versions/Current/Headers -c usbobserver.c -o usbobserver.o
clean :
rm -f *.o *.so

View File

@ -26,28 +26,34 @@
#include <IOKit/usb/IOUSBLib.h>
#include <IOKit/IOCFPlugIn.h>
#include <IOKit/IOKitLib.h>
#include <mach/mach.h>
CFStringRef USB_PROPS[3] = { CFSTR("USB Vendor Name"), CFSTR("USB Product Name"), CFSTR("USB Serial Number") };
static PyObject*
get_iokit_string_property(io_service_t dev, int prop) {
CFTypeRef PropRef;
char buf[500];
PropRef = IORegistryEntryCreateCFProperty(dev, USB_PROPS[prop], kCFAllocatorDefault, 0);
if (PropRef) {
if(!CFStringGetCString(PropRef, buf, 500, kCFStringEncodingUTF8)) buf[0] = '\0';
} else buf[0] = '\0';
return PyUnicode_DecodeUTF8(buf, strlen(buf), "replace");
}
static PyObject *
usbobserver_get_usb_devices(PyObject *self, PyObject *args) {
mach_port_t masterPort;
CFMutableDictionaryRef matchingDict;
kern_return_t kr;
/* Create a master port for communication with IOKit */
kr = IOMasterPort(MACH_PORT_NULL, &masterPort);
if (kr || !masterPort) {
PyErr_SetString(PyExc_RuntimeError, "Couldn't create master IOKit port");
return NULL;
}
//Set up matching dictionary for class IOUSBDevice and its subclasses
matchingDict = IOServiceMatching(kIOUSBDeviceClassName);
if (!matchingDict) {
PyErr_SetString(PyExc_RuntimeError, "Couldn't create a USB matching dictionary");
mach_port_deallocate(mach_task_self(), masterPort);
return NULL;
}
@ -58,12 +64,12 @@ usbobserver_get_usb_devices(PyObject *self, PyObject *args) {
SInt32 score;
IOUSBDeviceInterface182 **dev = NULL;
UInt16 vendor, product, bcd;
PyObject *manufacturer, *productn, *serial;
PyObject *devices, *device;
devices = PyList_New(0);
if (devices == NULL) {
PyErr_NoMemory();
mach_port_deallocate(mach_task_self(), masterPort);
return NULL;
}
@ -85,7 +91,15 @@ usbobserver_get_usb_devices(PyObject *self, PyObject *args) {
kr = (*dev)->GetDeviceVendor(dev, &vendor);
kr = (*dev)->GetDeviceProduct(dev, &product);
kr = (*dev)->GetDeviceReleaseNumber(dev, &bcd);
device = Py_BuildValue("(iii)", vendor, product, bcd);
manufacturer = get_iokit_string_property(usbDevice, 0);
if (manufacturer == NULL) manufacturer = Py_None;
productn = get_iokit_string_property(usbDevice, 1);
if (productn == NULL) productn = Py_None;
serial = get_iokit_string_property(usbDevice, 2);
if (serial == NULL) serial = Py_None;
device = Py_BuildValue("(iiiNNN)", vendor, product, bcd, manufacturer, productn, serial);
if (device == NULL) {
IOObjectRelease(usbDevice);
(*plugInInterface)->Release(plugInInterface);
@ -109,11 +123,7 @@ usbobserver_get_usb_devices(PyObject *self, PyObject *args) {
Py_DECREF(device);
}
//Finished with master port
mach_port_deallocate(mach_task_self(), masterPort);
return Py_BuildValue("N", devices);
return devices;
}
static PyMethodDef usbobserver_methods[] = {

View File

@ -33,7 +33,7 @@ class LRFOptions(object):
if unicode(x.file_as):
self.title_sort = unicode(x.file_as)
self.freetext = f2s(m.description)
self.category = f2s(m.tags)
self.category = f2s(m.subject)
self.cover = None
self.use_metadata_cover = True
self.output = output

View File

@ -386,3 +386,36 @@ class MetaInformation(object):
def __nonzero__(self):
return bool(self.title or self.author or self.comments or self.tags)
def check_isbn10(isbn):
try:
digits = map(int, isbn[:9])
products = [(i+1)*digits[i] for i in range(9)]
check = sum(products)%11
if (check == 10 and isbn[9] == 'X') or check == int(isbn[9]):
return isbn
except:
pass
return None
def check_isbn13(isbn):
try:
digits = map(int, isbn[:12])
products = [(1 if i%2 ==0 else 3)*digits[i] for i in range(12)]
check = 10 - (sum(products)%10)
if check == 10:
check = 0
if str(check) == isbn[12]:
return isbn
except:
pass
return None
def check_isbn(isbn):
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
if len(isbn) == 10:
return check_isbn10(isbn)
if len(isbn) == 13:
return check_isbn13(isbn)
return None

View File

@ -121,6 +121,7 @@ def metadata_from_filename(name, pat=None):
mi = MetaInformation(None, None)
if pat is None:
pat = re.compile(prefs.get('filename_pattern'))
name = name.replace('_', ' ')
match = pat.search(name)
if match:
try:
@ -131,6 +132,15 @@ def metadata_from_filename(name, pat=None):
au = match.group('author')
aus = string_to_authors(au)
mi.authors = aus
if prefs['swap_author_names'] and mi.authors:
def swap(a):
parts = a.split()
if len(parts) > 1:
t = parts[-1]
parts = parts[:-1]
parts.insert(0, t)
return ' '.join(parts)
mi.authors = [swap(x) for x in mi.authors]
except (IndexError, ValueError):
pass
try:

View File

@ -435,7 +435,8 @@ class MobiReader(object):
open(os.path.splitext(htmlfile)[0] + '.ncx', 'wb').write(ncx)
def read_embedded_metadata(self, root, elem, guide):
raw = '<package>' + html.tostring(elem, encoding='utf-8') + '</package>'
raw = '<?xml version="1.0" encoding="utf-8" ?>\n<package>' + \
html.tostring(elem, encoding='utf-8') + '</package>'
stream = cStringIO.StringIO(raw)
opf = OPF(stream)
self.embedded_mi = MetaInformation(opf)
@ -602,7 +603,7 @@ class MobiReader(object):
* opf.cover.split('/'))):
opf.cover = None
manifest = [(htmlfile, 'text/x-oeb1-document'),
manifest = [(htmlfile, 'application/xhtml+xml'),
(os.path.abspath('styles.css'), 'text/css')]
bp = os.path.dirname(htmlfile)
for i in getattr(self, 'image_names', []):

View File

@ -385,6 +385,7 @@ def initialize_file_icon_provider():
def file_icon_provider():
global _file_icon_provider
initialize_file_icon_provider()
return _file_icon_provider
class FileDialog(QObject):

View File

@ -24,6 +24,7 @@ class DuplicatesAdder(QThread):
def run(self):
count = 1
for mi, cover, formats in self.duplicates:
formats = [f for f in formats if not f.lower().endswith('.opf')]
id = self.db.create_book_entry(mi, cover=cover,
add_duplicates=True)
self.db_adder.add_formats(id, formats)
@ -139,6 +140,7 @@ class DBAdder(Thread):
if id is None:
self.duplicates.append((mi, cover, formats))
else:
formats = [f for f in formats if not f.lower().endswith('.opf')]
self.add_formats(id, formats)
else:
self.names.append(name)

View File

@ -56,6 +56,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
self.opt_read_metadata_from_filename.setChecked(not prefs['read_file_metadata'])
self.filename_pattern = FilenamePattern(self)
self.metadata_box.layout().insertWidget(0, self.filename_pattern)
self.opt_swap_author_names.setChecked(prefs['swap_author_names'])
def validate(self):
tmpl = preprocess_template(self.opt_template.text())
@ -87,6 +88,7 @@ class AddSave(QTabWidget, Ui_TabWidget):
prefs['read_file_metadata'] = not bool(self.opt_read_metadata_from_filename.isChecked())
pattern = self.filename_pattern.commit()
prefs['filename_pattern'] = pattern
prefs['swap_author_names'] = bool(self.opt_swap_author_names.isChecked())
return True

View File

@ -20,8 +20,8 @@
<attribute name="title">
<string>&amp;Adding books</string>
</attribute>
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="0" colspan="2">
<widget class="QLabel" name="label_6">
<property name="text">
<string>Here you can control how calibre will read metadata from the files you add to it. calibre can either read metadata from the contents of the file, or from the filename.</string>
@ -31,14 +31,24 @@
</property>
</widget>
</item>
<item>
<item row="1" column="0">
<widget class="QCheckBox" name="opt_read_metadata_from_filename">
<property name="text">
<string>Read metadata only from &amp;file name</string>
</property>
</widget>
</item>
<item>
<item row="1" column="1">
<widget class="QCheckBox" name="opt_swap_author_names">
<property name="toolTip">
<string>Swap the firstname and lastname of the author. This affects only metadata read from file names.</string>
</property>
<property name="text">
<string>&amp;Swap author firstname and lastname</string>
</property>
</widget>
</item>
<item row="2" column="0" colspan="2">
<widget class="QGroupBox" name="metadata_box">
<property name="title">
<string>&amp;Configure metadata from file name</string>

View File

@ -23,7 +23,8 @@ from calibre.gui2.dialogs.fetch_metadata import FetchMetadata
from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.gui2.widgets import ProgressIndicator
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, authors_to_string
from calibre.ebooks.metadata import authors_to_sort_string, string_to_authors, \
authors_to_string, check_isbn
from calibre.ebooks.metadata.library_thing import cover_from_isbn
from calibre import islinux
from calibre.ebooks.metadata.meta import get_metadata
@ -336,6 +337,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
isbn = db.isbn(self.id, index_is_id=True)
if not isbn:
isbn = ''
self.isbn.textChanged.connect(self.validate_isbn)
self.isbn.setText(isbn)
aus = self.db.author_sort(row)
self.author_sort.setText(aus if aus else '')
@ -380,6 +382,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.series_index.setValue(1.0)
QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.enable_series_index)
QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.enable_series_index)
self.series.lineEdit().editingFinished.connect(self.increment_series_index)
self.show()
height_of_rest = self.frameGeometry().height() - self.cover.height()
@ -394,6 +397,20 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
self.cover.setPixmap(pm)
self.cover_data = cover
def validate_isbn(self, isbn):
isbn = unicode(isbn).strip()
if not isbn:
self.isbn.setStyleSheet('QLineEdit { background-color: rgba(0,255,0,0%) }')
self.isbn.setToolTip(_('This ISBN number is valid'))
return
if check_isbn(isbn):
self.isbn.setStyleSheet('QLineEdit { background-color: rgba(0,255,0,20%) }')
self.isbn.setToolTip(_('This ISBN number is valid'))
else:
self.isbn.setStyleSheet('QLineEdit { background-color: rgba(255,0,0,20%) }')
self.isbn.setToolTip(_('This ISBN number is invalid'))
def show_format(self, item, *args):
fmt = item.ext
self.emit(SIGNAL('view_format(PyQt_PyObject)'), fmt)
@ -581,6 +598,16 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
def enable_series_index(self, *args):
self.series_index.setEnabled(True)
def increment_series_index(self):
if self.db is not None:
try:
series = unicode(self.series.text())
if series:
ns = self.db.get_next_series_num_for(series)
self.series_index.setValue(ns)
except:
traceback.print_exc()
def remove_unused_series(self):
self.db.remove_unused_series()
idx = qstring_to_unicode(self.series.currentText())

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QVBoxLayout, QDialog, QLabel, QDialogButtonBox, Qt, \
QAbstractListModel, QVariant, QListView, QSize
from calibre.gui2 import NONE, file_icon_provider
class Formats(QAbstractListModel):
def __init__(self, fmts):
QAbstractListModel.__init__(self)
self.fmts = sorted(fmts)
self.fi = file_icon_provider()
def rowCount(self, parent):
return len(self.fmts)
def data(self, index, role):
row = index.row()
if role == Qt.DisplayRole:
return QVariant(self.fmts[row].upper())
if role == Qt.DecorationRole:
return QVariant(self.fi.icon_from_ext(self.fmts[row].lower()))
return NONE
def flags(self, index):
return Qt.ItemIsSelectable|Qt.ItemIsEnabled
def fmt(self, idx):
return self.fmts[idx.row()]
class SelectFormats(QDialog):
def __init__(self, fmt_list, msg, single=False, parent=None):
QDialog.__init__(self, parent)
self._l = QVBoxLayout(self)
self.setLayout(self._l)
self.setWindowTitle(_('Choose formats'))
self._m = QLabel(msg)
self._m.setWordWrap = True
self._l.addWidget(self._m)
self.formats = Formats(fmt_list)
self.fview = QListView(self)
self._l.addWidget(self.fview)
self.fview.setModel(self.formats)
self.fview.setSelectionMode(self.fview.SingleSelection if single else
self.fview.MultiSelection)
self.bbox = \
QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel,
Qt.Horizontal, self)
self._l.addWidget(self.bbox)
self.bbox.accepted.connect(self.accept)
self.bbox.rejected.connect(self.reject)
self.fview.setIconSize(QSize(48, 48))
self.fview.setSpacing(2)
self.resize(350, 500)
self.selected_formats = set([])
def accept(self, *args):
for idx in self.fview.selectedIndexes():
self.selected_formats.add(self.formats.fmt(idx))
QDialog.accept(self, *args)
if __name__ == '__main__':
from PyQt4.Qt import QApplication
app = QApplication([])
d = SelectFormats(['epub', 'lrf', 'lit', 'mobi'], 'Choose a format')
d.exec_()
print d.selected_formats

View File

@ -11,7 +11,7 @@ from PyQt4.QtGui import QTableView, QAbstractItemView, QColor, \
QPen, QStyle, QPainter, \
QImage, QApplication, QMenu, \
QStyledItemDelegate, QCompleter
from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, \
from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, pyqtSignal, \
SIGNAL, QObject, QSize, QModelIndex, QDate
from calibre import strftime
@ -155,6 +155,10 @@ class TagsDelegate(QStyledItemDelegate):
return editor
class BooksModel(QAbstractTableModel):
about_to_be_sorted = pyqtSignal(object, name='aboutToBeSorted')
sorting_done = pyqtSignal(object, name='sortingDone')
headers = {
'title' : _("Title"),
'authors' : _("Author(s)"),
@ -285,13 +289,14 @@ class BooksModel(QAbstractTableModel):
def sort(self, col, order, reset=True):
if not self.db:
return
self.about_to_be_sorted.emit(self.db.id)
ascending = order == Qt.AscendingOrder
self.db.sort(self.column_map[col], ascending)
if reset:
self.clear_caches()
self.reset()
self.sorted_on = (self.column_map[col], order)
self.sorting_done.emit(self.db.index)
def refresh(self, reset=True):
try:
@ -631,12 +636,16 @@ class BooksModel(QAbstractTableModel):
val *= 2
self.db.set_rating(id, val)
elif column == 'series':
val = val.strip()
pat = re.compile(r'\[([.0-9]+)\]')
match = pat.search(val)
if match is not None:
self.db.set_series_index(id, float(match.group(1)))
val = pat.sub('', val)
val = val.strip()
elif val:
ni = self.db.get_next_series_num_for(val)
if ni != 1:
self.db.set_series_index(id, ni)
if val:
self.db.set_series(id, val)
elif column == 'timestamp':
@ -696,6 +705,22 @@ class BooksView(TableView):
hv = self.verticalHeader()
hv.setClickable(True)
hv.setCursor(Qt.PointingHandCursor)
self.selected_ids = []
self._model.about_to_be_sorted.connect(self.about_to_be_sorted)
self._model.sorting_done.connect(self.sorting_done)
def about_to_be_sorted(self, idc):
selected_rows = [r.row() for r in self.selectionModel().selectedRows()]
self.selected_ids = [idc(r) for r in selected_rows]
def sorting_done(self, indexc):
if self.selected_ids:
indices = [self.model().index(indexc(i), 0) for i in
self.selected_ids]
sm = self.selectionModel()
for idx in indices:
sm.select(idx, sm.Select|sm.Rows)
self.selected_ids = []
def columns_sorted(self):
for i in range(self.model().columnCount(None)):

View File

@ -314,6 +314,16 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.view_menu.addAction(_('View'))
self.view_menu.addAction(_('View specific format'))
self.action_view.setMenu(self.view_menu)
self.delete_menu = QMenu()
self.delete_menu.addAction(_('Remove selected books'))
self.delete_menu.addAction(
_('Remove files of a specific format from selected books..'))
self.delete_menu.addAction(
_('Remove all formats from selected books, except...'))
self.delete_menu.addAction(
_('Remove covers from selected books'))
self.action_del.setMenu(self.delete_menu)
QObject.connect(self.action_save, SIGNAL("triggered(bool)"),
self.save_to_disk)
QObject.connect(self.save_menu.actions()[0], SIGNAL("triggered(bool)"),
@ -330,6 +340,11 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
SIGNAL("triggered(bool)"), self.view_specific_format)
self.connect(self.action_open_containing_folder,
SIGNAL('triggered(bool)'), self.view_folder)
self.delete_menu.actions()[0].triggered.connect(self.delete_books)
self.delete_menu.actions()[1].triggered.connect(self.delete_selected_formats)
self.delete_menu.actions()[2].triggered.connect(self.delete_all_but_selected_formats)
self.delete_menu.actions()[3].triggered.connect(self.delete_covers)
self.action_open_containing_folder.setShortcut(Qt.Key_O)
self.addAction(self.action_open_containing_folder)
self.action_sync.setShortcut(Qt.Key_D)
@ -376,6 +391,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
setPopupMode(QToolButton.MenuButtonPopup)
self.tool_bar.widgetForAction(self.action_view).\
setPopupMode(QToolButton.MenuButtonPopup)
self.tool_bar.widgetForAction(self.action_del).\
setPopupMode(QToolButton.MenuButtonPopup)
self.tool_bar.widgetForAction(self.action_preferences).\
setPopupMode(QToolButton.MenuButtonPopup)
self.tool_bar.setContextMenuPolicy(Qt.PreventContextMenu)
@ -987,7 +1004,72 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
############################################################################
############################### Delete books ###############################
def delete_books(self, checked):
def _get_selected_formats(self, msg):
from calibre.gui2.dialogs.select_formats import SelectFormats
fmts = self.library_view.model().db.all_formats()
d = SelectFormats([x.lower() for x in fmts], msg, parent=self)
if d.exec_() != d.Accepted:
return None
return d.selected_formats
def _get_selected_ids(self, err_title=_('Cannot delete')):
rows = self.library_view.selectionModel().selectedRows()
if not rows or len(rows) == 0:
d = error_dialog(self, err_title, _('No book selected'))
d.exec_()
return set([])
return set(map(self.library_view.model().id, rows))
def delete_selected_formats(self, *args):
ids = self._get_selected_ids()
if not ids:
return
fmts = self._get_selected_formats(
_('Choose formats to be deleted'))
if not fmts:
return
for id in ids:
for fmt in fmts:
self.library_view.model().db.remove_format(id, fmt,
index_is_id=True, notify=False)
self.library_view.model().refresh_ids(ids)
self.library_view.model().current_changed(self.library_view.currentIndex(),
self.library_view.currentIndex())
def delete_all_but_selected_formats(self, *args):
ids = self._get_selected_ids()
if not ids:
return
fmts = self._get_selected_formats(
'<p>'+_('Choose formats <b>not</b> to be deleted'))
if fmts is None:
return
for id in ids:
bfmts = self.library_view.model().db.formats(id, index_is_id=True)
if bfmts is None:
continue
bfmts = set([x.lower() for x in bfmts.split(',')])
rfmts = bfmts - set(fmts)
for fmt in rfmts:
self.library_view.model().db.remove_format(id, fmt,
index_is_id=True, notify=False)
self.library_view.model().refresh_ids(ids)
self.library_view.model().current_changed(self.library_view.currentIndex(),
self.library_view.currentIndex())
def delete_covers(self, *args):
ids = self._get_selected_ids()
if not ids:
return
for id in ids:
self.library_view.model().db.remove_cover(id)
self.library_view.model().refresh_ids(ids)
self.library_view.model().current_changed(self.library_view.currentIndex(),
self.library_view.currentIndex())
def delete_books(self, *args):
'''
Delete selected books from device or library.
'''
@ -1591,7 +1673,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
view.resizeColumnsToContents()
view.resize_on_select = False
self.status_bar.reset_info()
self.current_view().clearSelection()
if location == 'library':
self.action_edit.setEnabled(True)
self.action_convert.setEnabled(True)
@ -1600,6 +1681,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.action_sync.setEnabled(True)
self.status_bar.tag_view_button.setEnabled(True)
self.status_bar.cover_flow_button.setEnabled(True)
for action in list(self.delete_menu.actions())[1:]:
action.setEnabled(True)
else:
self.action_edit.setEnabled(False)
self.action_convert.setEnabled(False)
@ -1608,6 +1691,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.action_sync.setEnabled(False)
self.status_bar.tag_view_button.setEnabled(False)
self.status_bar.cover_flow_button.setEnabled(False)
for action in list(self.delete_menu.actions())[1:]:
action.setEnabled(False)
def device_job_exception(self, job):

View File

@ -568,8 +568,8 @@ class DocumentView(QWebView):
delta_y = self.document.window_height - 25
if self.document.at_top:
if self.manager is not None:
self.manager.previous_document()
self.to_bottom = True
self.manager.previous_document()
else:
opos = self.document.ypos
upper_limit = opos - delta_y

View File

@ -10,6 +10,7 @@ import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \
itertools, functools, traceback
from itertools import repeat
from datetime import datetime
from math import floor
from PyQt4.QtCore import QThread, QReadWriteLock
try:
@ -864,6 +865,11 @@ class LibraryDatabase2(LibraryDatabase):
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
return os.access(path, os.R_OK)
def remove_cover(self, id):
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
if os.path.exists(path):
os.remove(path)
def set_cover(self, id, data):
'''
Set the cover for this book.
@ -1080,7 +1086,18 @@ class LibraryDatabase2(LibraryDatabase):
if tags and tag in tags.lower():
yield r[FIELD_MAP['id']]
def get_next_series_num_for(self, series):
series_id = self.conn.get('SELECT id from series WHERE name=?',
(series,), all=False)
if series_id is None:
return 1.0
series_num = self.conn.get(
('SELECT MAX(series_index) FROM books WHERE id IN '
'(SELECT book FROM books_series_link where series=?)'),
(series_id,), all=False)
if series_num is None:
return 1.0
return floor(series_num+1)
def set(self, row, column, val):
'''

View File

@ -8,7 +8,7 @@ Wrapper for multi-threaded access to a single sqlite database connection. Serial
all calls.
'''
import sqlite3 as sqlite, traceback, time, uuid
from sqlite3 import IntegrityError
from sqlite3 import IntegrityError, OperationalError
from threading import Thread
from Queue import Queue
from threading import RLock
@ -138,8 +138,17 @@ class DBThread(Thread):
ok, res = False, (err, traceback.format_exc())
else:
func = getattr(self.conn, func)
try:
for i in range(3):
try:
ok, res = True, func(*args, **kwargs)
break
except OperationalError, err:
# Retry if unable to open db file
if 'unable to open' not in str(err) or i == 2:
raise
traceback.print_exc()
time.sleep(0.5)
except Exception, err:
ok, res = False, (err, traceback.format_exc())
self.results.put((ok, res))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -649,6 +649,8 @@ def _prefs():
help=_('Read metadata from files'))
c.add_opt('worker_process_priority', default='normal',
help=_('The priority of worker processes'))
c.add_opt('swap_author_names', default=False,
help=_('Swap author first and last names when reading metadata'))
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
return c

View File

@ -100,6 +100,8 @@ _extra_lang_codes = {
'en_CA' : _('English (CA)'),
'en_IN' : _('English (IND)'),
'en_TH' : _('English (TH)'),
'en_CY' : _('English (CY)'),
'de_AT' : _('German (AT)'),
'nl' : _('Dutch (NL)'),
'nl_BE' : _('Dutch (BE)'),
'und' : _('Unknown')

View File

@ -38,6 +38,9 @@ class RecipeInput(InputFormatPlugin):
OptionRecommendation(name='password', recommended_value=None,
help=_('Password for sites that require a login to access '
'content.')),
OptionRecommendation(name='dont_download_recipe',
recommended_value=False,
help=_('Download latest version of builtin recipes')),
OptionRecommendation(name='lrf', recommended_value=False,
help='Optimize fetching for subsequent conversion to LRF.'),
])
@ -52,7 +55,8 @@ class RecipeInput(InputFormatPlugin):
else:
title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
title = os.path.basename(title).rpartition('.')[0]
raw = get_builtin_recipe_by_title(title, log=log, download_recipe=True)
raw = get_builtin_recipe_by_title(title, log=log,
download_recipe=not opts.dont_download_recipe)
builtin = False
try:
recipe = compile_recipe(raw)
@ -68,6 +72,8 @@ class RecipeInput(InputFormatPlugin):
if builtin:
raw = get_builtin_recipe_by_title(title, log=log,
download_recipe=False)
if raw is None:
raise ValueError('Failed to find builtin recipe: '+title)
recipe = compile_recipe(raw)

View File

@ -20,7 +20,7 @@ NS = 'http://calibre-ebook.com/recipe_collection'
E = ElementMaker(namespace=NS, nsmap={None:NS})
def iterate_over_builtin_recipe_files():
exclude = ['craigslist', 'iht', 'le_temps', 'outlook_india', 'toronto_sun']
exclude = ['craigslist', 'iht', 'outlook_india', 'toronto_sun']
d = os.path.dirname
base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'resources', 'recipes')
for x in os.walk(base):

View File

@ -188,7 +188,8 @@ class RecursiveFetcher(object):
delta = time.time() - self.last_fetch_at
if delta < self.delay:
time.sleep(delta)
if re.search(r'\s+|,', url) is not None:
if isinstance(url, unicode):
url = url.encode('utf-8')
purl = list(urlparse.urlparse(url))
for i in range(2, 6):
purl[i] = quote(purl[i])

View File

@ -17,7 +17,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
A Python package to parse and build CSS Cascading Style Sheets. DOM only, not any rendering facilities!
A Python package to parse and build CSS Cascading Style Sheets. DOM only, not
any rendering facilities!
Based upon and partly implementing the following specifications :
@ -26,30 +27,47 @@ Based upon and partly implementing the following specifications :
`CSS 2.1 Errata <http://www.w3.org/Style/css2-updates/CR-CSS21-20070719-errata.html>`__
A few errata, mainly the definition of CHARSET_SYM tokens
`CSS3 Module: Syntax <http://www.w3.org/TR/css3-syntax/>`__
Used in parts since cssutils 0.9.4. cssutils tries to use the features from CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some parts are from CSS 2.1
Used in parts since cssutils 0.9.4. cssutils tries to use the features from
CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some
parts are from CSS 2.1
`MediaQueries <http://www.w3.org/TR/css3-mediaqueries/>`__
MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in @import and @media rules.
MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in
@import and @media rules.
`Namespaces <http://dev.w3.org/csswg/css3-namespace/>`__
Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5 for dev version
Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5
for dev version
`Selectors <http://www.w3.org/TR/css3-selectors/>`__
The selector syntax defined here (and not in CSS 2.1) should be parsable with cssutils (*should* mind though ;) )
The selector syntax defined here (and not in CSS 2.1) should be parsable
with cssutils (*should* mind though ;) )
`DOM Level 2 Style CSS <http://www.w3.org/TR/DOM-Level-2-Style/css.html>`__
DOM for package css
`DOM Level 2 Style Stylesheets <http://www.w3.org/TR/DOM-Level-2-Style/stylesheets.html>`__
DOM for package stylesheets
`CSSOM <http://dev.w3.org/csswg/cssom/>`__
A few details (mainly the NamespaceRule DOM) is taken from here. Plan is to move implementation to the stuff defined here which is newer but still no REC so might change anytime...
A few details (mainly the NamespaceRule DOM) is taken from here. Plan is
to move implementation to the stuff defined here which is newer but still
no REC so might change anytime...
The cssutils tokenizer is a customized implementation of `CSS3 Module: Syntax (W3C Working Draft 13 August 2003) <http://www.w3.org/TR/css3-syntax/>`__ which itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as possible but uses some (helpful) parts of the CSS 2.1 tokenizer.
The cssutils tokenizer is a customized implementation of `CSS3 Module: Syntax
(W3C Working Draft 13 August 2003) <http://www.w3.org/TR/css3-syntax/>`__ which
itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as
possible but uses some (helpful) parts of the CSS 2.1 tokenizer.
I guess cssutils is neither CSS 2.1 nor CSS 3 compliant but tries to at least be able to parse both grammars including some more real world cases (some CSS hacks are actually parsed and serialized). Both official grammars are not final nor bugfree but still feasible. cssutils aim is not to be fully compliant to any CSS specification (the specifications seem to be in a constant flow anyway) but cssutils *should* be able to read and write as many as possible CSS stylesheets "in the wild" while at the same time implement the official APIs which are well documented. Some minor extensions are provided as well.
I guess cssutils is neither CSS 2.1 nor CSS 3 compliant but tries to at least
be able to parse both grammars including some more real world cases (some CSS
hacks are actually parsed and serialized). Both official grammars are not final
nor bugfree but still feasible. cssutils aim is not to be fully compliant to
any CSS specification (the specifications seem to be in a constant flow anyway)
but cssutils *should* be able to read and write as many as possible CSS
stylesheets "in the wild" while at the same time implement the official APIs
which are well documented. Some minor extensions are provided as well.
Please visit http://cthedot.de/cssutils/ for more details.
Tested with Python 2.5 on Windows Vista mainly.
Tested with Python 2.6 on Windows 7 mainly.
This library may be used ``from cssutils import *`` which
@ -70,13 +88,16 @@ Usage may be::
__all__ = ['css', 'stylesheets', 'CSSParser', 'CSSSerializer']
__docformat__ = 'restructuredtext'
__author__ = 'Christof Hoeke with contributions by Walter Doerwald'
__date__ = '$LastChangedDate:: 2009-10-17 15:12:28 -0600 #$:'
__date__ = '$LastChangedDate:: 2009-11-26 16:31:32 -0700 #$:'
VERSION = '0.9.7a1'
__version__ = '%s $Id: __init__.py 1877 2009-10-17 21:12:28Z cthedot $' % VERSION
__version__ = '%s $Id: __init__.py 1892 2009-11-26 23:31:32Z cthedot $' % VERSION
import codec
import os.path
import urllib
import urlparse
import xml.dom
# order of imports is important (partly circular)
@ -230,17 +251,20 @@ def getUrls(sheet):
if u is not None:
yield u
def replaceUrls(sheet, replacer):
def replaceUrls(sheet, replacer, ignoreImportRules=False):
"""Replace all URLs in :class:`cssutils.css.CSSImportRule` or
:class:`cssutils.css.CSSValue` objects of given `sheet`.
:param sheet:
:class:`cssutils.css.CSSStyleSheet` which is changed
:param replacer:
a function which is called with a single argument `urlstring` which is
the current value of each url() excluding ``url(`` and ``)`` and
a function which is called with a single argument `urlstring` which
is the current value of each url() excluding ``url(`` and ``)`` and
surrounding single or double quotes.
:param ignoreImportRules:
if ``True`` does not call `replacer` with URLs from @import rules.
"""
if not ignoreImportRules:
for importrule in (r for r in sheet if r.type == r.IMPORT_RULE):
importrule.href = replacer(importrule.href)
@ -273,7 +297,7 @@ def resolveImports(sheet, target=None):
@import rules which use media information are tried to be wrapped into
@media rules so keeping the media information. This may not work in
all instances (if e.g. an @import rule itself contains an @import rule
with different media infos or if it is contains rules which may not be
with different media infos or if it contains rules which may not be
used inside an @media block like @namespace rules.). In these cases
the @import rule is kept as in the original sheet and a WARNING is issued.
@ -281,43 +305,110 @@ def resolveImports(sheet, target=None):
in this given :class:`cssutils.css.CSSStyleSheet` all import rules are
resolved and added to a resulting *flat* sheet.
:param target:
A :class:`cssutils.css.CSSStyleSheet` object which will be the resulting
*flat* sheet if given
:returns: given `target` or a new :class:`cssutils.css.CSSStyleSheet` object
A :class:`cssutils.css.CSSStyleSheet` object which will be the
resulting *flat* sheet if given
:returns: given `target` or a new :class:`cssutils.css.CSSStyleSheet`
object
"""
if not target:
target = css.CSSStyleSheet()
target = css.CSSStyleSheet(href=sheet.href,
media=sheet.media,
title=sheet.title)
def getReplacer(targetbase):
"Return a replacer which uses base to return adjusted URLs"
basesch, baseloc, basepath, basequery, basefrag = urlparse.urlsplit(targetbase)
basepath, basepathfilename = os.path.split(basepath)
def replacer(url):
scheme, location, path, query, fragment = urlparse.urlsplit(url)
if not scheme and not location and not path.startswith(u'/'):
# relative
path, filename = os.path.split(path)
combined = os.path.normpath(os.path.join(basepath, path, filename))
return urllib.pathname2url(combined)
else:
# keep anything absolute
return url
return replacer
#target.add(css.CSSComment(cssText=u'/* START %s */' % sheet.href))
for rule in sheet.cssRules:
if rule.type == rule.CHARSET_RULE:
pass
elif rule.type == rule.IMPORT_RULE:
log.info(u'Processing @import %r' % rule.href, neverraise=True)
if rule.styleSheet:
target.add(css.CSSComment(cssText=u'/* START @import "%s" */' % rule.href))
if rule.media.mediaText == 'all':
t = target
else:
log.info(u'Replacing @import media with @media: %s' %
rule.media.mediaText, neverraise=True)
t = css.CSSMediaRule(rule.media.mediaText)
# add all rules of @import to current sheet
target.add(css.CSSComment(cssText=u'/* START @import "%s" */'
% rule.href))
try:
resolveImports(rule.styleSheet, t)
# nested imports
importedSheet = resolveImports(rule.styleSheet)
except xml.dom.HierarchyRequestErr, e:
log.warn(u'Cannot resolve @import: %s' %
e, neverraise=True)
log.warn(u'@import: Cannot resolve target, keeping rule: %s'
% e, neverraise=True)
target.add(rule)
else:
if t != target:
target.add(t)
t.add(css.CSSComment(cssText=u'/* END "%s" */' % rule.href))
# adjust relative URI references
log.info(u'@import: Adjusting paths for %r' % rule.href,
neverraise=True)
replaceUrls(importedSheet,
getReplacer(rule.href),
ignoreImportRules=True)
# might have to wrap rules in @media if media given
if rule.media.mediaText == u'all':
mediaproxy = None
else:
log.error(u'Cannot get referenced stylesheet %r' %
rule.href, neverraise=True)
keepimport = False
for r in importedSheet:
# check if rules present which may not be
# combined with media
if r.type not in (r.COMMENT,
r.STYLE_RULE,
r.IMPORT_RULE):
keepimport = True
break
if keepimport:
log.warn(u'Cannot combine imported sheet with'
u' given media as other rules then'
u' comments or stylerules found %r,'
u' keeping %r' % (r,
rule.cssText),
neverraise=True)
target.add(rule)
continue
# wrap in @media if media is not `all`
log.info(u'@import: Wrapping some rules in @media '
u' to keep media: %s'
% rule.media.mediaText, neverraise=True)
mediaproxy = css.CSSMediaRule(rule.media.mediaText)
for r in importedSheet:
if mediaproxy:
mediaproxy.add(r)
else:
# add to top sheet directly but are difficult anyway
target.add(r)
if mediaproxy:
target.add(mediaproxy)
else:
# keep @import as it is
log.error(u'Cannot get referenced stylesheet %r, keeping rule'
% rule.href, neverraise=True)
target.add(rule)
else:
target.add(rule)
return target

View File

@ -51,7 +51,7 @@ TODO:
"""
__all__ = ['CSSStyleDeclaration', 'Property']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssstyledeclaration.py 1870 2009-10-17 19:56:59Z cthedot $'
__version__ = '$Id: cssstyledeclaration.py 1879 2009-11-17 20:35:04Z cthedot $'
from cssproperties import CSS2Properties
from property import Property
@ -581,6 +581,9 @@ class CSSStyleDeclaration(CSS2Properties, cssutils.util.Base2):
if isinstance(name, Property):
newp = name
name = newp.literalname
elif not value:
# empty string or None effectively removed property
return self.removeProperty(name)
else:
newp = Property(name, value, priority)
if not newp.wellformed:

View File

@ -1,7 +1,7 @@
"""CSSUnknownRule implements DOM Level 2 CSS CSSUnknownRule."""
__all__ = ['CSSUnknownRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssunknownrule.py 1638 2009-01-13 20:39:33Z cthedot $'
__version__ = '$Id: cssunknownrule.py 1897 2009-12-17 22:09:06Z cthedot $'
import cssrule
import cssutils
@ -98,6 +98,20 @@ class CSSUnknownRule(cssrule.CSSRule):
token=token)
return expected
def FUNCTION(expected, seq, token, tokenizer=None):
# handled as opening (
type_, val, line, col = token
val = self._tokenvalue(token)
if expected != 'EOF':
new['nesting'].append(u'(')
seq.append(val, type_, line=line, col=col)
return expected
else:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Expected end of rule.',
token=token)
return expected
def EOF(expected, seq, token, tokenizer=None):
"close all blocks and return 'EOF'"
for x in reversed(new['nesting']):
@ -154,6 +168,7 @@ class CSSUnknownRule(cssrule.CSSRule):
seq=newseq, tokenizer=tokenizer,
productions={'CHAR': CHAR,
'EOF': EOF,
'FUNCTION': FUNCTION,
'INVALID': INVALID,
'STRING': STRING,
'URI': URI,

View File

@ -1,7 +1,7 @@
"""Property is a single CSS property in a CSSStyleDeclaration."""
__all__ = ['Property']
__docformat__ = 'restructuredtext'
__version__ = '$Id: property.py 1868 2009-10-17 19:36:54Z cthedot $'
__version__ = '$Id: property.py 1878 2009-11-17 20:16:26Z cthedot $'
from cssutils.helper import Deprecated
from cssvalue import CSSValue
@ -68,13 +68,10 @@ class Property(cssutils.util.Base):
self.__nametoken = None
self._name = u''
self._literalname = u''
self.seqs[1] = CSSValue(parent=self)
if name:
self.name = name
if value:
self.cssValue = value
else:
self.seqs[1] = CSSValue(parent=self)
self._priority = u''
self._literalpriority = u''
@ -246,14 +243,14 @@ class Property(cssutils.util.Base):
if self._mediaQuery and not cssText:
self.seqs[1] = CSSValue(parent=self)
else:
#if not self.seqs[1]:
# self.seqs[1] = CSSValue(parent=self)
self.seqs[1] = CSSValue(parent=self)
oldvalue = self.seqs[1].cssText
try:
self.seqs[1].cssText = cssText
except:
self.seqs[1].cssText = oldvalue
raise
self.wellformed = self.wellformed and self.seqs[1].wellformed
# self.valid = self.valid and self.cssValue.valid
cssValue = property(_getCSSValue, _setCSSValue,
doc="(cssutils) CSSValue object of this property")

428
src/cssutils/sac.py Normal file
View File

@ -0,0 +1,428 @@
#!/usr/bin/env python
"""A validating CSSParser"""
__all__ = ['CSSParser']
__docformat__ = 'restructuredtext'
__version__ = '$Id: parse.py 1754 2009-05-30 14:50:13Z cthedot $'
import helper
import codecs
import errorhandler
import os
import tokenize2
import urllib
import sys
class ErrorHandler(object):
"""Basic class for CSS error handlers.
This class class provides a default implementation ignoring warnings and
recoverable errors and throwing a SAXParseException for fatal errors.
If a CSS application needs to implement customized error handling, it must
extend this class and then register an instance with the CSS parser
using the parser's setErrorHandler method. The parser will then report all
errors and warnings through this interface.
The parser shall use this class instead of throwing an exception: it is
up to the application whether to throw an exception for different types of
errors and warnings. Note, however, that there is no requirement that the
parser continue to provide useful information after a call to fatalError
(in other words, a CSS driver class could catch an exception and report a
fatalError).
"""
def __init__(self):
self._log = errorhandler.ErrorHandler()
def error(self, exception, token=None):
self._log.error(exception, token, neverraise=True)
def fatal(self, exception, token=None):
self._log.fatal(exception, token)
def warn(self, exception, token=None):
self._log.warn(exception, token, neverraise=True)
class DocumentHandler(object):
"""
void endFontFace()
Receive notification of the end of a font face statement.
void endMedia(SACMediaList media)
Receive notification of the end of a media statement.
void endPage(java.lang.String name, java.lang.String pseudo_page)
Receive notification of the end of a media statement.
void importStyle(java.lang.String uri, SACMediaList media, java.lang.String defaultNamespaceURI)
Receive notification of a import statement in the style sheet.
void startFontFace()
Receive notification of the beginning of a font face statement.
void startMedia(SACMediaList media)
Receive notification of the beginning of a media statement.
void startPage(java.lang.String name, java.lang.String pseudo_page)
Receive notification of the beginning of a page statement.
"""
def __init__(self):
def log(msg):
sys.stderr.write('INFO\t%s\n' % msg)
self._log = log
def comment(self, text, line=None, col=None):
"Receive notification of a comment."
self._log("comment %r at [%s, %s]" % (text, line, col))
def startDocument(self, encoding):
"Receive notification of the beginning of a style sheet."
# source
self._log("startDocument encoding=%s" % encoding)
def endDocument(self, source=None, line=None, col=None):
"Receive notification of the end of a document."
self._log("endDocument EOF")
def importStyle(self, uri, media, name, line=None, col=None):
"Receive notification of a import statement in the style sheet."
# defaultNamespaceURI???
self._log("importStyle at [%s, %s]" % (line, col))
def namespaceDeclaration(self, prefix, uri, line=None, col=None):
"Receive notification of an unknown rule t-rule not supported by this parser."
# prefix might be None!
self._log("namespaceDeclaration at [%s, %s]" % (line, col))
def startSelector(self, selectors=None, line=None, col=None):
"Receive notification of the beginning of a rule statement."
# TODO selectorList!
self._log("startSelector at [%s, %s]" % (line, col))
def endSelector(self, selectors=None, line=None, col=None):
"Receive notification of the end of a rule statement."
self._log("endSelector at [%s, %s]" % (line, col))
def property(self, name, value='TODO', important=False, line=None, col=None):
"Receive notification of a declaration."
# TODO: value is LexicalValue?
self._log("property %r at [%s, %s]" % (name, line, col))
def ignorableAtRule(self, atRule, line=None, col=None):
"Receive notification of an unknown rule t-rule not supported by this parser."
self._log("ignorableAtRule %r at [%s, %s]" % (atRule, line, col))
class EchoHandler(DocumentHandler):
"Echos all input to property `out`"
def __init__(self):
super(EchoHandler, self).__init__()
self._out = []
out = property(lambda self: u''.join(self._out))
def startDocument(self, encoding):
super(EchoHandler, self).startDocument(encoding)
if u'utf-8' != encoding:
self._out.append(u'@charset "%s";\n' % encoding)
# def comment(self, text, line=None, col=None):
# self._out.append(u'/*%s*/' % text)
def importStyle(self, uri, media, name, line=None, col=None):
"Receive notification of a import statement in the style sheet."
# defaultNamespaceURI???
super(EchoHandler, self).importStyle(uri, media, name, line, col)
self._out.append(u'@import %s%s%s;\n' % (helper.string(uri),
u'%s ' % media if media else u'',
u'%s ' % name if name else u'')
)
def namespaceDeclaration(self, prefix, uri, line=None, col=None):
super(EchoHandler, self).namespaceDeclaration(prefix, uri, line, col)
self._out.append(u'@namespace %s%s;\n' % (u'%s ' % prefix if prefix else u'',
helper.string(uri)))
def startSelector(self, selectors=None, line=None, col=None):
super(EchoHandler, self).startSelector(selectors, line, col)
if selectors:
self._out.append(u', '.join(selectors))
self._out.append(u' {\n')
def endSelector(self, selectors=None, line=None, col=None):
self._out.append(u' }')
def property(self, name, value, important=False, line=None, col=None):
super(EchoHandler, self).property(name, value, line, col)
self._out.append(u' %s: %s%s;\n' % (name, value,
u' !important' if important else u''))
class Parser(object):
"""
java.lang.String getParserVersion()
Returns a string about which CSS language is supported by this parser.
boolean parsePriority(InputSource source)
Parse a CSS priority value (e.g.
LexicalUnit parsePropertyValue(InputSource source)
Parse a CSS property value.
void parseRule(InputSource source)
Parse a CSS rule.
SelectorList parseSelectors(InputSource source)
Parse a comma separated list of selectors.
void parseStyleDeclaration(InputSource source)
Parse a CSS style declaration (without '{' and '}').
void parseStyleSheet(InputSource source)
Parse a CSS document.
void parseStyleSheet(java.lang.String uri)
Parse a CSS document from a URI.
void setConditionFactory(ConditionFactory conditionFactory)
void setDocumentHandler(DocumentHandler handler)
Allow an application to register a document event handler.
void setErrorHandler(ErrorHandler handler)
Allow an application to register an error event handler.
void setLocale(java.util.Locale locale)
Allow an application to request a locale for errors and warnings.
void setSelectorFactory(SelectorFactory selectorFactory)
"""
def __init__(self, documentHandler=None, errorHandler=None):
self._tokenizer = tokenize2.Tokenizer()
if documentHandler:
self.setDocumentHandler(documentHandler)
else:
self.setDocumentHandler(DocumentHandler())
if errorHandler:
self.setErrorHandler(errorHandler)
else:
self.setErrorHandler(ErrorHandler())
def parseString(self, cssText, encoding=None):
if isinstance(cssText, str):
cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0]
tokens = self._tokenizer.tokenize(cssText, fullsheet=True)
def COMMENT(val, line, col):
self._handler.comment(val[2:-2], line, col)
def EOF(val, line, col):
self._handler.endDocument(val, line, col)
def simple(t):
map = {'COMMENT': COMMENT,
'S': lambda val, line, col: None,
'EOF': EOF}
type_, val, line, col = t
if type_ in map:
map[type_](val, line, col)
return True
else:
return False
# START PARSING
t = tokens.next()
type_, val, line, col = t
encoding = 'utf-8'
if 'CHARSET_SYM' == type_:
# @charset "encoding";
# S
encodingtoken = tokens.next()
semicolontoken = tokens.next()
if 'STRING' == type_:
encoding = helper.stringvalue(val)
# ;
if 'STRING' == encodingtoken[0] and semicolontoken:
encoding = helper.stringvalue(encodingtoken[1])
else:
self._errorHandler.fatal(u'Invalid @charset')
t = tokens.next()
type_, val, line, col = t
self._handler.startDocument(encoding)
while True:
start = (line, col)
try:
if simple(t):
pass
elif 'ATKEYWORD' == type_ or type_ in ('PAGE_SYM', 'MEDIA_SYM', 'FONT_FACE_SYM'):
atRule = [val]
braces = 0
while True:
# read till end ;
# TODO: or {}
t = tokens.next()
type_, val, line, col = t
atRule.append(val)
if u';' == val and not braces:
break
elif u'{' == val:
braces += 1
elif u'}' == val:
braces -= 1
if braces == 0:
break
self._handler.ignorableAtRule(u''.join(atRule), *start)
elif 'IMPORT_SYM' == type_:
# import URI or STRING media? name?
uri, media, name = None, None, None
while True:
t = tokens.next()
type_, val, line, col = t
if 'STRING' == type_:
uri = helper.stringvalue(val)
elif 'URI' == type_:
uri = helper.urivalue(val)
elif u';' == val:
break
if uri:
self._handler.importStyle(uri, media, name)
else:
self._errorHandler.error(u'Invalid @import'
u' declaration at %r'
% (start,))
elif 'NAMESPACE_SYM' == type_:
prefix, uri = None, None
while True:
t = tokens.next()
type_, val, line, col = t
if 'IDENT' == type_:
prefix = val
elif 'STRING' == type_:
uri = helper.stringvalue(val)
elif 'URI' == type_:
uri = helper.urivalue(val)
elif u';' == val:
break
if uri:
self._handler.namespaceDeclaration(prefix, uri, *start)
else:
self._errorHandler.error(u'Invalid @namespace'
u' declaration at %r'
% (start,))
else:
# CSSSTYLERULE
selector = []
selectors = []
while True:
# selectors[, selector]* {
if 'S' == type_:
selector.append(u' ')
elif simple(t):
pass
elif u',' == val:
selectors.append(u''.join(selector).strip())
selector = []
elif u'{' == val:
selectors.append(u''.join(selector).strip())
self._handler.startSelector(selectors, *start)
break
else:
selector.append(val)
t = tokens.next()
type_, val, line, col = t
end = None
while True:
# name: value [!important][;name: value [!important]]*;?
name, value, important = None, [], False
while True:
# name:
t = tokens.next()
type_, val, line, col = t
if 'S' == type_:
pass
elif simple(t):
pass
elif 'IDENT' == type_:
if name:
self._errorHandler.error('more than one property name', t)
else:
name = val
elif u':' == val:
if not name:
self._errorHandler.error('no property name', t)
break
elif u';' == val:
self._errorHandler.error('premature end of property', t)
end = val
break
elif u'}' == val:
if name:
self._errorHandler.error('premature end of property', t)
end = val
break
else:
self._errorHandler.error('unexpected property name token %r' % val, t)
while not u';' == end and not u'}' == end:
# value !;}
t = tokens.next()
type_, val, line, col = t
if 'S' == type_:
value.append(u' ')
elif simple(t):
pass
elif u'!' == val or u';' == val or u'}' == val:
value = ''.join(value).strip()
if not value:
self._errorHandler.error('premature end of property (no value)', t)
end = val
break
else:
value.append(val)
while u'!' == end:
# !important
t = tokens.next()
type_, val, line, col = t
if simple(t):
pass
elif u'IDENT' == type_ and not important:
important = True
elif u';' == val or u'}' == val:
end = val
break
else:
self._errorHandler.error('unexpected priority token %r' % val)
if name and value:
self._handler.property(name, value, important)
if u'}' == end:
self._handler.endSelector(selectors, line=line, col=col)
break
else:
# reset
end = None
else:
self._handler.endSelector(selectors, line=line, col=col)
t = tokens.next()
type_, val, line, col = t
except StopIteration:
break
def setDocumentHandler(self, handler):
"Allow an application to register a document event `handler`."
self._handler = handler
def setErrorHandler(self, handler):
"TODO"
self._errorHandler = handler

View File

@ -318,6 +318,8 @@ def csscombine(path=None, url=None,
`path` or `url`
path or URL to a CSSStyleSheet which imports other sheets which
are then combined into one sheet
`sourceencoding`
explicit encoding of the source proxysheet, default 'utf-8'
`targetencoding`
encoding of the combined stylesheet, default 'utf-8'
`minify`
@ -350,4 +352,3 @@ def csscombine(path=None, url=None,
cssText = result.cssText
return cssText

View File

@ -3,7 +3,7 @@
"""cssutils serializer"""
__all__ = ['CSSSerializer', 'Preferences']
__docformat__ = 'restructuredtext'
__version__ = '$Id: serialize.py 1872 2009-10-17 21:00:40Z cthedot $'
__version__ = '$Id: serialize.py 1898 2009-12-19 12:17:04Z cthedot $'
import codecs
import cssutils
@ -191,7 +191,6 @@ class Out(object):
add ``*spacer`` except ``space=False``
"""
prefspace = self.ser.prefs.spacer
if val or typ in ('STRING', 'URI'):
# PRE
if 'COMMENT' == typ:
@ -230,7 +229,10 @@ class Out(object):
if indent:
self.out.append(self.ser._indentblock(val, self.ser._level+1))
else:
if val.endswith(u' '):
self._remove_last_if_S()
self.out.append(val)
# POST
if lineSeparator:
# Property , ...
@ -238,6 +240,9 @@ class Out(object):
elif val in u'+>~': # enclose selector combinator
self.out.insert(-1, self.ser.prefs.selectorCombinatorSpacer)
self.out.append(self.ser.prefs.selectorCombinatorSpacer)
elif u')' == val and not keepS: # CHAR funcend
# TODO: pref?
self.out.append(u' ')
elif u',' == val: # list
self.out.append(self.ser.prefs.listItemSpacer)
elif u':' == val: # prop