Sync to trunk.

This commit is contained in:
John Schember 2011-08-24 18:10:41 -04:00
commit 140b1181d1
63 changed files with 2203 additions and 343 deletions

View File

@ -0,0 +1,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Android_com_pl(BasicNewsRecipe):
title = u'Android.com.pl'
__author__ = 'fenuks'
description = 'Android.com.pl - biggest polish Android site'
category = 'Android, mobile'
language = 'pl'
cover_url =u'http://upload.wikimedia.org/wikipedia/commons/thumb/d/d7/Android_robot.svg/220px-Android_robot.svg.png'
oldest_article = 8
max_articles_per_feed = 100
feeds = [(u'Android', u'http://android.com.pl/component/content/frontpage/frontpage.feed?type=rss')]

View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Bash_org_pl(BasicNewsRecipe):
title = u'Bash.org.pl'
__author__ = 'fenuks'
description = 'Bash.org.pl - funny quotations from IRC discussions'
category = 'funny quotations, humour'
language = 'pl'
oldest_article = 15
cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
max_articles_per_feed = 100
no_stylesheets= True
keep_only_tags= [dict(name='div', attrs={'class':'quote post-content post-body'})]
feeds = [(u'Cytaty', u'http://bash.org.pl/rss')]

View File

@ -36,8 +36,9 @@ class BBC(BasicNewsRecipe):
]
remove_tags = [
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper', \
'story-feature wide ', 'story-feature narrow']})
dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
'story-feature wide ', 'story-feature narrow']}),
dict(id=['hypertab', 'comment-form']),
]
remove_attributes = ['width','height']

View File

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class BrasilDeFato(BasicNewsRecipe):
news = True
title = u'Brasil de Fato'
__author__ = 'Alex Mitrani'
description = u'Uma visão popular do Brasil e do mundo.'
publisher = u'SOCIEDADE EDITORIAL BRASIL DE FATO'
category = 'news, politics, Brazil, rss, Portuguese'
oldest_article = 10
max_articles_per_feed = 100
summary_length = 1000
language = 'pt_BR'
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
masthead_url = 'http://www.brasildefato.com.br/sites/default/files/zeropoint_logo.jpg'
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
remove_tags = [dict(name='div', attrs={'class':'links'})]
remove_tags_after = [dict(name='div', attrs={'class':'links'})]
feeds = [(u'Nacional', u'http://www.brasildefato.com.br/rss_nacional')
,(u'Internacional', u'http://www.brasildefato.com.br/rss_internacional')
,(u'Entrevista', u'http://www.brasildefato.com.br/rss_entrevista')
,(u'Cultura', u'http://www.brasildefato.com.br/rss_cultura')
,(u'Análise', u'http://www.brasildefato.com.br/rss_analise')
]

View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Bugun (BasicNewsRecipe):
title = u'BUGÜN Gazetesi'
__author__ = u'thomass'
oldest_article = 2
max_articles_per_feed =100
#no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'UTF-8'
publisher = 'thomass'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'newspaper '
extra_css = ' div{font-size: small} h2{font-size: small;font-weight: bold} #ctl00_ortayer_haberBaslik{font-size:20px;font-weight: bold} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
#introduction{} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
cover_img_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
masthead_url = 'http://www.bugun.com.tr/images/bugunLogo2011.png'
keep_only_tags = [dict(name='h1', attrs={'class':[ 'haberBaslik']}),dict(name='h2', attrs={'class':[ 'haberOzet']}), dict(name='div', attrs={'class':['haberGriDivvvv']}), dict(name='div', attrs={'id':[ 'haberTextDiv']}), ]
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
#remove_tags = [ dict(name='div', attrs={'id':['news-detail-news-text-font-size','news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
#remove_attributes = ['width','height']
remove_empty_feeds= True
feeds = [
( u'Son Dakika', u'http://www.bugun.com.tr/haberler.xml'),
( u'Yazarlar', u'http://www.bugun.com.tr/rss/yazarlar.xml'),
( u'Gündem', u'http://www.bugun.com.tr/rss/gundem.xml'),
( u'Ekonomi', u'http://www.bugun.com.tr/rss/ekonomi.xml'),
( u'Spor', u'http://www.bugun.com.tr/rss/spor.xml'),
( u'Magazin', u'http://www.bugun.com.tr/rss/magazin.xml'),
( u'Teknoloji', u'http://www.bugun.com.tr/rss/teknoloji.xml'),
( u'Yaşam', u'http://www.bugun.com.tr/rss/yasam.xml'),
( u'Medya', u'http://www.bugun.com.tr/rss/medya.xml'),
( u'Dünya', u'http://www.bugun.com.tr/rss/dunya.xml'),
( u'Politika', u'http://www.bugun.com.tr/rss/politika.xml'),
( u'Sağlık', u'http://www.bugun.com.tr/rss/saglik.xml'),
( u'Tarifler', u'http://www.bugun.com.tr/rss/yemek-tarifi.xml'),
]

16
recipes/cd_action.recipe Normal file
View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class CD_Action(BasicNewsRecipe):
title = u'CD-Action'
__author__ = 'fenuks'
description = 'cdaction.pl - polish magazine about games site'
category = 'games'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
cover_url =u'http://s.cdaction.pl/obrazki/logo-CD-Action_172k9.JPG'
keep_only_tags= dict(id='news_content')
remove_tags_after= dict(name='div', attrs={'class':'tresc'})
feeds = [(u'Newsy', u'http://www.cdaction.pl/rss_newsy.xml')]

47
recipes/cvecezla.recipe Normal file
View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
cvecezla.wordpress.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class CveceZla(BasicNewsRecipe):
title = 'Cvece zla i naopakog'
__author__ = 'Darko Miletic'
description = 'Haoticnost razmisljanja poradja haoticnost pisanja. Muzika, stripovi, igre, knjige, generalno glupiranje...'
oldest_article = 7
max_articles_per_feed = 100
language = 'sr'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = False
publication_type = 'blog'
extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: "Trebuchet MS",Trebuchet,Verdana,sans1,sans-serif} .article_description{font-family: sans1, sans-serif} img{display: block } '
conversion_options = {
'comment' : description
, 'tags' : 'igre, muzika, film, blog, Srbija'
, 'publisher': 'Mehmet Krljic'
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_before = dict(attrs={'class':'navigation'})
remove_tags_after = dict(attrs={'class':'commentlist'})
remove_tags = [
dict(attrs={'class':['postmetadata alt','sharedaddy sharedaddy-dark sd-like-enabled sd-sharing-enabled','reply','navigation']})
,dict(attrs={'id':'respond'})
]
feeds = [(u'Clanci', u'http://cvecezla.wordpress.com/feed/')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Dobreprogramy_pl(BasicNewsRecipe):
title = 'Dobreprogramy.pl'
__author__ = 'fenuks'
__licence__ ='GPL v3'
category = 'IT'
language = 'pl'
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
description = u'Aktualności i blogi z dobreprogramy.pl'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
extra_css = '.title {font-size:22px;}'
oldest_article = 8
max_articles_per_feed = 100
remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
keep_only_tags = [dict(name='div', attrs={'class':['mainBar', 'newsContent', 'postTitle title', 'postInfo', 'contentText', 'content']})]
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]

View File

@ -0,0 +1,128 @@
#import re # Provides preprocess_regexps re.compile
from calibre.web.feeds.news import BasicNewsRecipe
class FairbanksDailyNewsminer(BasicNewsRecipe):
title = u'Fairbanks Daily News-miner'
__author__ = 'Roger'
oldest_article = 7
max_articles_per_feed = 100
description = ''''The voice of interior Alaska since 1903'''
publisher = 'http://www.newsminer.com/'
category = 'news, Alaska, Fairbanks'
language = 'en'
#extra_css = '''
# p{font-weight: normal;text-align: justify}
# '''
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf8'
conversion_options = {'linearize_tables':True}
# TODO: I don't see any photos in my Mobi file with this masterhead_url!
masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
# In order to omit seeing number of views, number of posts and the pipe
# symbol for divider after the title and date of the article, a regex or
# manual processing is needed to get just the "story_item_date updated"
# (which contains the date). Everything else on this line is pretty much not needed.
#
# HTML line containing story_item_date:
# <div class="signature_line"><span title="2011-08-22T23:37:14Z" class="story_item_date updated">Aug 22, 2011</span>&nbsp;|&nbsp;2370&nbsp;views&nbsp;|&nbsp;52&nbsp;<a href="/pages/full_story/push?article-Officials+tout+new+South+Cushman+homeless+living+facility%20&id=15183753#comments_15183753"><img alt="52 comments" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/comments-icon.gif" title="52 comments" /></a>&nbsp;|&nbsp;<span id="number_recommendations_15183753" class="number_recommendations">9</span>&nbsp;<a href="#1" id="recommend_link_15183753" onclick="Element.remove('recommend_link_15183753'); new Ajax.Request('/community/content/recommend/15183753', {asynchronous:true, evalScripts:true}); return false;"><img alt="9 recommendations" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/thumbs-up-icon.gif" title="9 recommendations" /></a>&nbsp;|&nbsp;<a href="#1" onclick="$j.facebox({ajax: '/community/content/email_friend_pane/15183753'}); return false;"><span style="position: relative;"><img alt="email to a friend" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/email-this.gif" title="email to a friend" /></span></a>&nbsp;|&nbsp;<span><a href="/printer_friendly/15183753" target="_blank"><img alt="print" class="dont_touch_me" src="http://d2uh5w9wm14i0w.cloudfront.net/images/print_icon.gif" title="print" /></a></span><span id="email_content_message_15183753" class="signature_email_message"></span></div>
# The following was suggested, but it looks like I also need to define self & soup
# (as well as bring in extra soup depends?)
#date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
#preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
#preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
#preprocess_regexps = [
# (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
# ]
#def get_browser(self):
#def preprocess_html(soup, first_fetch):
# date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
# return
# Try to keep some tags - some might not be needed here
keep_only_tags = [
#date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'})),
dict(name='div', attrs={'class':'hnews hentry item'}),
dict(name='div', attrs={'class':'story_item_headline entry-title'}),
#dict(name='span', attrs={'class':'story_item_date updated'}),
dict(name='div', attrs={'class':'full_story'})
]
#remove_tags = [
# dict(name='div', attrs={'class':'story_tools'}),
# dict(name='p', attrs={'class':'ad_label'}),
# ]
# Try to remove some bothersome tags
remove_tags = [
#dict(name='img', attrs={'alt'}),
dict(name='img', attrs={'class':'dont_touch_me'}),
dict(name='span', attrs={'class':'number_recommendations'}),
#dict(name='div', attrs={'class':'signature_line'}),
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
dict(name='div', attrs={'class':['addthis_toolbox','addthis_default_style']}),
dict(name='span', attrs={'class':'addthis_separator'}),
dict(name='div', attrs={'class':'related_content'}),
dict(name='div', attrs={'class':'comments_container'}),
#dict(name='div', attrs={'class':'signature_line'}),
dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'}),
dict(name='div', attrs={'id':'comments_container'})
]
# This one works but only gets title, date and clips article content!
#remove_tags_after = [
# dict(name='span', attrs={'class':'story_item_date updated'})
# ]
#remove_tags_after = [
# dict(name='div', attrs={'class':'advertisement'}),
# ]
# Try clipping tags before and after to prevent pulling img views/posts numbers after date?
#remove_tags_before = [
# dict(name='span', attrs={'class':'story_item_date updated'})
# ]
#extra_css # tweak the appearance # TODO: Change article titles <h2?> to bold?
# Comment-out or uncomment any of the following RSS feeds according to your
# liking.
#
# TODO: Adding more then one RSS Feed, and newline will be omitted for
# entries within the Table of Contents or Index of Articles
#
# TODO: Some random bits of text is trailing the last page (or TOC on MOBI
# files), these are bits of public posts and comments and need to also be
# removed.
#
feeds = [
(u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
(u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
(u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
(u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
(u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
# (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),
(u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),
# (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
# (u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),
(u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),
# (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
# (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
# (u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),
# (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),
(u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')
]

40
recipes/film_web.recipe Normal file
View File

@ -0,0 +1,40 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Filmweb_pl(BasicNewsRecipe):
title = u'FilmWeb'
__author__ = 'fenuks'
description = 'FilmWeb - biggest polish movie site'
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
category = 'movies'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
extra_css = '.hdrBig {font-size:22px;}'
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'})]
keep_only_tags= [dict(name='h1', attrs={'class':'hdrBig'}), dict(name='div', attrs={'class':['newsInfo', 'reviewContent fontSizeCont description']})]
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'),
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
(u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
(u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
(u'News / Multimedia', u'http://www.filmweb.pl/feed/news/category/multimedia'),
(u'News / Dystrybucja dvd / blu-ray', u'http://www.filmweb.pl/feed/news/category/video'),
(u'News / Dystrybucja kinowa', u'http://www.filmweb.pl/feed/news/category/cinema'),
(u'News / off', u'http://www.filmweb.pl/feed/news/category/off'),
(u'News / Gry wideo', u'http://www.filmweb.pl/feed/news/category/game'),
(u'News / Organizacje branżowe', u'http://www.filmweb.pl/feed/news/category/organizations'),
(u'News / Internet', u'http://www.filmweb.pl/feed/news/category/internet'),
(u'News / Różne', u'http://www.filmweb.pl/feed/news/category/other'),
(u'News / Kino polskie', u'http://www.filmweb.pl/feed/news/category/polish.cinema'),
(u'News / Telewizja', u'http://www.filmweb.pl/feed/news/category/tv'),
(u'Recenzje redakcji', u'http://www.filmweb.pl/feed/reviews/latest'),
(u'Recenzje użytkowników', u'http://www.filmweb.pl/feed/user-reviews/latest')]
def skip_ad_pages(self, soup):
skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})['href']
#self.log.warn(skip_tag)
if skip_tag is not None:
return self.index_to_soup(skip_tag, raw=True)
else:
None

View File

@ -5,6 +5,7 @@ www.ft.com/uk-edition
'''
import datetime
from calibre.ptempfile import PersistentTemporaryFile
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -22,6 +23,8 @@ class FinancialTimes(BasicNewsRecipe):
needs_subscription = True
encoding = 'utf8'
publication_type = 'newspaper'
articles_are_obfuscated = True
temp_files = []
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
LOGIN2 = 'http://media.ft.com/h/subs3.html'
@ -47,7 +50,12 @@ class FinancialTimes(BasicNewsRecipe):
br.submit()
return br
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
keep_only_tags = [
dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
,dict(name='div', attrs={'class':'standfirst'})
,dict(name='div', attrs={'id' :'storyContent'})
,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
]
remove_tags = [
dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])
@ -69,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):
def get_artlinks(self, elem):
articles = []
count = 0
for item in elem.findAll('a',href=True):
count = count + 1
if self.test and count > 2:
return articles
rawlink = item['href']
if rawlink.startswith('http://'):
url = rawlink
else:
url = self.PREFIX + rawlink
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
title = self.tag_to_string(item)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'url' :urlverified
,'description':''
})
return articles
@ -97,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
st = wide.find('h4',attrs={'class':'section-no-arrow'})
if st:
strest.insert(0,st)
count = 0
for item in strest:
count = count + 1
if self.test and count > 2:
return feeds
ftitle = self.tag_to_string(item)
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
feedarts = self.get_artlinks(item.parent.ul)
@ -136,4 +153,19 @@ class FinancialTimes(BasicNewsRecipe):
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
def get_obfuscated_article(self, url):
count = 0
while (count < 10):
try:
response = self.browser.open(url)
html = response.read()
count = 10
except:
print "Retrying download..."
count += 1
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name

39
recipes/fluter_de.recipe Normal file
View File

@ -0,0 +1,39 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch fluter.de
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1313693926(BasicNewsRecipe):
title = u'Fluter'
description = 'fluter.de Magazin der Bundeszentrale für politische Bildung/bpb'
language = 'de'
encoding = 'UTF-8'
__author__ = 'Armin Geller' # 2011-08-19
oldest_article = 7
max_articles_per_feed = 50
remove_tags = [
dict(name='div', attrs={'id':["comments"]}),
dict(attrs={'class':['commentlink']}),
]
keep_only_tags = [
dict(name='div', attrs={'class':["grid_8 articleText"]}),
dict(name='div', attrs={'class':["articleTextInnerText"]}),
]
feeds = [
(u'Inhalt:', u'http://www.fluter.de/de/?tpl=907'),
]
extra_css = '.cs_img {margin-right: 10pt;}'

16
recipes/gram_pl.recipe Normal file
View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Gram_pl(BasicNewsRecipe):
title = u'Gram.pl'
__author__ = 'fenuks'
description = 'Gram.pl - site about computer games'
category = 'games'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})]
feeds = [(u'gram.pl - informacje', u'http://www.gram.pl/feed_news.asp'),
(u'gram.pl - publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')]

View File

@ -13,6 +13,8 @@ class HBR(BasicNewsRecipe):
no_stylesheets = True
LOGIN_URL = 'http://hbr.org/login?request_url=/'
LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
INDEX = 'http://hbr.org/archive-toc/BR'
keep_only_tags = [dict(name='div', id='pageContainer')]
@ -34,6 +36,9 @@ class HBR(BasicNewsRecipe):
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
self.logout_url = None
#'''
br.open(self.LOGIN_URL)
br.select_form(name='signin-form')
br['signin-form:username'] = self.username
@ -41,10 +46,13 @@ class HBR(BasicNewsRecipe):
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
self.logout_url = None
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
try:
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
except:
self.logout_url = self.LOGOUT_URL
#'''
return br
def cleanup(self):
@ -57,6 +65,8 @@ class HBR(BasicNewsRecipe):
def hbr_get_toc(self):
#return self.index_to_soup(open('/t/hbr.html').read())
today = date.today()
future = today + timedelta(days=30)
for x in [x.strftime('%y%m') for x in (future, today)]:
@ -66,53 +76,43 @@ class HBR(BasicNewsRecipe):
return soup
raise Exception('Could not find current issue')
def hbr_parse_section(self, container, feeds):
current_section = None
current_articles = []
for x in container.findAll(name=['li', 'h3', 'h4']):
if x.name in ['h3', 'h4'] and not x.findAll(True):
if current_section and current_articles:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
self.log('\tFound section:', current_section)
if x.name == 'li':
a = x.find('a', href=True)
if a is not None:
title = self.tag_to_string(a)
url = a.get('href')
if '/ar/' not in url:
continue
if url.startswith('/'):
url = 'http://hbr.org'+url
url = self.map_url(url)
p = x.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
if not title or not url:
continue
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', desc)
current_articles.append({'title':title, 'url':url,
'description':desc, 'date':''})
if current_section and current_articles:
feeds.append((current_section, current_articles))
def hbr_parse_toc(self, soup):
feeds = []
features = soup.find(id='issueFeaturesContent')
self.hbr_parse_section(features, feeds)
departments = soup.find(id='issueDepartments')
self.hbr_parse_section(departments, feeds)
current_section = None
articles = []
for x in soup.find(id='archiveToc').findAll(['h3', 'h4']):
if x.name == 'h3':
if current_section is not None and articles:
feeds.append((current_section, articles))
current_section = self.tag_to_string(x).capitalize()
articles = []
self.log('\tFound section:', current_section)
else:
a = x.find('a', href=True)
if a is None: continue
title = self.tag_to_string(a)
url = a['href']
if '/ar/' not in url:
continue
if url.startswith('/'):
url = 'http://hbr.org' + url
url = self.map_url(url)
p = x.parent.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', desc)
articles.append({'title':title, 'url':url, 'description':desc,
'date':''})
return feeds
def parse_index(self):
soup = self.hbr_get_toc()
#open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
feeds = self.hbr_parse_toc(soup)
return feeds

View File

@ -6,33 +6,21 @@ class HBR(BasicNewsRecipe):
title = 'Harvard Business Review Blogs'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal, enhanced by BrianG'
__author__ = 'Kovid Goyal'
language = 'en'
no_stylesheets = True
LOGIN_URL = 'http://hbr.org/login?request_url=/'
LOGOUT_URL = 'http://hbr.org/logout?request_url=/'
INDEX = 'http://hbr.org/current'
#
# Blog Stuff
#
INCLUDE_BLOGS = True
INCLUDE_ARTICLES = False
# option-specific settings.
if INCLUDE_BLOGS == True:
remove_tags_after = dict(id='articleBody')
remove_tags_before = dict(id='pageFeature')
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
oldest_article = 30
max_articles_per_feed = 100
use_embedded_content = False
else:
timefmt = ' [%B %Y]'
remove_tags_after = dict(id='articleBody')
remove_tags_before = dict(id='pageFeature')
feeds = [('Blog','http://feeds.harvardbusiness.org/harvardbusiness')]
oldest_article = 30
max_articles_per_feed = 100
use_embedded_content = False
keep_only_tags = [ dict(name='div', id='pageContainer')
]
@ -41,21 +29,15 @@ class HBR(BasicNewsRecipe):
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
'articleToolbarTop','articleToolbarBottom', 'articleToolbarRD',
'mailingListTout', 'partnerCenter', 'pageFooter']),
dict(name='iframe')]
'mailingListTout', 'partnerCenter', 'pageFooter', 'shareWidgetTop']),
dict(name=['iframe', 'style'])]
extra_css = '''
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
#articleBody{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
'''
#-------------------------------------------------------------------------------------------------
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
self.logout_url = None
#'''
br.open(self.LOGIN_URL)
br.select_form(name='signin-form')
br['signin-form:username'] = self.username
@ -63,11 +45,15 @@ class HBR(BasicNewsRecipe):
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
self.logout_url = None
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
try:
link = br.find_link(text='Sign out')
if link:
self.logout_url = link.absolute_url
except:
self.logout_url = self.LOGOUT_URL
#'''
return br
#-------------------------------------------------------------------------------------------------
def cleanup(self):
if self.logout_url is not None:
@ -76,99 +62,7 @@ class HBR(BasicNewsRecipe):
def map_url(self, url):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
#-------------------------------------------------------------------------------------------------
def hbr_get_toc(self):
soup = self.index_to_soup(self.INDEX)
url = soup.find('a', text=lambda t:'Full Table of Contents' in t).parent.get('href')
return self.index_to_soup('http://hbr.org'+url)
#-------------------------------------------------------------------------------------------------
def hbr_parse_section(self, container, feeds):
current_section = None
current_articles = []
for x in container.findAll(name=['li', 'h3', 'h4']):
if x.name in ['h3', 'h4'] and not x.findAll(True):
if current_section and current_articles:
feeds.append((current_section, current_articles))
current_section = self.tag_to_string(x)
current_articles = []
self.log('\tFound section:', current_section)
if x.name == 'li':
a = x.find('a', href=True)
if a is not None:
title = self.tag_to_string(a)
url = a.get('href')
if '/ar/' not in url:
continue
if url.startswith('/'):
url = 'http://hbr.org'+url
url = self.map_url(url)
p = x.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
if not title or not url:
continue
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', desc)
current_articles.append({'title':title, 'url':url,
'description':desc, 'date':''})
if current_section and current_articles:
feeds.append((current_section, current_articles))
#-------------------------------------------------------------------------------------------------
def hbr_parse_toc(self, soup):
feeds = []
features = soup.find(id='issueFeaturesContent')
self.hbr_parse_section(features, feeds)
departments = soup.find(id='issueDepartments')
self.hbr_parse_section(departments, feeds)
return feeds
#-------------------------------------------------------------------------------------------------
def feed_to_index_append(self, feedObject, masterFeed):
# Loop thru the feed object and build the correct type of article list
for feed in feedObject:
# build the correct structure from the feed object
newArticles = []
for article in feed.articles:
newArt = {
'title' : article.title,
'url' : article.url,
'date' : article.date,
'description' : article.text_summary
}
newArticles.append(newArt)
# Append the earliest/latest dates of the feed to the feed title
startDate, endDate = self.get_feed_dates(feed, '%d-%b')
newFeedTitle = feed.title + ' (' + startDate + ' thru ' + endDate + ')'
# append the newly-built list object to the index object passed in
# as masterFeed.
masterFeed.append( (newFeedTitle,newArticles) )
#-------------------------------------------------------------------------------------------------
def get_feed_dates(self, feedObject, dateMask):
startDate = feedObject.articles[len(feedObject.articles)-1].localtime.strftime(dateMask)
endDate = feedObject.articles[0].localtime.strftime(dateMask)
return startDate, endDate
#-------------------------------------------------------------------------------------------------
def parse_index(self):
if self.INCLUDE_ARTICLES == True:
soup = self.hbr_get_toc()
feeds = self.hbr_parse_toc(soup)
else:
return BasicNewsRecipe.parse_index(self)
return feeds
#-------------------------------------------------------------------------------------------------
def get_cover_url(self):
cover_url = None
index = 'http://hbr.org/current'

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
import string, pprint
from calibre.web.feeds.news import BasicNewsRecipe
class HoustonChronicle(BasicNewsRecipe):
@ -13,53 +11,28 @@ class HoustonChronicle(BasicNewsRecipe):
language = 'en'
timefmt = ' [%a, %d %b, %Y]'
no_stylesheets = True
use_embedded_content = False
remove_attributes = ['style']
keep_only_tags = [
dict(id=['story-head', 'story'])
]
remove_tags = [
dict(id=['share-module', 'resource-box',
'resource-box-header'])
]
extra_css = '''
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
h2{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#666666;}
h3{font-family :Arial,Helvetica,sans-serif; font-size:medium; color:#000000;}
h4{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
p{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
#story-head h1{font-family :Arial,Helvetica,sans-serif; font-size: xx-large;}
#story-head h2{font-family :Arial,Helvetica,sans-serif; font-size: small; color:#000000;}
#story-head h3{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
#story-head h4{font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
#story{font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
#Text-TextSubhed BoldCond PoynterAgateZero h3{color:#444444;font-family :Arial,Helvetica,sans-serif; font-size:small;}
.p260x p{font-family :Arial,Helvetica,serif; font-size:x-small;font-style:italic;}
.p260x h6{color:#777777;font-family :Arial,Helvetica,sans-serif; font-size:xx-small;}
'''
def parse_index(self):
categories = ['news', 'sports', 'business', 'entertainment', 'life',
'travel']
feeds = []
for cat in categories:
articles = []
soup = self.index_to_soup('http://www.chron.com/%s/'%cat)
for elem in soup.findAll(comptype='story', storyid=True):
a = elem.find('a', href=True)
if a is None: continue
url = a['href']
if not url.startswith('http://'):
url = 'http://www.chron.com'+url
articles.append({'title':self.tag_to_string(a), 'url':url,
'description':'', 'date':''})
pprint.pprint(articles[-1])
if articles:
feeds.append((string.capwords(cat), articles))
return feeds
oldest_article = 2.0
keep_only_tags = {'class':lambda x: x and ('hst-articletitle' in x or
'hst-articletext' in x or 'hst-galleryitem' in x)}
feeds = [
('News', "http://www.chron.com/rss/feed/News-270.php"),
('Sports',
'http://www.chron.com/sports/headlines/collectionRss/Sports-Headlines-Staff-Stories-10767.php'),
('Neighborhood',
'http://www.chron.com/rss/feed/Neighborhood-305.php'),
('Business', 'http://www.chron.com/rss/feed/Business-287.php'),
('Entertainment',
'http://www.chron.com/rss/feed/Entertainment-293.php'),
('Editorials',
'http://www.chron.com/opinion/editorials/collectionRss/Opinion-Editorials-Headline-List-10567.php'),
('Life', 'http://www.chron.com/rss/feed/Life-297.php'),
('Science & Tech',
'http://www.chron.com/rss/feed/AP-Technology-and-Science-266.php'),
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 391 B

BIN
recipes/icons/cd_action.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 972 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
recipes/icons/film_web.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

BIN
recipes/icons/gram_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 795 B

BIN
recipes/icons/wnp.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 576 B

View File

@ -6,11 +6,13 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
msdn.microsoft.com/en-us/magazine
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class MSDNMagazine_en(BasicNewsRecipe):
title = 'MSDN Magazine'
__author__ = 'Darko Miletic'
description = 'The Microsoft Journal for Developers'
masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
publisher = 'Microsoft Press'
category = 'news, IT, Microsoft, programming, windows'
oldest_article = 31
@ -20,24 +22,44 @@ class MSDNMagazine_en(BasicNewsRecipe):
encoding = 'utf-8'
language = 'en'
base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
feeds = [(u'Articles', u'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1')]
keep_only_tags = [dict(name='div', attrs={'class':'navpage'})]
keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
remove_tags = [
dict(name=['object','link','base','table'])
,dict(name='div', attrs={'class':'MTPS_CollapsibleRegion'})
dict(name='div', attrs={'class':'DivRatingsOnly'})
,dict(name='div', attrs={'class':'ShareThisButton4'})
]
remove_tags_after = dict(name='div', attrs={'class':'navpage'})
def preprocess_html(self, soup):
for item in soup.findAll('div',attrs={'class':['FeatureSmallHead','ColumnTypeSubTitle']}):
item.name="h2"
for item in soup.findAll('div',attrs={'class':['FeatureHeadline','ColumnTypeTitle']}):
item.name="h1"
for item in soup.findAll('div',attrs={'class':'ArticleTypeTitle'}):
item.name="h3"
return soup
def find_articles(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
for article in idx.findAll('item'):
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date' : self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
soup = self.index_to_soup(self.base_url)
#find issue name, eg "August 2011"
issue_name = self.tag_to_string(soup.find('h1'))
# find cover pic
img = soup.find('img',attrs ={'alt':issue_name})
if img is not None:
self.cover_url = img['src']
return [(issue_name, list(self.find_articles()))]

View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Niebezpiecznik_pl(BasicNewsRecipe):
title = u'Niebezpiecznik.pl'
__author__ = 'fenuks'
description = 'Niebezpiecznik.pl'
category = 'hacking, IT'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets = True
cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})]
feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]

View File

@ -5,7 +5,6 @@ class PolitiFactCom(BasicNewsRecipe):
__author__ = u'Michael Heinz'
oldest_article = 21
max_articles_per_feed = 100
recursion = 0
language = 'en'
no_stylesheets = True

View File

@ -40,11 +40,11 @@ class SVD_se(BasicNewsRecipe):
,(u'Kultur' , u'http://www.svd.se/kulturnoje/nyheter/?service=rss')
]
keep_only_tags = [dict(name='div', attrs={'id':'articlecontent'})]
remove_tags_after = dict(name='div',attrs={'class':'articlebody normal'})
keep_only_tags = [dict(name='div', attrs={'id':['article-content', 'articlecontent']})]
remove_tags_after = dict(name='div',attrs={'class':'articlebody'})
remove_tags = [
dict(name=['object','link','base'])
,dict(name='div',attrs={'class':['articlead','factcolumn']})
,dict(name='div',attrs={'class':['articlead','factcolumn', 'article-ad']})
,dict(name='ul', attrs={'class':'toolbar articletop clearfix'})
,dict(name='p', attrs={'class':'more'})
]

21
recipes/wnp.recipe Normal file
View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1312886443(BasicNewsRecipe):
title = u'WNP'
cover_url= 'http://k.wnp.pl/images/wnpLogo.gif'
__author__ = 'fenuks'
description = u'Wirtualny Nowy Przemysł'
category = 'economy'
language = 'pl'
oldest_article = 8
max_articles_per_feed = 100
no_stylesheets= True
keep_only_tags = dict(name='div', attrs={'id':'contentText'})
feeds = [(u'Wiadomości gospodarcze', u'http://www.wnp.pl/rss/serwis_rss.xml'),
(u'Serwis Energetyka - Gaz', u'http://www.wnp.pl/rss/serwis_rss_1.xml'),
(u'Serwis Nafta - Chemia', u'http://www.wnp.pl/rss/serwis_rss_2.xml'),
(u'Serwis Hutnictwo', u'http://www.wnp.pl/rss/serwis_rss_3.xml'),
(u'Serwis Górnictwo', u'http://www.wnp.pl/rss/serwis_rss_4.xml'),
(u'Serwis Logistyka', u'http://www.wnp.pl/rss/serwis_rss_5.xml'),
(u'Serwis IT', u'http://www.wnp.pl/rss/serwis_rss_6.xml')]

View File

@ -53,6 +53,12 @@ class WallStreetJournal(BasicNewsRecipe):
return soup
def abs_wsj_url(self, href):
if not href.startswith('http'):
href = 'http://online.wsj.com' + href
return href
def wsj_get_index(self):
return self.index_to_soup('http://online.wsj.com/itp')
@ -83,14 +89,14 @@ class WallStreetJournal(BasicNewsRecipe):
pageone = a['href'].endswith('pageone')
if pageone:
title = 'Front Section'
url = 'http://online.wsj.com' + a['href']
url = self.abs_wsj_url(a['href'])
feeds = self.wsj_add_feed(feeds,title,url)
title = 'What''s News'
url = url.replace('pageone','whatsnews')
feeds = self.wsj_add_feed(feeds,title,url)
else:
title = self.tag_to_string(a)
url = 'http://online.wsj.com' + a['href']
url = self.abs_wsj_url(a['href'])
feeds = self.wsj_add_feed(feeds,title,url)
return feeds
@ -146,7 +152,7 @@ class WallStreetJournal(BasicNewsRecipe):
title = self.tag_to_string(a).strip() + ' [%s]'%meta
else:
title = self.tag_to_string(a).strip()
url = 'http://online.wsj.com'+a['href']
url = self.abs_wsj_url(a['href'])
desc = ''
for p in container.findAll('p'):
desc = self.tag_to_string(p)

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Yagmur(BasicNewsRecipe):
title = u'Yagmur Dergisi'
__author__ = u'thomass'
description = 'Üç Aylık Dil, Kültür ve Edebiyat Dergisi'
oldest_article = 90
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
#publisher = ' '
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
language = 'tr'
publication_type = 'magazine'
encoding = 'ISO 8859-9'
publisher = 'thomass'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
#remove_attributes = ['aria-describedby']
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d2.gif'
#remove_tags_before = dict(id='content-right')
#remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Yagmur', u'http://open.dapper.net/services/yagmur'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
def print_version(self, url): #there is a probem caused by table format
return url.replace('http://www.yagmurdergisi.com.tr/konu_goster.php?konu_id=', 'http://www.yagmurdergisi.com.tr/yazformati.php?konu_id=')

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class YeniUmit(BasicNewsRecipe):
title = u'Yeni Umit Dergisi'
__author__ = u'thomass'
description = 'Aylık Dini İlimler ve Kültür Dergisi'
oldest_article = 45
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
#publisher = ' '
category = 'dergi, ilim, kültür, edebiyat,Türkçe'
language = 'tr'
publication_type = 'magazine'
encoding = 'ISO 8859-9'
publisher = 'thomass'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='h1', attrs={'class':['georgia_30']})]
#remove_attributes = ['aria-describedby']
#remove_tags = [dict(name='div', attrs={'id':['renk10']}) ]
cover_img_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
masthead_url = 'http://www.sizinti.com.tr/images/dergiler/d1.gif'
#remove_tags_before = dict(id='content-right')
#remove_empty_feeds= True
#remove_attributes = ['width','height']
feeds = [
( u'Yeni Umit', u'http://open.dapper.net/services/yeniumit'),
]
#def preprocess_html(self, soup):
# return self.adeify_images(soup)
def print_version(self, url): #there is a probem caused by table format
return url.replace('http://www.yeniumit.com.tr/konular', 'http://www.yeniumit.com.tr/yazdir')

View File

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Bugun (BasicNewsRecipe):
title = u'Yenişafak Gazetesi'
__author__ = u'thomass'
oldest_article = 2
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'ISO 8859-9' #'UTF-8'
publisher = 'thomass'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'newspaper '
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = {
'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
cover_img_url = 'http://yenisafak.com.tr/resim/logo.gif'
masthead_url = 'http://yenisafak.com.tr/resim/logo.gif'
keep_only_tags = [dict(name='div', attrs={'id':[ 'ctghaberdetay2010']}) ]
extra_css = ' h1{font-size:20px;font-weight: bold}h2{font-size: small;font-weight: bold}div{font-size: small} '#h1{ font-size:10%;font-weight: bold} '#ctl00_ortayer_haberBaslik{ 'font-size:10%;font-weight: bold'}
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
remove_tags = [ dict(name='div', attrs={'id':['yasaluyari2010','divhaberdetayilisik2010']}),dict(name='font', attrs={'class':['haberdetaytarih']})]#,'news-detail-gallery','news-detail-news-bottom-social']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})]
#remove_attributes = ['width','height']
remove_empty_feeds= True
feeds = [
( u'SonDakika', u'http://yenisafak.com.tr/rss/?xml=anasayfa'),
( u'Gündem', u'http://yenisafak.com.tr/rss/?xml=gundem'),
( u'Politika', u'http://yenisafak.com.tr/rss/?xml=politika'),
( u'Ekonomi', u'http://yenisafak.com.tr/rss/?xml=ekonomi'),
( u'Dünya', u'http://yenisafak.com.tr/rss/?xml=dunya'),
( u'Aktüel', u'http://yenisafak.com.tr/rss/?xml=aktuel'),
( u'Eğitim', u'http://yenisafak.com.tr/rss/?xml=egitim'),
( u'Spor', u'http://yenisafak.com.tr/rss/?xml=spor'),
( u'Yazarlar', u'http://yenisafak.com.tr/rss/?xml=yazarlar'),
( u'Televizyon', u'http://yenisafak.com.tr/rss/?xml=televizyon'),
( u'Sağlık', u'http://yenisafak.com.tr/rss/?xml=saglik'),
( u'Yurt Haberler', u'http://yenisafak.com.tr/rss/?xml=yurthaberler'),
( u'Bilişim', u'http://yenisafak.com.tr/rss/?xml=bilisim'),
( u'Diziler', u'http://yenisafak.com.tr/rss/?xml=diziler'),
( u'Kültür-Sanat', u'http://yenisafak.com.tr/rss/?xml=kultursanat'),
( u'Röportaj', u'http://yenisafak.com.tr/rss/?xml=roportaj'),
( u'Sinema', u'http://yenisafak.com.tr/rss/?xml=sinema'),
( u'Yorum', u'http://yenisafak.com.tr/rss/?xml=yorum'),
( u' Yeni Şafak Pazar', u'http://yenisafak.com.tr/rss/?xml=pazar'),
( u'Yeni Şafak Kitap', u'http://yenisafak.com.tr/rss/?xml=kitap'),
( u'Yeni Şafak English', u'http://yenisafak.com.tr/rss/?xml=english'),
]

View File

@ -62,10 +62,16 @@ authors_completer_append_separator = False
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled.
# The author name copy words are a set of words which if they occur in an
# author name cause the automatically geenrated author sort string to be
# identical to the author name. This means that the sort for a string like Acme
# Inc. will be Acme Inc. instead of Inc., Acme
author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV',
'Junior', 'Senior')
author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
#: Use author sort in Tag Browser
# Set which author field to display in the tags pane (the list of authors,

View File

@ -17,8 +17,8 @@ class GUI(Command):
@classmethod
def find_forms(cls):
from calibre.gui2 import find_forms
return find_forms(cls.SRC)
# We do not use the calibre function find_forms as
# mporting calibre.gui2 may not work
forms = []
for root, _, files in os.walk(cls.PATH):
for name in files:
@ -29,8 +29,9 @@ class GUI(Command):
@classmethod
def form_to_compiled_form(cls, form):
from calibre.gui2 import form_to_compiled_form
return form_to_compiled_form(form)
# We do not use the calibre function form_to_compiled_form as
# importing calibre.gui2 may not work
return form.rpartition('.')[0]+'_ui.py'
def run(self, opts):
self.build_forms()

View File

@ -55,7 +55,7 @@ class Develop(Command):
short_description = 'Setup a development environment for calibre'
MODE = 0755
sub_commands = ['build', 'resources', 'gui']
sub_commands = ['build', 'resources', 'iso639', 'gui',]
def add_postinstall_options(self, parser):
parser.add_option('--make-errors-fatal', action='store_true', default=False,

View File

@ -219,12 +219,17 @@ class Resources(Command):
json.dump(function_dict, open(dest, 'wb'), indent=4)
def clean(self):
for x in ('scripts', 'recipes', 'ebook-convert-complete'):
for x in ('scripts', 'ebook-convert-complete'):
x = self.j(self.RESOURCES, x+'.pickle')
if os.path.exists(x):
os.remove(x)
from setup.commands import kakasi
kakasi.clean()
for x in ('builtin_recipes.xml', 'builtin_recipes.zip',
'template-functions.json'):
x = self.j(self.RESOURCES, x)
if os.path.exists(x):
os.remove(x)

View File

@ -206,6 +206,10 @@ class Translations(POT): # {{{
for x in (i, j, d):
if os.path.exists(x):
os.remove(x)
zf = self.DEST + '.zip'
if os.path.exists(zf):
os.remove(zf)
# }}}
class GetTranslations(Translations):
@ -273,13 +277,14 @@ class GetTranslations(Translations):
class ISO639(Command):
description = 'Compile translations for ISO 639 codes'
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
'iso639.pickle')
def run(self, opts):
src = self.j(self.d(self.SRC), 'setup', 'iso639.xml')
if not os.path.exists(src):
raise Exception(src + ' does not exist')
dest = self.j(self.d(self.SRC), 'resources', 'localization',
'iso639.pickle')
dest = self.DEST
if not self.newer(dest, src):
self.info('Pickled code is up to date')
return
@ -322,3 +327,8 @@ class ISO639(Command):
'3to2':m3to2, '3bto3t':m3bto3t, 'name_map':nm}
dump(x, open(dest, 'wb'), -1)
def clean(self):
if os.path.exists(self.DEST):
os.remove(self.DEST)

File diff suppressed because it is too large Load Diff

View File

@ -36,8 +36,15 @@ def author_to_author_sort(author, method=None):
return author
if method is None:
method = tweaks['author_sort_copy_method']
ltoks = frozenset(x.lower() for x in tokens)
copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords'])
if ltoks.intersection(copy_words):
method = u'copy'
if method == u'copy':
return author
suffixes = set([x.lower() for x in tweaks['author_name_suffixes']])
suffixes |= set([x+u'.' for x in suffixes])

View File

@ -1312,7 +1312,7 @@ class OPFCreator(Metadata):
ncx_stream.flush()
def metadata_to_opf(mi, as_string=True):
def metadata_to_opf(mi, as_string=True, default_lang=None):
from lxml import etree
import textwrap
from calibre.ebooks.oeb.base import OPF, DC
@ -1328,7 +1328,8 @@ def metadata_to_opf(mi, as_string=True):
'[http://calibre-ebook.com]'
if not mi.languages:
lang = get_lang().replace('_', '-').partition('-')[0]
lang = (get_lang().replace('_', '-').partition('-')[0] if default_lang
is None else default_lang)
mi.languages = [lang]
root = etree.fromstring(textwrap.dedent(

View File

@ -481,7 +481,7 @@ def identify(log, abort, # {{{
log('The identify phase took %.2f seconds'%(time.time() - start_time))
log('The longest time (%f) was taken by:'%longest, lp)
log('Merging results from different sources and finding earliest',
'publication dates')
'publication dates from the xisbn service')
start_time = time.time()
results = merge_identify_results(results, log)

View File

@ -33,7 +33,7 @@ def serialize_metadata_for(formats, tdir, id_):
if not mi.application_id:
mi.application_id = '__calibre_dummy__'
with open(os.path.join(tdir, '%s.opf'%id_), 'wb') as f:
f.write(metadata_to_opf(mi))
f.write(metadata_to_opf(mi, default_lang='und'))
if cdata:
with open(os.path.join(tdir, str(id_)), 'wb') as f:
f.write(cdata)

View File

@ -308,6 +308,11 @@ class MobiMLizer(object):
istate = copy.copy(istates[-1])
istate.rendered = False
istate.list_num = 0
if tag == 'ol' and 'start' in elem.attrib:
try:
istate.list_num = int(elem.attrib['start'])-1
except:
pass
istates.append(istate)
left = 0
display = style['display']

View File

@ -504,6 +504,9 @@ class Indexer(object): # {{{
else:
self.indices = self.create_book_index()
if not self.indices:
raise ValueError('No valid entries in TOC, cannot generate index')
self.records.append(self.create_index_record())
self.records.insert(0, self.create_header())
self.records.extend(self.cncx.records)

View File

@ -590,7 +590,7 @@ class MobiWriter(object):
Write the PalmDB header
'''
title = ascii_filename(unicode(self.oeb.metadata.title[0])).replace(
' ', '_')
' ', '_')[:32]
title = title + (b'\0' * (32 - len(title)))
now = int(time.time())
nrecords = len(self.records)

View File

@ -116,6 +116,12 @@ class Serializer(object):
buf.write(b'</html>')
self.end_offset = buf.tell()
self.fixup_links()
if self.start_offset is None:
# If we don't set a start offset, the stupid Kindle will
# open the book at the location of the first IndexEntry, which
# could be anywhere. So ensure the book is always opened at the
# beginning, instead.
self.start_offset = self.body_start_offset
return buf.getvalue()
def serialize_head(self):

View File

@ -27,6 +27,7 @@ from calibre import force_unicode
from calibre.ebooks import unit_convert
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
from calibre.ebooks.cssselect import css_to_xpath_no_case
cssutils_log.setLevel(logging.WARN)
@ -98,32 +99,71 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large'])
class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
class CSSSelector(object):
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
def __init__(self, css, namespaces=XPNSMAP):
css = self.MIN_SPACE_RE.sub(r'\1', css)
if isinstance(css, unicode):
# Workaround for bug in lxml on windows/OS X that causes a massive
# memory leak with non ASCII selectors
css = css.encode('ascii', 'ignore').decode('ascii')
try:
path = css_to_xpath(css)
except UnicodeEncodeError: # Bug in css_to_xpath
path = '/'
except NotImplementedError: # Probably a subselect like :hover
path = '/'
path = self.LOCAL_NAME_RE.sub(r"local-name() = '", path)
etree.XPath.__init__(self, path, namespaces=namespaces)
path = self.LOCAL_NAME_RE.sub(r"local-name() = '", css_to_xpath(css))
self.sel1 = etree.XPath(css_to_xpath(css), namespaces=namespaces)
except:
self.sel1 = lambda x: []
try:
path = self.LOCAL_NAME_RE.sub(r"local-name() = '",
css_to_xpath_no_case(css))
self.sel2 = etree.XPath(path, namespaces=namespaces)
except:
self.sel2 = lambda x: []
self.sel2_use_logged = False
self.css = css
def __call__(self, node, log):
try:
ans = self.sel1(node)
except (AssertionError, ExpressionError, etree.XPathSyntaxError,
NameError, # thrown on OS X instead of SelectorSyntaxError
SelectorSyntaxError):
return []
if not ans:
try:
ans = self.sel2(node)
except:
return []
else:
if ans and not self.sel2_use_logged:
self.sel2_use_logged = True
log.warn('Interpreting class and tag selectors case'
' insensitively in the CSS selector: %s'%self.css)
return ans
def __repr__(self):
return '<%s %s for %r>' % (
self.__class__.__name__,
hex(abs(id(self)))[2:],
self.css)
_selector_cache = {}
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
def get_css_selector(raw_selector):
css = MIN_SPACE_RE.sub(r'\1', raw_selector)
if isinstance(css, unicode):
# Workaround for bug in lxml on windows/OS X that causes a massive
# memory leak with non ASCII selectors
css = css.encode('ascii', 'ignore').decode('ascii')
ans = _selector_cache.get(css, None)
if ans is None:
ans = CSSSelector(css)
_selector_cache[css] = ans
return ans
class Stylizer(object):
STYLESHEETS = WeakKeyDictionary()
@ -223,41 +263,12 @@ class Stylizer(object):
rules.sort()
self.rules = rules
self._styles = {}
class_sel_pat = re.compile(r'\.[a-z]+', re.IGNORECASE)
capital_sel_pat = re.compile(r'h|[A-Z]+')
for _, _, cssdict, text, _ in rules:
fl = ':first-letter' in text
if fl:
text = text.replace(':first-letter', '')
try:
selector = CSSSelector(text)
except (AssertionError, ExpressionError, etree.XPathSyntaxError,
NameError, # thrown on OS X instead of SelectorSyntaxError
SelectorSyntaxError):
continue
try:
matches = selector(tree)
except etree.XPathEvalError:
continue
if not matches:
ntext = capital_sel_pat.sub(lambda m: m.group().lower(), text)
if ntext != text:
self.logger.warn('Transformed CSS selector', text, 'to',
ntext)
selector = CSSSelector(ntext)
matches = selector(tree)
if not matches and class_sel_pat.match(text) and text.lower() != text:
found = False
ltext = text.lower()
for x in tree.xpath('//*[@class]'):
if ltext.endswith('.'+x.get('class').lower()):
matches.append(x)
found = True
if found:
self.logger.warn('Ignoring case mismatches for CSS selector: %s in %s'
%(text, item.href))
selector = get_css_selector(text)
matches = selector(tree, self.logger)
if fl:
from lxml.builder import ElementMaker
E = ElementMaker(namespace=XHTML_NS)

View File

@ -320,7 +320,8 @@ class CSSFlattener(object):
if self.context.insert_blank_line:
cssdict['margin-top'] = cssdict['margin-bottom'] = \
'%fem'%self.context.insert_blank_line_size
if self.context.remove_paragraph_spacing:
if (self.context.remove_paragraph_spacing and
cssdict.get('text-align', None) not in ('center', 'right')):
cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
if cssdict:

View File

@ -98,6 +98,7 @@ gprefs.defaults['book_display_fields'] = [
]
gprefs.defaults['default_author_link'] = 'http://en.wikipedia.org/w/index.php?search={author}'
gprefs.defaults['preserve_date_on_ctl'] = True
gprefs.defaults['cb_fullscreen'] = False
# }}}
@ -173,6 +174,8 @@ def _config(): # {{{
help='Search history for the plugin preferences')
c.add_opt('shortcuts_search_history', default=[],
help='Search history for the keyboard preferences')
c.add_opt('tweaks_search_history', default=[],
help='Search history for tweaks')
c.add_opt('worker_limit', default=6,
help=_(
'Maximum number of simultaneous conversion/news download jobs. '
@ -186,7 +189,9 @@ def _config(): # {{{
c.add_opt('enforce_cpu_limit', default=True,
help=_('Limit max simultaneous jobs to number of CPUs'))
c.add_opt('gui_layout', choices=['wide', 'narrow'],
help=_('The layout of the user interface'), default='wide')
help=_('The layout of the user interface. Wide has the '
'book details panel on the right and narrow has '
'it at the bottom.'), default='wide')
c.add_opt('show_avg_rating', default=True,
help=_('Show the average rating per item indication in the tag browser'))
c.add_opt('disable_animations', default=False,

View File

@ -17,7 +17,7 @@ from calibre.gui2.actions import InterfaceAction
class GenerateCatalogAction(InterfaceAction):
name = 'Generate Catalog'
action_spec = (_('Create a catalog of the books in your calibre library'), 'catalog.png', 'Catalog builder', None)
action_spec = (_('Create catalog'), 'catalog.png', 'Catalog builder', None)
dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
def genesis(self):

View File

@ -9,8 +9,8 @@ Module to implement the Cover Flow feature
import sys, os, time
from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize,
QStackedLayout, QLabel, QByteArray, pyqtSignal)
from PyQt4.Qt import (QImage, QSizePolicy, QTimer, QDialog, Qt, QSize, QAction,
QStackedLayout, QLabel, QByteArray, pyqtSignal, QKeySequence)
from calibre import plugins
from calibre.gui2 import config, available_height, available_width, gprefs
@ -150,12 +150,39 @@ class CBDialog(QDialog):
if not self.restoreGeometry(geom):
h, w = available_height()-60, int(available_width()/1.5)
self.resize(w, h)
self.action_fs_toggle = a = QAction(self)
self.addAction(a)
a.setShortcuts([QKeySequence('F11', QKeySequence.PortableText),
QKeySequence('Ctrl+Shift+F', QKeySequence.PortableText)])
a.triggered.connect(self.toggle_fullscreen)
self.action_esc_fs = a = QAction(self)
a.triggered.connect(self.show_normal)
self.addAction(a)
a.setShortcuts([QKeySequence('Esc', QKeySequence.PortableText)])
self.pre_fs_geom = None
def closeEvent(self, *args):
geom = bytearray(self.saveGeometry())
gprefs['cover_browser_dialog_geometry'] = geom
if not self.isFullScreen():
geom = bytearray(self.saveGeometry())
gprefs['cover_browser_dialog_geometry'] = geom
self.closed.emit()
def show_normal(self):
self.showNormal()
if self.pre_fs_geom is not None:
self.restoreGeometry(self.pre_fs_geom)
self.pre_fs_geom = None
def toggle_fullscreen(self, *args):
if self.isFullScreen():
self.show_normal()
else:
self.pre_fs_geom = bytearray(self.saveGeometry())
self.showFullScreen()
class CoverFlowMixin(object):
def __init__(self):
@ -228,7 +255,7 @@ class CoverFlowMixin(object):
d.addAction(self.cb_splitter.action_toggle)
self.cover_flow.setVisible(True)
self.cover_flow.setFocus(Qt.OtherFocusReason)
d.show()
d.showFullScreen() if gprefs['cb_fullscreen'] else d.show()
self.cb_splitter.button.set_state_to_hide()
d.closed.connect(self.cover_browser_closed)
self.cb_dialog = d

View File

@ -9,18 +9,20 @@ __docformat__ = 'restructuredtext en'
from calibre.gui2.complete import MultiCompleteComboBox
from calibre.utils.localization import lang_map
from calibre.utils.icu import sort_key
from calibre.utils.icu import sort_key, lower
class LanguagesEdit(MultiCompleteComboBox):
def __init__(self, parent=None):
MultiCompleteComboBox.__init__(self, parent)
self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
self.setMinimumContentsLength(20)
self._lang_map = lang_map()
self.names_with_commas = [x for x in self._lang_map.itervalues() if ',' in x]
self.comma_map = {k:k.replace(',', '|') for k in self.names_with_commas}
self.comma_rmap = {v:k for k, v in self.comma_map.iteritems()}
self._rmap = {v:k for k,v in self._lang_map.iteritems()}
self._rmap = {lower(v):k for k,v in self._lang_map.iteritems()}
all_items = sorted(self._lang_map.itervalues(),
key=sort_key)
@ -44,7 +46,7 @@ class LanguagesEdit(MultiCompleteComboBox):
ans = []
for name in vals:
if name:
code = self._rmap.get(name, None)
code = self._rmap.get(lower(name), None)
if code is not None:
ans.append(code)
return ans
@ -64,7 +66,7 @@ class LanguagesEdit(MultiCompleteComboBox):
bad = []
for name in vals:
if name:
code = self._rmap.get(name, None)
code = self._rmap.get(lower(name), None)
if code is None:
bad.append(name)
return bad

View File

@ -308,7 +308,7 @@ class AuthorSortEdit(EnLineEdit):
LABEL = _('Author s&ort:')
def __init__(self, parent, authors_edit, autogen_button, db,
copy_a_to_as_action, copy_as_to_a_action):
copy_a_to_as_action, copy_as_to_a_action, a_to_as, as_to_a):
EnLineEdit.__init__(self, parent)
self.authors_edit = authors_edit
self.db = db
@ -333,6 +333,8 @@ class AuthorSortEdit(EnLineEdit):
autogen_button.clicked.connect(self.auto_generate)
copy_a_to_as_action.triggered.connect(self.auto_generate)
copy_as_to_a_action.triggered.connect(self.copy_to_authors)
a_to_as.triggered.connect(self.author_to_sort)
as_to_a.triggered.connect(self.sort_to_author)
self.update_state()
@dynamic_property
@ -389,10 +391,21 @@ class AuthorSortEdit(EnLineEdit):
def auto_generate(self, *args):
au = unicode(self.authors_edit.text())
au = re.sub(r'\s+et al\.$', '', au)
au = re.sub(r'\s+et al\.$', '', au).strip()
authors = string_to_authors(au)
self.current_val = self.db.author_sort_from_authors(authors)
def author_to_sort(self, *args):
au = unicode(self.authors_edit.text())
au = re.sub(r'\s+et al\.$', '', au).strip()
if au:
self.current_val = au
def sort_to_author(self, *args):
aus = self.current_val
if aus:
self.authors_edit.current_val = [aus]
def initialize(self, db, id_):
self.current_val = db.author_sort(id_, index_is_id=True)

View File

@ -130,10 +130,15 @@ class MetadataSingleDialogBase(ResizableDialog):
ac = m.addAction(QIcon(I('forward.png')), _('Set author sort from author'))
ac2 = m.addAction(QIcon(I('back.png')), _('Set author from author sort'))
ac3 = m.addAction(QIcon(I('user_profile.png')), _('Manage authors'))
ac4 = m.addAction(QIcon(I('next.png')),
_('Copy author to author sort'))
ac5 = m.addAction(QIcon(I('previous.png')),
_('Copy author sort to author'))
b.setMenu(m)
self.authors = AuthorsEdit(self, ac3)
self.author_sort = AuthorSortEdit(self, self.authors, b, self.db, ac,
ac2)
ac2, ac4, ac5)
self.basic_metadata_widgets.extend([self.authors, self.author_sort])
self.swap_title_author_button = QToolButton(self)
@ -723,7 +728,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
tl.addWidget(self.manage_authors_button, 2, 0, 1, 1)
tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
tl.addWidget(self.paste_isbn_button, 12, 0, 1, 1)
create_row(0, self.title, self.title_sort,
button=self.deduce_title_sort_button, span=2,
@ -859,7 +864,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
tl.addWidget(self.swap_title_author_button, 0, 0, 2, 1)
tl.addWidget(self.manage_authors_button, 2, 0, 2, 1)
tl.addWidget(self.paste_isbn_button, 11, 0, 1, 1)
tl.addWidget(self.paste_isbn_button, 12, 0, 1, 1)
create_row(0, self.title, self.title_sort,
button=self.deduce_title_sort_button, span=2,

View File

@ -6,16 +6,15 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QApplication, QFont, QFontInfo, QFontDialog,
QAbstractListModel, Qt, QIcon)
QAbstractListModel, Qt, QIcon, QKeySequence)
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, CommaSeparatedList
from calibre.gui2.preferences.look_feel_ui import Ui_Form
from calibre.gui2 import config, gprefs, qt_app
from calibre.gui2 import config, gprefs, qt_app, NONE
from calibre.utils.localization import (available_translations,
get_language, get_lang)
from calibre.utils.config import prefs
from calibre.utils.icu import sort_key
from calibre.gui2 import NONE
from calibre.gui2.book_details import get_field_list
from calibre.gui2.preferences.coloring import EditRules
@ -130,6 +129,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('disable_tray_notification', config)
r('use_roman_numerals_for_series_number', config)
r('separate_cover_flow', config, restart_required=True)
r('cb_fullscreen', gprefs)
choices = [(_('Off'), 'off'), (_('Small'), 'small'),
(_('Medium'), 'medium'), (_('Large'), 'large')]
@ -171,6 +171,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.tabWidget.addTab(self.edit_rules,
QIcon(I('format-fill-color.png')), _('Column coloring'))
self.tabWidget.setCurrentIndex(0)
keys = [QKeySequence('F11', QKeySequence.PortableText), QKeySequence(
'Ctrl+Shift+F', QKeySequence.PortableText)]
keys = [unicode(x.toString(QKeySequence.NativeText)) for x in keys]
self.fs_help_msg.setText(unicode(self.fs_help_msg.text())%(
_(' or ').join(keys)))
def initialize(self):
ConfigWidgetBase.initialize(self)

View File

@ -417,7 +417,7 @@ then the tags will be displayed each on their own line.</string>
<item row="1" column="1">
<widget class="QSpinBox" name="opt_cover_flow_queue_length"/>
</item>
<item row="2" column="0" colspan="2">
<item row="4" column="0" colspan="2">
<spacer name="verticalSpacer_4">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -430,6 +430,26 @@ then the tags will be displayed each on their own line.</string>
</property>
</spacer>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_cb_fullscreen">
<property name="text">
<string>When showing cover browser in separate window, show it &amp;fullscreen</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QLabel" name="fs_help_msg">
<property name="styleSheet">
<string notr="true">margin-left: 1.5em</string>
</property>
<property name="text">
<string>You can press the %s keys to toggle full screen mode.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
</widget>

View File

@ -9,14 +9,19 @@ import textwrap
from calibre.gui2.preferences import ConfigWidgetBase, test_widget, AbortCommit
from calibre.gui2.preferences.tweaks_ui import Ui_Form
from calibre.gui2 import error_dialog, NONE
from calibre.gui2 import error_dialog, NONE, info_dialog
from calibre.utils.config import read_raw_tweaks, write_tweaks
from calibre.gui2.widgets import PythonHighlighter
from calibre import isbytestring
from calibre.utils.icu import lower
from calibre.utils.search_query_parser import (ParseException,
SearchQueryParser)
from PyQt4.Qt import (QAbstractListModel, Qt, QStyledItemDelegate, QStyle,
QStyleOptionViewItem, QFont, QDialogButtonBox, QDialog,
QVBoxLayout, QPlainTextEdit, QLabel)
QVBoxLayout, QPlainTextEdit, QLabel, QModelIndex)
ROOT = QModelIndex()
class Delegate(QStyledItemDelegate): # {{{
def __init__(self, view):
@ -35,7 +40,7 @@ class Delegate(QStyledItemDelegate): # {{{
class Tweak(object): # {{{
def __init__(self, name, doc, var_names, defaults, custom):
translate = __builtins__['_']
translate = _
self.name = translate(name)
self.doc = translate(doc.strip())
self.var_names = var_names
@ -87,10 +92,11 @@ class Tweak(object): # {{{
# }}}
class Tweaks(QAbstractListModel): # {{{
class Tweaks(QAbstractListModel, SearchQueryParser): # {{{
def __init__(self, parent=None):
QAbstractListModel.__init__(self, parent)
SearchQueryParser.__init__(self, ['all'])
raw_defaults, raw_custom = read_raw_tweaks()
self.parse_tweaks(raw_defaults, raw_custom)
@ -223,6 +229,54 @@ class Tweaks(QAbstractListModel): # {{{
def set_plugin_tweaks(self, d):
self.plugin_tweaks = d
def universal_set(self):
return set(xrange(self.rowCount()))
def get_matches(self, location, query, candidates=None):
if candidates is None:
candidates = self.universal_set()
ans = set()
if not query:
return ans
query = lower(query)
for r in candidates:
dat = self.data(self.index(r), Qt.UserRole)
if query in lower(dat.name):# or query in lower(dat.doc):
ans.add(r)
return ans
def find(self, query):
query = query.strip()
if not query:
return ROOT
matches = self.parse(query)
if not matches:
return ROOT
matches = list(sorted(matches))
return self.index(matches[0])
def find_next(self, idx, query, backwards=False):
query = query.strip()
if not query:
return idx
matches = self.parse(query)
if not matches:
return idx
loc = idx.row()
if loc not in matches:
return self.find(query)
if len(matches) == 1:
return ROOT
matches = list(sorted(matches))
i = matches.index(loc)
if backwards:
ans = i - 1 if i - 1 >= 0 else len(matches)-1
else:
ans = i + 1 if i + 1 < len(matches) else 0
ans = matches[ans]
return self.index(ans)
# }}}
class PluginTweaks(QDialog): # {{{
@ -257,12 +311,18 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.delegate = Delegate(self.tweaks_view)
self.tweaks_view.setItemDelegate(self.delegate)
self.tweaks_view.currentChanged = self.current_changed
self.view = self.tweaks_view
self.highlighter = PythonHighlighter(self.edit_tweak.document())
self.restore_default_button.clicked.connect(self.restore_to_default)
self.apply_button.clicked.connect(self.apply_tweak)
self.plugin_tweaks_button.clicked.connect(self.plugin_tweaks)
self.splitter.setStretchFactor(0, 1)
self.splitter.setStretchFactor(1, 100)
self.next_button.clicked.connect(self.find_next)
self.previous_button.clicked.connect(self.find_previous)
self.search.initialize('tweaks_search_history', help_text=
_('Search for tweak'))
self.search.search.connect(self.find)
def plugin_tweaks(self):
@ -290,7 +350,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.changed_signal.emit()
def initialize(self):
self.tweaks = Tweaks()
self.tweaks = self._model = Tweaks()
self.tweaks_view.setModel(self.tweaks)
def restore_to_default(self, *args):
@ -338,6 +398,45 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
ConfigWidgetBase.commit(self)
return True
def find(self, query):
if not query:
return
try:
idx = self._model.find(query)
except ParseException:
self.search.search_done(False)
return
self.search.search_done(True)
if not idx.isValid():
info_dialog(self, _('No matches'),
_('Could not find any shortcuts matching %s')%query,
show=True, show_copy_button=False)
return
self.highlight_index(idx)
def highlight_index(self, idx):
if not idx.isValid(): return
self.view.scrollTo(idx)
self.view.selectionModel().select(idx,
self.view.selectionModel().ClearAndSelect)
self.view.setCurrentIndex(idx)
def find_next(self, *args):
idx = self.view.currentIndex()
if not idx.isValid():
idx = self._model.index(0)
idx = self._model.find_next(idx,
unicode(self.search.currentText()))
self.highlight_index(idx)
def find_previous(self, *args):
idx = self.view.currentIndex()
if not idx.isValid():
idx = self._model.index(0)
idx = self._model.find_next(idx,
unicode(self.search.currentText()), backwards=True)
self.highlight_index(idx)
if __name__ == '__main__':
from PyQt4.Qt import QApplication

View File

@ -6,7 +6,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>660</width>
<width>756</width>
<height>531</height>
</rect>
</property>
@ -14,8 +14,24 @@
<string>Form</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout_4">
<item>
<widget class="QLabel" name="label_18">
<property name="text">
<string>Values for the tweaks are shown below. Edit them to change the behavior of calibre. Your changes will only take effect &lt;b&gt;after a restart&lt;/b&gt; of calibre.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QSplitter" name="splitter">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>10</verstretch>
</sizepolicy>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
@ -24,16 +40,6 @@
</property>
<widget class="QWidget" name="layoutWidget">
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QLabel" name="label_18">
<property name="text">
<string>Values for the tweaks are shown below. Edit them to change the behavior of calibre. Your changes will only take effect &lt;b&gt;after a restart&lt;/b&gt; of calibre.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QListView" name="tweaks_view">
<property name="sizePolicy">
@ -72,8 +78,8 @@
</layout>
</widget>
<widget class="QWidget" name="layoutWidget">
<layout class="QVBoxLayout" name="verticalLayout_3">
<item>
<layout class="QGridLayout" name="gridLayout_3">
<item row="1" column="0" colspan="3">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Help</string>
@ -92,7 +98,7 @@
</layout>
</widget>
</item>
<item>
<item row="2" column="0" colspan="3">
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>Edit tweak</string>
@ -128,12 +134,59 @@
</layout>
</widget>
</item>
<item row="0" column="0">
<widget class="SearchBox2" name="search">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
<horstretch>10</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="sizeAdjustPolicy">
<enum>QComboBox::AdjustToMinimumContentsLength</enum>
</property>
<property name="minimumContentsLength">
<number>10</number>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QPushButton" name="next_button">
<property name="text">
<string>&amp;Next</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/arrow-down.png</normaloff>:/images/arrow-down.png</iconset>
</property>
</widget>
</item>
<item row="0" column="2">
<widget class="QPushButton" name="previous_button">
<property name="text">
<string>&amp;Previous</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<normaloff>:/images/arrow-up.png</normaloff>:/images/arrow-up.png</iconset>
</property>
</widget>
</item>
</layout>
</widget>
</widget>
</item>
</layout>
</widget>
<resources/>
<customwidgets>
<customwidget>
<class>SearchBox2</class>
<extends>QComboBox</extends>
<header>calibre/gui2/search_box.h</header>
</customwidget>
</customwidgets>
<resources>
<include location="../../../../resources/images.qrc"/>
</resources>
<connections/>
</ui>

View File

@ -15,7 +15,7 @@ from calibre.utils.config import tweaks, prefs
from calibre.utils.date import parse_date, now, UNDEFINED_DATE
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.utils.pyparsing import ParseException
from calibre.utils.localization import canonicalize_lang
from calibre.utils.localization import canonicalize_lang, lang_map
from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre import prints
@ -728,7 +728,9 @@ class ResultCache(SearchQueryParser): # {{{
elif loc == db_col['languages']:
q = canonicalize_lang(query)
if q is None:
q = query
lm = lang_map()
rm = {v.lower():k for k,v in lm.iteritems()}
q = rm.get(query, query)
else:
q = query

View File

@ -290,7 +290,10 @@ class DatabaseException(Exception):
def __init__(self, err, tb):
tb = '\n\t'.join(('\tRemote'+tb).splitlines())
msg = unicode(err) +'\n' + tb
try:
msg = unicode(err) +'\n' + tb
except:
msg = repr(err) + '\n' + tb
Exception.__init__(self, msg)
self.orig_err = err
self.orig_tb = tb

View File

@ -35,7 +35,7 @@ def load_icu():
if _icu is None:
print plugins['icu'][1]
else:
if not _icu.ok:
if not getattr(_icu, 'ok', False):
print 'icu not ok'
_icu = None
return _icu

View File

@ -28,6 +28,7 @@ from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.date import now as nowf
from calibre.utils.magick.draw import save_cover_data_to, add_borders_to_image
from calibre.utils.localization import canonicalize_lang
class LoginFailed(ValueError):
pass
@ -1117,6 +1118,9 @@ class BasicNewsRecipe(Recipe):
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
mi.comments = self.description
language = canonicalize_lang(self.language)
if language is not None:
mi.language = language
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.pubdate = nowf()