This commit is contained in:
GRiker 2012-10-19 03:51:20 -06:00
commit 41e40fbd5b
150 changed files with 72451 additions and 28225 deletions

View File

@ -19,6 +19,55 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.9.3
date: 2012-10-19
new features:
- title: "Conversion: Add support for CSS 3 selectors by switching to the new cssselect module"
- title: "Drivers for the WayteQ xBook and the Topwise Android tablet"
tickets: [1066083,1067105]
- title: "Add an output profile for the Kindle PaperWhite"
- title: "Kobo driver: Improve performance when a large number of shelves are created on the device."
tickets: [1066633]
- title: "Catalogs: Put catalog sections at top level of ToC for EPUB/AZW3 catalogs."
- title: "Adding books: When duplicates are found, show the list of possible duplicates in calibre with title and author."
bug fixes:
- title: "KF8 Input: Fix conversion/viewing of KF8 files generated with the unreleased? kindlegen 2.7."
tickets: [1067310]
- title: "Kindle driver: Increase the size of the cover thumbnails sent by calibre to the device. This fixes the problem of cover thumbnails not showing up on the PaperWhite"
- title: "MTP driver: Fix sorting on the title column of the device view."
tickets: [1067562]
- title: "Catalogs: Fix regression that caused authors and titles to be be incorrectly listed under symbols on OSX < 10.8."
- title: "Catalogs: Fix error when generating catalog in non English locale and the user has specified a prefix rule using a Yes/no column."
tickets: [1065452]
- title: "E-book viewer: Remove the reload entry from the context menu as it is not supported in paged mode."
tickets: [1065615]
improved recipes:
- Richmond Times Dispatch
- Various Polish news sources
- Aksiyon dergisi
- Spektrum der Wissenschaft
- Zeit Online
- Baltimore Sun
- LWN Weekly
- The Sun
new recipes:
- title: Various Polish news sources
author: fenuks
- version: 0.9.2 - version: 0.9.2
date: 2012-10-11 date: 2012-10-11

View File

@ -3,7 +3,7 @@ import re
class Adventure_zone(BasicNewsRecipe): class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone' title = u'Adventure Zone'
__author__ = 'fenuks' __author__ = 'fenuks'
description = 'Adventure zone - adventure games from A to Z' description = u'Adventure zone - adventure games from A to Z'
category = 'games' category = 'games'
language = 'pl' language = 'pl'
no_stylesheets = True no_stylesheets = True
@ -11,7 +11,9 @@ class Adventure_zone(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
index='http://www.adventure-zone.info/fusion/' index='http://www.adventure-zone.info/fusion/'
use_embedded_content=False use_embedded_content=False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')] preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
(re.compile(r'\<table .*?\>'), lambda match: ''),
(re.compile(r'\<tbody\>'), lambda match: '')]
remove_tags_before= dict(name='td', attrs={'class':'main-bg'}) remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})] remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after= dict(id='comments') remove_tags_after= dict(id='comments')
@ -52,6 +54,11 @@ class Adventure_zone(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
footer=soup.find(attrs={'class':'news-footer middle-border'}) footer=soup.find(attrs={'class':'news-footer middle-border'})
r = soup.find(name='td', attrs={'class':'capmain'})
if r:
r.name='h1'
for item in soup.findAll(name=['tr', 'td']):
item.name='div'
if footer and len(footer('a'))>=2: if footer and len(footer('a'))>=2:
footer('a')[1].extract() footer('a')[1].extract()
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -17,18 +17,13 @@ class Aksiyon (BasicNewsRecipe):
category = 'news, haberler,TR,gazete' category = 'news, haberler,TR,gazete'
language = 'tr' language = 'tr'
publication_type = 'magazine' publication_type = 'magazine'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' auto_cleanup = True
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg' cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg' masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
remove_empty_feeds= True remove_empty_feeds= True
remove_attributes = ['width','height']
feeds = [ feeds = [
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'), ( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'), ( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'), ( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'), ( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
@ -37,17 +32,15 @@ class Aksiyon (BasicNewsRecipe):
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'), ( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'), ( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'), ( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'), ( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'), ( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'), ( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'), ( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'), ( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'), ( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
] ]
def print_version(self, url): #def print_version(self, url):
return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?') #return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')

View File

@ -11,10 +11,9 @@ class BaltimoreSun(BasicNewsRecipe):
title = 'The Baltimore Sun' title = 'The Baltimore Sun'
__author__ = 'Josh Hall' __author__ = 'Josh Hall'
description = 'Complete local news and blogs from Baltimore' description = 'Complete local news and blogs from Baltimore'
language = 'en' language = 'en'
version = 2 version = 2.1
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
@ -22,6 +21,7 @@ class BaltimoreSun(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
recursions = 1 recursions = 1
ignore_duplicate_articles = {'title'}
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}), keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}), dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
] ]
@ -201,3 +201,5 @@ class BaltimoreSun(BasicNewsRecipe):
tag.extract() tag.extract()
for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})): for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})):
tag.extract() tag.extract()
return soup

View File

@ -12,9 +12,9 @@ class BenchmarkPl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets=True no_stylesheets=True
preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')] preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})] keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
remove_tags_after=dict(name='div', attrs={'class':'body'}) remove_tags_after=dict(name='div', attrs={'class':'body'})
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})] remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery', 'breadcrumb', 'footer', 'moreTopics']}), dict(name='table', attrs={'background':'http://www.benchmark.pl/uploads/backend_img/a/fotki_newsy/opinie_dnia/bg.png'}), dict(name='table', attrs={'width':'210', 'cellspacing':'1', 'cellpadding':'4', 'border':'0', 'align':'right'})]
INDEX= 'http://www.benchmark.pl' INDEX= 'http://www.benchmark.pl'
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')] (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]

View File

@ -13,6 +13,7 @@ class Biolog_pl(BasicNewsRecipe):
masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png' masthead_url= 'http://www.biolog.pl/naukowy,portal,biolog.png'
cover_url='http://www.biolog.pl/naukowy,portal,biolog.png' cover_url='http://www.biolog.pl/naukowy,portal,biolog.png'
no_stylesheets = True no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
#keeps_only_tags=[dict(id='main')] #keeps_only_tags=[dict(id='main')]
remove_tags_before=dict(id='main') remove_tags_before=dict(id='main')
remove_tags_after=dict(name='a', attrs={'name':'komentarze'}) remove_tags_after=dict(name='a', attrs={'name':'komentarze'})

View File

@ -13,11 +13,11 @@ class CGM(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
remove_empty_feeds= True remove_empty_feeds= True
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheers=True no_stylesheets = True
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}' extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}'
remove_tags_before=dict(id='mainContent') remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'}) remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}), remove_tags=[dict(name='div', attrs={'class':['fbContainer', 'socials']}),
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}), dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
dict(id=['movieShare', 'container'])] dict(id=['movieShare', 'container'])]
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'), feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),

View File

@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ] preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
keep_only_tags=[dict(attrs={'class':['news', 'entry single']})] keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
remove_tags = [dict(name='div', attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']})] remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze')]
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})] #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]

View File

@ -12,9 +12,8 @@ class Dzieje(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript=True
no_stylesheets= True no_stylesheets= True
remove_tags_before= dict(name='h1', attrs={'class':'title'}) keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
remove_tags_after= dict(id='dogory') remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
remove_tags=[dict(id='dogory')]
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')] feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]

View File

@ -15,6 +15,7 @@ class Dziennik_pl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript=True
remove_empty_feeds=True remove_empty_feeds=True
ignore_duplicate_articles = {'title', 'url'}
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}' extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')] preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('<p><strong><a href=".*?">&gt;&gt;&gt; CZYTAJ TAKŻE: ".*?"</a></strong></p>'), lambda m: '')]
keep_only_tags=[dict(id='article')] keep_only_tags=[dict(id='article')]
@ -59,8 +60,6 @@ class Dziennik_pl(BasicNewsRecipe):
appendtag.find('div', attrs={'class':'article_paginator'}).extract() appendtag.find('div', attrs={'class':'article_paginator'}).extract()
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
return soup return soup

View File

@ -13,12 +13,12 @@ class FilmWebPl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
remove_empty_feeds=True remove_empty_feeds=True
ignore_duplicate_articles = {'title', 'url'}
preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')] preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}' extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})] remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})] keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
feeds = [(u'Wszystkie newsy', u'http://www.filmweb.pl/feed/news/latest'), feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
(u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
(u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'), (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
(u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'), (u'News / Box office', u'http://www.filmweb.pl/feed/news/category/boxoffice'),
@ -41,7 +41,6 @@ class FilmWebPl(BasicNewsRecipe):
if skip_tag is not None: if skip_tag is not None:
return self.index_to_soup(skip_tag['href'], raw=True) return self.index_to_soup(skip_tag['href'], raw=True)
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a'): for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:

23
recipes/forsal.recipe Normal file
View File

@ -0,0 +1,23 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class ForsalPL(BasicNewsRecipe):
title = u'Forsal.pl'
__author__ = 'fenuks'
description = u'Na portalu finansowym Forsal.pl znajdziesz najświeższe wiadomości finansowe i analizy. Kliknij i poznaj aktualne kursy walut, notowania giełdowe oraz inne wiadomości ze świata finansów.'
category = 'economy, finance'
language = 'pl'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg'
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':'related'}), dict(name='img', attrs={'title':'Forsal'})]
feeds = [(u'Najnowsze', u'http://forsal.pl/atom/najnowsze'), (u'Tylko na forsal.pl', u'http://forsal.pl/atom/tagi/forsal'), (u'Publicystyka', u'http://forsal.pl/atom/tagi/opinia'), (u'Bloomberg', u'http://forsal.pl/atom/tagi/bloomberg'), (u'Financial Times', u'http://forsal.pl/atom/tagi/financial_times'), (u'Gie\u0142da', u'http://forsal.pl/atom/tagi/gielda'), (u'Waluty', u'http://forsal.pl/atom/tagi/waluty'), (u'Surowce', u'http://forsal.pl/atom/tagi/surowce'), (u'Komenarze finasnowe', u'http://forsal.pl/atom/tagi/komentarz'), (u'Komentarze gie\u0142dowe', u'http://forsal.pl/atom/tagi/komentarz;gielda'), (u'Komentarze walutowe', u'http://forsal.pl/atom/tagi/komentarz;waluty'), (u'Makroekonomia', u'http://forsal.pl/atom/tagi/makroekonomia'), (u'Handel', u'http://forsal.pl/atom/tagi/handel'), (u'Nieruchomo\u015bci', u'http://forsal.pl/atom/tagi/nieruchomosci'), (u'Motoryzacja', u'http://forsal.pl/atom/tagi/motoryzacja'), (u'Finanse', u'http://forsal.pl/atom/tagi/finanse'), (u'Transport', u'http://forsal.pl/atom/tagi/transport'), (u'Media', u'http://forsal.pl/atom/tagi/media'), (u'Telekomunikacja', u'http://forsal.pl/atom/tagi/telekomunikacja'), (u'Energetyka', u'http://forsal.pl/atom/tagi/energetyka'), (u'Przemys\u0142', u'http://forsal.pl/atom/tagi/przemysl'), (u'Moja firma', u'http://forsal.pl/atom/tagi/moja_firma')]
def print_version(self, url):
url_id = re.search(ur'/[0-9]+,', url)
if url_id:
return 'http://forsal.pl/drukowanie' + url_id.group(0)[:-1]
else:
return url

View File

@ -15,15 +15,28 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_javascript=True remove_javascript=True
no_stylesheets=True no_stylesheets=True
remove_tags_before=dict(id='k0') ignore_duplicate_articles = {'title', 'url'}
remove_tags_after=dict(id='banP4') keep_only_tags = dict(id=['gazeta_article', 'article'])
remove_tags=[dict(name='div', attrs={'class':'rel_box'}), dict(attrs={'class':['date', 'zdjP', 'zdjM', 'pollCont', 'rel_video', 'brand', 'txt_upl']}), dict(name='div', attrs={'id':'footer'})] remove_tags_after = dict(id='gazeta_article_share')
remove_tags = [dict(attrs={'class':['artReadMore', 'gazeta_article_related_new', 'txt_upl']}), dict(id=['gazeta_article_likes', 'gazeta_article_tools', 'rel', 'gazeta_article_tags', 'gazeta_article_share', 'gazeta_article_brand', 'gazeta_article_miniatures'])]
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'), feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),
(u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'), (u'Wyborcza.biz', u'http://wyborcza.biz/pub/rss/wyborcza_biz_wiadomosci.htm'),
(u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'), (u'Komentarze', u'http://rss.feedsportal.com/c/32739/f/530312/index.rss'),
(u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'), (u'Kultura', u'http://rss.gazeta.pl/pub/rss/gazetawyborcza_kultura.xml'),
(u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'), (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'), (u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'), (u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'), (u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'), (u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'), (u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'), (u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'), (u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'), (u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'), (u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'), (u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss'), (u'Nekrologi', u'http://rss.feedsportal.com/c/32739/f/530358/index.rss') (u'Nauka', u'http://rss.feedsportal.com/c/32739/f/530269/index.rss'),
] (u'Opinie', u'http://rss.gazeta.pl/pub/rss/opinie.xml'),
(u'Gazeta \u015awi\u0105teczna', u'http://rss.feedsportal.com/c/32739/f/530431/index.rss'),
#(u'Du\u017cy Format', u'http://rss.feedsportal.com/c/32739/f/530265/index.rss'),
(u'Witamy w Polsce', u'http://rss.feedsportal.com/c/32739/f/530476/index.rss'),
(u'M\u0119ska Muzyka', u'http://rss.feedsportal.com/c/32739/f/530337/index.rss'),
(u'Lata Lec\u0105', u'http://rss.feedsportal.com/c/32739/f/530326/index.rss'),
(u'Solidarni z Tybetem', u'http://rss.feedsportal.com/c/32739/f/530461/index.rss'),
(u'W pon. - \u017bakowski', u'http://rss.feedsportal.com/c/32739/f/530491/index.rss'),
(u'We wt. - Kolenda-Zalewska', u'http://rss.feedsportal.com/c/32739/f/530310/index.rss'),
(u'\u015aroda w \u015brod\u0119', u'http://rss.feedsportal.com/c/32739/f/530428/index.rss'),
(u'W pi\u0105tek - Olejnik', u'http://rss.feedsportal.com/c/32739/f/530364/index.rss')
]
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
tag=soup.find(name='a', attrs={'class':'btn'}) tag=soup.find(name='a', attrs={'class':'btn'})

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class Gildia(BasicNewsRecipe): class Gildia(BasicNewsRecipe):
title = u'Gildia.pl' title = u'Gildia.pl'
@ -11,6 +12,8 @@ class Gildia(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_empty_feeds=True remove_empty_feeds=True
no_stylesheets=True no_stylesheets=True
ignore_duplicate_articles = {'title', 'url'}
preprocess_regexps = [(re.compile(ur'</?sup>'), lambda match: '') ]
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})] remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
keep_only_tags=dict(name='div', attrs={'class':'widetext'}) keep_only_tags=dict(name='div', attrs={'class':'widetext'})
feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')] feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')]
@ -18,10 +21,9 @@ class Gildia(BasicNewsRecipe):
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
content = soup.find('div', attrs={'class':'news'}) content = soup.find('div', attrs={'class':'news'})
skip_tag= content.findAll(name='a') if 'recenzj' in soup.title.string.lower():
if skip_tag is not None: for link in content.findAll(name='a'):
for link in skip_tag: if 'recenzj' in link['href']:
if 'recenzja' in link['href']:
self.log.warn('odnosnik') self.log.warn('odnosnik')
self.log.warn(link['href']) self.log.warn(link['href'])
return self.index_to_soup(link['href'], raw=True) return self.index_to_soup(link['href'], raw=True)

View File

@ -12,7 +12,7 @@ class Gram_pl(BasicNewsRecipe):
no_stylesheets= True no_stylesheets= True
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])] remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent', 'sharedaddy sd-sharing-enabled']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])]
keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')] keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')]
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'), feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'), (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),

View File

@ -12,8 +12,8 @@ class GryOnlinePl(BasicNewsRecipe):
cover_url='http://www.gry-online.pl/im/gry-online-logo.png' cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets= True no_stylesheets= True
keep_only_tags=[dict(name='div', attrs={'class':'gc660'})] keep_only_tags=[dict(name='div', attrs={'class':['gc660', 'gc660 S013']})]
remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})] remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]

BIN
recipes/icons/forsal.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 660 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 322 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 450 B

View File

@ -15,7 +15,6 @@ class Konflikty(BasicNewsRecipe):
keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')] keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'), (u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'), (u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),

View File

@ -5,9 +5,9 @@ class Lomza(BasicNewsRecipe):
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'4Łomża - regional site' description = u'4Łomża - regional site'
cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg' cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
language = 'pl' language = 'pl'
oldest_article = 15 oldest_article = 15
no_styleseets=True no_stylesheets = True
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})] remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})] keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]

View File

@ -8,15 +8,18 @@ lwn.net
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re import re
import sys
class WeeklyLWN(BasicNewsRecipe): class WeeklyLWN(BasicNewsRecipe):
title = 'LWN.net Weekly Edition' title = 'LWN.net Weekly Edition'
description = 'Weekly summary of what has happened in the free software world.' description = 'Weekly summary of what has happened in the free software world.'
__author__ = 'Davide Cavalca' __author__ = 'Davide Cavalca'
language = 'en' language = 'en'
site_url = 'http://lwn.net' site_url = u'http://lwn.net'
extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n' extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'
no_stylesheets = True
remove_javascript = True
cover_url = site_url + '/images/lcorner.png' cover_url = site_url + '/images/lcorner.png'
#masthead_url = 'http://lwn.net/images/lcorner.png' #masthead_url = 'http://lwn.net/images/lcorner.png'
@ -28,10 +31,13 @@ class WeeklyLWN(BasicNewsRecipe):
preprocess_regexps = [ preprocess_regexps = [
# Remove the <hr> and "Log in to post comments" # Remove the <hr> and "Log in to post comments"
(re.compile(r'<hr.*?comments[)]', re.DOTALL), lambda m: ''), (re.compile(r'<hr [^>]+>\s*\n\s*.*?comments[)]'), lambda m: ''),
] ]
conversion_options = { 'linearize_tables' : True } conversion_options = {
'linearize_tables' : True,
'no_inline_navbars': True,
}
oldest_article = 7.0 oldest_article = 7.0
needs_subscription = 'optional' needs_subscription = 'optional'
@ -60,8 +66,6 @@ class WeeklyLWN(BasicNewsRecipe):
if url[-len(print_param):] != print_param: if url[-len(print_param):] != print_param:
url += print_param url += print_param
#import sys
#print >>sys.stderr, "*** print_version(url):", url
return url return url
def parse_index(self): def parse_index(self):
@ -70,61 +74,69 @@ class WeeklyLWN(BasicNewsRecipe):
else: else:
index_url = self.print_version('/free/bigpage') index_url = self.print_version('/free/bigpage')
soup = self.index_to_soup(index_url) soup = self.index_to_soup(index_url)
body = soup.body curr = soup.body
articles = {} articles = {}
ans = [] ans = []
url_re = re.compile('^/Articles/')
section = soup.title.string
subsection = None
while True: while True:
tag_title = body.findNext(attrs={'class':'SummaryHL'}) curr = curr.findNext(attrs = {'class': ['SummaryHL', 'Cat1HL', 'Cat2HL'] })
if tag_title == None:
if curr == None:
break break
tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'}) text = curr.contents[0].string
if tag_section == None:
section = 'Front Page'
else:
section = tag_section.string
tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'}) if 'Cat2HL' in curr.attrMap['class']:
if tag_section2 != None: subsection = text
if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
section = "%s: %s" %(section, tag_section2.string)
if section not in articles.keys(): elif 'Cat1HL' in curr.attrMap['class']:
articles[section] = [] section = text
if section not in ans: subsection = None
ans.append(section)
body = tag_title elif 'SummaryHL' in curr.attrMap['class']:
while True: article_title = text
tag_url = body.findNext(name='a', attrs={'href':url_re})
if tag_url == None: if subsection:
break section_title = "%s: %s" % (section, subsection)
body = tag_url
if tag_url.string == None:
continue
elif tag_url.string == 'Full Story':
break
elif tag_url.string.startswith('Comments ('):
break
else: else:
section_title = section
# Most articles have anchors in their titles, *except* the security vulnerabilities
article_anchor = curr.findNext(name = 'a', attrs = { 'href': re.compile('^/Articles/') } )
if article_anchor:
article_url = article_anchor.get('href')
if not article_url:
print >>sys.stderr, 'article_url is None for article_anchor "%s": "%s"' \
% (str(article_anchor), article_title)
continue
else:
print >>sys.stderr, 'article_anchor is None for "%s"; skipping' % article_title
article_url = None
continue continue
if tag_url == None: if section_title not in articles:
break articles[section_title] = []
if section_title not in ans:
ans.append(section_title)
articles[section_title].append({
'url': article_url,
'title': article_title,
'description': '', 'content': '', 'date': '',
})
article = dict( else:
title=self.tag_to_string(tag_title), print >>sys.stderr, "lwn_weekly.recipe: something bad happened; should not be able to reach this"
url=tag_url['href'],
description='', content='', date='')
articles[section].append(article)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(section, articles[section]) for section in ans if section in articles]
if not ans: #from pprint import pprint
raise Exception('Could not find any articles.') #pprint(ans)
return ans return ans

16
recipes/nowy_ekran.recipe Normal file
View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class NowyEkran(BasicNewsRecipe):
title = u'Nowy ekran'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets= True
__author__ = 'fenuks'
description = u'Niezależny serwis społeczności blogerów'
category = 'blog'
language = 'pl'
masthead_url='http://s.nowyekran.pl/gfx/ekran-big.gif'
cover_url= 'http://s.nowyekran.pl/gfx/ekran-big.gif'
remove_tags_before = dict(name='div', attrs={'class':'post_detal'})
remove_tags_after = dict(name='div', attrs={'class':'post_footer'})
remove_tags=[dict(name='span', attrs={'class':'ico ico_comments'}), dict(name='div', attrs={'class':'post_footer'}), dict(name='a', attrs={'class':'getpdf'})]
feeds = [(u'Najnowsze notki', u'http://www.nowyekran.pl/RSS/')]

View File

@ -1,5 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Polska_times(BasicNewsRecipe): class PolskaTimes(BasicNewsRecipe):
title = u'Polska Times' title = u'Polska Times'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.' description = u'Internetowe wydanie dziennika ogólnopolskiego Polska The Times. Najświeższe informacje: wydarzenia w kraju i na świecie, reportaże, poradniki, opinie.'
@ -10,6 +10,7 @@ class Polska_times(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_emty_feeds= True remove_emty_feeds= True
no_stylesheets = True no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
#preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ] #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
@ -23,6 +24,7 @@ class Polska_times(BasicNewsRecipe):
nexturl=soup.find('a')['href'] nexturl=soup.find('a')['href']
return self.index_to_soup(nexturl, raw=True) return self.index_to_soup(nexturl, raw=True)
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/') soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
self.cover_url=soup.find(id='pojemnik').img['src'] self.cover_url=soup.find(id='pojemnik').img['src']

View File

@ -0,0 +1,31 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class PB_PL(BasicNewsRecipe):
title = u'Puls Biznesu'
__author__ = 'fenuks'
language = 'pl'
description = u'Puls Biznesu - biznes, ekonomia, giełda, inwestycje'
category = u'newspaper'
publication_type = u'newspaper'
encoding = 'utf-8'
#masthead_url = 'http://www.pb.pl/img/pb.png'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
remove_tags_after = dict(name='div', attrs={'class':'news_content'})
feeds = [(u'Wszystkie', u'http://www.pb.pl/atom'), (u'Puls inwestora', u'http://pulsinwestora.pb.pl/atom'), (u'Puls Firmy', u'http://firma.pb.pl/atom'), (u'PB Weekend', u'http://weekend.pb.pl/atom'), (u'Forum MPS', u'http://forummsp.pb.pl/atom'), (u'Moto', u'http://moto.pb.pl/atom'), (u'Kariera i praca', u'http://kariera.pb.pl/atom'),(u'Nieruchomości', u'http://nieruchomosci.pb.pl/atom'), (u'Samorządy', u'http://samorzady.pb.pl/atom'), (u'Tech', u'http://tech.pb.pl/atom'), (u'Energetyka', u'http://energetyka.pb.pl/atom'), (u'Retailing', u'http://retailing.pb.pl/atom'), (u'Puls medycyny', u'http://pulsmedycyny.pl/atom'), (u'Logistyka', u'http://logistyka.pb.pl/atom')]
def print_version(self, url):
article_id = re.search(r'(?P<id>\d+,\d+)', url)
if article_id:
return 'http://www.pb.pl/actionprint/' + article_id.group('id')
else:
return url
def get_cover_url(self):
soup = self.index_to_soup('http://archiwum.pb.pl/')
cover = soup.find(name='img', attrs={'class':'cover_picture'})
self.cover_url= cover['src']
return getattr(self, 'cover_url', self.cover_url)

View File

@ -9,13 +9,14 @@ class RichmondTimesDispatch(BasicNewsRecipe):
and is also a default paper for rural regions of the state. \ and is also a default paper for rural regions of the state. \
The RTD has published in some form for more than 150 years." The RTD has published in some form for more than 150 years."
__author__ = '_reader' __author__ = '_reader'
__date__ = '05 July 2012' __date__ = '17 October 2012'
__version__ = '1.4' __version__ = '1.6'
cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png' cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
masthead_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png' masthead_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
language = 'en' language = 'en'
oldest_article = 1.5 #days oldest_article = 1.5 #days
max_articles_per_feed = 100 max_articles_per_feed = 100
ignore_duplicate_articles = { 'title', 'url' }
needs_subscription = False needs_subscription = False
publisher = 'timesdispatch.com' publisher = 'timesdispatch.com'
category = 'news, commentary' category = 'news, commentary'
@ -70,6 +71,7 @@ class RichmondTimesDispatch(BasicNewsRecipe):
('Local Business', 'http://www2.timesdispatch.com/list/feed/rss/local-business'), ('Local Business', 'http://www2.timesdispatch.com/list/feed/rss/local-business'),
('Politics', 'http://www2.timesdispatch.com/list/feed/rss/politics'), ('Politics', 'http://www2.timesdispatch.com/list/feed/rss/politics'),
('Virginia Politics', 'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'), ('Virginia Politics', 'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
('History', 'http://www2.timesdispatch.com/feed/rss/special_section/news/history'),
('Sports', 'http://www2.timesdispatch.com/list/feed/rss/sports2'), ('Sports', 'http://www2.timesdispatch.com/list/feed/rss/sports2'),
('Health', 'http://www2.timesdispatch.com/feed/rss/lifestyles/health_med_fit/'), ('Health', 'http://www2.timesdispatch.com/feed/rss/lifestyles/health_med_fit/'),
('Entertainment/Life', 'http://www2.timesdispatch.com/list/feed/rss/entertainment'), ('Entertainment/Life', 'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
@ -78,13 +80,15 @@ class RichmondTimesDispatch(BasicNewsRecipe):
('Music', 'http://www2.timesdispatch.com/list/feed/rss/music'), ('Music', 'http://www2.timesdispatch.com/list/feed/rss/music'),
('Dining & Food', 'http://www2.timesdispatch.com/list/feed/rss/dining'), ('Dining & Food', 'http://www2.timesdispatch.com/list/feed/rss/dining'),
('Home & Garden', 'http://www2.timesdispatch.com/list/feed/rss/home-and-garden/'), ('Home & Garden', 'http://www2.timesdispatch.com/list/feed/rss/home-and-garden/'),
#inactive('Travel', 'http://www2.timesdispatch.com/feed/rss/travel/'), ('Travel', 'http://www2.timesdispatch.com/feed/rss/travel/'),
('Opinion', 'http://www2.timesdispatch.com/feed/rss/news/opinion/'), ('Opinion', 'http://www2.timesdispatch.com/feed/rss/news/opinion/'),
('Editorials', 'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'), ('Editorials', 'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
('Columnists and Blogs', 'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'), ('Columnists and Blogs', 'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
('Opinion Columnists', 'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'), ('Opinion Columnists', 'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
('Letters to the Editor', 'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'), ('Letters to the Editor', 'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
('Traffic', 'http://www2.timesdispatch.com/list/feed/rss/traffic'), ('Traffic', 'http://www2.timesdispatch.com/list/feed/rss/traffic'),
('Drives', 'http://www2.timesdispatch.com/feed/rss/classifieds/transportation/'),
] ]
def print_version(self,url): def print_version(self,url):

View File

@ -1,28 +1,57 @@
__license__ = 'GPL v3'
__copyright__ = ''
'''
Fetch RSS-Feeds spektrum.de
'''
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class AdvancedUserRecipe1303841067(BasicNewsRecipe): class AdvancedUserRecipe1303841067(BasicNewsRecipe):
title = u'Spektrum (der Wissenschaft)' title = u'Spektrum der Wissenschaft'
__author__ = 'schuster' __author__ = 'Armin Geller, Bratzzo, Rainer Zenz' # Update Bratzzo & AGE 2012-10-12
oldest_article = 7 description = u'German online portal of Spektrum der Wissenschaft'
publisher = 'Spektrum der Wissenschaft Verlagsgesellschaft mbH'
category = 'science news, Germany'
oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'de' no_stylesheets = True
cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg' remove_javascript = True
remove_empty_feeds = True
language = 'de_DE'
remove_tags = [dict(attrs={'class':['hauptnaviPkt gainlayout', 'hauptnaviButton', 'suchButton', 'suchbegriffKasten', 'loginButton', 'subnavigation', 'artikelInfoLeiste gainlayout', 'artikelTools', 'nurLetzteSeite', 'link', 'boxUnterArtikel', 'leserbriefeBlock', 'boxTitel', 'boxInhalt', 'sehrklein', 'boxabstand', 'werbeboxinhalt', 'rbabstand', 'bildlinks', 'rechtebox', 'denkmalbox', 'denkmalfrage']}), #conversion_options = {'base_font_size': 20}
dict(id=['pflip', 'verlagsleiste', 'bereich', 'bannerVertikal', 'headerLogoLink', 'kopf', 'topNavi', 'headerSchnellsuche', 'headerSchnellsucheWarten', 'navigation', 'navigationL', 'navigationR', 'inhalt', 'rechtespalte', 'sdwboxenshop', 'shopboxen', 'fuss']),
dict(name=['naservice'])]
def print_version(self,url): # cover_url = 'http://upload.wikimedia.org/wikipedia/de/3/3b/Spektrum_der_Wissenschaft_Logo.svg' # old logo
newurl = url.replace('artikel/', 'sixcms/detail.php?id=') cover_url = 'http://upload.wikimedia.org/wikipedia/de/5/59/Spektrum-cover.jpg' # from Rainer Zenz
return newurl + '&_druckversion=1'
masthead_url = 'http://www.spektrum.de/fm/861/spektrum.de.png'
extra_css = '''
h1 {font-size: 1.6em; text-align: left}
h2 {font-size: 1em; font-style: italic; font-weight: normal}
h3 {font-size: 1.3em;text-align: left}
h4, h5, h6, .heading, .hgroup {font-size: 1em;text-align: left}
'''
feeds = [(u'Spektrum der Wissenschaft', u'http://www.spektrum.de/artikel/982623'), feeds = [
(u'SpektrumDirekt', u'http://www.spektrumdirekt.de/artikel/996406'), (u'Spektrum.de', u'http://www.spektrum.de/alias/rss/spektrum-de-rss-feed/996406'),
(u'Sterne und Weltraum', u'http://www.astronomie-heute.de/artikel/865248'), (u'Spektrum der Wissenschaft', u'http://www.spektrum.de/alias/rss/spektrum-der-wissenschaft-rss-feed/982623'),
(u'Gehirn & Geist', u'http://www.gehirn-und-geist.de/artikel/982626'), (u'Gehirn & Geist', u'http://www.spektrum.de/alias/rss/gehirn-geist-rss-feed/982626'),
(u'epoc', u'http://www.epoc.de/artikel/982625') (u'Epoc', u'http://www.spektrum.de/alias/rss/epoc-rss-feed/982625'),
(u'Sterne und Weltraum', u'http://www.spektrum.de/alias/rss/sterne-und-weltraum-rss-feed/865248'),
(u'Editional', u'http://www.spektrum.de/alias/rss/spektrum-de-editorial/996044'),
(u'Pressemitteilungen', u'http://www.spektrum.de/alias/rss/pressemitteilungen/995265'),
]
] keep_only_tags = [
dict(name='div', attrs={'class':'border-h clearfix article-top'}),
dict(name='div', attrs={'class':'clearfix'}),
dict(name='div', attrs={'class':'bilderrahmenlinks'}),
dict(name='div', attrs={'class':'relcontainer'}),
]
filter_regexps = [r'ads\.doubleclick\.net'] remove_tags_after=dict(name='div', attrs={'class':['sidebar-box-head']})
remove_tags = [
dict(attrs={'id':['recommend-article', 'dossierbox', 'cover', 'toc']}),
dict(attrs={'class':['sidebar-box-full clearfix', 'linktotop' ]}),
]

View File

@ -11,5 +11,5 @@ class SpidersWeb(BasicNewsRecipe):
no_stylesheers=True no_stylesheers=True
max_articles_per_feed = 100 max_articles_per_feed = 100
keep_only_tags=[dict(id='Post')] keep_only_tags=[dict(id='Post')]
remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']})] remove_tags=[dict(name='div', attrs={'class':['Comments', 'Shows', 'Post-Tags']}), dict(id='Author-Column')]
feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')] feeds = [(u'Wpisy', u'http://www.spidersweb.pl/feed')]

25
recipes/stopklatka.recipe Normal file
View File

@ -0,0 +1,25 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Stopklatka(BasicNewsRecipe):
title = u'Stopklatka'
__author__ = 'fenuks'
description = u'Stopklatka.pl to najdłużej działający polski portal filmowy. Baza filmów, seriali i aktorów, repertuar kin, program tv, wydarzenia ze świata filmu'
category = 'movies'
language = 'pl'
oldest_article = 7
masthead_url= 'http://img.stopklatka.pl/logo/logo-3.gif'
cover_url= 'http://img.stopklatka.pl/logo/logo-3.gif'
max_articles_per_feed = 100
no_stylesheets = True
preprocess_regexps = [(re.compile(ur'Wersja internetowa dostępna jest pod adresem:.*</body>', re.DOTALL), lambda match: '</body>'), (re.compile(ur'</?font.*?>', re.DOTALL), lambda match: '') ]
remove_empty_feeds = True
remove_tags = [dict(name='img', attrs={'alt':'logo'})]
feeds = [(u'Wydarzenia', u'http://rss.stopklatka.pl/wydarzenia.rss')]
def print_version(self, url):
link_id = re.search(r'wi=(?P<id>\d+)', url)
if link_id:
return 'http://www.stopklatka.pl/narzedzia/drukuj.asp?typ=wydarzenie&id=' + link_id.group('id')
else:
return url

View File

@ -19,7 +19,7 @@ class Swiat_Obrazu(BasicNewsRecipe):
return url + ',drukuj' return url + ',drukuj'
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):
if 'http://' not in url or 'https://' not in url: if 'http://' not in url and 'https://' not in url:
return 'http://www.swiatobrazu.pl' + url[5:] return 'http://www.swiatobrazu.pl' + url[5:]
else: else:
return url return url

View File

@ -8,7 +8,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'The Sun UK' title = u'The Sun UK'
description = 'Articles from The Sun tabloid UK' description = 'Articles from The Sun tabloid UK'
__author__ = 'Dave Asbury' __author__ = 'Dave Asbury'
# last updated 6/10/12 added starsons remove article code # last updated 12/10/12 added starsons remove article code
language = 'en_GB' language = 'en_GB'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 15 max_articles_per_feed = 15
@ -19,6 +19,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
ignore_duplicate_articles = {'title'}
extra_css = ''' extra_css = '''
@ -51,8 +52,10 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
feeds = BasicNewsRecipe.parse_feeds(self) feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds: for feed in feeds:
for article in feed.articles[:]: for article in feed.articles[:]:
# print 'article.title is: ', article.title print 'article.title is: ', article.title
if 'Web porn harms kids' in article.title.upper() or 'The-Sun-says' in article.url: if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
feed.articles.remove(article)
if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
feed.articles.remove(article) feed.articles.remove(article)
return feeds return feeds
@ -72,7 +75,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
cov2 = str(cov) cov2 = str(cov)
cov2=cov2[27:-18] cov2=cov2[27:-18]
#cov2 now is pic url, now go back to original function #cov2 now is pic url, now go back to original function
# print "**** cov2 =",cov2,"****"
br = browser() br = browser()
br.set_handle_redirect(False) br.set_handle_redirect(False)
try: try:

View File

@ -7,19 +7,28 @@ class tvn24(BasicNewsRecipe):
description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata' description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
category = 'news' category = 'news'
language = 'pl' language = 'pl'
masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' #masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' cover_url= 'http://www.userlogos.org/files/logos/Struna/TVN24.jpg'
extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}' extra_css = 'ul {list-style:none;} \
li {list-style:none; float: left; margin: 0 0.15em;} \
h2 {font-size: medium} \
.date60m {float: left; margin: 0 10px 0 5px;}'
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
keep_only_tags=[dict(name='h1', attrs={'class':'standardHeader1'}), dict(attrs={'class':['date60m rd5', 'imageBackground fl rd7', 'contentFromCMS']}), dict(attrs={'class':'mainLeftColumn'})] use_embedded_content = False
remove_tags=[dict(attrs={'class':['commentsInfo', 'textSize', 'related newsNews align-right', 'box', 'watchMaterial text']})] ignore_duplicate_articles = {'title', 'url'}
#remove_tags_after= dict(attrs={'class':'articleAuthors mb30 mt5 grey_v6'}) keep_only_tags=[dict(name='h1', attrs={'class':['size30 mt10 pb10', 'size38 mt10 pb15']}), dict(name='figure', attrs={'class':'articleMainPhoto articleMainPhotoWide'}), dict(name='article', attrs={'class':['mb20', 'mb20 textArticleDefault']}), dict(name='ul', attrs={'class':'newsItem'})]
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'), ] remove_tags = [dict(name='aside', attrs={'class':['innerArticleModule onRight cols externalContent', 'innerArticleModule center']}), dict(name='div', attrs={'class':['thumbsGallery', 'articleTools', 'article right rd7', 'heading', 'quizContent']}), dict(name='a', attrs={'class':'watchMaterial text'}), dict(name='section', attrs={'class':['quiz toCenter', 'quiz toRight']})]
#(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
feeds = [(u'Najnowsze', u'http://www.tvn24.pl/najnowsze.xml'),
(u'Polska', u'www.tvn24.pl/polska.xml'), (u'\u015awiat', u'http://www.tvn24.pl/swiat.xml'), (u'Sport', u'http://www.tvn24.pl/sport.xml'), (u'Biznes', u'http://www.tvn24.pl/biznes.xml'), (u'Meteo', u'http://www.tvn24.pl/meteo.xml'), (u'Micha\u0142ki', u'http://www.tvn24.pl/michalki.xml'), (u'Kultura', u'http://www.tvn24.pl/kultura.xml')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
tag = soup.find(name='ul', attrs={'class':'newsItem'})
if tag:
tag.name='div'
tag.li.name='div'
return soup return soup

View File

@ -1,46 +0,0 @@
from calibre.web.feeds.news import BasicNewsRecipe
class webhosting_pl(BasicNewsRecipe):
title = u'Webhosting.pl'
__author__ = 'fenuks'
description = 'Webhosting.pl to pierwszy na polskim rynku serwis poruszający w szerokim aspekcie tematy związane z hostingiem, globalną Siecią i usługami internetowymi. Głównym celem przedsięwzięcia jest dostarczanie przydatnej i bogatej merytorycznie wiedzy osobom, które chcą tworzyć i efektywnie wykorzystywać współczesny Internet.'
category = 'web'
language = 'pl'
cover_url='http://webhosting.pl/images/logo.png'
masthead_url='http://webhosting.pl/images/logo.png'
oldest_article = 7
index='http://webhosting.pl'
max_articles_per_feed = 100
no_stylesheets = True
remove_empty_feeds = True
#keep_only_tags= [dict(name='div', attrs={'class':'content_article'}), dict(attrs={'class':'paging'})]
#remove_tags=[dict(attrs={'class':['tags', 'wykop', 'facebook_button_count', 'article_bottom']})]
feeds = [(u'Newsy', u'http://webhosting.pl/feed/rss/an'),
(u'Artyku\u0142y', u'http://webhosting.pl/feed/rss/aa'),
(u'Software', u'http://webhosting.pl/feed/rss/n/12'),
(u'Internet', u'http://webhosting.pl/feed/rss/n/9'),
(u'Biznes', u'http://webhosting.pl/feed/rss/n/13'),
(u'Bezpiecze\u0144stwo', u'http://webhosting.pl/feed/rss/n/10'),
(u'Blogi', u'http://webhosting.pl/feed/rss/ab'),
(u'Programowanie', u'http://webhosting.pl/feed/rss/n/8'),
(u'Kursy', u'http://webhosting.pl/feed/rss/n/11'),
(u'Tips&Tricks', u'http://webhosting.pl/feed/rss/n/15'),
(u'Imprezy', u'http://webhosting.pl/feed/rss/n/22'),
(u'Wywiady', u'http://webhosting.pl/feed/rss/n/24'),
(u'Porady', u'http://webhosting.pl/feed/rss/n/3027'),
(u'Znalezione w sieci', u'http://webhosting.pl/feed/rss/n/6804'),
(u'Dev area', u'http://webhosting.pl/feed/rss/n/24504'),
(u"Webmaster's blog", u'http://webhosting.pl/feed/rss/n/29195'),
(u'Domeny', u'http://webhosting.pl/feed/rss/n/11513'),
(u'Praktyka', u'http://webhosting.pl/feed/rss/n/2'),
(u'Serwery', u'http://webhosting.pl/feed/rss/n/11514'),
(u'Inne', u'http://webhosting.pl/feed/rss/n/24811'),
(u'Marketing', u'http://webhosting.pl/feed/rss/n/11535')]
def print_version(self, url):
return url.replace('webhosting.pl', 'webhosting.pl/print')
def preprocess_html(self, soup):
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup

View File

@ -2,74 +2,52 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Fetch Die Zeit. Fetch Zeit-Online.de
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from datetime import date
class ZeitDe(BasicNewsRecipe): class ZeitDe(BasicNewsRecipe):
title = 'Zeit Online' __author__ = 'Armin Geller' # AGe 2012-10-13
description = 'Zeit Online' title = u'Zeit Online'
language = 'de' description = u'German online portal of newspaper Die Zeit'
encoding = 'UTF-8' publisher = 'ZEIT ONLINE GmbH'
category = 'news, Germany'
timefmt = ' [%a, %d %b %Y]'
publication_type = 'newspaper'
language = 'de_DE'
encoding = 'UTF-8'
__author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing' oldest_article = 7
no_stylesheets = True max_articles_per_feed = 100
remove_empty_feeds = True
auto_cleanup = True
# no_stylesheets = True
# conversion_options = {'base_font_size': 10}
max_articles_per_feed = 40 masthead_url = 'http://images.zeit.de/static/img/logo_247x30.png'
remove_tags = [ year = str(date.today().isocalendar()[0]) # [0]=year [1]=week number [2]=week day
dict(name='iframe'), week = str(date.today().isocalendar()[1]+1)
dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }), cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/titelfluss/' + year + '/0'+ week + '_001.jpg'
dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
dict(name='div', attrs={'id':["place_5","place_4","comments"]})
]
keep_only_tags = [dict(id=['main'])]
feeds = [ feeds = [
('Seite 1', 'http://newsfeed.zeit.de/index_xml'), (u'Startseite Die wichtigsten Themen auf einen Blick', u'http://newsfeed.zeit.de/index_xml'),
('Politik', 'http://newsfeed.zeit.de/politik/index'), (u'Politik Ausland und Deutschland', u'http://newsfeed.zeit.de/politik/index'),
('Wirtschaft', 'http://newsfeed.zeit.de/wirtschaft/index'), (u'Wirtschaft Wirtschaft und Unternehmen', u'http://newsfeed.zeit.de/wirtschaft/index'),
('Meinung', 'http://newsfeed.zeit.de/meinung/index'), (u'Meinung Autoren kommentieren', u'http://newsfeed.zeit.de/meinung/index'),
('Gesellschaft', 'http://newsfeed.zeit.de/gesellschaft/index'), (u'Gesellschaft Gesellschaft und soziales Leben', u'http://newsfeed.zeit.de/gesellschaft/index'),
('Kultur', 'http://newsfeed.zeit.de/kultur/index'), (u'Kultur Literatur, Kunst, Film und Musik', u'http://newsfeed.zeit.de/kultur/index'),
('Wissen', 'http://newsfeed.zeit.de/wissen/index'), (u'Wissen Wissenschaft, Gesundheit, Umwelt und Geschichte', u'http://newsfeed.zeit.de/wissen/index'),
('Digital', 'http://newsfeed.zeit.de/digital/index'), (u'Digital Hardware, Software, Internet, Datenschutz', u'http://newsfeed.zeit.de/digital/index'),
('Studium', 'http://newsfeed.zeit.de/studium/index'), (u'Studium ZEIT ONLINE für Studenten', u'http://newsfeed.zeit.de/studium/index'),
('Karriere', 'http://newsfeed.zeit.de/karriere/index'), (u'Karriere Für Ein-, Um- und Aufsteiger', u'http://newsfeed.zeit.de/karriere/index'),
('Lebensart', 'http://newsfeed.zeit.de/lebensart/index'), (u'Lebensart Freizeit und Leben', u'http://newsfeed.zeit.de/lebensart/index'),
('Reisen', 'http://newsfeed.zeit.de/reisen/index'), (u'Reisen All inclusive und individuell', u'http://newsfeed.zeit.de/reisen/index'),
('Auto', 'http://newsfeed.zeit.de/auto/index'), (u'Auto Modelle und Trends', u'http://newsfeed.zeit.de/auto/index'),
('Sport', 'http://newsfeed.zeit.de/sport/index'), (u'Sport Sieg und Niederlage', u'http://newsfeed.zeit.de/sport/index')
] ]
extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}' def print_version(self, url):
return url + '/komplettansicht?print=true'
#filter_regexps = [r'ad.de.doubleclick.net/']
def get_article_url(self, article):
ans = article.get('link',None)
ans += "?page=all&print=true"
if 'video' in ans or 'quiz' in ans or 'blog' in ans :
ans = None
return ans
def preprocess_html(self, soup):
for tag in soup.findAll(name=['ul','li']):
tag.name = 'div'
soup.html['xml:lang'] = self.language.replace('_', '-')
soup.html['lang'] = self.language.replace('_', '-')
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup
def get_cover_url(self):
try:
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
return inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
except:
return 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'

Binary file not shown.

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import subprocess, tempfile, os, time import subprocess, tempfile, os, time, socket
from setup import Command, installer_name from setup import Command, installer_name
from setup.build_environment import HOST, PROJECT from setup.build_environment import HOST, PROJECT
@ -62,7 +62,10 @@ class Push(Command):
r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre':'winxp', r'Owner@winxp:/cygdrive/c/Documents\ and\ Settings/Owner/calibre':'winxp',
'kovid@ox:calibre':None, 'kovid@ox:calibre':None,
r'kovid@win7:/cygdrive/c/Users/kovid/calibre':'Windows 7', r'kovid@win7:/cygdrive/c/Users/kovid/calibre':'Windows 7',
'kovid@getafix:calibre-src':None,
}.iteritems(): }.iteritems():
if '@getafix:' in host and socket.gethostname() == 'getafix':
continue
if vmname is None or is_vm_running(vmname): if vmname is None or is_vm_running(vmname):
rcmd = BASE_RSYNC + EXCLUDES + ['.', host] rcmd = BASE_RSYNC + EXCLUDES + ['.', host]
print '\n\nPushing to:', vmname or host, '\n' print '\n\nPushing to:', vmname or host, '\n'

View File

@ -16,7 +16,7 @@ SITE_PACKAGES = ['PIL', 'dateutil', 'dns', 'PyQt4', 'mechanize',
'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml', 'sip.so', 'BeautifulSoup.py', 'cssutils', 'encutils', 'lxml',
'sipconfig.py', 'xdg', 'dbus', '_dbus_bindings.so', 'dbus_bindings.py', 'sipconfig.py', 'xdg', 'dbus', '_dbus_bindings.so', 'dbus_bindings.py',
'_dbus_glib_bindings.so', 'netifaces.so', '_psutil_posix.so', '_dbus_glib_bindings.so', 'netifaces.so', '_psutil_posix.so',
'_psutil_linux.so', 'psutil'] '_psutil_linux.so', 'psutil', 'cssselect']
QTDIR = '/usr/lib/qt4' QTDIR = '/usr/lib/qt4'
QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus') QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus')

View File

@ -30,7 +30,7 @@ If there are no windows binaries already compiled for the version of python you
Run the following command to install python dependencies:: Run the following command to install python dependencies::
easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto easy_install --always-unzip -U mechanize pyreadline python-dateutil dnspython cssutils clientform pycrypto cssselect
Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly) Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly)

View File

@ -152,7 +152,7 @@ class Translations(POT): # {{{
subprocess.check_call(['msgfmt', '-o', dest, iso639]) subprocess.check_call(['msgfmt', '-o', dest, iso639])
elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', elif locale not in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc',
'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku', 'ltg', 'nds', 'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku',
'fr_CA', 'him', 'jv', 'ka'): 'fr_CA', 'him', 'jv', 'ka', 'fur', 'ber'):
self.warn('No ISO 639 translations for locale:', locale) self.warn('No ISO 639 translations for locale:', locale)
self.write_stats() self.write_stats()

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 9, 2) numeric_version = (0, 9, 3)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -668,7 +668,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
from calibre.devices.sne.driver import SNE from calibre.devices.sne.driver import SNE
from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G) TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G, WAYTEQ)
from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
from calibre.devices.kobo.driver import KOBO, KOBOTOUCH from calibre.devices.kobo.driver import KOBO, KOBOTOUCH
from calibre.devices.bambook.driver import BAMBOOK from calibre.devices.bambook.driver import BAMBOOK
@ -742,7 +742,7 @@ plugins += [
EEEREADER, EEEREADER,
NEXTBOOK, NEXTBOOK,
ADAM, ADAM,
MOOVYBOOK, COBY, EX124G, MOOVYBOOK, COBY, EX124G, WAYTEQ,
ITUNES, ITUNES,
BOEYE_BEX, BOEYE_BEX,
BOEYE_BDX, BOEYE_BDX,

View File

@ -654,6 +654,17 @@ class KindleDXOutput(OutputProfile):
return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags), return u'%s <br/><span style="color: white">%s</span>' % (', '.join(tags),
'ttt '.join(tags)+'ttt ') 'ttt '.join(tags)+'ttt ')
class KindlePaperWhiteOutput(KindleOutput):
name = 'Kindle PaperWhite'
short_name = 'kindle_pw'
description = _('This profile is intended for the Amazon Kindle PaperWhite')
# Screen size is a best guess
screen_size = (658, 940)
dpi = 212.0
comic_screen_size = screen_size
class KindleFireOutput(KindleDXOutput): class KindleFireOutput(KindleDXOutput):
name = 'Kindle Fire' name = 'Kindle Fire'
@ -766,6 +777,6 @@ output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput, SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput, IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
BambookOutput, NookColorOutput, PocketBook900Output, GenericEink, BambookOutput, NookColorOutput, PocketBook900Output, GenericEink,
GenericEinkLarge, KindleFireOutput] GenericEinkLarge, KindleFireOutput, KindlePaperWhiteOutput]
output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower())) output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))

View File

@ -447,7 +447,8 @@ def plugin_for_catalog_format(fmt):
# }}} # }}}
def device_plugins(include_disabled=False): # {{{ # Device plugins {{{
def device_plugins(include_disabled=False):
for plugin in _initialized_plugins: for plugin in _initialized_plugins:
if isinstance(plugin, DevicePlugin): if isinstance(plugin, DevicePlugin):
if include_disabled or not is_disabled(plugin): if include_disabled or not is_disabled(plugin):
@ -456,6 +457,13 @@ def device_plugins(include_disabled=False): # {{{
False): False):
plugin.do_delayed_plugin_initialization() plugin.do_delayed_plugin_initialization()
yield plugin yield plugin
def disabled_device_plugins():
for plugin in _initialized_plugins:
if isinstance(plugin, DevicePlugin):
if is_disabled(plugin):
if platform in plugin.supported_platforms:
yield plugin
# }}} # }}}
# epub fixers {{{ # epub fixers {{{

View File

@ -55,7 +55,8 @@ def get_connected_device():
break break
return dev return dev
def debug(ioreg_to_tmp=False, buf=None, plugins=None): def debug(ioreg_to_tmp=False, buf=None, plugins=None,
disabled_plugins=None):
''' '''
If plugins is None, then this method calls startup and shutdown on the If plugins is None, then this method calls startup and shutdown on the
device plugins. So if you are using it in a context where startup could device plugins. So if you are using it in a context where startup could
@ -63,7 +64,7 @@ def debug(ioreg_to_tmp=False, buf=None, plugins=None):
device plugins as the plugins parameter. device plugins as the plugins parameter.
''' '''
import textwrap import textwrap
from calibre.customize.ui import device_plugins from calibre.customize.ui import device_plugins, disabled_device_plugins
from calibre.debug import print_basic_debug_info from calibre.debug import print_basic_debug_info
from calibre.devices.scanner import DeviceScanner, win_pnp_drives from calibre.devices.scanner import DeviceScanner, win_pnp_drives
from calibre.constants import iswindows, isosx from calibre.constants import iswindows, isosx
@ -85,6 +86,9 @@ def debug(ioreg_to_tmp=False, buf=None, plugins=None):
except: except:
out('Startup failed for device plugin: %s'%d) out('Startup failed for device plugin: %s'%d)
if disabled_plugins is None:
disabled_plugins = list(disabled_device_plugins())
try: try:
print_basic_debug_info(out=buf) print_basic_debug_info(out=buf)
s = DeviceScanner() s = DeviceScanner()
@ -113,9 +117,10 @@ def debug(ioreg_to_tmp=False, buf=None, plugins=None):
ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n' ioreg += 'Output from osx_get_usb_drives:\n'+drives+'\n\n'
ioreg += Device.run_ioreg() ioreg += Device.run_ioreg()
connected_devices = [] connected_devices = []
out('Available plugins:', textwrap.fill(' '.join([x.__class__.__name__ for x in if disabled_plugins:
devplugins]))) out('\nDisabled plugins:', textwrap.fill(' '.join([x.__class__.__name__ for x in
out(' ') disabled_plugins])))
out(' ')
found_dev = False found_dev = False
for dev in devplugins: for dev in devplugins:
if not dev.MANAGES_DEVICE_PRESENCE: continue if not dev.MANAGES_DEVICE_PRESENCE: continue

View File

@ -168,7 +168,7 @@ class ANDROID(USBMS):
# Xperia # Xperia
0x13d3 : { 0x3304 : [0x0001, 0x0002] }, 0x13d3 : { 0x3304 : [0x0001, 0x0002] },
# CREEL?? Also Nextbook # CREEL?? Also Nextbook and Wayteq
0x5e3 : { 0x726 : [0x222] }, 0x5e3 : { 0x726 : [0x222] },
# ZTE # ZTE
@ -212,7 +212,7 @@ class ANDROID(USBMS):
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP', 'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD', 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD',
'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY', 'SXZ', 'USB_2.0', 'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY', 'SXZ', 'USB_2.0',
'COBY_MID', 'VS', 'AINOL'] 'COBY_MID', 'VS', 'AINOL', 'TOPWISE']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -243,7 +243,7 @@ class ANDROID(USBMS):
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875', 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727', 'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E', 'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
'NOVO7'] 'NOVO7', 'ADVANCED']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -285,8 +285,8 @@ class KINDLE(USBMS):
class KINDLE2(KINDLE): class KINDLE2(KINDLE):
name = 'Kindle 2/3/4/Touch Device Interface' name = 'Kindle 2/3/4/Touch/PaperWhite Device Interface'
description = _('Communicate with the Kindle 2/3/4/Touch eBook reader.') description = _('Communicate with the Kindle 2/3/4/Touch/PaperWhite eBook reader.')
FORMATS = ['azw', 'mobi', 'azw3', 'prc', 'azw1', 'tpz', 'azw4', 'pobi', 'pdf', 'txt'] FORMATS = ['azw', 'mobi', 'azw3', 'prc', 'azw1', 'tpz', 'azw4', 'pobi', 'pdf', 'txt']
DELETE_EXTS = KINDLE.DELETE_EXTS + ['.mbp1', '.mbs', '.sdr', '.han'] DELETE_EXTS = KINDLE.DELETE_EXTS + ['.mbp1', '.mbs', '.sdr', '.han']
@ -327,7 +327,9 @@ class KINDLE2(KINDLE):
OPT_APNX = 0 OPT_APNX = 0
OPT_APNX_ACCURATE = 1 OPT_APNX_ACCURATE = 1
OPT_APNX_CUST_COL = 2 OPT_APNX_CUST_COL = 2
THUMBNAIL_HEIGHT = 180 # x330 on the PaperWhite
THUMBNAIL_HEIGHT = 330
# x262 on the Touch. Doesn't choke on x330, though.
def formats_to_scan_for(self): def formats_to_scan_for(self):
ans = USBMS.formats_to_scan_for(self) | {'azw3'} ans = USBMS.formats_to_scan_for(self) | {'azw3'}

View File

@ -58,6 +58,7 @@ class Book(Book_):
self.datetime = time.gmtime() self.datetime = time.gmtime()
self.contentID = None self.contentID = None
self.current_collections = []
if thumbnail_name is not None: if thumbnail_name is not None:
self.thumbnail = ImageWrapper(thumbnail_name) self.thumbnail = ImageWrapper(thumbnail_name)
@ -250,4 +251,3 @@ class KTCollectionsBookList(CollectionsBookList):
# debug_print("KTCollectionsBookList:is_debugging - is_debugging=", is_debugging) # debug_print("KTCollectionsBookList:is_debugging - is_debugging=", is_debugging)
return is_debugging return is_debugging

View File

@ -33,11 +33,11 @@ class KOBO(USBMS):
gui_name = 'Kobo Reader' gui_name = 'Kobo Reader'
description = _('Communicate with the Kobo Reader') description = _('Communicate with the Kobo Reader')
author = 'Timothy Legge and David Forrester' author = 'Timothy Legge and David Forrester'
version = (2, 0, 1) version = (2, 0, 2)
dbversion = 0 dbversion = 0
fwversion = 0 fwversion = 0
supported_dbversion = 62 supported_dbversion = 65
has_kepubs = False has_kepubs = False
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
@ -217,7 +217,7 @@ class KOBO(USBMS):
# print 'update_metadata_item returned true' # print 'update_metadata_item returned true'
changed = True changed = True
else: else:
debug_print(" Strange: The file: ", prefix, lpath, " does mot exist!") debug_print(" Strange: The file: ", prefix, lpath, " does mot exist!")
if lpath in playlist_map and \ if lpath in playlist_map and \
playlist_map[lpath] not in bl[idx].device_collections: playlist_map[lpath] not in bl[idx].device_collections:
bl[idx].device_collections = playlist_map.get(lpath,[]) bl[idx].device_collections = playlist_map.get(lpath,[])
@ -841,6 +841,14 @@ class KOBO(USBMS):
# debug_print('Finished update_device_database_collections', collections_attributes) # debug_print('Finished update_device_database_collections', collections_attributes)
def get_collections_attributes(self):
collections = []
opts = self.settings()
if opts.extra_customization and len(opts.extra_customization[self.OPT_COLLECTIONS]) > 0:
collections = [x.lower().strip() for x in opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
return collections
def sync_booklists(self, booklists, end_session=True): def sync_booklists(self, booklists, end_session=True):
# debug_print('KOBO: started sync_booklists') # debug_print('KOBO: started sync_booklists')
paths = self.get_device_paths() paths = self.get_device_paths()
@ -853,12 +861,7 @@ class KOBO(USBMS):
blists[i] = booklists[i] blists[i] = booklists[i]
except IndexError: except IndexError:
pass pass
opts = self.settings() collections = self.get_collections_attributes()
if opts.extra_customization:
collections = [x.lower().strip() for x in
opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
else:
collections = []
#debug_print('KOBO: collection fields:', collections) #debug_print('KOBO: collection fields:', collections)
for i, blist in blists.items(): for i, blist in blists.items():
@ -1447,6 +1450,7 @@ class KOBOTOUCH(KOBO):
if lpath in playlist_map: if lpath in playlist_map:
bl[idx].device_collections = playlist_map.get(lpath,[]) bl[idx].device_collections = playlist_map.get(lpath,[])
bl[idx].current_collections = bl[idx].device_collections
changed = True changed = True
if show_debug: if show_debug:
@ -1483,6 +1487,7 @@ class KOBOTOUCH(KOBO):
# print 'Update booklist' # print 'Update booklist'
book.device_collections = playlist_map.get(lpath,[])# if lpath in playlist_map else [] book.device_collections = playlist_map.get(lpath,[])# if lpath in playlist_map else []
book.current_collections = bl[idx].device_collections
book.contentID = ContentID book.contentID = ContentID
# debug_print('KoboTouch:update_booklist - title=', title, 'book.device_collections', book.device_collections) # debug_print('KoboTouch:update_booklist - title=', title, 'book.device_collections', book.device_collections)
@ -1944,6 +1949,7 @@ class KOBOTOUCH(KOBO):
if self.supports_bookshelves(): if self.supports_bookshelves():
debug_print("KoboTouch:update_device_database_collections - managing bookshelves.") debug_print("KoboTouch:update_device_database_collections - managing bookshelves.")
if bookshelf_attribute: if bookshelf_attribute:
debug_print("KoboTouch:update_device_database_collections - bookshelf_attribute=", bookshelf_attribute)
for book in booklists: for book in booklists:
if book.application_id is not None: if book.application_id is not None:
# debug_print("KoboTouch:update_device_database_collections - about to remove a book from shelves book.title=%s" % book.title) # debug_print("KoboTouch:update_device_database_collections - about to remove a book from shelves book.title=%s" % book.title)
@ -1958,11 +1964,7 @@ class KOBOTOUCH(KOBO):
def rebuild_collections(self, booklist, oncard): def rebuild_collections(self, booklist, oncard):
debug_print("KoboTouch:rebuild_collections") debug_print("KoboTouch:rebuild_collections")
collections_attributes = [] collections_attributes = self.get_collections_attributes()
opts = self.settings()
if opts.extra_customization:
collections_attributes = [x.strip() for x in
opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
debug_print('KoboTouch:rebuild_collections: collection fields:', collections_attributes) debug_print('KoboTouch:rebuild_collections: collection fields:', collections_attributes)
self.update_device_database_collections(booklist, collections_attributes, oncard) self.update_device_database_collections(booklist, collections_attributes, oncard)
@ -2086,12 +2088,17 @@ class KOBOTOUCH(KOBO):
def remove_book_from_device_bookshelves(self, connection, book): def remove_book_from_device_bookshelves(self, connection, book):
show_debug = self.is_debugging_title(book.title)# or True show_debug = self.is_debugging_title(book.title)# or True
remove_shelf_list = set(book.current_collections) - set(book.device_collections) - set(["Im_Reading", "Read", "Closed"])
if show_debug: if show_debug:
debug_print('KoboTouch:remove_book_from_device_bookshelves - book.in_library="%s"'%book.application_id) debug_print('KoboTouch:remove_book_from_device_bookshelves - book.application_id="%s"'%book.application_id)
debug_print('KoboTouch:remove_book_from_device_bookshelves - book.contentID="%s"'%book.contentID) debug_print('KoboTouch:remove_book_from_device_bookshelves - book.contentID="%s"'%book.contentID)
debug_print('KoboTouch:remove_book_from_device_bookshelves - book.device_collections=', book.device_collections) debug_print('KoboTouch:remove_book_from_device_bookshelves - book.device_collections=', book.device_collections)
debug_print('KoboTouch:remove_book_from_device_bookshelves - remove_shelf_list=', remove_shelf_list)
if len(remove_shelf_list) == 0:
return
query = 'DELETE FROM ShelfContent WHERE ContentId = ?' query = 'DELETE FROM ShelfContent WHERE ContentId = ?'

View File

@ -407,4 +407,59 @@ class EX124G(USBMS):
return 'eBooks' return 'eBooks'
return self.EBOOK_DIR_CARD_A return self.EBOOK_DIR_CARD_A
class WAYTEQ(USBMS):
name = 'WayteQ device interface'
gui_name = 'WayteQ xBook'
description = _('Communicate with the WayteQ Reader')
author = 'Kovid Goyal'
supported_platforms = ['windows', 'osx', 'linux']
# Ordered list of supported formats
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'txt', 'pdf', 'html', 'rtf', 'chm', 'djvu', 'doc']
VENDOR_ID = [0x05e3]
PRODUCT_ID = [0x0726]
BCD = [0x0222]
EBOOK_DIR_MAIN = 'Documents'
SCAN_FROM_ROOT = True
VENDOR_NAME = 'ROCKCHIP'
WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'RK28_SDK_DEMO'
SUPPORTS_SUB_DIRS = True
def get_carda_ebook_dir(self, for_upload=False):
if for_upload:
return 'Documents'
return self.EBOOK_DIR_CARD_A
def windows_sort_drives(self, drives):
if len(drives) < 2: return drives
main = drives.get('main', None)
carda = drives.get('carda', None)
if main and carda:
drives['main'] = carda
drives['carda'] = main
return drives
def linux_swap_drives(self, drives):
if len(drives) < 2 or not drives[1] or not drives[2]: return drives
drives = list(drives)
t = drives[0]
drives[0] = drives[1]
drives[1] = t
return tuple(drives)
def osx_sort_names(self, names):
if len(names) < 2: return names
main = names.get('main', None)
card = names.get('carda', None)
if main is not None and card is not None:
names['main'] = card
names['carda'] = main
return names

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import os import os
from calibre.devices.interface import BookList as BL from calibre.devices.interface import BookList as BL
from calibre.ebooks.metadata import title_sort
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.book.json_codec import JsonCodec from calibre.ebooks.metadata.book.json_codec import JsonCodec
from calibre.utils.date import utcnow from calibre.utils.date import utcnow
@ -62,6 +63,12 @@ class Book(Metadata):
def __hash__(self): def __hash__(self):
return hash((self.storage_id, self.mtp_relpath)) return hash((self.storage_id, self.mtp_relpath))
@property
def title_sorter(self):
ans = getattr(self, 'title_sort', None)
if not ans or self.is_null('title_sort') or ans == _('Unknown'):
ans = ''
return ans or title_sort(self.title or '')
class JSONCodec(JsonCodec): class JSONCodec(JsonCodec):
pass pass

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class MobiError(Exception): class MobiError(Exception):
pass pass
# That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW
MAX_THUMB_SIZE = 16 * 1024 MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240) MAX_THUMB_DIMEN = (180, 240)

View File

@ -172,12 +172,9 @@ class BookHeader(object):
self.codec = 'cp1252' if not user_encoding else user_encoding self.codec = 'cp1252' if not user_encoding else user_encoding
log.warn('Unknown codepage %d. Assuming %s' % (self.codepage, log.warn('Unknown codepage %d. Assuming %s' % (self.codepage,
self.codec)) self.codec))
# There exists some broken DRM removal tool that removes DRM but # Some KF8 files have header length == 256 (generated by kindlegen
# leaves the DRM fields in the header yielding a header size of # 2.7?). See https://bugs.launchpad.net/bugs/1067310
# 0xF8. The actual value of max_header_length should be 0xE8 but max_header_length = 0x100
# it's changed to accommodate this silly tool. Hopefully that will
# not break anything else.
max_header_length = 0xF8
if (ident == 'TEXTREAD' or self.length < 0xE4 or if (ident == 'TEXTREAD' or self.length < 0xE4 or
self.length > max_header_length or self.length > max_header_length or

View File

@ -753,13 +753,13 @@ class MobiReader(object):
processed_records = list(range(offset-1, self.book_header.records + processed_records = list(range(offset-1, self.book_header.records +
offset)) offset))
self.mobi_html = '' self.mobi_html = b''
if self.book_header.compression_type == 'DH': if self.book_header.compression_type == 'DH':
huffs = [self.sections[i][0] for i in huffs = [self.sections[i][0] for i in
range(self.book_header.huff_offset, xrange(self.book_header.huff_offset,
self.book_header.huff_offset + self.book_header.huff_number)] self.book_header.huff_offset + self.book_header.huff_number)]
processed_records += list(range(self.book_header.huff_offset, processed_records += list(xrange(self.book_header.huff_offset,
self.book_header.huff_offset + self.book_header.huff_number)) self.book_header.huff_offset + self.book_header.huff_number))
huff = HuffReader(huffs) huff = HuffReader(huffs)
unpack = huff.unpack unpack = huff.unpack

View File

@ -20,16 +20,17 @@ except ImportError:
from cssutils import (profile as cssprofiles, parseString, parseStyle, log as from cssutils import (profile as cssprofiles, parseString, parseStyle, log as
cssutils_log, CSSParser, profiles, replaceUrls) cssutils_log, CSSParser, profiles, replaceUrls)
from lxml import etree from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError from cssselect import HTMLTranslator
from calibre import force_unicode from calibre import force_unicode
from calibre.ebooks import unit_convert from calibre.ebooks import unit_convert
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
from calibre.ebooks.cssselect import css_to_xpath_no_case
cssutils_log.setLevel(logging.WARN) cssutils_log.setLevel(logging.WARN)
_html_css_stylesheet = None _html_css_stylesheet = None
css_to_xpath = HTMLTranslator().css_to_xpath
def html_css_stylesheet(): def html_css_stylesheet():
global _html_css_stylesheet global _html_css_stylesheet
@ -96,70 +97,86 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large', FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large']) 'x-large', 'xx-large'])
def xpath_lower_case(arg):
'An ASCII lowercase function for XPath'
return ("translate(%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
"'abcdefghijklmnopqrstuvwxyz')")%arg
is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
class CaseInsensitiveAttributesTranslator(HTMLTranslator):
'Treat class and id CSS selectors case-insensitively'
def xpath_class(self, class_selector):
"""Translate a class selector."""
x = self.xpath(class_selector.selector)
if is_non_whitespace(class_selector.class_name):
x.add_condition(
"%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
% ('@class', xpath_lower_case('@class'), self.xpath_literal(
' '+class_selector.class_name.lower()+' ')))
else:
x.add_condition('0')
return x
def xpath_hash(self, id_selector):
"""Translate an ID selector."""
x = self.xpath(id_selector.selector)
return self.xpath_attrib_equals(x, xpath_lower_case('@id'),
(id_selector.id.lower()))
ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath
class CSSSelector(object): class CSSSelector(object):
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:") def __init__(self, css, log=None, namespaces=XPNSMAP):
self.namespaces = namespaces
def __init__(self, css, namespaces=XPNSMAP): self.sel = self.build_selector(css, log)
if isinstance(css, unicode):
# Workaround for bug in lxml on windows/OS X that causes a massive
# memory leak with non ASCII selectors
css = css.encode('ascii', 'ignore').decode('ascii')
try:
path = self.LOCAL_NAME_RE.sub(r"local-name() = '", css_to_xpath(css))
self.sel1 = etree.XPath(css_to_xpath(css), namespaces=namespaces)
except:
self.sel1 = lambda x: []
try:
path = self.LOCAL_NAME_RE.sub(r"local-name() = '",
css_to_xpath_no_case(css))
self.sel2 = etree.XPath(path, namespaces=namespaces)
except:
self.sel2 = lambda x: []
self.sel2_use_logged = False
self.css = css self.css = css
self.used_ci_sel = False
def build_selector(self, css, log, func=css_to_xpath):
try:
return etree.XPath(func(css), namespaces=self.namespaces)
except:
if log is not None:
log.exception('Failed to parse CSS selector: %r'%css)
return None
def __call__(self, node, log): def __call__(self, node, log):
if self.sel is None:
return []
try: try:
ans = self.sel1(node) ans = self.sel(node)
except (AssertionError, ExpressionError, etree.XPathSyntaxError, except:
NameError, # thrown on OS X instead of SelectorSyntaxError log.exception(u'Failed to run CSS selector: %s'%self.css)
SelectorSyntaxError):
return [] return []
if not ans: if not ans:
try: # Try a case insensitive version
ans = self.sel2(node) if not hasattr(self, 'ci_sel'):
except: self.ci_sel = self.build_selector(self.css, log, ci_css_to_xpath)
return [] if self.ci_sel is not None:
else: try:
if ans and not self.sel2_use_logged: ans = self.ci_sel(node)
self.sel2_use_logged = True except:
log.warn('Interpreting class and tag selectors case' log.exception(u'Failed to run case-insensitive CSS selector: %s'%self.css)
' insensitively in the CSS selector: %s'%self.css) return []
if ans:
if not self.used_ci_sel:
log.warn('Interpreting class and id values '
'case-insensitively in selector: %s'%self.css)
self.used_ci_sel = True
return ans return ans
def __repr__(self):
return '<%s %s for %r>' % (
self.__class__.__name__,
hex(abs(id(self)))[2:],
self.css)
_selector_cache = {} _selector_cache = {}
MIN_SPACE_RE = re.compile(r' *([>~+]) *') MIN_SPACE_RE = re.compile(r' *([>~+]) *')
def get_css_selector(raw_selector): def get_css_selector(raw_selector, log):
css = MIN_SPACE_RE.sub(r'\1', raw_selector) css = MIN_SPACE_RE.sub(r'\1', raw_selector)
if isinstance(css, unicode):
# Workaround for bug in lxml on windows/OS X that causes a massive
# memory leak with non ASCII selectors
css = css.encode('ascii', 'ignore').decode('ascii')
ans = _selector_cache.get(css, None) ans = _selector_cache.get(css, None)
if ans is None: if ans is None:
ans = CSSSelector(css) ans = CSSSelector(css, log)
_selector_cache[css] = ans _selector_cache[css] = ans
return ans return ans
@ -272,7 +289,7 @@ class Stylizer(object):
fl = pseudo_pat.search(text) fl = pseudo_pat.search(text)
if fl is not None: if fl is not None:
text = text.replace(fl.group(), '') text = text.replace(fl.group(), '')
selector = get_css_selector(text) selector = get_css_selector(text, self.oeb.log)
matches = selector(tree, self.logger) matches = selector(tree, self.logger)
if fl is not None: if fl is not None:
fl = fl.group(1) fl = fl.group(1)

View File

@ -13,7 +13,7 @@ import os, math, functools, collections, re, copy
from lxml.etree import XPath as _XPath from lxml.etree import XPath as _XPath
from lxml import etree from lxml import etree
from lxml.cssselect import CSSSelector from cssselect import HTMLTranslator
from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
urldefrag, rewrite_links, urlunquote, barename, XHTML, urlnormalize) urldefrag, rewrite_links, urlunquote, barename, XHTML, urlnormalize)
@ -73,6 +73,7 @@ class Split(object):
def find_page_breaks(self, item): def find_page_breaks(self, item):
if self.page_break_selectors is None: if self.page_break_selectors is None:
css_to_xpath = HTMLTranslator().css_to_xpath
self.page_break_selectors = set([]) self.page_break_selectors = set([])
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
OEB_STYLES] OEB_STYLES]
@ -83,7 +84,7 @@ class Split(object):
'page-break-after'), 'cssText', '').strip().lower() 'page-break-after'), 'cssText', '').strip().lower()
try: try:
if before and before not in {'avoid', 'auto', 'inherit'}: if before and before not in {'avoid', 'auto', 'inherit'}:
self.page_break_selectors.add((CSSSelector(rule.selectorText), self.page_break_selectors.add((XPath(css_to_xpath(rule.selectorText)),
True)) True))
if self.remove_css_pagebreaks: if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-before') rule.style.removeProperty('page-break-before')
@ -91,7 +92,7 @@ class Split(object):
pass pass
try: try:
if after and after not in {'avoid', 'auto', 'inherit'}: if after and after not in {'avoid', 'auto', 'inherit'}:
self.page_break_selectors.add((CSSSelector(rule.selectorText), self.page_break_selectors.add((XPath(css_to_xpath(rule.selectorText)),
False)) False))
if self.remove_css_pagebreaks: if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-after') rule.style.removeProperty('page-break-after')

View File

@ -64,8 +64,12 @@ def shorten_title(doc):
if e.text_content(): if e.text_content():
add_match(candidates, e.text_content(), orig) add_match(candidates, e.text_content(), orig)
for item in ['#title', '#head', '#heading', '.pageTitle', '.news_title', '.title', '.head', '.heading', '.contentheading', '.small_header_red']: from cssselect import HTMLTranslator
for e in doc.cssselect(item): css_to_xpath = HTMLTranslator().css_to_xpath
for item in ('#title', '#head', '#heading', '.pageTitle', '.news_title',
'.title', '.head', '.heading', '.contentheading',
'.small_header_red'):
for e in doc.xpath(css_to_xpath(item)):
if e.text: if e.text:
add_match(candidates, e.text, orig) add_match(candidates, e.text, orig)
if e.text_content(): if e.text_content():

View File

@ -93,7 +93,8 @@ class ShareConnMenu(QMenu): # {{{
get_external_ip()) get_external_ip())
try : try :
cs_port = content_server_config().parse().port cs_port = content_server_config().parse().port
ip_text = _(' [%s, port %d]')%(listen_on, cs_port) ip_text = _(' [%(ip)s, port %(port)d]')%dict(ip=listen_on,
port=cs_port)
except: except:
ip_text = ' [%s]'%listen_on ip_text = ' [%s]'%listen_on
text = _('Stop Content Server') + ip_text text = _('Stop Content Server') + ip_text

View File

@ -11,7 +11,7 @@ from calibre.gui2.dialogs.progress import ProgressDialog
from calibre.gui2 import (question_dialog, error_dialog, info_dialog, gprefs, from calibre.gui2 import (question_dialog, error_dialog, info_dialog, gprefs,
warning_dialog, available_width) warning_dialog, available_width)
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG from calibre.constants import preferred_encoding, filesystem_encoding, DEBUG
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre import prints, force_unicode, as_unicode from calibre import prints, force_unicode, as_unicode
@ -382,12 +382,25 @@ class Adder(QObject): # {{{
if not duplicates: if not duplicates:
return self.duplicates_processed() return self.duplicates_processed()
self.pd.hide() self.pd.hide()
files = [_('%(title)s by %(author)s')%dict(title=x[0].title, duplicate_message = []
author=x[0].format_field('authors')[1]) for x in duplicates] for x in duplicates:
duplicate_message.append(_('Already in calibre:'))
matching_books = self.db.books_with_same_title(x[0])
for book_id in matching_books:
aut = [a.replace('|', ',') for a in (self.db.authors(book_id,
index_is_id=True) or '').split(',')]
duplicate_message.append('\t'+ _('%(title)s by %(author)s')%
dict(title=self.db.title(book_id, index_is_id=True),
author=authors_to_string(aut)))
duplicate_message.append(_('You are trying to add:'))
duplicate_message.append('\t'+_('%(title)s by %(author)s')%
dict(title=x[0].title,
author=x[0].format_field('authors')[1]))
duplicate_message.append('')
if question_dialog(self._parent, _('Duplicates found!'), if question_dialog(self._parent, _('Duplicates found!'),
_('Books with the same title as the following already ' _('Books with the same title as the following already '
'exist in the database. Add them anyway?'), 'exist in calibre. Add them anyway?'),
'\n'.join(files)): '\n'.join(duplicate_message)):
pd = QProgressDialog(_('Adding duplicates...'), '', 0, len(duplicates), pd = QProgressDialog(_('Adding duplicates...'), '', 0, len(duplicates),
self._parent) self._parent)
pd.setCancelButton(None) pd.setCancelButton(None)

View File

@ -11,7 +11,7 @@ from PyQt4.Qt import (QMenu, QAction, QActionGroup, QIcon, SIGNAL,
QDialogButtonBox) QDialogButtonBox)
from calibre.customize.ui import (available_input_formats, available_output_formats, from calibre.customize.ui import (available_input_formats, available_output_formats,
device_plugins) device_plugins, disabled_device_plugins)
from calibre.devices.interface import DevicePlugin from calibre.devices.interface import DevicePlugin
from calibre.devices.errors import (UserFeedback, OpenFeedback, OpenFailed, from calibre.devices.errors import (UserFeedback, OpenFeedback, OpenFailed,
InitialConnectionError) InitialConnectionError)
@ -130,6 +130,7 @@ class DeviceManager(Thread): # {{{
self.setDaemon(True) self.setDaemon(True)
# [Device driver, Showing in GUI, Ejected] # [Device driver, Showing in GUI, Ejected]
self.devices = list(device_plugins()) self.devices = list(device_plugins())
self.disabled_device_plugins = list(disabled_device_plugins())
self.managed_devices = [x for x in self.devices if self.managed_devices = [x for x in self.devices if
not x.MANAGES_DEVICE_PRESENCE] not x.MANAGES_DEVICE_PRESENCE]
self.unmanaged_devices = [x for x in self.devices if self.unmanaged_devices = [x for x in self.devices if
@ -425,7 +426,8 @@ class DeviceManager(Thread): # {{{
def _debug_detection(self): def _debug_detection(self):
from calibre.devices import debug from calibre.devices import debug
raw = debug(plugins=self.devices) raw = debug(plugins=self.devices,
disabled_plugins=self.disabled_device_plugins)
return raw return raw
def debug_detection(self, done): def debug_detection(self, done):

View File

@ -29,7 +29,7 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
SearchQueryParser.__init__(self, ['all']) SearchQueryParser.__init__(self, ['all'])
self.show_only_user_plugins = show_only_user_plugins self.show_only_user_plugins = show_only_user_plugins
self.icon = QVariant(QIcon(I('plugins.png'))) self.icon = QVariant(QIcon(I('plugins.png')))
p = QIcon(self.icon).pixmap(32, 32, QIcon.Disabled, QIcon.On) p = QIcon(self.icon).pixmap(64, 64, QIcon.Disabled, QIcon.On)
self.disabled_icon = QVariant(QIcon(p)) self.disabled_icon = QVariant(QIcon(p))
self._p = p self._p = p
self.populate() self.populate()
@ -194,17 +194,20 @@ class PluginModel(QAbstractItemModel, SearchQueryParser): # {{{
dict(plugin_type=category, plugins=_('plugins'))) dict(plugin_type=category, plugins=_('plugins')))
else: else:
plugin = self.index_to_plugin(index) plugin = self.index_to_plugin(index)
disabled = is_disabled(plugin)
if role == Qt.DisplayRole: if role == Qt.DisplayRole:
ver = '.'.join(map(str, plugin.version)) ver = '.'.join(map(str, plugin.version))
desc = '\n'.join(textwrap.wrap(plugin.description, 100)) desc = '\n'.join(textwrap.wrap(plugin.description, 100))
ans='%s (%s) %s %s\n%s'%(plugin.name, ver, _('by'), plugin.author, desc) ans='%s (%s) %s %s\n%s'%(plugin.name, ver, _('by'), plugin.author, desc)
c = plugin_customization(plugin) c = plugin_customization(plugin)
if c: if c and not disabled:
ans += _('\nCustomization: ')+c ans += _('\nCustomization: ')+c
if disabled:
ans += _('\n\nThis plugin has been disabled')
return QVariant(ans) return QVariant(ans)
if role == Qt.DecorationRole: if role == Qt.DecorationRole:
return self.disabled_icon if is_disabled(plugin) else self.icon return self.disabled_icon if disabled else self.icon
if role == Qt.ForegroundRole and is_disabled(plugin): if role == Qt.ForegroundRole and disabled:
return QVariant(QBrush(Qt.gray)) return QVariant(QBrush(Qt.gray))
if role == Qt.UserRole: if role == Qt.UserRole:
return plugin return plugin

View File

@ -975,7 +975,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
else: else:
r = getattr(worker.exception, 'reason', worker.exception) r = getattr(worker.exception, 'reason', worker.exception)
error_dialog(self, _('Could not open ebook'), error_dialog(self, _('Could not open ebook'),
as_unicode(r), det_msg=worker.traceback, show=True) as_unicode(r) or _('Unknown error'),
det_msg=worker.traceback, show=True)
self.close_progress_indicator() self.close_progress_indicator()
else: else:
self.metadata.show_opf(self.iterator.opf, self.metadata.show_opf(self.iterator.opf,

View File

@ -85,7 +85,7 @@ class Kindle(Device):
output_profile = 'kindle' output_profile = 'kindle'
output_format = 'MOBI' output_format = 'MOBI'
name = 'Kindle Paperwhite/Touch/1-4' name = 'Kindle Touch/1-4'
manufacturer = 'Amazon' manufacturer = 'Amazon'
id = 'kindle' id = 'kindle'
@ -118,6 +118,11 @@ class KindleFire(KindleDX):
output_profile = 'kindle_fire' output_profile = 'kindle_fire'
supports_color = True supports_color = True
class KindlePW(Kindle):
name = 'Kindle PaperWhite'
id = 'kindle_pw'
output_profile = 'kindle_pw'
class Sony505(Device): class Sony505(Device):
output_profile = 'sony' output_profile = 'sony'
@ -550,7 +555,7 @@ class DevicePage(QWizardPage, DeviceUI):
def nextId(self): def nextId(self):
idx = list(self.device_view.selectionModel().selectedIndexes())[0] idx = list(self.device_view.selectionModel().selectedIndexes())[0]
dev = self.dev_model.data(idx, Qt.UserRole) dev = self.dev_model.data(idx, Qt.UserRole)
if dev in (Kindle, KindleDX): if dev in (Kindle, KindleDX, KindleFire, KindlePW):
return KindlePage.ID return KindlePage.ID
if dev is iPhone: if dev is iPhone:
return StanzaPage.ID return StanzaPage.ID

View File

@ -649,6 +649,7 @@ class CatalogBuilder(object):
cl_list = [None] * len(item_list) cl_list = [None] * len(item_list)
last_ordnum = 0 last_ordnum = 0
last_c = u''
for idx, item in enumerate(item_list): for idx, item in enumerate(item_list):
if key: if key:

View File

@ -692,7 +692,22 @@ datatype is one of: {0}
help=_('A dictionary of options to customize how ' help=_('A dictionary of options to customize how '
'the data in this column will be interpreted. This is a JSON ' 'the data in this column will be interpreted. This is a JSON '
' string. For enumeration columns, use ' ' string. For enumeration columns, use '
'--display=\'{"enum_values":["val1", "val2"]}\'')) '--display="{\\"enum_values\\":[\\"val1\\", \\"val2\\"]}"'
'\n'
'There are many options that can go into the display variable.'
'The options by column type are:\n'
'composite: composite_template, composite_sort, make_category,'
'contains_html, use_decorations\n'
'datetime: date_format\n'
'enumeration: enum_values, enum_colors, use_decorations\n'
'int, float: number_format\n'
'text: is_names, use_decorations\n'
'\n'
'The best way to find legal combinations is to create a custom'
'column of the appropriate type in the GUI then look at the'
'backup OPF for a book (ensure that a new OPF has been created'
'since the column was added). You will see the JSON for the'
'"display" for the new column in the OPF.'))
return parser return parser

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More