Merge from trunk

This commit is contained in:
Charles Haley 2010-12-01 08:58:00 +00:00
commit c7bb32dc40
35 changed files with 2267 additions and 232 deletions

View File

@ -36,22 +36,37 @@
/*
** Title
*/
.cbj_title {
table.cbj_header td.cbj_title {
font-size: x-large;
font-style: italic;
text-align: center;
}
/*
** Series
*/
table.cbj_header td.cbj_series {
font-size: medium;
text-align: center;
}
/*
** Author
*/
.cbj_author {
table.cbj_header td.cbj_author {
font-size: medium;
text-align: center;
margin-bottom: 1ex;
}
/*
** Table containing Series, Publication Year, Rating and Tags
** Publisher/published
*/
table.cbj_header td.cbj_pubdata {
text-align: center;
}
/*
** Table containing Rating and Tags
*/
table.cbj_header {
width: 100%;
@ -62,9 +77,8 @@ table.cbj_header {
*/
table.cbj_header td.cbj_label {
font-family: sans-serif;
font-weight: bold;
text-align: right;
width: 40%;
width: 33%;
}
/*
@ -73,9 +87,23 @@ table.cbj_header td.cbj_label {
table.cbj_header td.cbj_content {
font-family: sans-serif;
text-align: left;
width:60%;
width:67%;
}
/*
** Metadata divider
*/
hr.metadata_divider {
width:90%;
margin-left:5%;
border-top: solid white 0px;
border-right: solid white 0px;
border-bottom: solid black 1px;
border-left: solid white 0px;
}
/*
** To skip a banner item (Series|Published|Rating|Tags),
** edit the appropriate CSS rule below.

View File

@ -6,17 +6,24 @@
</head>
<body>
<div class="cbj_banner">
<div class="cbj_title">{title}</div>
<div class="cbj_author">{author}</div>
<table class="cbj_header">
<tr class="cbj_series">
<td class="cbj_label">{series_label}:</td>
<td class="cbj_content">{series}</td>
<tr>
<td class="cbj_title" colspan="2">{title}</td>
</tr>
<tr class="cbj_pubdate">
<td class="cbj_label">{pubdate_label}:</td>
<td class="cbj_content">{pubdate}</td>
<tr>
<td class="cbj_series" colspan="2">{series}</td>
</tr>
<tr>
<td class="cbj_author" colspan="2">{author}</td>
</tr>
<tr>
<td class="cbj_pubdata" colspan="2">{publisher} ({pubdate})</td>
</tr>
<tr>
<td class="cbj_author" colspan="2"><hr class="metadata_divider" /></td>
</tr>
<tr class="cbj_rating">
<td class="cbj_label">{rating_label}:</td>
<td class="cbj_content">{rating}</td>

1381
resources/mime.types Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Dean Cording'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class ABCNews(BasicNewsRecipe):
title = 'ABC News'
__author__ = 'Dean Cording'
description = 'News from Australia'
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'ABC News'
category = 'News, Australia, World'
language = 'en_AU'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
keep_only_tags = dict(id='article')
remove_tags = [dict(attrs={'class':['related', 'tags']}),
dict(id='statepromo')
]
remove_attributes = ['width','height']
feeds = [
('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
]

View File

@ -0,0 +1,48 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Dean Cording'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessSpectator(BasicNewsRecipe):
title = 'Business Spectator'
__author__ = 'Dean Cording'
description = 'Australian Business News & commentary delivered the way you want it.'
masthead_url = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
cover_url = masthead_url
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'Business Spectator'
category = 'News, Australia, Business'
language = 'en_AU'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
remove_tags = [dict(attrs={'class':'hql'})]
remove_attributes = ['width','height','style']
feeds = [
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
('Stephen Bartholomeusz', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
]

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Esensja(BasicNewsRecipe):
title = u'Esensja'
__author__ = 'matek09'
description = 'Monthly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
HREF = '0'
#keep_only_tags =[]
#keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'})
remove_tags_before = dict(dict(name = 'div', attrs = {'class' : 't-title'}))
remove_tags_after = dict(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
remove_tags =[]
remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_top.gif'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 't-title2 nextpage'}))
extra_css = '''
.t-title {font-size: x-large; font-weight: bold; text-align: left}
.t-author {font-size: x-small; text-align: left}
.t-title2 {font-size: x-small; font-style: italic; text-align: left}
.text {font-size: small; text-align: left}
.annot-ref {font-style: italic; text-align: left}
'''
preprocess_regexps = [(re.compile(r'alt="[^"]*"'),
lambda match: '')]
def parse_index(self):
soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
a = soup.find('a', attrs={'href' : re.compile('.*/index.html')})
year = a['href'].split('/')[0]
month = a['href'].split('/')[1]
self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
soup = self.index_to_soup(self.HREF + '01.html')
self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg'
feeds = []
intro = soup.find('div', attrs={'class' : 'n-title'})
introduction = {'title' : self.tag_to_string(intro.a),
'url' : self.HREF + intro.a['href'],
'date' : '',
'description' : ''}
chapter = 'Wprowadzenie'
subchapter = ''
articles = []
articles.append(introduction)
for tag in intro.findAllNext(attrs={'class': ['chapter', 'subchapter', 'n-title']}):
if tag.name in 'td':
if len(articles) > 0:
section = chapter
if len(subchapter) > 0:
section += ' - ' + subchapter
feeds.append((section, articles))
articles = []
if tag['class'] == 'chapter':
chapter = self.tag_to_string(tag).capitalize()
subchapter = ''
else:
subchapter = self.tag_to_string(tag)
subchapter = self.tag_to_string(tag)
continue
articles.append({'title' : self.tag_to_string(tag.a), 'url' : self.HREF + tag.a['href'], 'date' : '', 'description' : ''})
a = self.index_to_soup(self.HREF + tag.a['href'])
i = 1
while True:
div = a.find('div', attrs={'class' : 't-title2 nextpage'})
if div is not None:
a = self.index_to_soup(self.HREF + div.a['href'])
articles.append({'title' : self.tag_to_string(tag.a) + ' c. d. ' + str(i), 'url' : self.HREF + div.a['href'], 'date' : '', 'description' : ''})
i = i + 1
else:
break
return feeds

View File

@ -1,67 +1,61 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Justus Bisser <justus.bisser at gmail.com>'
__copyright__ = '2010, Christian Schmitt'
'''
fr-online.de
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.recipes import BasicNewsRecipe
class Spiegel_ger(BasicNewsRecipe):
class FROnlineRecipe(BasicNewsRecipe):
title = 'Frankfurter Rundschau'
__author__ = 'Justus Bisser'
description = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255"
__author__ = 'maccs'
description = 'Nachrichten aus D und aller Welt'
encoding = 'utf-8'
masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
category = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget'
oldest_article = 7
max_articles_per_feed = 100
category = 'news, germany, world'
language = 'de'
lang = 'de-DE'
no_stylesheets = True
publication_type = 'newspaper'
use_embedded_content = False
#encoding = 'cp1252'
remove_javascript = True
no_stylesheets = True
oldest_article = 1 # Increase this number if you're interested in older articles
max_articles_per_feed = 50 # Seems a reasonable number to me
extra_css = '''
body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
.imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
.p--heading-1 {font-weight: bold;}
.calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
'''
remove_tags = [dict(name='div', attrs={'id':'Logo'})]
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
cover_margins = (100, 150, '#ffffff')
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
}
recursions = 0
max_articles_per_feed = 100
#keep_only_tags = [dict(name='div', attrs={'class':'text'})]
#tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})]
remove_attributes = ['style']
feeds = []
#remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})]
#remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})]
feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
# enable for all news
allNews = 0
if allNews:
feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')]
else:
#select the feeds you like
feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')]
feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml'))
feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml'))
feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml'))
feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml'))
feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml'))
feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml'))
feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml'))
feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml'))
feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml'))
feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml'))
feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml'))
def get_article_url(self, article):
url = article.link
regex = re.compile("0C[0-9]{6,8}0A?")
def print_version(self, url):
return url.replace('index.html', 'view/printVersion/-/index.html')
liste = regex.findall(url)
string = liste.pop(0)
string = string[2:len(string)-1]
return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Histmag(BasicNewsRecipe):
title = u'Histmag'
__author__ = 'matek09'
description = u"Artykuly historyczne i publicystyczne"
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
#max_articles_per_feed = 1
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
#keep_only_tags =[]
#keep_only_tags.append(dict(name = 'h2'))
#keep_only_tags.append(dict(name = 'p'))
remove_tags =[]
remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
(re.compile(r'<span>'), lambda match: '<br><br><span>')]
extra_css = '''
.left {font-size: x-small}
.right {font-size: x-small}
'''
def find_articles(self, soup):
articles = []
for div in soup.findAll('div', attrs={'class' : 'text'}):
articles.append({
'title' : self.tag_to_string(div.h3.a),
'url' : 'http://www.histmag.org/' + div.h3.a['href'],
'date' : self.tag_to_string(div.next('p')).split('|')[0],
'description' : self.tag_to_string(div.next('p', podpis=False)),
})
return articles
def parse_index(self):
soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
feeds = []
feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
feeds.append((u"Wydarzenia", self.find_articles(soup)))
return feeds

View File

@ -1,19 +1,22 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
class Newsweek(BasicNewsRecipe):
EDITION = 0
FIND_LAST_FULL_ISSUE = True
EDITION = '0'
EXCLUDE_LOCKED = True
LOCKED_ICO = 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'
title = u'Newsweek Polska'
__author__ = 'Mateusz Kielar'
__author__ = 'matek09'
description = 'Weekly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'en'
language = 'pl'
remove_javascript = True
keep_only_tags =[]
@ -33,24 +36,42 @@ class Newsweek(BasicNewsRecipe):
def print_version(self, url):
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
def is_locked(self, a):
if a.findNext('img')['src'] == 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif':
return True
else:
return False
def is_full(self, issue_soup):
if len(issue_soup.findAll('img', attrs={'src' : 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'})) > 1:
return False
else:
return True
def find_last_full_issue(self):
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
page = self.index_to_soup(issue)
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
page = self.index_to_soup(issue)
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
frame_url = 'http://www.newsweek.pl/Frames/IssueCover.aspx'
while True:
frame_soup = self.index_to_soup(frame_url)
self.EDITION = frame_soup.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
if self.is_full(issue_soup):
break
frame_url = 'http://www.newsweek.pl/Frames/' + frame_soup.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
def parse_index(self):
if self.FIND_LAST_FULL_ISSUE:
self.find_last_full_issue()
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
self.cover_url = img['src']
feeds = []
parent = soup.find(id='content-left-big')
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
section = self.tag_to_string(txt).capitalize()
articles = list(self.find_articles(txt))
if len(articles) > 0:
section = self.tag_to_string(txt).capitalize()
feeds.append((section, articles))
return feeds
@ -58,6 +79,8 @@ class Newsweek(BasicNewsRecipe):
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
if a.name in "div":
break
if (not self.FIND_LAST_FULL_ISSUE) & self.EXCLUDE_LOCKED & self.is_locked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.newsweek.pl' + a['href'],

View File

@ -8,12 +8,15 @@ www.nin.co.rs
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from contextlib import nested, closing
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre import entity_to_unicode
class Nin(BasicNewsRecipe):
title = 'NIN online'
__author__ = 'Darko Miletic'
description = 'Nedeljne Informativne Novine'
publisher = 'NIN d.o.o.'
publisher = 'NIN d.o.o. - Ringier d.o.o.'
category = 'news, politics, Serbia'
no_stylesheets = True
delay = 1
@ -26,18 +29,29 @@ class Nin(BasicNewsRecipe):
use_embedded_content = False
language = 'sr'
publication_type = 'magazine'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} '
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Verdana, Lucida, sans1, sans-serif}
.article_description{font-family: Verdana, Lucida, sans1, sans-serif}
.artTitle{font-size: x-large; font-weight: bold; color: #900}
.izjava{font-size: x-large; font-weight: bold}
.columnhead{font-size: small; font-weight: bold;}
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
b{margin-top: 1em}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
, 'linearize_tables' : True
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_attributes = ['height','width']
preprocess_regexps = [
(re.compile(r'</body>.*?<html>', re.DOTALL|re.IGNORECASE),lambda match: '</body>')
,(re.compile(r'</html>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</html>')
,(re.compile(u'\u0110'), lambda match: u'\u00D0')
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
@ -50,7 +64,10 @@ class Nin(BasicNewsRecipe):
return br
keep_only_tags =[dict(name='td', attrs={'width':'520'})]
remove_tags_before =dict(name='span', attrs={'class':'izjava'})
remove_tags_after =dict(name='html')
remove_tags = [dict(name=['object','link','iframe','meta','base'])]
remove_attributes=['border','background','height','width','align','valign']
def get_cover_url(self):
cover_url = None
@ -63,7 +80,7 @@ class Nin(BasicNewsRecipe):
def parse_index(self):
articles = []
count = 0
soup = self.index_to_soup(self.PREFIX)
soup = self.index_to_soup(self.INDEX)
for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
count = count +1
if self.test and count > 2:
@ -90,3 +107,45 @@ class Nin(BasicNewsRecipe):
articles.append((section,inarts))
return articles
def index_to_soup(self, url_or_raw, raw=False):
if re.match(r'\w+://', url_or_raw):
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
with closing(open_func(url_or_raw)) as f:
_raw = f.read()
if not _raw:
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
else:
_raw = url_or_raw
if raw:
return _raw
if not isinstance(_raw, unicode) and self.encoding:
if callable(self.encoding):
_raw = self.encoding(_raw)
else:
_raw = _raw.decode(self.encoding, 'replace')
massage = list(BeautifulSoup.MARKUP_MASSAGE)
enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
massage.append((re.compile(r'&(\S+?);'), lambda match:
entity_to_unicode(match, encoding=enc)))
massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
''))
return BeautifulSoup(_raw, markupMassage=massage)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div'):
if len(item.contents) == 0:
item.extract()
for item in soup.findAll(['td','tr']):
item.name='div'
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for tbl in soup.findAll('table'):
img = tbl.find('img')
if img:
img.extract()
tbl.replaceWith(img)
return soup

View File

@ -1,18 +1,18 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
class Polityka(BasicNewsRecipe):
title = u'Polityka'
__author__ = 'Mateusz Kielar'
__author__ = 'matek09'
description = 'Weekly magazine. Last archive issue'
encoding = 'utf-8'
no_stylesheets = True
language = 'en'
language = 'pl'
remove_javascript = True
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
@ -48,7 +48,6 @@ class Polityka(BasicNewsRecipe):
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
print section
if not articles.has_key(section):
articles[section] = []
articles[section].append( {

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Wprost(BasicNewsRecipe):
EDITION = 0
FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
title = u'Wprost'
__author__ = 'matek09'
description = 'Weekly magazine'
encoding = 'ISO-8859-2'
no_stylesheets = True
language = 'pl'
remove_javascript = True
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
'''keep_only_tags =[]
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
#h2 {font-size: x-large; font-weight: bold}
def is_blocked(self, a):
if a.findNextSibling('img') is None:
return False
else:
return True
def find_last_issue(self):
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
a = 0
if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.cover_url = a.img['src']
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = self.tag_to_string(main_block)
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}

View File

@ -340,6 +340,8 @@ class LinuxFreeze(Command):
__builtin__.help = _Helper()
def set_qt_plugin_path():
import uuid
uuid.uuid4() # Workaround for libuuid/PyQt conflict
from PyQt4.Qt import QCoreApplication
paths = list(map(unicode, QCoreApplication.libraryPaths()))
paths.insert(0, sys.frozen_path + '/lib/qt_plugins')

View File

@ -3,7 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import uuid, sys, os, re, logging, time, mimetypes, \
import uuid, sys, os, re, logging, time, \
__builtin__, warnings, multiprocessing
from urllib import getproxies
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
@ -19,43 +19,18 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
__appname__, __version__, __author__, \
win32event, win32api, winerror, fcntl, \
filesystem_encoding, plugins, config_dir
from calibre.startup import winutil, winutilerror
from calibre.startup import winutil, winutilerror, guess_type
uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
if islinux and not getattr(sys, 'frozen', False):
# Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
uuid.uuid4()
if False:
# Prevent pyflakes from complaining
winutil, winutilerror, __appname__, islinux, __version__
fcntl, win32event, isfrozen, __author__, terminal_controller
winerror, win32api, isfreebsd
winerror, win32api, isfreebsd, guess_type
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('text/fb2+xml', '.fb2')
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-sony-bbeb', '.lrx')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
mimetypes.add_type('application/x-font-opentype', '.otf')
mimetypes.add_type('application/x-font-truetype', '.ttf')
mimetypes.add_type('application/oebps-package+xml', '.opf')
mimetypes.add_type('application/vnd.palm', '.pdb')
mimetypes.add_type('application/x-mobipocket-ebook', '.mobi')
mimetypes.add_type('application/x-mobipocket-ebook', '.prc')
mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
mimetypes.add_type('application/x-cbz', '.cbz')
mimetypes.add_type('application/x-cbr', '.cbr')
mimetypes.add_type('application/x-koboreader-ebook', '.kobo')
mimetypes.add_type('image/wmf', '.wmf')
mimetypes.add_type('image/jpeg', '.jpg')
mimetypes.add_type('image/jpeg', '.jpeg')
mimetypes.add_type('image/png', '.png')
mimetypes.add_type('image/gif', '.gif')
mimetypes.add_type('image/bmp', '.bmp')
mimetypes.add_type('image/svg+xml', '.svg')
guess_type = mimetypes.guess_type
import cssutils
cssutils.log.setLevel(logging.WARN)

View File

@ -19,7 +19,7 @@ class ANDROID(USBMS):
VENDOR_ID = {
# HTC
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9
0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
0xc92 : [0x100]},

View File

@ -91,6 +91,10 @@ class FB2MLizer(object):
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
def clean_text(self, text):
text = re.sub(r'(?miu)<section>\s*</section>', '', text)
text = re.sub(r'(?miu)\s+</section>', '</section>', text)
text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
text = re.sub(r'(?miu)<p>\s*</p>', '', text)
text = re.sub(r'(?miu)\s+</p>', '</p>', text)
text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text)
@ -166,11 +170,15 @@ class FB2MLizer(object):
def get_text(self):
text = []
for item in self.oeb_book.spine:
for i, item in enumerate(self.oeb_book.spine):
if self.opts.sectionize_chapters_using_file_structure and i is not 0:
text.append('<section>')
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1:
text.append('</section>')
return ''.join(text)
def fb2_body_footer(self):
@ -258,6 +266,10 @@ class FB2MLizer(object):
if id_name:
fb2_text.append(self.get_anchor(page, id_name))
if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
fb2_text.append('<title>')
tags.append('title')
fb2_tag = TAG_MAP.get(tag, None)
if fb2_tag == 'p':
if 'p' in tag_stack+tags:

View File

@ -25,6 +25,20 @@ class FB2Output(OutputFormatPlugin):
'WARNING: ' \
'This option is experimental. It can cause conversion ' \
'to fail. It can also produce unexpected output.')),
OptionRecommendation(name='sectionize_chapters_using_file_structure',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Try to turn chapters into individual sections using the ' \
'internal structure of the ebook. This works well for EPUB ' \
'books that have been internally split by chapter.')),
OptionRecommendation(name='h1_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h1 tags with fb2 title elements.')),
OptionRecommendation(name='h2_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h2 tags with fb2 title elements.')),
OptionRecommendation(name='h3_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h3 tags with fb2 title elements.')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):

View File

@ -504,6 +504,9 @@ class MobiReader(object):
'x-large': '5',
'xx-large': '6',
}
def barename(x):
return x.rpartition(':')[-1]
mobi_version = self.book_header.mobi_version
for x in root.xpath('//ncx'):
x.getparent().remove(x)
@ -512,7 +515,8 @@ class MobiReader(object):
for x in tag.attrib:
if ':' in x:
del tag.attrib[x]
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
if tag.tag and barename(tag.tag.lower()) in \
('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content', 'form'):
tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
for key in tag.attrib.keys():

View File

@ -93,7 +93,7 @@ class Jacket(object):
# Render Jacket {{{
def get_rating(rating, rchar):
def get_rating(rating, rchar, e_rchar):
ans = ''
try:
num = float(rating)/2
@ -104,12 +104,12 @@ def get_rating(rating, rchar):
if num < 1:
return ans
ans = rchar * int(num)
ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
return ans
def render_jacket(mi, output_profile,
alt_title=_('Unknown'), alt_tags=[], alt_comments=''):
alt_title=_('Unknown'), alt_tags=[], alt_comments='',
alt_publisher=('Unknown publisher')):
css = P('jacket/stylesheet.css', data=True).decode('utf-8')
try:
@ -124,12 +124,17 @@ def render_jacket(mi, output_profile,
if not mi.series:
series = ''
try:
publisher = mi.publisher if mi.publisher else alt_publisher
except:
publisher = _('Unknown publisher')
try:
pubdate = strftime(u'%Y', mi.pubdate.timetuple())
except:
pubdate = ''
rating = get_rating(mi.rating, output_profile.ratings_char)
rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char)
tags = mi.tags if mi.tags else alt_tags
if tags:
@ -154,6 +159,7 @@ def render_jacket(mi, output_profile,
css=css,
title=title,
author=author,
publisher=publisher,
pubdate_label=_('Published'), pubdate=pubdate,
series_label=_('Series'), series=series,
rating_label=_('Rating'), rating=rating,
@ -168,16 +174,16 @@ def render_jacket(mi, output_profile,
# Post-process the generated html to strip out empty header items
soup = BeautifulSoup(generated_html)
if not series:
series_tag = soup.find('tr', attrs={'class':'cbj_series'})
series_tag = soup.find(attrs={'class':'cbj_series'})
series_tag.extract()
if not rating:
rating_tag = soup.find('tr', attrs={'class':'cbj_rating'})
rating_tag = soup.find(attrs={'class':'cbj_rating'})
rating_tag.extract()
if not tags:
tags_tag = soup.find('tr', attrs={'class':'cbj_tags'})
tags_tag = soup.find(attrs={'class':'cbj_tags'})
tags_tag.extract()
if not pubdate:
pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'})
pubdate_tag = soup.find(attrs={'class':'cbj_pubdate'})
pubdate_tag.extract()
if output_profile.short_name != 'kindle':
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})

View File

@ -216,7 +216,9 @@ class PMLMLizer(object):
w = '\\w'
width = elem.get('width')
if width:
w += '="%s%%"' % width
if not width.endswith('%'):
width += '%'
w += '="%s"' % width
else:
w += '="50%"'
text.append(w)

View File

@ -37,7 +37,8 @@ class GenerateCatalogAction(InterfaceAction):
dbspec[id] = {'ondevice': db.ondevice(id, index_is_id=True)}
# Calling gui2.tools:generate_catalog()
ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager)
ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager,
db)
if ret is None:
return

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QMenu
from calibre.constants import isosx
from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
open_local_file
open_local_file, info_dialog
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.utils.config import prefs
from calibre.ptempfile import PersistentTemporaryFile
@ -89,18 +89,34 @@ class ViewAction(InterfaceAction):
self._launch_viewer(name, viewer, internal)
def view_specific_format(self, triggered):
rows = self.gui.library_view.selectionModel().selectedRows()
rows = list(self.gui.library_view.selectionModel().selectedRows())
if not rows or len(rows) == 0:
d = error_dialog(self.gui, _('Cannot view'), _('No book selected'))
d.exec_()
return
row = rows[0].row()
formats = self.gui.library_view.model().db.formats(row).upper().split(',')
d = ChooseFormatDialog(self.gui, _('Choose the format to view'), formats)
db = self.gui.library_view.model().db
rows = [r.row() for r in rows]
formats = [db.formats(row) for row in rows]
formats = [list(f.upper().split(',')) if f else None for f in formats]
all_fmts = set([])
for x in formats:
for f in x: all_fmts.add(f)
d = ChooseFormatDialog(self.gui, _('Choose the format to view'),
list(sorted(all_fmts)))
if d.exec_() == d.Accepted:
format = d.format()
self.view_format(row, format)
fmt = d.format()
orig_num = len(rows)
rows = [rows[i] for i in range(len(rows)) if formats[i] and fmt in
formats[i]]
if self._view_check(len(rows)):
for row in rows:
self.view_format(row, fmt)
if len(rows) < orig_num:
info_dialog(self.gui, _('Format unavailable'),
_('Not all the selected books were available in'
' the %s format. You should convert'
' them first.')%fmt, show=True)
def _view_check(self, num, max_=3):
if num <= max_:

View File

@ -208,7 +208,8 @@ class BookInfo(QWebView):
rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
k, t in rows])
comments = data.get(_('Comments'), '')
if comments and comments != u'None':
if not comments or comments == u'None':
comments = ''
self.renderer.queue.put((rows, comments))
self._show_data(rows, '')

View File

@ -34,7 +34,7 @@ class PluginWidget(QWidget, Ui_Form):
self.all_fields.append(x)
QListWidgetItem(x, self.db_fields)
def initialize(self, name): #not working properly to update
def initialize(self, name, db): #not working properly to update
self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated db_fields from last use

View File

@ -28,7 +28,7 @@ class PluginWidget(QWidget, Ui_Form):
self.all_fields.append(x)
QListWidgetItem(x, self.db_fields)
def initialize(self, name):
def initialize(self, name, db):
self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated fields from last use

View File

@ -7,10 +7,11 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.gui2 import gprefs
from catalog_epub_mobi_ui import Ui_Form
from calibre.ebooks.conversion.config import load_defaults
from PyQt4.Qt import QWidget
from calibre.gui2 import gprefs
from catalog_epub_mobi_ui import Ui_Form
from PyQt4.Qt import QWidget, QLineEdit
class PluginWidget(QWidget,Ui_Form):
@ -23,7 +24,8 @@ class PluginWidget(QWidget,Ui_Form):
('generate_recently_added', True),
('note_tag','*'),
('numbers_as_text', False),
('read_tag','+'),
('read_pattern','+'),
('read_source_field_cb','Tag'),
('wishlist_tag','Wishlist'),
]
@ -38,16 +40,54 @@ class PluginWidget(QWidget,Ui_Form):
QWidget.__init__(self, parent)
self.setupUi(self)
def initialize(self, name):
def initialize(self, name, db):
self.name = name
# Populate the 'Read book' source fields
all_custom_fields = db.custom_field_keys()
custom_fields = {}
custom_fields['Tag'] = {'field':'tag', 'datatype':u'text'}
for custom_field in all_custom_fields:
field_md = db.metadata_for_field(custom_field)
if field_md['datatype'] in ['bool','composite','datetime','text']:
custom_fields[field_md['name']] = {'field':custom_field,
'datatype':field_md['datatype']}
# Add the sorted eligible fields to the combo box
for cf in sorted(custom_fields):
self.read_source_field_cb.addItem(cf)
self.read_source_fields = custom_fields
self.read_source_field_cb.currentIndexChanged.connect(self.read_source_field_changed)
# Update dialog fields from stored options
for opt in self.OPTION_FIELDS:
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']:
if opt[0] in [
'generate_recently_added',
'generate_series',
'generate_titles',
'numbers_as_text',
]:
getattr(self, opt[0]).setChecked(opt_value)
# Combo box
elif opt[0] in ['read_source_field_cb']:
# Look for last-stored combo box value
index = self.read_source_field_cb.findText(opt_value)
if index == -1:
index = self.read_source_field_cb.findText('Tag')
self.read_source_field_cb.setCurrentIndex(index)
# Text fields
else:
getattr(self, opt[0]).setText(opt_value)
# Init self.read_source_field
cs = unicode(self.read_source_field_cb.currentText())
read_source_spec = self.read_source_fields[cs]
self.read_source_field = read_source_spec['field']
def options(self):
# Save/return the current options
# exclude_genre stores literally
@ -55,16 +95,60 @@ class PluginWidget(QWidget,Ui_Form):
# others store as lists
opts_dict = {}
for opt in self.OPTION_FIELDS:
if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']:
# Save values to gprefs
if opt[0] in [
'generate_recently_added',
'generate_series',
'generate_titles',
'numbers_as_text',
]:
opt_value = getattr(self,opt[0]).isChecked()
# Combo box uses .currentText()
elif opt[0] in ['read_source_field_cb']:
opt_value = unicode(getattr(self, opt[0]).currentText())
# text fields use .text()
else:
opt_value = unicode(getattr(self, opt[0]).text())
gprefs.set(self.name + '_' + opt[0], opt_value)
if opt[0] in ['exclude_genre','numbers_as_text','generate_titles','generate_series','generate_recently_added']:
# Construct opts
if opt[0] in [
'exclude_genre',
'generate_recently_added',
'generate_series',
'generate_titles',
'numbers_as_text',
]:
opts_dict[opt[0]] = opt_value
else:
opts_dict[opt[0]] = opt_value.split(',')
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
# Generate read_book_marker
opts_dict['read_book_marker'] = "%s:%s" % (self.read_source_field, self.read_pattern.text())
# Append the output profile
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
return opts_dict
def read_source_field_changed(self,new_index):
'''
Process changes in the read_source_field combo box
Currently using QLineEdit for all field types
Possible to modify to switch QWidget type
'''
new_source = str(self.read_source_field_cb.currentText())
read_source_spec = self.read_source_fields[str(new_source)]
self.read_source_field = read_source_spec['field']
# Change pattern input widget to match the source field datatype
if read_source_spec['datatype'] in ['bool','composite','datetime','text']:
if not isinstance(self.read_pattern, QLineEdit):
self.read_spec_hl.removeWidget(self.read_pattern)
dw = QLineEdit(self)
dw.setObjectName('read_pattern')
dw.setToolTip('Pattern for read book')
self.read_pattern = dw
self.read_spec_hl.addWidget(dw)

View File

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>579</width>
<height>411</height>
<width>627</width>
<height>549</height>
</rect>
</property>
<property name="windowTitle">
@ -28,42 +28,28 @@
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>'Mark this book as read' tag:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="read_tag">
<property name="toolTip">
<string extracomment="Default: +"/>
</property>
</widget>
</item>
<item row="3" column="0">
<item row="4" column="0">
<widget class="QLabel" name="label_4">
<property name="text">
<string>Additional note tag prefix:</string>
</property>
</widget>
</item>
<item row="3" column="1">
<item row="4" column="1">
<widget class="QLineEdit" name="note_tag">
<property name="toolTip">
<string extracomment="Default: *"/>
</property>
</widget>
</item>
<item row="5" column="1">
<item row="6" column="1">
<widget class="QLineEdit" name="exclude_genre">
<property name="toolTip">
<string extracomment="Default: \[[\w]*\]"/>
</property>
</widget>
</item>
<item row="5" column="0">
<item row="6" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Regex pattern describing tags to exclude as genres:</string>
@ -76,7 +62,7 @@
</property>
</widget>
</item>
<item row="6" column="1">
<item row="7" column="1">
<widget class="QLabel" name="label_6">
<property name="text">
<string>Regex tips:
@ -88,7 +74,7 @@
</property>
</widget>
</item>
<item row="7" column="0">
<item row="8" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -101,44 +87,84 @@
</property>
</spacer>
</item>
<item row="9" column="0">
<item row="10" column="0">
<widget class="QCheckBox" name="generate_titles">
<property name="text">
<string>Include 'Titles' Section</string>
</property>
</widget>
</item>
<item row="11" column="0">
<item row="12" column="0">
<widget class="QCheckBox" name="generate_recently_added">
<property name="text">
<string>Include 'Recently Added' Section</string>
</property>
</widget>
</item>
<item row="12" column="0">
<item row="13" column="0">
<widget class="QCheckBox" name="numbers_as_text">
<property name="text">
<string>Sort numbers as text</string>
</property>
</widget>
</item>
<item row="10" column="0">
<item row="11" column="0">
<widget class="QCheckBox" name="generate_series">
<property name="text">
<string>Include 'Series' Section</string>
</property>
</widget>
</item>
<item row="2" column="1">
<item row="3" column="1">
<widget class="QLineEdit" name="wishlist_tag"/>
</item>
<item row="2" column="0">
<item row="3" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>Wishlist tag:</string>
</property>
</widget>
</item>
<item row="2" column="1">
<layout class="QHBoxLayout" name="read_spec_hl">
<property name="sizeConstraint">
<enum>QLayout::SetMinimumSize</enum>
</property>
<item>
<widget class="QComboBox" name="read_source_field_cb">
<property name="sizePolicy">
<sizepolicy hsizetype="MinimumExpanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="toolTip">
<string>Source column for read book</string>
</property>
<property name="statusTip">
<string/>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="read_pattern">
<property name="toolTip">
<string>Pattern for read book</string>
</property>
<property name="statusTip">
<string/>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Books marked as read:</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>

View File

@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form):
ICON = I('mimetypes/fb2.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters'])
Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters',
'sectionize_chapters_using_file_structure', 'h1_to_title',
'h2_to_title', 'h3_to_title'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,7 +14,7 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="2" column="0">
<item row="6" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -41,6 +41,34 @@
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure">
<property name="text">
<string>Sectionize Chapters using file structure</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="opt_h1_to_title">
<property name="text">
<string>Wrap h1 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="opt_h2_to_title">
<property name="text">
<string>Wrap h2 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="opt_h3_to_title">
<property name="text">
<string>Wrap h3 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
</layout>
</widget>
<resources/>

View File

@ -19,7 +19,7 @@ from calibre.customize.ui import catalog_plugins
class Catalog(QDialog, Ui_Dialog):
''' Catalog Dialog builder'''
def __init__(self, parent, dbspec, ids):
def __init__(self, parent, dbspec, ids, db):
import re, cStringIO
from calibre import prints as info
from PyQt4.uic import compileUi
@ -51,7 +51,7 @@ class Catalog(QDialog, Ui_Dialog):
catalog_widget = __import__('calibre.gui2.catalog.'+name,
fromlist=[1])
pw = catalog_widget.PluginWidget()
pw.initialize(name)
pw.initialize(name, db)
pw.ICON = I('forward.png')
self.widgets.append(pw)
[self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]

View File

@ -29,10 +29,6 @@ class SearchLineEdit(QLineEdit):
QLineEdit.mouseReleaseEvent(self, event)
QLineEdit.selectAll(self)
def focusInEvent(self, event):
QLineEdit.focusInEvent(self, event)
QLineEdit.selectAll(self)
def dropEvent(self, ev):
self.parent().normalize_state()
return QLineEdit.dropEvent(self, ev)
@ -256,7 +252,11 @@ class SavedSearchBox(QComboBox):
def initialize(self, _search_box, colorize=False, help_text=_('Search')):
self.search_box = _search_box
try:
self.line_edit.setPlaceholderText(help_text)
except:
# Using Qt < 4.7
pass
self.colorize = colorize
self.clear()
@ -350,14 +350,17 @@ class SearchBoxMixin(object):
shortcuts = QKeySequence.keyBindings(QKeySequence.Find)
shortcuts = list(shortcuts) + [QKeySequence('/'), QKeySequence('Alt+S')]
self.action_focus_search.setShortcuts(shortcuts)
self.action_focus_search.triggered.connect(lambda x:
self.search.setFocus(Qt.OtherFocusReason))
self.action_focus_search.triggered.connect(self.focus_search_box)
self.addAction(self.action_focus_search)
self.search.setStatusTip(re.sub(r'<\w+>', ' ',
unicode(self.search.toolTip())))
self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
self.clear_button.setStatusTip(self.clear_button.toolTip())
def focus_search_box(self, *args):
self.search.setFocus(Qt.OtherFocusReason)
self.search.lineEdit().selectAll()
def search_box_cleared(self):
self.tags_view.clear()
self.saved_search.clear()

View File

@ -245,11 +245,11 @@ def fetch_scheduled_recipe(arg):
return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt]
def generate_catalog(parent, dbspec, ids, device_manager):
def generate_catalog(parent, dbspec, ids, device_manager, db):
from calibre.gui2.dialogs.catalog import Catalog
# Build the Catalog dialog in gui2.dialogs.catalog
d = Catalog(parent, dbspec, ids)
d = Catalog(parent, dbspec, ids, db)
if d.exec_() != d.Accepted:
return None

View File

@ -606,12 +606,12 @@ class EPUB_MOBI(CatalogPlugin):
help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--read-tag',
default='+',
dest='read_tag',
Option('--read-book-marker',
default='tag:+',
dest='read_book_marker',
action = None,
help=_("Tag indicating book has been read.\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
help=_("field:pattern indicating book has been read.\n" "Default: '%default'\n"
"Applies to ePub, MOBI output formats")),
Option('--wishlist-tag',
default='Wishlist',
dest='wishlist_tag',
@ -898,6 +898,8 @@ class EPUB_MOBI(CatalogPlugin):
self.__plugin = plugin
self.__progressInt = 0.0
self.__progressString = ''
f, _, p = opts.read_book_marker.partition(':')
self.__read_book_marker = {'field':f, 'pattern':p}
self.__reporter = report_progress
self.__stylesheet = stylesheet
self.__thumbs = None
@ -936,7 +938,6 @@ class EPUB_MOBI(CatalogPlugin):
if self.opts.generate_series:
self.__totalSteps += 2
# Accessors
if True:
'''
@ -1210,7 +1211,7 @@ class EPUB_MOBI(CatalogPlugin):
def READING_SYMBOL(self):
def fget(self):
return '<span style="color:black">&#x25b7;</span>' if self.generateForKindle else \
'<span style="color:white">%s</span>' % self.opts.read_tag
'<span style="color:white">+</span>'
return property(fget=fget)
@dynamic_property
def READ_SYMBOL(self):
@ -1401,8 +1402,7 @@ class EPUB_MOBI(CatalogPlugin):
if record['cover']:
this_title['cover'] = re.sub('&amp;', '&', record['cover'])
# This may be updated in self.processSpecialTags()
this_title['read'] = False
this_title['read'] = self.discoverReadStatus(record)
if record['tags']:
this_title['tags'] = self.processSpecialTags(record['tags'],
@ -2675,13 +2675,7 @@ class EPUB_MOBI(CatalogPlugin):
pBookTag = Tag(soup, "p")
ptc = 0
# book with read/reading/unread symbol
for tag in book['tags']:
if tag == self.opts.read_tag:
book['read'] = True
break
else:
book['read'] = False
book['read'] = self.discoverReadStatus(book)
# book with read|reading|unread symbol or wishlist item
if self.opts.wishlist_tag in book.get('tags', []):
@ -2689,7 +2683,7 @@ class EPUB_MOBI(CatalogPlugin):
pBookTag.insert(ptc,NavigableString(self.MISSING_SYMBOL))
ptc += 1
else:
if book['read']:
if book.get('read', False):
# check mark
pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
pBookTag['class'] = "read_book"
@ -4027,6 +4021,34 @@ class EPUB_MOBI(CatalogPlugin):
if not os.path.isdir(images_path):
os.makedirs(images_path)
def discoverReadStatus(self, record):
'''
Given a field:pattern spec, discover if this book marked as read
if field == tag, scan tags for pattern
if custom field, try regex match for pattern
This allows maximum flexibility with fields of type
datatype bool: #field_name:True
datatype text: #field_name:<string>
datatype datetime: #field_name:.*
'''
# Legacy handling of special 'read' tag
field = self.__read_book_marker['field']
pat = self.__read_book_marker['pattern']
if field == 'tag' and pat in record['tags']:
return True
field_contents = self.__db.get_field(record['id'],
field,
index_is_id=True)
if field_contents:
if re.search(pat, unicode(field_contents),
re.IGNORECASE) is not None:
return True
return False
def filterDbTags(self, tags):
# Remove the special marker tags from the database's tag list,
# return sorted list of normalized genre tags
@ -4519,7 +4541,6 @@ class EPUB_MOBI(CatalogPlugin):
markerTags = []
markerTags.extend(self.opts.exclude_tags.split(','))
markerTags.extend(self.opts.note_tag.split(','))
markerTags.extend(self.opts.read_tag.split(','))
return markerTags
def letter_or_symbol(self,char):
@ -4629,6 +4650,7 @@ class EPUB_MOBI(CatalogPlugin):
if open_pTag:
result.insert(rtc, pTag)
rtc += 1
paras = result.findAll('p')
for p in paras:
@ -4647,10 +4669,12 @@ class EPUB_MOBI(CatalogPlugin):
tag = self.convertHTMLEntities(tag)
if tag.startswith(opts.note_tag):
this_title['notes'] = tag[len(self.opts.note_tag):]
elif tag == opts.read_tag:
this_title['read'] = True
elif re.search(opts.exclude_genre, tag):
continue
elif self.__read_book_marker['field'] == 'tag' and \
tag == self.__read_book_marker['pattern']:
# remove 'read' tag
continue
else:
tag_list.append(tag)
return tag_list
@ -4759,7 +4783,7 @@ class EPUB_MOBI(CatalogPlugin):
for key in keys:
if key in ['catalog_title','authorClip','connected_kindle','descriptionClip',
'exclude_genre','exclude_tags','note_tag','numbers_as_text',
'output_profile','read_tag',
'output_profile','read_book_marker',
'search_text','sort_by','sort_descriptions_by_author','sync',
'wishlist_tag']:
build_log.append(" %s: %s" % (key, opts_dict[key]))

View File

@ -640,7 +640,7 @@ def catalog_option_parser(args):
log = Log()
parser = get_parser(_(
'''
%prog catalog /path/to/destination.(csv|epub|mobi|xml ...) [options]
%prog catalog /path/to/destination.(CSV|EPUB|MOBI|XML ...) [options]
Export a catalog in format specified by path/to/destination extension.
Options control how entries are displayed in the generated catalog ouput.

View File

@ -199,6 +199,11 @@ if not _run_once:
__builtin__.__dict__['lopen'] = local_open
import mimetypes
mimetypes.init([P('mime.types')])
guess_type = mimetypes.guess_type
def test_lopen():
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir