Merge from trunk

This commit is contained in:
Charles Haley 2010-12-01 08:58:00 +00:00
commit c7bb32dc40
35 changed files with 2267 additions and 232 deletions

View File

@ -36,22 +36,37 @@
/* /*
** Title ** Title
*/ */
.cbj_title { table.cbj_header td.cbj_title {
font-size: x-large; font-size: x-large;
font-style: italic;
text-align: center;
}
/*
** Series
*/
table.cbj_header td.cbj_series {
font-size: medium;
text-align: center; text-align: center;
} }
/* /*
** Author ** Author
*/ */
.cbj_author { table.cbj_header td.cbj_author {
font-size: medium; font-size: medium;
text-align: center; text-align: center;
margin-bottom: 1ex;
} }
/* /*
** Table containing Series, Publication Year, Rating and Tags ** Publisher/published
*/
table.cbj_header td.cbj_pubdata {
text-align: center;
}
/*
** Table containing Rating and Tags
*/ */
table.cbj_header { table.cbj_header {
width: 100%; width: 100%;
@ -62,9 +77,8 @@ table.cbj_header {
*/ */
table.cbj_header td.cbj_label { table.cbj_header td.cbj_label {
font-family: sans-serif; font-family: sans-serif;
font-weight: bold;
text-align: right; text-align: right;
width: 40%; width: 33%;
} }
/* /*
@ -73,9 +87,23 @@ table.cbj_header td.cbj_label {
table.cbj_header td.cbj_content { table.cbj_header td.cbj_content {
font-family: sans-serif; font-family: sans-serif;
text-align: left; text-align: left;
width:60%; width:67%;
} }
/*
** Metadata divider
*/
hr.metadata_divider {
width:90%;
margin-left:5%;
border-top: solid white 0px;
border-right: solid white 0px;
border-bottom: solid black 1px;
border-left: solid white 0px;
}
/* /*
** To skip a banner item (Series|Published|Rating|Tags), ** To skip a banner item (Series|Published|Rating|Tags),
** edit the appropriate CSS rule below. ** edit the appropriate CSS rule below.

View File

@ -6,17 +6,24 @@
</head> </head>
<body> <body>
<div class="cbj_banner"> <div class="cbj_banner">
<div class="cbj_title">{title}</div>
<div class="cbj_author">{author}</div>
<table class="cbj_header"> <table class="cbj_header">
<tr class="cbj_series"> <tr>
<td class="cbj_label">{series_label}:</td> <td class="cbj_title" colspan="2">{title}</td>
<td class="cbj_content">{series}</td>
</tr> </tr>
<tr class="cbj_pubdate"> <tr>
<td class="cbj_label">{pubdate_label}:</td> <td class="cbj_series" colspan="2">{series}</td>
<td class="cbj_content">{pubdate}</td>
</tr> </tr>
<tr>
<td class="cbj_author" colspan="2">{author}</td>
</tr>
<tr>
<td class="cbj_pubdata" colspan="2">{publisher} ({pubdate})</td>
</tr>
<tr>
<td class="cbj_author" colspan="2"><hr class="metadata_divider" /></td>
</tr>
<tr class="cbj_rating"> <tr class="cbj_rating">
<td class="cbj_label">{rating_label}:</td> <td class="cbj_label">{rating_label}:</td>
<td class="cbj_content">{rating}</td> <td class="cbj_content">{rating}</td>

1381
resources/mime.types Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Dean Cording'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class ABCNews(BasicNewsRecipe):
title = 'ABC News'
__author__ = 'Dean Cording'
description = 'News from Australia'
masthead_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
cover_url = 'http://www.abc.net.au/news/assets/v5/images/common/logo-news.png'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = False
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'ABC News'
category = 'News, Australia, World'
language = 'en_AU'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
keep_only_tags = dict(id='article')
remove_tags = [dict(attrs={'class':['related', 'tags']}),
dict(id='statepromo')
]
remove_attributes = ['width','height']
feeds = [
('Top Stories', 'http://www.abc.net.au/news/syndicate/topstoriesrss.xml'),
('Canberra', 'http://www.abc.net.au/news/indexes/idx-act/rss.xml'),
('Sydney', 'http://www.abc.net.au/news/indexes/sydney/rss.xml'),
('Melbourne', 'http://www.abc.net.au/news/indexes/melbourne/rss.xml'),
('Brisbane', 'http://www.abc.net.au/news/indexes/brisbane/rss.xml'),
('Perth', 'http://www.abc.net.au/news/indexes/perth/rss.xml'),
('Australia', 'http://www.abc.net.au/news/indexes/idx-australia/rss.xml'),
('World', 'http://www.abc.net.au/news/indexes/world/rss.xml'),
('Business', 'http://www.abc.net.au/news/indexes/business/rss.xml'),
('Science and Technology', 'http://www.abc.net.au/news/tag/science-and-technology/rss.xml'),
]

View File

@ -0,0 +1,48 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Dean Cording'
'''
abc.net.au/news
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class BusinessSpectator(BasicNewsRecipe):
title = 'Business Spectator'
__author__ = 'Dean Cording'
description = 'Australian Business News & commentary delivered the way you want it.'
masthead_url = 'http://www.businessspectator.com.au/bs.nsf/logo-business-spectator.gif'
cover_url = masthead_url
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'Business Spectator'
category = 'News, Australia, Business'
language = 'en_AU'
publication_type = 'newsportal'
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': False
}
keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
remove_tags = [dict(attrs={'class':'hql'})]
remove_attributes = ['width','height','style']
feeds = [
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
('Alan Kohler', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Alan%20Kohler'),
('Robert Gottliebsen', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Robert%20Gottliebsen'),
('Stephen Bartholomeusz', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=spectators&cat=Stephen%20Bartholomeusz'),
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
]

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Esensja(BasicNewsRecipe):
title = u'Esensja'
__author__ = 'matek09'
description = 'Monthly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
HREF = '0'
#keep_only_tags =[]
#keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'})
remove_tags_before = dict(dict(name = 'div', attrs = {'class' : 't-title'}))
remove_tags_after = dict(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
remove_tags =[]
remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_top.gif'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : '../../../2000/01/img/tab_bot.gif'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 't-title2 nextpage'}))
extra_css = '''
.t-title {font-size: x-large; font-weight: bold; text-align: left}
.t-author {font-size: x-small; text-align: left}
.t-title2 {font-size: x-small; font-style: italic; text-align: left}
.text {font-size: small; text-align: left}
.annot-ref {font-style: italic; text-align: left}
'''
preprocess_regexps = [(re.compile(r'alt="[^"]*"'),
lambda match: '')]
def parse_index(self):
soup = self.index_to_soup('http://www.esensja.pl/magazyn/')
a = soup.find('a', attrs={'href' : re.compile('.*/index.html')})
year = a['href'].split('/')[0]
month = a['href'].split('/')[1]
self.HREF = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/iso/'
soup = self.index_to_soup(self.HREF + '01.html')
self.cover_url = 'http://www.esensja.pl/magazyn/' + year + '/' + month + '/img/ilustr/cover_b.jpg'
feeds = []
intro = soup.find('div', attrs={'class' : 'n-title'})
introduction = {'title' : self.tag_to_string(intro.a),
'url' : self.HREF + intro.a['href'],
'date' : '',
'description' : ''}
chapter = 'Wprowadzenie'
subchapter = ''
articles = []
articles.append(introduction)
for tag in intro.findAllNext(attrs={'class': ['chapter', 'subchapter', 'n-title']}):
if tag.name in 'td':
if len(articles) > 0:
section = chapter
if len(subchapter) > 0:
section += ' - ' + subchapter
feeds.append((section, articles))
articles = []
if tag['class'] == 'chapter':
chapter = self.tag_to_string(tag).capitalize()
subchapter = ''
else:
subchapter = self.tag_to_string(tag)
subchapter = self.tag_to_string(tag)
continue
articles.append({'title' : self.tag_to_string(tag.a), 'url' : self.HREF + tag.a['href'], 'date' : '', 'description' : ''})
a = self.index_to_soup(self.HREF + tag.a['href'])
i = 1
while True:
div = a.find('div', attrs={'class' : 't-title2 nextpage'})
if div is not None:
a = self.index_to_soup(self.HREF + div.a['href'])
articles.append({'title' : self.tag_to_string(tag.a) + ' c. d. ' + str(i), 'url' : self.HREF + div.a['href'], 'date' : '', 'description' : ''})
i = i + 1
else:
break
return feeds

View File

@ -1,67 +1,61 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Justus Bisser <justus.bisser at gmail.com>' __copyright__ = '2010, Christian Schmitt'
''' '''
fr-online.de fr-online.de
''' '''
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Spiegel_ger(BasicNewsRecipe): class FROnlineRecipe(BasicNewsRecipe):
title = 'Frankfurter Rundschau' title = 'Frankfurter Rundschau'
__author__ = 'Justus Bisser' __author__ = 'maccs'
description = "Dies ist die Online-Ausgabe der Frankfurter Rundschau. Um die abgerufenen individuell einzustellen bearbeiten sie die Liste im erweiterten Modus. Die Feeds findet man auf http://www.fr-online.de/verlagsservice/fr_newsreader/?em_cnt=574255" description = 'Nachrichten aus D und aller Welt'
encoding = 'utf-8'
masthead_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH' publisher = 'Druck- und Verlagshaus Frankfurt am Main GmbH'
category = 'FR Online, Frankfurter Rundschau, Nachrichten, News,Dienste, RSS, RSS, Feedreader, Newsfeed, iGoogle, Netvibes, Widget' category = 'news, germany, world'
oldest_article = 7
max_articles_per_feed = 100
language = 'de' language = 'de'
lang = 'de-DE' publication_type = 'newspaper'
no_stylesheets = True
use_embedded_content = False use_embedded_content = False
#encoding = 'cp1252' remove_javascript = True
no_stylesheets = True
oldest_article = 1 # Increase this number if you're interested in older articles
max_articles_per_feed = 50 # Seems a reasonable number to me
extra_css = '''
body { font-family: "arial", "verdana", "geneva", sans-serif; font-size: 12px; margin: 0px; background-color: #ffffff;}
.imgSubline{background-color: #f4f4f4; font-size: 0.8em;}
.p--heading-1 {font-weight: bold;}
.calibre_navbar {font-size: 0.8em; font-family: "arial", "verdana", "geneva", sans-serif;}
'''
remove_tags = [dict(name='div', attrs={'id':'Logo'})]
cover_url = 'http://www.fr-online.de/image/view/-/1474018/data/823552/-/logo.png'
cover_margins = (100, 150, '#ffffff')
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
}
recursions = 0
max_articles_per_feed = 100
#keep_only_tags = [dict(name='div', attrs={'class':'text'})]
#tags_remove = [dict(name='div', attrs={'style':'text-align: left; margin: 4px 0px 0px 4px; width: 200px; float: right;'})]
remove_attributes = ['style']
feeds = [] feeds = []
#remove_tags_before = [dict(name='div', attrs={'style':'padding-left: 0px;'})] feeds.append(('Startseite', u'http://www.fr-online.de/home/-/1472778/1472778/-/view/asFeed/-/index.xml'))
#remove_tags_after = [dict(name='div', attrs={'class':'box_head_text'})] feeds.append(('Politik', u'http://www.fr-online.de/politik/-/1472596/1472596/-/view/asFeed/-/index.xml'))
feeds.append(('Meinung', u'http://www.fr-online.de/politik/meinung/-/1472602/1472602/-/view/asFeed/-/index.xml'))
feeds.append(('Wirtschaft', u'http://www.fr-online.de/wirtschaft/-/1472780/1472780/-/view/asFeed/-/index.xml'))
feeds.append(('Sport', u'http://www.fr-online.de/sport/-/1472784/1472784/-/view/asFeed/-/index.xml'))
feeds.append(('Eintracht Frankfurt', u'http://www.fr-online.de/sport/eintracht-frankfurt/-/1473446/1473446/-/view/asFeed/-/index.xml'))
feeds.append(('Kultur und Medien', u'http://www.fr-online.de/kultur/-/1472786/1472786/-/view/asFeed/-/index.xml'))
feeds.append(('Panorama', u'http://www.fr-online.de/panorama/-/1472782/1472782/-/view/asFeed/-/index.xml'))
feeds.append(('Frankfurt', u'http://www.fr-online.de/frankfurt/-/1472798/1472798/-/view/asFeed/-/index.xml'))
feeds.append(('Rhein-Main', u'http://www.fr-online.de/rhein-main/-/1472796/1472796/-/view/asFeed/-/index.xml'))
feeds.append(('Hanau', u'http://www.fr-online.de/rhein-main/hanau/-/1472866/1472866/-/view/asFeed/-/index.xml'))
feeds.append(('Darmstadt', u'http://www.fr-online.de/rhein-main/darmstadt/-/1472858/1472858/-/view/asFeed/-/index.xml'))
feeds.append(('Wiesbaden', u'http://www.fr-online.de/rhein-main/wiesbaden/-/1472860/1472860/-/view/asFeed/-/index.xml'))
feeds.append(('Offenbach', u'http://www.fr-online.de/rhein-main/offenbach/-/1472856/1472856/-/view/asFeed/-/index.xml'))
feeds.append(('Bad Homburg', u'http://www.fr-online.de/rhein-main/bad-homburg/-/1472864/1472864/-/view/asFeed/-/index.xml'))
feeds.append(('Digital', u'http://www.fr-online.de/digital/-/1472406/1472406/-/view/asFeed/-/index.xml'))
feeds.append(('Wissenschaft', u'http://www.fr-online.de/wissenschaft/-/1472788/1472788/-/view/asFeed/-/index.xml'))
# enable for all news
allNews = 0
if allNews:
feeds = [(u'Frankfurter Rundschau', u'http://www.fr-online.de/rss/sport/index.xml')]
else:
#select the feeds you like
feeds = [(u'Nachrichten', u'http://www.fr-online.de/rss/politik/index.xml')]
feeds.append((u'Kommentare und Analysen', u'http://www.fr-online.de/rss/meinung/index.xml'))
feeds.append((u'Dokumentationen', u'http://www.fr-online.de/rss/dokumentation/index.xml'))
feeds.append((u'Deutschlandtrend', u'http://www.fr-online.de/rss/deutschlandtrend/index.xml'))
feeds.append((u'Wirtschaft', u'http://www.fr-online.de/rss/wirtschaft/index.xml'))
feeds.append((u'Sport', u'http://www.fr-online.de/rss/sport/index.xml'))
feeds.append((u'Feuilleton', u'http://www.fr-online.de/rss/feuilleton/index.xml'))
feeds.append((u'Panorama', u'http://www.fr-online.de/rss/panorama/index.xml'))
feeds.append((u'Rhein Main und Hessen', u'http://www.fr-online.de/rss/hessen/index.xml'))
feeds.append((u'Fitness und Gesundheit', u'http://www.fr-online.de/rss/fit/index.xml'))
feeds.append((u'Multimedia', u'http://www.fr-online.de/rss/multimedia/index.xml'))
feeds.append((u'Wissen und Bildung', u'http://www.fr-online.de/rss/wissen/index.xml'))
def get_article_url(self, article): def print_version(self, url):
url = article.link return url.replace('index.html', 'view/printVersion/-/index.html')
regex = re.compile("0C[0-9]{6,8}0A?")
liste = regex.findall(url)
string = liste.pop(0)
string = string[2:len(string)-1]
return "http://www.fr-online.de/_em_cms/_globals/print.php?em_cnt=" + string

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Histmag(BasicNewsRecipe):
title = u'Histmag'
__author__ = 'matek09'
description = u"Artykuly historyczne i publicystyczne"
encoding = 'utf-8'
no_stylesheets = True
language = 'pl'
remove_javascript = True
#max_articles_per_feed = 1
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'article'}))
remove_tags_after = dict(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
#keep_only_tags =[]
#keep_only_tags.append(dict(name = 'h2'))
#keep_only_tags.append(dict(name = 'p'))
remove_tags =[]
remove_tags.append(dict(name = 'p', attrs = {'class' : 'podpis'}))
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'komentarze'}))
remove_tags.append(dict(name = 'img', attrs = {'src' : 'style/buttons/wesprzyjnas-1.jpg'}))
preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),
(re.compile(r'<span>'), lambda match: '<br><br><span>')]
extra_css = '''
.left {font-size: x-small}
.right {font-size: x-small}
'''
def find_articles(self, soup):
articles = []
for div in soup.findAll('div', attrs={'class' : 'text'}):
articles.append({
'title' : self.tag_to_string(div.h3.a),
'url' : 'http://www.histmag.org/' + div.h3.a['href'],
'date' : self.tag_to_string(div.next('p')).split('|')[0],
'description' : self.tag_to_string(div.next('p', podpis=False)),
})
return articles
def parse_index(self):
soup = self.index_to_soup('http://histmag.org/?arc=4&dx=0')
feeds = []
feeds.append((u"Artykuly historyczne", self.find_articles(soup)))
soup = self.index_to_soup('http://histmag.org/?arc=5&dx=0')
feeds.append((u"Artykuly publicystyczne", self.find_articles(soup)))
soup = self.index_to_soup('http://histmag.org/?arc=1&dx=0')
feeds.append((u"Wydarzenia", self.find_articles(soup)))
return feeds

View File

@ -1,19 +1,22 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com' __copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Newsweek(BasicNewsRecipe): class Newsweek(BasicNewsRecipe):
EDITION = 0 FIND_LAST_FULL_ISSUE = True
EDITION = '0'
EXCLUDE_LOCKED = True
LOCKED_ICO = 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'
title = u'Newsweek Polska' title = u'Newsweek Polska'
__author__ = 'Mateusz Kielar' __author__ = 'matek09'
description = 'Weekly magazine' description = 'Weekly magazine'
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
language = 'en' language = 'pl'
remove_javascript = True remove_javascript = True
keep_only_tags =[] keep_only_tags =[]
@ -33,24 +36,42 @@ class Newsweek(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print' return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
def is_locked(self, a):
if a.findNext('img')['src'] == 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif':
return True
else:
return False
def is_full(self, issue_soup):
if len(issue_soup.findAll('img', attrs={'src' : 'http://www.newsweek.pl/bins/media/static/newsweek/img/ico_locked.gif'})) > 1:
return False
else:
return True
def find_last_full_issue(self): def find_last_full_issue(self):
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx') frame_url = 'http://www.newsweek.pl/Frames/IssueCover.aspx'
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href'] while True:
page = self.index_to_soup(issue) frame_soup = self.index_to_soup(frame_url)
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href'] self.EDITION = frame_soup.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
page = self.index_to_soup(issue) issue_soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','') if self.is_full(issue_soup):
break
frame_url = 'http://www.newsweek.pl/Frames/' + frame_soup.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
def parse_index(self): def parse_index(self):
if self.FIND_LAST_FULL_ISSUE:
self.find_last_full_issue() self.find_last_full_issue()
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION)) soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + self.EDITION)
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True) img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
self.cover_url = img['src'] self.cover_url = img['src']
feeds = [] feeds = []
parent = soup.find(id='content-left-big') parent = soup.find(id='content-left-big')
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}): for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
section = self.tag_to_string(txt).capitalize()
articles = list(self.find_articles(txt)) articles = list(self.find_articles(txt))
if len(articles) > 0:
section = self.tag_to_string(txt).capitalize()
feeds.append((section, articles)) feeds.append((section, articles))
return feeds return feeds
@ -58,6 +79,8 @@ class Newsweek(BasicNewsRecipe):
for a in txt.findAllNext( attrs={'class':['strong','hr']}): for a in txt.findAllNext( attrs={'class':['strong','hr']}):
if a.name in "div": if a.name in "div":
break break
if (not self.FIND_LAST_FULL_ISSUE) & self.EXCLUDE_LOCKED & self.is_locked(a):
continue
yield { yield {
'title' : self.tag_to_string(a), 'title' : self.tag_to_string(a),
'url' : 'http://www.newsweek.pl' + a['href'], 'url' : 'http://www.newsweek.pl' + a['href'],

View File

@ -8,12 +8,15 @@ www.nin.co.rs
import re import re
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from contextlib import nested, closing
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre import entity_to_unicode
class Nin(BasicNewsRecipe): class Nin(BasicNewsRecipe):
title = 'NIN online' title = 'NIN online'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Nedeljne Informativne Novine' description = 'Nedeljne Informativne Novine'
publisher = 'NIN d.o.o.' publisher = 'NIN d.o.o. - Ringier d.o.o.'
category = 'news, politics, Serbia' category = 'news, politics, Serbia'
no_stylesheets = True no_stylesheets = True
delay = 1 delay = 1
@ -26,18 +29,29 @@ class Nin(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
language = 'sr' language = 'sr'
publication_type = 'magazine' publication_type = 'magazine'
extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} ' extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Verdana, Lucida, sans1, sans-serif}
.article_description{font-family: Verdana, Lucida, sans1, sans-serif}
.artTitle{font-size: x-large; font-weight: bold; color: #900}
.izjava{font-size: x-large; font-weight: bold}
.columnhead{font-size: small; font-weight: bold;}
img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
b{margin-top: 1em}
"""
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher' : publisher
, 'language' : language , 'language' : language
, 'linearize_tables' : True
} }
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] preprocess_regexps = [
remove_attributes = ['height','width'] (re.compile(r'</body>.*?<html>', re.DOTALL|re.IGNORECASE),lambda match: '</body>')
,(re.compile(r'</html>.*?</html>', re.DOTALL|re.IGNORECASE),lambda match: '</html>')
,(re.compile(u'\u0110'), lambda match: u'\u00D0')
]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
@ -50,7 +64,10 @@ class Nin(BasicNewsRecipe):
return br return br
keep_only_tags =[dict(name='td', attrs={'width':'520'})] keep_only_tags =[dict(name='td', attrs={'width':'520'})]
remove_tags_before =dict(name='span', attrs={'class':'izjava'})
remove_tags_after =dict(name='html') remove_tags_after =dict(name='html')
remove_tags = [dict(name=['object','link','iframe','meta','base'])]
remove_attributes=['border','background','height','width','align','valign']
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None
@ -63,7 +80,7 @@ class Nin(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
articles = [] articles = []
count = 0 count = 0
soup = self.index_to_soup(self.PREFIX) soup = self.index_to_soup(self.INDEX)
for item in soup.findAll('a',attrs={'class':'lmeninavFont'}): for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
count = count +1 count = count +1
if self.test and count > 2: if self.test and count > 2:
@ -90,3 +107,45 @@ class Nin(BasicNewsRecipe):
articles.append((section,inarts)) articles.append((section,inarts))
return articles return articles
def index_to_soup(self, url_or_raw, raw=False):
if re.match(r'\w+://', url_or_raw):
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
with closing(open_func(url_or_raw)) as f:
_raw = f.read()
if not _raw:
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
else:
_raw = url_or_raw
if raw:
return _raw
if not isinstance(_raw, unicode) and self.encoding:
if callable(self.encoding):
_raw = self.encoding(_raw)
else:
_raw = _raw.decode(self.encoding, 'replace')
massage = list(BeautifulSoup.MARKUP_MASSAGE)
enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
massage.append((re.compile(r'&(\S+?);'), lambda match:
entity_to_unicode(match, encoding=enc)))
massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
''))
return BeautifulSoup(_raw, markupMassage=massage)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('div'):
if len(item.contents) == 0:
item.extract()
for item in soup.findAll(['td','tr']):
item.name='div'
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
for tbl in soup.findAll('table'):
img = tbl.find('img')
if img:
img.extract()
tbl.replaceWith(img)
return soup

View File

@ -1,18 +1,18 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com' __copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Polityka(BasicNewsRecipe): class Polityka(BasicNewsRecipe):
title = u'Polityka' title = u'Polityka'
__author__ = 'Mateusz Kielar' __author__ = 'matek09'
description = 'Weekly magazine. Last archive issue' description = 'Weekly magazine. Last archive issue'
encoding = 'utf-8' encoding = 'utf-8'
no_stylesheets = True no_stylesheets = True
language = 'en' language = 'pl'
remove_javascript = True remove_javascript = True
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'})) remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
@ -48,7 +48,6 @@ class Polityka(BasicNewsRecipe):
for div in box.findAll('div', attrs={'class': 'list_tresc'}): for div in box.findAll('div', attrs={'class': 'list_tresc'}):
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],) article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip() section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
print section
if not articles.has_key(section): if not articles.has_key(section):
articles[section] = [] articles[section] = []
articles[section].append( { articles[section].append( {

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, matek09, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Wprost(BasicNewsRecipe):
EDITION = 0
FIND_LAST_FULL_ISSUE = True
EXCLUDE_LOCKED = True
ICO_BLOCKED = 'http://www.wprost.pl/G/icons/ico_blocked.gif'
title = u'Wprost'
__author__ = 'matek09'
description = 'Weekly magazine'
encoding = 'ISO-8859-2'
no_stylesheets = True
language = 'pl'
remove_javascript = True
remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'}))
'''keep_only_tags =[]
keep_only_tags.append(dict(name = 'table', attrs = {'id' : 'title-table'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-header'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'div-content'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'def element-autor'}))'''
preprocess_regexps = [(re.compile(r'style="display: none;"'), lambda match: ''),
(re.compile(r'display: block;'), lambda match: '')]
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def element-date'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'def silver'}))
remove_tags.append(dict(name = 'div', attrs = {'id' : 'content-main-column-right'}))
extra_css = '''
.div-header {font-size: x-small; font-weight: bold}
'''
#h2 {font-size: x-large; font-weight: bold}
def is_blocked(self, a):
if a.findNextSibling('img') is None:
return False
else:
return True
def find_last_issue(self):
soup = self.index_to_soup('http://www.wprost.pl/archiwum/')
a = 0
if self.FIND_LAST_FULL_ISSUE:
ico_blocked = soup.findAll('img', attrs={'src' : self.ICO_BLOCKED})
a = ico_blocked[-1].findNext('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
else:
a = soup.find('a', attrs={'title' : re.compile('Zobacz spis tre.ci')})
self.EDITION = a['href'].replace('/tygodnik/?I=', '')
self.cover_url = a.img['src']
def parse_index(self):
self.find_last_issue()
soup = self.index_to_soup('http://www.wprost.pl/tygodnik/?I=' + self.EDITION)
feeds = []
for main_block in soup.findAll(attrs={'class':'main-block-s3 s3-head head-red3'}):
articles = list(self.find_articles(main_block))
if len(articles) > 0:
section = self.tag_to_string(main_block)
feeds.append((section, articles))
return feeds
def find_articles(self, main_block):
for a in main_block.findAllNext( attrs={'style':['','padding-top: 15px;']}):
if a.name in "td":
break
if self.EXCLUDE_LOCKED & self.is_blocked(a):
continue
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.wprost.pl' + a['href'],
'date' : '',
'description' : ''
}

View File

@ -340,6 +340,8 @@ class LinuxFreeze(Command):
__builtin__.help = _Helper() __builtin__.help = _Helper()
def set_qt_plugin_path(): def set_qt_plugin_path():
import uuid
uuid.uuid4() # Workaround for libuuid/PyQt conflict
from PyQt4.Qt import QCoreApplication from PyQt4.Qt import QCoreApplication
paths = list(map(unicode, QCoreApplication.libraryPaths())) paths = list(map(unicode, QCoreApplication.libraryPaths()))
paths.insert(0, sys.frozen_path + '/lib/qt_plugins') paths.insert(0, sys.frozen_path + '/lib/qt_plugins')

View File

@ -3,7 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import uuid, sys, os, re, logging, time, mimetypes, \ import uuid, sys, os, re, logging, time, \
__builtin__, warnings, multiprocessing __builtin__, warnings, multiprocessing
from urllib import getproxies from urllib import getproxies
__builtin__.__dict__['dynamic_property'] = lambda(func): func(None) __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
@ -19,43 +19,18 @@ from calibre.constants import iswindows, isosx, islinux, isfreebsd, isfrozen, \
__appname__, __version__, __author__, \ __appname__, __version__, __author__, \
win32event, win32api, winerror, fcntl, \ win32event, win32api, winerror, fcntl, \
filesystem_encoding, plugins, config_dir filesystem_encoding, plugins, config_dir
from calibre.startup import winutil, winutilerror from calibre.startup import winutil, winutilerror, guess_type
uuid.uuid4() # Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo if islinux and not getattr(sys, 'frozen', False):
# Imported before PyQt4 to workaround PyQt4 util-linux conflict on gentoo
uuid.uuid4()
if False: if False:
# Prevent pyflakes from complaining
winutil, winutilerror, __appname__, islinux, __version__ winutil, winutilerror, __appname__, islinux, __version__
fcntl, win32event, isfrozen, __author__, terminal_controller fcntl, win32event, isfrozen, __author__, terminal_controller
winerror, win32api, isfreebsd winerror, win32api, isfreebsd, guess_type
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('text/fb2+xml', '.fb2')
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-sony-bbeb', '.lrx')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
mimetypes.add_type('application/x-font-opentype', '.otf')
mimetypes.add_type('application/x-font-truetype', '.ttf')
mimetypes.add_type('application/oebps-package+xml', '.opf')
mimetypes.add_type('application/vnd.palm', '.pdb')
mimetypes.add_type('application/x-mobipocket-ebook', '.mobi')
mimetypes.add_type('application/x-mobipocket-ebook', '.prc')
mimetypes.add_type('application/x-mobipocket-ebook', '.azw')
mimetypes.add_type('application/x-cbz', '.cbz')
mimetypes.add_type('application/x-cbr', '.cbr')
mimetypes.add_type('application/x-koboreader-ebook', '.kobo')
mimetypes.add_type('image/wmf', '.wmf')
mimetypes.add_type('image/jpeg', '.jpg')
mimetypes.add_type('image/jpeg', '.jpeg')
mimetypes.add_type('image/png', '.png')
mimetypes.add_type('image/gif', '.gif')
mimetypes.add_type('image/bmp', '.bmp')
mimetypes.add_type('image/svg+xml', '.svg')
guess_type = mimetypes.guess_type
import cssutils import cssutils
cssutils.log.setLevel(logging.WARN) cssutils.log.setLevel(logging.WARN)

View File

@ -19,7 +19,7 @@ class ANDROID(USBMS):
VENDOR_ID = { VENDOR_ID = {
# HTC # HTC
0x0bb4 : { 0x0c02 : [0x100, 0x0227], 0x0c01 : [0x100, 0x0227], 0x0ff9 0x0bb4 : { 0x0c02 : [0x100, 0x0227, 0x0226], 0x0c01 : [0x100, 0x0227], 0x0ff9
: [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226], : [0x0100, 0x0227, 0x0226], 0x0c87: [0x0100, 0x0227, 0x0226],
0xc92 : [0x100]}, 0xc92 : [0x100]},

View File

@ -91,6 +91,10 @@ class FB2MLizer(object):
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True) return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode, pretty_print=True)
def clean_text(self, text): def clean_text(self, text):
text = re.sub(r'(?miu)<section>\s*</section>', '', text)
text = re.sub(r'(?miu)\s+</section>', '</section>', text)
text = re.sub(r'(?miu)</section><section>', '</section>\n\n<section>', text)
text = re.sub(r'(?miu)<p>\s*</p>', '', text) text = re.sub(r'(?miu)<p>\s*</p>', '', text)
text = re.sub(r'(?miu)\s+</p>', '</p>', text) text = re.sub(r'(?miu)\s+</p>', '</p>', text)
text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text) text = re.sub(r'(?miu)</p><p>', '</p>\n\n<p>', text)
@ -166,11 +170,15 @@ class FB2MLizer(object):
def get_text(self): def get_text(self):
text = [] text = []
for item in self.oeb_book.spine: for i, item in enumerate(self.oeb_book.spine):
if self.opts.sectionize_chapters_using_file_structure and i is not 0:
text.append('<section>')
self.log.debug('Converting %s to FictionBook2 XML' % item.href) self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item)) text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
if self.opts.sectionize_chapters_using_file_structure and i is not len(self.oeb_book.spine) - 1:
text.append('</section>')
return ''.join(text) return ''.join(text)
def fb2_body_footer(self): def fb2_body_footer(self):
@ -258,6 +266,10 @@ class FB2MLizer(object):
if id_name: if id_name:
fb2_text.append(self.get_anchor(page, id_name)) fb2_text.append(self.get_anchor(page, id_name))
if tag == 'h1' and self.opts.h1_to_title or tag == 'h2' and self.opts.h2_to_title or tag == 'h3' and self.opts.h3_to_title:
fb2_text.append('<title>')
tags.append('title')
fb2_tag = TAG_MAP.get(tag, None) fb2_tag = TAG_MAP.get(tag, None)
if fb2_tag == 'p': if fb2_tag == 'p':
if 'p' in tag_stack+tags: if 'p' in tag_stack+tags:

View File

@ -25,6 +25,20 @@ class FB2Output(OutputFormatPlugin):
'WARNING: ' \ 'WARNING: ' \
'This option is experimental. It can cause conversion ' \ 'This option is experimental. It can cause conversion ' \
'to fail. It can also produce unexpected output.')), 'to fail. It can also produce unexpected output.')),
OptionRecommendation(name='sectionize_chapters_using_file_structure',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Try to turn chapters into individual sections using the ' \
'internal structure of the ebook. This works well for EPUB ' \
'books that have been internally split by chapter.')),
OptionRecommendation(name='h1_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h1 tags with fb2 title elements.')),
OptionRecommendation(name='h2_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h2 tags with fb2 title elements.')),
OptionRecommendation(name='h3_to_title',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Wrap all h3 tags with fb2 title elements.')),
]) ])
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):

View File

@ -504,6 +504,9 @@ class MobiReader(object):
'x-large': '5', 'x-large': '5',
'xx-large': '6', 'xx-large': '6',
} }
def barename(x):
return x.rpartition(':')[-1]
mobi_version = self.book_header.mobi_version mobi_version = self.book_header.mobi_version
for x in root.xpath('//ncx'): for x in root.xpath('//ncx'):
x.getparent().remove(x) x.getparent().remove(x)
@ -512,7 +515,8 @@ class MobiReader(object):
for x in tag.attrib: for x in tag.attrib:
if ':' in x: if ':' in x:
del tag.attrib[x] del tag.attrib[x]
if tag.tag in ('country-region', 'place', 'placetype', 'placename', if tag.tag and barename(tag.tag.lower()) in \
('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content', 'form'): 'state', 'city', 'street', 'address', 'content', 'form'):
tag.tag = 'div' if tag.tag in ('content', 'form') else 'span' tag.tag = 'div' if tag.tag in ('content', 'form') else 'span'
for key in tag.attrib.keys(): for key in tag.attrib.keys():

View File

@ -93,7 +93,7 @@ class Jacket(object):
# Render Jacket {{{ # Render Jacket {{{
def get_rating(rating, rchar): def get_rating(rating, rchar, e_rchar):
ans = '' ans = ''
try: try:
num = float(rating)/2 num = float(rating)/2
@ -104,12 +104,12 @@ def get_rating(rating, rchar):
if num < 1: if num < 1:
return ans return ans
ans = rchar * int(num) ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
return ans return ans
def render_jacket(mi, output_profile, def render_jacket(mi, output_profile,
alt_title=_('Unknown'), alt_tags=[], alt_comments=''): alt_title=_('Unknown'), alt_tags=[], alt_comments='',
alt_publisher=('Unknown publisher')):
css = P('jacket/stylesheet.css', data=True).decode('utf-8') css = P('jacket/stylesheet.css', data=True).decode('utf-8')
try: try:
@ -124,12 +124,17 @@ def render_jacket(mi, output_profile,
if not mi.series: if not mi.series:
series = '' series = ''
try:
publisher = mi.publisher if mi.publisher else alt_publisher
except:
publisher = _('Unknown publisher')
try: try:
pubdate = strftime(u'%Y', mi.pubdate.timetuple()) pubdate = strftime(u'%Y', mi.pubdate.timetuple())
except: except:
pubdate = '' pubdate = ''
rating = get_rating(mi.rating, output_profile.ratings_char) rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char)
tags = mi.tags if mi.tags else alt_tags tags = mi.tags if mi.tags else alt_tags
if tags: if tags:
@ -154,6 +159,7 @@ def render_jacket(mi, output_profile,
css=css, css=css,
title=title, title=title,
author=author, author=author,
publisher=publisher,
pubdate_label=_('Published'), pubdate=pubdate, pubdate_label=_('Published'), pubdate=pubdate,
series_label=_('Series'), series=series, series_label=_('Series'), series=series,
rating_label=_('Rating'), rating=rating, rating_label=_('Rating'), rating=rating,
@ -168,16 +174,16 @@ def render_jacket(mi, output_profile,
# Post-process the generated html to strip out empty header items # Post-process the generated html to strip out empty header items
soup = BeautifulSoup(generated_html) soup = BeautifulSoup(generated_html)
if not series: if not series:
series_tag = soup.find('tr', attrs={'class':'cbj_series'}) series_tag = soup.find(attrs={'class':'cbj_series'})
series_tag.extract() series_tag.extract()
if not rating: if not rating:
rating_tag = soup.find('tr', attrs={'class':'cbj_rating'}) rating_tag = soup.find(attrs={'class':'cbj_rating'})
rating_tag.extract() rating_tag.extract()
if not tags: if not tags:
tags_tag = soup.find('tr', attrs={'class':'cbj_tags'}) tags_tag = soup.find(attrs={'class':'cbj_tags'})
tags_tag.extract() tags_tag.extract()
if not pubdate: if not pubdate:
pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'}) pubdate_tag = soup.find(attrs={'class':'cbj_pubdate'})
pubdate_tag.extract() pubdate_tag.extract()
if output_profile.short_name != 'kindle': if output_profile.short_name != 'kindle':
hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'}) hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'})

View File

@ -216,7 +216,9 @@ class PMLMLizer(object):
w = '\\w' w = '\\w'
width = elem.get('width') width = elem.get('width')
if width: if width:
w += '="%s%%"' % width if not width.endswith('%'):
width += '%'
w += '="%s"' % width
else: else:
w += '="50%"' w += '="50%"'
text.append(w) text.append(w)

View File

@ -37,7 +37,8 @@ class GenerateCatalogAction(InterfaceAction):
dbspec[id] = {'ondevice': db.ondevice(id, index_is_id=True)} dbspec[id] = {'ondevice': db.ondevice(id, index_is_id=True)}
# Calling gui2.tools:generate_catalog() # Calling gui2.tools:generate_catalog()
ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager) ret = generate_catalog(self.gui, dbspec, ids, self.gui.device_manager,
db)
if ret is None: if ret is None:
return return

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import Qt, QMenu
from calibre.constants import isosx from calibre.constants import isosx
from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \ from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
open_local_file open_local_file, info_dialog
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
@ -89,18 +89,34 @@ class ViewAction(InterfaceAction):
self._launch_viewer(name, viewer, internal) self._launch_viewer(name, viewer, internal)
def view_specific_format(self, triggered): def view_specific_format(self, triggered):
rows = self.gui.library_view.selectionModel().selectedRows() rows = list(self.gui.library_view.selectionModel().selectedRows())
if not rows or len(rows) == 0: if not rows or len(rows) == 0:
d = error_dialog(self.gui, _('Cannot view'), _('No book selected')) d = error_dialog(self.gui, _('Cannot view'), _('No book selected'))
d.exec_() d.exec_()
return return
row = rows[0].row() db = self.gui.library_view.model().db
formats = self.gui.library_view.model().db.formats(row).upper().split(',') rows = [r.row() for r in rows]
d = ChooseFormatDialog(self.gui, _('Choose the format to view'), formats) formats = [db.formats(row) for row in rows]
formats = [list(f.upper().split(',')) if f else None for f in formats]
all_fmts = set([])
for x in formats:
for f in x: all_fmts.add(f)
d = ChooseFormatDialog(self.gui, _('Choose the format to view'),
list(sorted(all_fmts)))
if d.exec_() == d.Accepted: if d.exec_() == d.Accepted:
format = d.format() fmt = d.format()
self.view_format(row, format) orig_num = len(rows)
rows = [rows[i] for i in range(len(rows)) if formats[i] and fmt in
formats[i]]
if self._view_check(len(rows)):
for row in rows:
self.view_format(row, fmt)
if len(rows) < orig_num:
info_dialog(self.gui, _('Format unavailable'),
_('Not all the selected books were available in'
' the %s format. You should convert'
' them first.')%fmt, show=True)
def _view_check(self, num, max_=3): def _view_check(self, num, max_=3):
if num <= max_: if num <= max_:

View File

@ -208,7 +208,8 @@ class BookInfo(QWebView):
rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for rows = u'\n'.join([u'<tr><td valign="top"><b>%s:</b></td><td valign="top">%s</td></tr>'%(k,t) for
k, t in rows]) k, t in rows])
comments = data.get(_('Comments'), '') comments = data.get(_('Comments'), '')
if comments and comments != u'None': if not comments or comments == u'None':
comments = ''
self.renderer.queue.put((rows, comments)) self.renderer.queue.put((rows, comments))
self._show_data(rows, '') self._show_data(rows, '')

View File

@ -34,7 +34,7 @@ class PluginWidget(QWidget, Ui_Form):
self.all_fields.append(x) self.all_fields.append(x)
QListWidgetItem(x, self.db_fields) QListWidgetItem(x, self.db_fields)
def initialize(self, name): #not working properly to update def initialize(self, name, db): #not working properly to update
self.name = name self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields) fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated db_fields from last use # Restore the activated db_fields from last use

View File

@ -28,7 +28,7 @@ class PluginWidget(QWidget, Ui_Form):
self.all_fields.append(x) self.all_fields.append(x)
QListWidgetItem(x, self.db_fields) QListWidgetItem(x, self.db_fields)
def initialize(self, name): def initialize(self, name, db):
self.name = name self.name = name
fields = gprefs.get(name+'_db_fields', self.all_fields) fields = gprefs.get(name+'_db_fields', self.all_fields)
# Restore the activated fields from last use # Restore the activated fields from last use

View File

@ -7,10 +7,11 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.gui2 import gprefs
from catalog_epub_mobi_ui import Ui_Form
from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.conversion.config import load_defaults
from PyQt4.Qt import QWidget from calibre.gui2 import gprefs
from catalog_epub_mobi_ui import Ui_Form
from PyQt4.Qt import QWidget, QLineEdit
class PluginWidget(QWidget,Ui_Form): class PluginWidget(QWidget,Ui_Form):
@ -23,7 +24,8 @@ class PluginWidget(QWidget,Ui_Form):
('generate_recently_added', True), ('generate_recently_added', True),
('note_tag','*'), ('note_tag','*'),
('numbers_as_text', False), ('numbers_as_text', False),
('read_tag','+'), ('read_pattern','+'),
('read_source_field_cb','Tag'),
('wishlist_tag','Wishlist'), ('wishlist_tag','Wishlist'),
] ]
@ -38,16 +40,54 @@ class PluginWidget(QWidget,Ui_Form):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.setupUi(self) self.setupUi(self)
def initialize(self, name): def initialize(self, name, db):
self.name = name self.name = name
# Populate the 'Read book' source fields
all_custom_fields = db.custom_field_keys()
custom_fields = {}
custom_fields['Tag'] = {'field':'tag', 'datatype':u'text'}
for custom_field in all_custom_fields:
field_md = db.metadata_for_field(custom_field)
if field_md['datatype'] in ['bool','composite','datetime','text']:
custom_fields[field_md['name']] = {'field':custom_field,
'datatype':field_md['datatype']}
# Add the sorted eligible fields to the combo box
for cf in sorted(custom_fields):
self.read_source_field_cb.addItem(cf)
self.read_source_fields = custom_fields
self.read_source_field_cb.currentIndexChanged.connect(self.read_source_field_changed)
# Update dialog fields from stored options # Update dialog fields from stored options
for opt in self.OPTION_FIELDS: for opt in self.OPTION_FIELDS:
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1]) opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']: if opt[0] in [
'generate_recently_added',
'generate_series',
'generate_titles',
'numbers_as_text',
]:
getattr(self, opt[0]).setChecked(opt_value) getattr(self, opt[0]).setChecked(opt_value)
# Combo box
elif opt[0] in ['read_source_field_cb']:
# Look for last-stored combo box value
index = self.read_source_field_cb.findText(opt_value)
if index == -1:
index = self.read_source_field_cb.findText('Tag')
self.read_source_field_cb.setCurrentIndex(index)
# Text fields
else: else:
getattr(self, opt[0]).setText(opt_value) getattr(self, opt[0]).setText(opt_value)
# Init self.read_source_field
cs = unicode(self.read_source_field_cb.currentText())
read_source_spec = self.read_source_fields[cs]
self.read_source_field = read_source_spec['field']
def options(self): def options(self):
# Save/return the current options # Save/return the current options
# exclude_genre stores literally # exclude_genre stores literally
@ -55,16 +95,60 @@ class PluginWidget(QWidget,Ui_Form):
# others store as lists # others store as lists
opts_dict = {} opts_dict = {}
for opt in self.OPTION_FIELDS: for opt in self.OPTION_FIELDS:
if opt[0] in ['numbers_as_text','generate_titles','generate_series','generate_recently_added']: # Save values to gprefs
if opt[0] in [
'generate_recently_added',
'generate_series',
'generate_titles',
'numbers_as_text',
]:
opt_value = getattr(self,opt[0]).isChecked() opt_value = getattr(self,opt[0]).isChecked()
# Combo box uses .currentText()
elif opt[0] in ['read_source_field_cb']:
opt_value = unicode(getattr(self, opt[0]).currentText())
# text fields use .text()
else: else:
opt_value = unicode(getattr(self, opt[0]).text()) opt_value = unicode(getattr(self, opt[0]).text())
gprefs.set(self.name + '_' + opt[0], opt_value) gprefs.set(self.name + '_' + opt[0], opt_value)
if opt[0] in ['exclude_genre','numbers_as_text','generate_titles','generate_series','generate_recently_added']: # Construct opts
if opt[0] in [
'exclude_genre',
'generate_recently_added',
'generate_series',
'generate_titles',
'numbers_as_text',
]:
opts_dict[opt[0]] = opt_value opts_dict[opt[0]] = opt_value
else: else:
opts_dict[opt[0]] = opt_value.split(',') opts_dict[opt[0]] = opt_value.split(',')
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
# Generate read_book_marker
opts_dict['read_book_marker'] = "%s:%s" % (self.read_source_field, self.read_pattern.text())
# Append the output profile
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
return opts_dict return opts_dict
def read_source_field_changed(self,new_index):
'''
Process changes in the read_source_field combo box
Currently using QLineEdit for all field types
Possible to modify to switch QWidget type
'''
new_source = str(self.read_source_field_cb.currentText())
read_source_spec = self.read_source_fields[str(new_source)]
self.read_source_field = read_source_spec['field']
# Change pattern input widget to match the source field datatype
if read_source_spec['datatype'] in ['bool','composite','datetime','text']:
if not isinstance(self.read_pattern, QLineEdit):
self.read_spec_hl.removeWidget(self.read_pattern)
dw = QLineEdit(self)
dw.setObjectName('read_pattern')
dw.setToolTip('Pattern for read book')
self.read_pattern = dw
self.read_spec_hl.addWidget(dw)

View File

@ -6,8 +6,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>579</width> <width>627</width>
<height>411</height> <height>549</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -28,42 +28,28 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="0"> <item row="4" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>'Mark this book as read' tag:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="read_tag">
<property name="toolTip">
<string extracomment="Default: +"/>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QLabel" name="label_4"> <widget class="QLabel" name="label_4">
<property name="text"> <property name="text">
<string>Additional note tag prefix:</string> <string>Additional note tag prefix:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="1"> <item row="4" column="1">
<widget class="QLineEdit" name="note_tag"> <widget class="QLineEdit" name="note_tag">
<property name="toolTip"> <property name="toolTip">
<string extracomment="Default: *"/> <string extracomment="Default: *"/>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="1"> <item row="6" column="1">
<widget class="QLineEdit" name="exclude_genre"> <widget class="QLineEdit" name="exclude_genre">
<property name="toolTip"> <property name="toolTip">
<string extracomment="Default: \[[\w]*\]"/> <string extracomment="Default: \[[\w]*\]"/>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="0"> <item row="6" column="0">
<widget class="QLabel" name="label"> <widget class="QLabel" name="label">
<property name="text"> <property name="text">
<string>Regex pattern describing tags to exclude as genres:</string> <string>Regex pattern describing tags to exclude as genres:</string>
@ -76,7 +62,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="6" column="1"> <item row="7" column="1">
<widget class="QLabel" name="label_6"> <widget class="QLabel" name="label_6">
<property name="text"> <property name="text">
<string>Regex tips: <string>Regex tips:
@ -88,7 +74,7 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0"> <item row="8" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -101,44 +87,84 @@
</property> </property>
</spacer> </spacer>
</item> </item>
<item row="9" column="0"> <item row="10" column="0">
<widget class="QCheckBox" name="generate_titles"> <widget class="QCheckBox" name="generate_titles">
<property name="text"> <property name="text">
<string>Include 'Titles' Section</string> <string>Include 'Titles' Section</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="11" column="0"> <item row="12" column="0">
<widget class="QCheckBox" name="generate_recently_added"> <widget class="QCheckBox" name="generate_recently_added">
<property name="text"> <property name="text">
<string>Include 'Recently Added' Section</string> <string>Include 'Recently Added' Section</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="12" column="0"> <item row="13" column="0">
<widget class="QCheckBox" name="numbers_as_text"> <widget class="QCheckBox" name="numbers_as_text">
<property name="text"> <property name="text">
<string>Sort numbers as text</string> <string>Sort numbers as text</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="10" column="0"> <item row="11" column="0">
<widget class="QCheckBox" name="generate_series"> <widget class="QCheckBox" name="generate_series">
<property name="text"> <property name="text">
<string>Include 'Series' Section</string> <string>Include 'Series' Section</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1"> <item row="3" column="1">
<widget class="QLineEdit" name="wishlist_tag"/> <widget class="QLineEdit" name="wishlist_tag"/>
</item> </item>
<item row="2" column="0"> <item row="3" column="0">
<widget class="QLabel" name="label_5"> <widget class="QLabel" name="label_5">
<property name="text"> <property name="text">
<string>Wishlist tag:</string> <string>Wishlist tag:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1">
<layout class="QHBoxLayout" name="read_spec_hl">
<property name="sizeConstraint">
<enum>QLayout::SetMinimumSize</enum>
</property>
<item>
<widget class="QComboBox" name="read_source_field_cb">
<property name="sizePolicy">
<sizepolicy hsizetype="MinimumExpanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="toolTip">
<string>Source column for read book</string>
</property>
<property name="statusTip">
<string/>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="read_pattern">
<property name="toolTip">
<string>Pattern for read book</string>
</property>
<property name="statusTip">
<string/>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Books marked as read:</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View File

@ -17,6 +17,8 @@ class PluginWidget(Widget, Ui_Form):
ICON = I('mimetypes/fb2.png') ICON = I('mimetypes/fb2.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None): def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters']) Widget.__init__(self, parent, ['inline_toc', 'sectionize_chapters',
'sectionize_chapters_using_file_structure', 'h1_to_title',
'h2_to_title', 'h3_to_title'])
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id) self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,7 +14,7 @@
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="2" column="0"> <item row="6" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -41,6 +41,34 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_sectionize_chapters_using_file_structure">
<property name="text">
<string>Sectionize Chapters using file structure</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="opt_h1_to_title">
<property name="text">
<string>Wrap h1 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="opt_h2_to_title">
<property name="text">
<string>Wrap h2 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="opt_h3_to_title">
<property name="text">
<string>Wrap h3 tags with &lt;title&gt; elements</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View File

@ -19,7 +19,7 @@ from calibre.customize.ui import catalog_plugins
class Catalog(QDialog, Ui_Dialog): class Catalog(QDialog, Ui_Dialog):
''' Catalog Dialog builder''' ''' Catalog Dialog builder'''
def __init__(self, parent, dbspec, ids): def __init__(self, parent, dbspec, ids, db):
import re, cStringIO import re, cStringIO
from calibre import prints as info from calibre import prints as info
from PyQt4.uic import compileUi from PyQt4.uic import compileUi
@ -51,7 +51,7 @@ class Catalog(QDialog, Ui_Dialog):
catalog_widget = __import__('calibre.gui2.catalog.'+name, catalog_widget = __import__('calibre.gui2.catalog.'+name,
fromlist=[1]) fromlist=[1])
pw = catalog_widget.PluginWidget() pw = catalog_widget.PluginWidget()
pw.initialize(name) pw.initialize(name, db)
pw.ICON = I('forward.png') pw.ICON = I('forward.png')
self.widgets.append(pw) self.widgets.append(pw)
[self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types] [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]

View File

@ -29,10 +29,6 @@ class SearchLineEdit(QLineEdit):
QLineEdit.mouseReleaseEvent(self, event) QLineEdit.mouseReleaseEvent(self, event)
QLineEdit.selectAll(self) QLineEdit.selectAll(self)
def focusInEvent(self, event):
QLineEdit.focusInEvent(self, event)
QLineEdit.selectAll(self)
def dropEvent(self, ev): def dropEvent(self, ev):
self.parent().normalize_state() self.parent().normalize_state()
return QLineEdit.dropEvent(self, ev) return QLineEdit.dropEvent(self, ev)
@ -256,7 +252,11 @@ class SavedSearchBox(QComboBox):
def initialize(self, _search_box, colorize=False, help_text=_('Search')): def initialize(self, _search_box, colorize=False, help_text=_('Search')):
self.search_box = _search_box self.search_box = _search_box
try:
self.line_edit.setPlaceholderText(help_text) self.line_edit.setPlaceholderText(help_text)
except:
# Using Qt < 4.7
pass
self.colorize = colorize self.colorize = colorize
self.clear() self.clear()
@ -350,14 +350,17 @@ class SearchBoxMixin(object):
shortcuts = QKeySequence.keyBindings(QKeySequence.Find) shortcuts = QKeySequence.keyBindings(QKeySequence.Find)
shortcuts = list(shortcuts) + [QKeySequence('/'), QKeySequence('Alt+S')] shortcuts = list(shortcuts) + [QKeySequence('/'), QKeySequence('Alt+S')]
self.action_focus_search.setShortcuts(shortcuts) self.action_focus_search.setShortcuts(shortcuts)
self.action_focus_search.triggered.connect(lambda x: self.action_focus_search.triggered.connect(self.focus_search_box)
self.search.setFocus(Qt.OtherFocusReason))
self.addAction(self.action_focus_search) self.addAction(self.action_focus_search)
self.search.setStatusTip(re.sub(r'<\w+>', ' ', self.search.setStatusTip(re.sub(r'<\w+>', ' ',
unicode(self.search.toolTip()))) unicode(self.search.toolTip())))
self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip()) self.advanced_search_button.setStatusTip(self.advanced_search_button.toolTip())
self.clear_button.setStatusTip(self.clear_button.toolTip()) self.clear_button.setStatusTip(self.clear_button.toolTip())
def focus_search_box(self, *args):
self.search.setFocus(Qt.OtherFocusReason)
self.search.lineEdit().selectAll()
def search_box_cleared(self): def search_box_cleared(self):
self.tags_view.clear() self.tags_view.clear()
self.saved_search.clear() self.saved_search.clear()

View File

@ -245,11 +245,11 @@ def fetch_scheduled_recipe(arg):
return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt] return 'gui_convert', args, _('Fetch news from ')+arg['title'], fmt.upper(), [pt]
def generate_catalog(parent, dbspec, ids, device_manager): def generate_catalog(parent, dbspec, ids, device_manager, db):
from calibre.gui2.dialogs.catalog import Catalog from calibre.gui2.dialogs.catalog import Catalog
# Build the Catalog dialog in gui2.dialogs.catalog # Build the Catalog dialog in gui2.dialogs.catalog
d = Catalog(parent, dbspec, ids) d = Catalog(parent, dbspec, ids, db)
if d.exec_() != d.Accepted: if d.exec_() != d.Accepted:
return None return None

View File

@ -606,12 +606,12 @@ class EPUB_MOBI(CatalogPlugin):
help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n" help=_("Specifies the output profile. In some cases, an output profile is required to optimize the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured Table of Contents with Sections and Articles.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--read-tag', Option('--read-book-marker',
default='+', default='tag:+',
dest='read_tag', dest='read_book_marker',
action = None, action = None,
help=_("Tag indicating book has been read.\n" "Default: '%default'\n" help=_("field:pattern indicating book has been read.\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to ePub, MOBI output formats")),
Option('--wishlist-tag', Option('--wishlist-tag',
default='Wishlist', default='Wishlist',
dest='wishlist_tag', dest='wishlist_tag',
@ -898,6 +898,8 @@ class EPUB_MOBI(CatalogPlugin):
self.__plugin = plugin self.__plugin = plugin
self.__progressInt = 0.0 self.__progressInt = 0.0
self.__progressString = '' self.__progressString = ''
f, _, p = opts.read_book_marker.partition(':')
self.__read_book_marker = {'field':f, 'pattern':p}
self.__reporter = report_progress self.__reporter = report_progress
self.__stylesheet = stylesheet self.__stylesheet = stylesheet
self.__thumbs = None self.__thumbs = None
@ -936,7 +938,6 @@ class EPUB_MOBI(CatalogPlugin):
if self.opts.generate_series: if self.opts.generate_series:
self.__totalSteps += 2 self.__totalSteps += 2
# Accessors # Accessors
if True: if True:
''' '''
@ -1210,7 +1211,7 @@ class EPUB_MOBI(CatalogPlugin):
def READING_SYMBOL(self): def READING_SYMBOL(self):
def fget(self): def fget(self):
return '<span style="color:black">&#x25b7;</span>' if self.generateForKindle else \ return '<span style="color:black">&#x25b7;</span>' if self.generateForKindle else \
'<span style="color:white">%s</span>' % self.opts.read_tag '<span style="color:white">+</span>'
return property(fget=fget) return property(fget=fget)
@dynamic_property @dynamic_property
def READ_SYMBOL(self): def READ_SYMBOL(self):
@ -1401,8 +1402,7 @@ class EPUB_MOBI(CatalogPlugin):
if record['cover']: if record['cover']:
this_title['cover'] = re.sub('&amp;', '&', record['cover']) this_title['cover'] = re.sub('&amp;', '&', record['cover'])
# This may be updated in self.processSpecialTags() this_title['read'] = self.discoverReadStatus(record)
this_title['read'] = False
if record['tags']: if record['tags']:
this_title['tags'] = self.processSpecialTags(record['tags'], this_title['tags'] = self.processSpecialTags(record['tags'],
@ -2675,13 +2675,7 @@ class EPUB_MOBI(CatalogPlugin):
pBookTag = Tag(soup, "p") pBookTag = Tag(soup, "p")
ptc = 0 ptc = 0
# book with read/reading/unread symbol book['read'] = self.discoverReadStatus(book)
for tag in book['tags']:
if tag == self.opts.read_tag:
book['read'] = True
break
else:
book['read'] = False
# book with read|reading|unread symbol or wishlist item # book with read|reading|unread symbol or wishlist item
if self.opts.wishlist_tag in book.get('tags', []): if self.opts.wishlist_tag in book.get('tags', []):
@ -2689,7 +2683,7 @@ class EPUB_MOBI(CatalogPlugin):
pBookTag.insert(ptc,NavigableString(self.MISSING_SYMBOL)) pBookTag.insert(ptc,NavigableString(self.MISSING_SYMBOL))
ptc += 1 ptc += 1
else: else:
if book['read']: if book.get('read', False):
# check mark # check mark
pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL)) pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL))
pBookTag['class'] = "read_book" pBookTag['class'] = "read_book"
@ -4027,6 +4021,34 @@ class EPUB_MOBI(CatalogPlugin):
if not os.path.isdir(images_path): if not os.path.isdir(images_path):
os.makedirs(images_path) os.makedirs(images_path)
def discoverReadStatus(self, record):
'''
Given a field:pattern spec, discover if this book marked as read
if field == tag, scan tags for pattern
if custom field, try regex match for pattern
This allows maximum flexibility with fields of type
datatype bool: #field_name:True
datatype text: #field_name:<string>
datatype datetime: #field_name:.*
'''
# Legacy handling of special 'read' tag
field = self.__read_book_marker['field']
pat = self.__read_book_marker['pattern']
if field == 'tag' and pat in record['tags']:
return True
field_contents = self.__db.get_field(record['id'],
field,
index_is_id=True)
if field_contents:
if re.search(pat, unicode(field_contents),
re.IGNORECASE) is not None:
return True
return False
def filterDbTags(self, tags): def filterDbTags(self, tags):
# Remove the special marker tags from the database's tag list, # Remove the special marker tags from the database's tag list,
# return sorted list of normalized genre tags # return sorted list of normalized genre tags
@ -4519,7 +4541,6 @@ class EPUB_MOBI(CatalogPlugin):
markerTags = [] markerTags = []
markerTags.extend(self.opts.exclude_tags.split(',')) markerTags.extend(self.opts.exclude_tags.split(','))
markerTags.extend(self.opts.note_tag.split(',')) markerTags.extend(self.opts.note_tag.split(','))
markerTags.extend(self.opts.read_tag.split(','))
return markerTags return markerTags
def letter_or_symbol(self,char): def letter_or_symbol(self,char):
@ -4629,6 +4650,7 @@ class EPUB_MOBI(CatalogPlugin):
if open_pTag: if open_pTag:
result.insert(rtc, pTag) result.insert(rtc, pTag)
rtc += 1
paras = result.findAll('p') paras = result.findAll('p')
for p in paras: for p in paras:
@ -4647,10 +4669,12 @@ class EPUB_MOBI(CatalogPlugin):
tag = self.convertHTMLEntities(tag) tag = self.convertHTMLEntities(tag)
if tag.startswith(opts.note_tag): if tag.startswith(opts.note_tag):
this_title['notes'] = tag[len(self.opts.note_tag):] this_title['notes'] = tag[len(self.opts.note_tag):]
elif tag == opts.read_tag:
this_title['read'] = True
elif re.search(opts.exclude_genre, tag): elif re.search(opts.exclude_genre, tag):
continue continue
elif self.__read_book_marker['field'] == 'tag' and \
tag == self.__read_book_marker['pattern']:
# remove 'read' tag
continue
else: else:
tag_list.append(tag) tag_list.append(tag)
return tag_list return tag_list
@ -4759,7 +4783,7 @@ class EPUB_MOBI(CatalogPlugin):
for key in keys: for key in keys:
if key in ['catalog_title','authorClip','connected_kindle','descriptionClip', if key in ['catalog_title','authorClip','connected_kindle','descriptionClip',
'exclude_genre','exclude_tags','note_tag','numbers_as_text', 'exclude_genre','exclude_tags','note_tag','numbers_as_text',
'output_profile','read_tag', 'output_profile','read_book_marker',
'search_text','sort_by','sort_descriptions_by_author','sync', 'search_text','sort_by','sort_descriptions_by_author','sync',
'wishlist_tag']: 'wishlist_tag']:
build_log.append(" %s: %s" % (key, opts_dict[key])) build_log.append(" %s: %s" % (key, opts_dict[key]))

View File

@ -640,7 +640,7 @@ def catalog_option_parser(args):
log = Log() log = Log()
parser = get_parser(_( parser = get_parser(_(
''' '''
%prog catalog /path/to/destination.(csv|epub|mobi|xml ...) [options] %prog catalog /path/to/destination.(CSV|EPUB|MOBI|XML ...) [options]
Export a catalog in format specified by path/to/destination extension. Export a catalog in format specified by path/to/destination extension.
Options control how entries are displayed in the generated catalog ouput. Options control how entries are displayed in the generated catalog ouput.

View File

@ -199,6 +199,11 @@ if not _run_once:
__builtin__.__dict__['lopen'] = local_open __builtin__.__dict__['lopen'] = local_open
import mimetypes
mimetypes.init([P('mime.types')])
guess_type = mimetypes.guess_type
def test_lopen(): def test_lopen():
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir from calibre import CurrentDir