diff --git a/resources/images/news/joop.png b/resources/images/news/joop.png new file mode 100644 index 0000000000..0ea5e422e1 Binary files /dev/null and b/resources/images/news/joop.png differ diff --git a/resources/images/news/kitsapun.png b/resources/images/news/kitsapun.png new file mode 100644 index 0000000000..4b7b883d52 Binary files /dev/null and b/resources/images/news/kitsapun.png differ diff --git a/resources/images/news/nrcnext.png b/resources/images/news/nrcnext.png new file mode 100644 index 0000000000..1349755925 Binary files /dev/null and b/resources/images/news/nrcnext.png differ diff --git a/resources/quick_start.epub b/resources/quick_start.epub new file mode 100644 index 0000000000..d340d40996 Binary files /dev/null and b/resources/quick_start.epub differ diff --git a/resources/recipes/amspec.recipe b/resources/recipes/amspec.recipe index 62bec5ae18..e5a76a4f86 100644 --- a/resources/recipes/amspec.recipe +++ b/resources/recipes/amspec.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' spectator.org ''' @@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe class TheAmericanSpectator(BasicNewsRecipe): title = 'The American Spectator' __author__ = 'Darko Miletic' - language = 'en' - description = 'News from USA' + category = 'news, politics, USA, world' + publisher = 'The American Spectator' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + language = 'en' INDEX = 'http://spectator.org' - html2lrf_options = [ - '--comment' , description - , '--category' , 'news, politics, USA' - , '--publisher' , title - ] + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } keep_only_tags = [ dict(name='div', attrs={'class':'post inner'}) @@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe): remove_tags = [ dict(name='object') - ,dict(name='div', attrs={'class':'col3' }) - ,dict(name='div', attrs={'class':'post-options' }) - ,dict(name='p' , attrs={'class':'letter-editor'}) - ,dict(name='div', attrs={'class':'social' }) + ,dict(name='div', attrs={'class':['col3','post-options','social']}) + ,dict(name='p' , attrs={'class':['letter-editor','meta']}) ] - feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')] + feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')] def get_cover_url(self): cover_url = None @@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe): def print_version(self, url): return url + '/print' + + def get_article_url(self, article): + return article.get('guid', None) + diff --git a/resources/recipes/drivelry.recipe b/resources/recipes/drivelry.recipe new file mode 100644 index 0000000000..9e001ba530 --- /dev/null +++ b/resources/recipes/drivelry.recipe @@ -0,0 +1,41 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + +class drivelrycom(BasicNewsRecipe): + title = u'drivelry.com' + language = 'en' + description = 'A blog by Mike Abrahams' + __author__ = 'Krittika Goyal' + oldest_article = 60 #days + max_articles_per_feed = 25 + #encoding = 'latin1' + + remove_stylesheets = True + #remove_tags_before = dict(name='h1', attrs={'class':'heading'}) + remove_tags_after = dict(name='div', attrs={'id':'bookmark'}) + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['sidebar']}), + dict(name='div', attrs={'id':['bookmark']}), + #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}), + #dict(name='ul', attrs={'class':'articleTools'}), + ] + + feeds = [ +('drivelry.com', + 'http://feeds.feedburner.com/drivelry'), + +] + + def preprocess_html(self, soup): + story = soup.find(name='div', attrs={'id':'main'}) + #td = heading.findParent(name='td') + #td.extract() + soup = BeautifulSoup(''' +t +

To donate to this blog: click here

+ +''') + body = soup.find(name='body') + body.insert(0, story) + return soup diff --git a/resources/recipes/fokkeensukke.recipe b/resources/recipes/fokkeensukke.recipe index 3ddbe1cfe5..76a4aa39b9 100644 --- a/resources/recipes/fokkeensukke.recipe +++ b/resources/recipes/fokkeensukke.recipe @@ -1,23 +1,29 @@ -#!/usr/bin/python from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class FokkeEnSukkeRecipe(BasicNewsRecipe) : __license__ = 'GPL v3' __author__ = 'kwetal' language = 'nl' - description = u'Popular Dutch daily cartoon Fokke en Sukke' + country = 'NL' + version = 2 title = u'Fokke en Sukke' - no_stylesheets = True - # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the and not in the . My reader (Sony PRS-600) has a serious issue - # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me. - template_css = '' - INDEX = u'http://foksuk.nl' + publisher = u'Reid, Geleijnse & Van Tol' + category = u'News, Cartoons' + description = u'Popular Dutch daily cartoon Fokke en Sukke' - # This cover is not as nice as it could be, needs some work - #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif' + conversion_options = {'comments': description, 'language': language, 'publisher': publisher} + + no_stylesheets = True + extra_css = ''' + body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;} + div.title {text-align: center; margin-bottom: 1em;} + ''' + + INDEX = u'http://foksuk.nl' + cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif' keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})] @@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) : links = index.findAll('a') maxIndex = len(links) - 1 articles = [] - for i in range(len(links)) : - # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice. - if i == 0 : - continue - - # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname. - # If there are two, there are three links in sequence: dayname 1 2. In that case we're interested in the last two. + for i in range(1, len(links)) : + # There can be more than one cartoon for a given day (currently either one or two). + # If there's only one, there is just a link with the dayname. + # If there are two, there are three links in sequence: dayname 1 2. + # In that case we're interested in the last two. if links[i].renderContents() in dayNames : - # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content + # If the link is not in daynames, we processed it already, but if it is, let's see + # if the next one has '1' as content if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') : # Got you! Add it to the list article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''} @@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) : return [[week, articles]] def preprocess_html(self, soup) : - # This method is called for every page, be it cartoon or TOC. We need to process each in their own way cartoon = soup.find('div', attrs={'class' : 'cartoon'}) - if cartoon : - # It is a cartoon. Extract the title. - title = '' - img = soup.find('img', attrs = {'alt' : True}) - if img : - title = img['alt'] - # Using the 'extra_css' displays it in the and not in the . See comment at the top of this class. Setting the style this way solves that. - tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')]) - tag.insert(0, title) - cartoon.insert(0, tag) + title = '' + img = soup.find('img', attrs = {'alt' : True}) + if img : + title = img['alt'] - # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier, - # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook. - select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) - if select : - select.extract() + tag = Tag(soup, 'div', [('class', 'title')]) + tag.insert(0, title) + cartoon.insert(0, tag) - return cartoon - else : - # It is a TOC. Just return the whole lot. - return soup + # We only want the cartoon, so throw out the index + select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) + if select : + select.extract() + + freshSoup = self.getFreshSoup(soup) + freshSoup.body.append(cartoon) + + return freshSoup + + def getFreshSoup(self, oldSoup): + freshSoup = BeautifulSoup('') + if oldSoup.head.title: + freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title)) + return freshSoup diff --git a/resources/recipes/ftd.recipe b/resources/recipes/ftd.recipe index db53a3ed19..d18f9bdc56 100644 --- a/resources/recipes/ftd.recipe +++ b/resources/recipes/ftd.recipe @@ -9,16 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe class FTDe(BasicNewsRecipe): - + title = 'FTD' description = 'Financial Times Deutschland' __author__ = 'Oliver Niesner' use_embedded_content = False timefmt = ' [%d %b %Y]' - language = 'de' + language = _('German') max_articles_per_feed = 40 no_stylesheets = True - + remove_tags = [dict(id='navi_top'), dict(id='topbanner'), dict(id='seitenkopf'), @@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe): dict(id='ADS_Top'), dict(id='spinner'), dict(id='ftd-contentad'), + dict(id='ftd-promo'), dict(id='nava-50009007-1-0'), dict(id='navli-50009007-1-0'), + dict(id='Box5000534-0-0-0'), + dict(id='ExpV-1-0-0-1'), + dict(id='ExpV-1-0-0-0'), + dict(id='PollExpV-2-0-0-0'), dict(id='starRating'), dict(id='saveRating'), dict(id='yLayer'), @@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe): dict(name='ul', attrs={'class':'nav'}), dict(name='p', attrs={'class':'articleOptionHead'}), dict(name='p', attrs={'class':'articleOptionFoot'}), + dict(name='p', attrs={'class':'moreInfo'}), dict(name='div', attrs={'class':'chartBox'}), dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}), dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}), - dict(name='div', attrs={'class':'box boxNavTabs '}), + dict(name='div', attrs={'class':'box boxNavTabs'}), + dict(name='div', attrs={'class':'boxMMRgtLow'}), dict(name='span', attrs={'class':'vote_455857'}), dict(name='div', attrs={'class':'relatedhalb'}), dict(name='div', attrs={'class':'box boxListScrollOutline'}), + dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}), + dict(name='div', attrs={'class':'box boxTeaser'}), dict(name='div', attrs={'class':'tagCloud'}), + dict(name='div', attrs={'class':'pollView'}), dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}), dict(name='div', attrs={'class':'ftdHpNav'}), dict(name='div', attrs={'class':'ftdHead'}), @@ -67,11 +77,12 @@ class FTDe(BasicNewsRecipe): dict(name='div', attrs={'class':'wertungoben'}), dict(name='div', attrs={'class':'artikelfuss'}), dict(name='a', attrs={'class':'rating'}), + dict(name='a', attrs={'href':'#rt'}), dict(name='div', attrs={'class':'articleOptionFootFrame'}), dict(name='div', attrs={'class':'artikelsplitfaq'})] - remove_tags_after = [dict(name='a', attrs={'class':'more'})] - - feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), + #remove_tags_after = [dict(name='a', attrs={'class':'more'})] + + feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'), ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'), ('Politik', 'http://www.ftd.de/rss2/politik'), @@ -82,8 +93,8 @@ class FTDe(BasicNewsRecipe): ('Auto', 'http://www.ftd.de/rss2/auto'), ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle') - ] - + ] + def print_version(self, url): - return url + '?mode=print' + return url.replace('.html', '.html?mode=print') diff --git a/resources/recipes/greader_uber.recipe b/resources/recipes/greader_uber.recipe new file mode 100644 index 0000000000..ee48e7069d --- /dev/null +++ b/resources/recipes/greader_uber.recipe @@ -0,0 +1,38 @@ +import urllib, re, mechanize +from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre import __appname__ + +class GoogleReaderUber(BasicNewsRecipe): + title = 'Google Reader Uber' + description = 'This recipe downloads all unread feedsfrom your Google Reader account.' + needs_subscription = True + __author__ = 'rollercoaster, davec' + base_url = 'http://www.google.com/reader/atom/' + oldest_article = 365 + max_articles_per_feed = 250 + get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed + use_embedded_content = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + + if self.username is not None and self.password is not None: + request = urllib.urlencode([('Email', self.username), ('Passwd', self.password), + ('service', 'reader'), ('source', __appname__)]) + response = br.open('https://www.google.com/accounts/ClientLogin', request) + sid = re.search('SID=(\S*)', response.read()).group(1) + + cookies = mechanize.CookieJar() + br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) + cookies.set_cookie(mechanize.Cookie(None, 'SID', sid, None, False, '.google.com', True, True, '/', True, False, None, True, '', '', None)) + return br + + + def get_feeds(self): + feeds = [] + soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list') + for id in soup.findAll(True, attrs={'name':['id']}): + url = id.contents[0].replace('broadcast','reading-list') + feeds.append((re.search('/([^/]*)$', url).group(1), + self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options)) + return feeds diff --git a/resources/recipes/joop.recipe b/resources/recipes/joop.recipe new file mode 100644 index 0000000000..a913328b9b --- /dev/null +++ b/resources/recipes/joop.recipe @@ -0,0 +1,91 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag +import re + +class JoopRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'nl' + country = 'NL' + version = 1 + + title = u'Joop' + publisher = u'Vara' + category = u'News, Politics, Discussion' + description = u'Political blog from the Netherlands' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'})) + keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')})) + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif;} + img {margin-right: 0.4em;} + h3 {font-size: medium; font-style: italic; font-weight: normal;} + h2 {font-size: xx-large; font-weight: bold} + sub {color: #666666; font-size: x-small; font-weight: normal;} + div.joop_byline {font-size: large} + div.joop_byline_job {font-size: small; color: #696969;} + div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em} + ''' + + INDEX = 'http://www.joop.nl' + + conversion_options = {'comments': description, 'tags': category, 'language': language, + 'publisher': publisher} + + def parse_index(self): + sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies'] + soup = self.index_to_soup(self.INDEX) + answer = [] + + div = soup.find('div', attrs = {'id': 'footer'}) + for section in sections: + articles = [] + h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section) + if h2: + ul = h2.findNextSibling('ul', 'linklist') + if ul: + for li in ul.findAll('li'): + title = self.tag_to_string(li.a) + url = self.INDEX + li.a['href'] + articles.append({'title': title, 'date': None, 'url': url, 'description': ''}) + + answer.append((section, articles)) + + return answer + + def preprocess_html(self, soup): + div = soup.find('div', 'author_head clearfix photo') + if div: + h2 = soup.find('h2') + if h2: + h2.name = 'div' + h2['class'] = 'joop_byline' + span = h2.find('span') + if span: + span.name = 'div' + span['class'] = 'joop_byline_job' + div.replaceWith(h2) + + h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'}) + if h2: + txt = None + span = h2.find('span', 'info') + if span: + txt = span.find(text = True) + div = Tag(soup, 'div', attrs = [('class', 'joop_date')]) + div.append(txt) + h2.replaceWith(div) + + return soup + + diff --git a/resources/recipes/kitsapun.recipe b/resources/recipes/kitsapun.recipe new file mode 100644 index 0000000000..e9a7c42f06 --- /dev/null +++ b/resources/recipes/kitsapun.recipe @@ -0,0 +1,44 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +www.kitsapun.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Kitsapsun(BasicNewsRecipe): + title = 'Kitsap Sun' + __author__ = 'Darko Miletic' + description = 'News from Kitsap County' + publisher = 'Scripps Interactive Newspapers Group' + category = 'news, Kitsap county, USA' + language = 'en' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + encoding = 'cp1252' + use_embedded_content = False + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher': publisher + } + + + keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})] + + remove_tags = [dict(name=['object','link','embed','form','iframe'])] + + feeds = [ + (u'News' , u'http://www.kitsapsun.com/rss/headlines/news/' ) + ,(u'Business' , u'http://www.kitsapsun.com/rss/headlines/business/' ) + ,(u'Communities' , u'http://www.kitsapsun.com/rss/headlines/communities/' ) + ,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/') + ,(u'Lifestyles' , u'http://www.kitsapsun.com/rss/headlines/lifestyles/' ) + ] + + def print_version(self, url): + return url.rpartition('/')[0] + '/?print=1' diff --git a/resources/recipes/ledevoir.recipe b/resources/recipes/ledevoir.recipe new file mode 100644 index 0000000000..97b33c43a7 --- /dev/null +++ b/resources/recipes/ledevoir.recipe @@ -0,0 +1,79 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '14, January 2010' +__description__ = 'Canadian Paper ' + +''' +http://www.ledevoir.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ledevoir(BasicNewsRecipe): + author = 'Lorenzo Vigentini' + description = 'Canadian Paper' + + cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif' + title = u'Le Devoir' + publisher = 'leDevoir.com' + category = 'News, finance, economy, politics' + + language = 'fr' + encoding = 'utf-8' + timefmt = '[%a, %d %b, %Y]' + + max_articles_per_feed = 50 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [ + dict(name='div', attrs={'id':'article'}), + dict(name='ul', attrs={'id':'ariane'}) + ] + + remove_tags = [ + dict(name='div', attrs={'id':'dialog'}), + dict(name='div', attrs={'class':['interesse_actions','reactions']}), + dict(name='ul', attrs={'class':'mots_cles'}), + dict(name='a', attrs={'class':'haut'}), + dict(name='h5', attrs={'class':'interesse_actions'}) + ] + + feeds = [ + (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'), + (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'), + (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'), + (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'), + (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'), + (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'), + (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'), + (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'), + (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'), + (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'), + (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'), + (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50') + ] + + extra_css = ''' + h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;} + h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;} + h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} + h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } + h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} + .specs {line-height:1em;margin:1px 0;} + .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;} + .specs span.auteur a, + .specs span.auteur span {text-transform:uppercase;color:#787878;} + .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;} + ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;} + ul#ariane li {display:inline;} + ul#ariane a {color:#2E2E2E;text-decoration:underline;} + .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;} + .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;} + ''' diff --git a/resources/recipes/national_post.recipe b/resources/recipes/national_post.recipe index d9743d5980..4fe188934c 100644 --- a/resources/recipes/national_post.recipe +++ b/resources/recipes/national_post.recipe @@ -70,11 +70,28 @@ class NYTimes(BasicNewsRecipe): feeds.append((current_section, current_articles)) return feeds + def preprocess_html(self, soup): story = soup.find(name='div', attrs={'class':'triline'}) - #td = heading.findParent(name='td') - #td.extract() + page2_link = soup.find('p','pagenav') + if page2_link: + atag = page2_link.find('a',href=True) + if atag: + page2_url = atag['href'] + if page2_url.startswith('story'): + page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url + elif page2_url.startswith( '/todays-paper/story.html'): + page2_url = 'http://www.nationalpost.com/'+page2_url + page2_soup = self.index_to_soup(page2_url) + if page2_soup: + page2_content = page2_soup.find('div','story-content') + if page2_content: + full_story = BeautifulSoup('
') + full_story.insert(0,story) + full_story.insert(1,page2_content) + story = full_story soup = BeautifulSoup('t') body = soup.find(name='body') body.insert(0, story) return soup + diff --git a/resources/recipes/ncrnext.recipe b/resources/recipes/ncrnext.recipe index d8a51e62c8..e03da301fa 100644 --- a/resources/recipes/ncrnext.recipe +++ b/resources/recipes/ncrnext.recipe @@ -1,29 +1,38 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class NrcNextRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' + __license__ = 'GPL v3' __author__ = 'kwetal' - version = 1 language = 'nl' + country = 'NL' + version = 2 + + title = u'nrcnext' + publisher = u'NRC Media' + category = u'News, Opinion, the Netherlands' description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.' - title = u'nrcnext' + + conversion_options = {'comments': description, 'language': language, 'publisher': publisher} no_stylesheets = True - template_css = '' + remove_javascript = True - # I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way. keep_only_tags = [dict(name='div', attrs={'id' : 'main'})] - # If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method. - #keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ] - remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}), - dict(name = 'div', attrs = {'class' : 'datumlabel'}), - dict(name = 'ul', attrs = {'class' : 'cats single'}), - dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}), - dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})] + remove_tags = [] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'})) + remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'})) + remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'})) + remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})) - use_embedded_content = False + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;} + p.wp-caption-text {font-size: x-small; color: #666666;} + h2.sub_title {font-size: medium; color: #696969;} + h2.vlag {font-size: small; font-weight: bold;} + ''' def parse_index(self) : # Use the wesbite as an index. Their RSS feeds can be out of date. @@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe): # Find the links to the actual articles and rember the location they're pointing to and the title a = post.find('a', attrs={'rel' : 'bookmark'}) href = a['href'] - title = a.renderContents() + title = self.tag_to_string(a) if index == 'columnisten' : - # In this feed/page articles can be written by more than one author. It is nice to see their names in the titles. + # In this feed/page articles can be written by more than one author. + # It is nice to see their names in the titles. flag = post.find('h2', attrs = {'class' : 'vlag'}) author = flag.contents[0].renderContents() completeTitle = u''.join([author, u': ', title]) @@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe): return answer def preprocess_html(self, soup) : - # This method is called for every page, be it cartoon or TOC. We need to process each in their own way - if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}) : - # It's an article, find the interesting part + if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}): tag = soup.find('div', attrs = {'class' : 'post'}) - if tag : - # And replace any links with their text, so they don't show up underlined on my reader. - for link in tag.findAll('a') : - link.replaceWith(link.renderContents()) + if tag: + h2 = tag.find('h2', 'vlag') + if h2: + new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')]) + new_h2.append(self.tag_to_string(h2)) + h2.replaceWith(new_h2) + else: + h2 = tag.find('h2') + if h2: + new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')]) + new_h2.append(self.tag_to_string(h2)) + h2.replaceWith(new_h2) - # Slows down my Sony reader; feel free to comment out - for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}) : + h1 = tag.find('h1') + if h1: + new_h1 = Tag(soup, 'h1') + new_h1.append(self.tag_to_string(h1)) + h1.replaceWith(new_h1) + + # Slows down my reader. + for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}): movie.extract() - for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}) : + for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}): movie.extract() + for iframe in tag.findAll('iframe') : + iframe.extract() - homeMadeSoup = BeautifulSoup('') - body = homeMadeSoup.find('body') - body.append(tag) + fresh_soup = self.getFreshSoup(soup) + fresh_soup.body.append(tag) - return homeMadeSoup - else : + return fresh_soup + else: # This should never happen and other famous last words... return soup - else : - # It's a TOC, return the whole lot. - return soup - - def postproces_html(self, soup) : - # Should not happen, but it does. Slows down my Sony eReader - for img in soup.findAll('img') : - if img['src'].startswith('http://') : - img.extract() - - # Happens for some movies which we are not able to view anyway - for iframe in soup.findAll('iframe') : - if iframe['src'].startswith('http://') : - iframe.extract() - - + def getFreshSoup(self, oldSoup): + freshSoup = BeautifulSoup('') + if oldSoup.head.title: + freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title)) + return freshSoup diff --git a/resources/recipes/yementimes.recipe b/resources/recipes/yementimes.recipe new file mode 100644 index 0000000000..426c9a748c --- /dev/null +++ b/resources/recipes/yementimes.recipe @@ -0,0 +1,125 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag + +class YemenTimesRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en_YE' + country = 'YE' + version = 1 + + title = u'Yemen Times' + publisher = u'yementimes.com' + category = u'News, Opinion, Yemen' + description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + encoding = 'utf-8' + + remove_empty_feeds = True + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1', + 'class': 'DMAIN2'})) + remove_attributes = ['style'] + + INDEX = 'http://www.yementimes.com/' + feeds = [] + feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT')) + feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news')) + feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News')) + feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report')) + feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health')) + feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview')) + feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion')) + feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business')) + feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed')) + feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture')) + feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View')) + feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety')) + feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education')) + + extra_css = ''' + body {font-family:verdana, arial, helvetica, geneva, sans-serif;} + div.yemen_byline {font-size: medium; font-weight: bold;} + div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;} + .yemen_caption {font-size: x-small; font-style: italic; color: #696969;} + ''' + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher, 'linearize_tables': True} + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.set_handle_gzip(True) + + return br + + def parse_index(self): + answer = [] + for feed_title, feed in self.feeds: + soup = self.index_to_soup(feed) + + newsbox = soup.find('div', 'newsbox') + main = newsbox.findNextSibling('table') + + articles = [] + for li in main.findAll('li'): + title = self.tag_to_string(li.a) + url = self.INDEX + li.a['href'] + articles.append({'title': title, 'date': None, 'url': url, 'description': '
 '}) + + answer.append((feed_title, articles)) + + return answer + + def preprocess_html(self, soup): + freshSoup = self.getFreshSoup(soup) + + headline = soup.find('div', attrs = {'id': 'DVMTIT'}) + if headline: + div = headline.findNext('div', attrs = {'id': 'DVTOP'}) + img = None + if div: + img = div.find('img') + + headline.name = 'h1' + freshSoup.body.append(headline) + if img is not None: + freshSoup.body.append(img) + + byline = soup.find('div', attrs = {'id': 'DVTIT'}) + if byline: + date_el = byline.find('span') + if date_el: + pub_date = self.tag_to_string(date_el) + date = Tag(soup, 'div', attrs = [('class', 'yemen_date')]) + date.append(pub_date) + date_el.extract() + + raw = '
'.join(['%s' % (part) for part in byline.findAll(text = True)]) + author = BeautifulSoup('') + + if date is not None: + freshSoup.body.append(date) + freshSoup.body.append(author) + + story = soup.find('div', attrs = {'id': 'DVDET'}) + if story: + for table in story.findAll('table'): + if table.find('img'): + table['class'] = 'yemen_caption' + + freshSoup.body.append(story) + + return freshSoup + + def getFreshSoup(self, oldSoup): + freshSoup = BeautifulSoup('') + if oldSoup.head.title: + freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title)) + return freshSoup diff --git a/src/calibre/customize/__init__.py b/src/calibre/customize/__init__.py index f92fa85225..5ab9ac6d1c 100644 --- a/src/calibre/customize/__init__.py +++ b/src/calibre/customize/__init__.py @@ -2,11 +2,12 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -import os, sys, tempfile, zipfile +import atexit, os, shutil, sys, tempfile, zipfile from calibre.constants import numeric_version from calibre.ptempfile import PersistentTemporaryFile + class Plugin(object): ''' A calibre plugin. Useful members include: @@ -225,12 +226,14 @@ class MetadataWriterPlugin(Plugin): ''' pass - + class CatalogPlugin(Plugin): ''' A plugin that implements a catalog generator. ''' + resources_path = None + #: Output file type for which this plugin should be run #: For example: 'epub' or 'xml' file_types = set([]) @@ -249,22 +252,18 @@ class CatalogPlugin(Plugin): cli_options = [] - def cleanup(self, path): - try: - import os, shutil - if os.path.exists(path): - shutil.rmtree(path) - except: - pass def search_sort_db(self, db, opts): - if opts.search_text: + + # If declared, --ids overrides any declared search criteria + if not opts.ids and opts.search_text: db.search(opts.search_text) + if opts.sort_by: # 2nd arg = ascending db.sort(opts.sort_by, True) - - return db.get_data_as_dict() + + return db.get_data_as_dict(ids=opts.ids) def get_output_fields(self, opts): # Return a list of requested fields, with opts.sort_by first @@ -280,8 +279,10 @@ class CatalogPlugin(Plugin): fields = list(all_fields & requested_fields) else: fields = list(all_fields) + fields.sort() - fields.insert(0,fields.pop(int(fields.index(opts.sort_by)))) + if opts.sort_by: + fields.insert(0,fields.pop(int(fields.index(opts.sort_by)))) return fields def initialize(self): @@ -291,35 +292,27 @@ class CatalogPlugin(Plugin): Tab will be dynamically generated and added to the Catalog Options dialog in calibre.gui2.dialogs.catalog.py:Catalog ''' - import atexit from calibre.customize.builtins import plugins as builtin_plugins + from calibre.customize.ui import config + from calibre.ptempfile import PersistentTemporaryDirectory - if type(self) in builtin_plugins: - print "%s: Built-in Catalog plugin, no init necessary" % self.name - else: - print "%s: User-added plugin" % self.name - print " Copying .ui and .py resources from %s to tmpdir" % self.plugin_path - - # Generate a list of resource files to extract from the zipped plugin - # Copy to tmpdir/calibre_plugin_resources + if not type(self) in builtin_plugins and \ + not self.name in config['disabled_plugins']: files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]] - print " files_to_copy: %s" % files_to_copy resources = zipfile.ZipFile(self.plugin_path,'r') - temp_resources_path = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources') - + + if self.resources_path is None: + self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='') + for file in files_to_copy: try: - resources.extract(file, temp_resources_path) - print " %s extracted to %s" % (file, temp_resources_path) + resources.extract(file, self.resources_path) except: - print " %s not found in %s" % (file, os.path.basename(self.plugin_path)) - resources.close() + print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path)) + continue + resources.close() - # Register temp_resources_path for deletion when calibre exits - atexit.register(self.cleanup, temp_resources_path) - - - def run(self, path_to_output, opts, db): + def run(self, path_to_output, opts, db, ids): ''' Run the plugin. Must be implemented in subclasses. It should generate the catalog in the format specified diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py index d84f3c3e77..d3990e95ac 100644 --- a/src/calibre/devices/eb600/driver.py +++ b/src/calibre/devices/eb600/driver.py @@ -14,6 +14,7 @@ Windows PNP strings: 2W00000&1', 3, u'G:\\') ''' +import re from calibre.devices.usbms.driver import USBMS @@ -108,6 +109,7 @@ class POCKETBOOK360(EB600): OSX_MAIN_MEM = 'Philips Mass Storge Media' OSX_CARD_A_MEM = 'Philips Mass Storge Media' + OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Pocket') @classmethod def can_handle(cls, dev, debug=False): diff --git a/src/calibre/ebooks/metadata/cli.py b/src/calibre/ebooks/metadata/cli.py index e4ea1a3931..5de8b76c43 100644 --- a/src/calibre/ebooks/metadata/cli.py +++ b/src/calibre/ebooks/metadata/cli.py @@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type): mi.title_sort = title_sort(opts.title) if getattr(opts, 'tags', None) is not None: mi.tags = [t.strip() for t in opts.tags.split(',')] + if getattr(opts, 'series', None) is not None: + mi.series = opts.series.strip() + if getattr(opts, 'series_index', None) is not None: + mi.series_index = float(opts.series_index.strip()) if getattr(opts, 'cover', None) is not None: ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() diff --git a/src/calibre/ebooks/metadata/rtf.py b/src/calibre/ebooks/metadata/rtf.py index 7f418de8d7..d116ec30fb 100644 --- a/src/calibre/ebooks/metadata/rtf.py +++ b/src/calibre/ebooks/metadata/rtf.py @@ -25,12 +25,14 @@ def get_document_info(stream): while not found: prefix = block[-6:] block = prefix + stream.read(block_size) + actual_block_size = len(block) - len(prefix) if len(block) == len(prefix): break idx = block.find(r'{\info') if idx >= 0: found = True - stream.seek(stream.tell() - block_size + idx - len(prefix)) + pos = stream.tell() - actual_block_size + idx - len(prefix) + stream.seek(pos) else: if block.find(r'\sect') > -1: break diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 2f52fde371..15e9675aa8 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -90,7 +90,10 @@ class DetectStructure(object): mark = etree.Element(XHTML('div'), style=page_break_after) else: # chapter_mark == 'both': mark = etree.Element(XHTML('hr'), style=page_break_before) - elem.addprevious(mark) + try: + elem.addprevious(mark) + except TypeError: + self.log.exception('Failed to mark chapter') def create_level_based_toc(self): if self.opts.level1_toc is None: diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 1b2149cf3a..f4bdb9c7ac 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -20,6 +20,10 @@ class Font(object): class Column(object): + # A column contains an element is the element bulges out to + # the left or the right by at most HFUZZ*col width. + HFUZZ = 0.2 + def __init__(self): self.left = self.right = self.top = self.bottom = 0 self.width = self.height = 0 @@ -41,6 +45,10 @@ class Column(object): for x in self.elements: yield x + def contains(self, elem): + return elem.left > self.left - self.HFUZZ*self.width and \ + elem.right < self.right + self.HFUZZ*self.width + class Element(object): def __eq__(self, other): @@ -238,11 +246,10 @@ class Page(object): return columns def find_elements_in_row_of(self, x): - interval = Interval(x.top - self.YFUZZ * self.average_text_height, + interval = Interval(x.top, x.top + self.YFUZZ*(1+self.average_text_height)) h_interval = Interval(x.left, x.right) - m = max(0, x.idx-15) - for y in self.elements[m:x.idx+15]: + for y in self.elements[x.idx:x.idx+15]: if y is not x: y_interval = Interval(y.top, y.bottom) x_interval = Interval(y.left, y.right) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 55f42ae4d5..ff20793f39 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -169,6 +169,21 @@ class RTFInput(InputFormatPlugin): with open('styles.css', 'ab') as f: f.write(css) + def preprocess(self, fname): + self.log('\tPreprocessing to convert unicode characters') + try: + data = open(fname, 'rb').read() + from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser + tokenizer = RtfTokenizer(data) + tokens = RtfTokenParser(tokenizer.tokens) + data = tokens.toRTF() + fname = 'preprocessed.rtf' + with open(fname, 'wb') as f: + f.write(data) + except: + self.log.exception( + 'Failed to preprocess RTF to convert unicode sequences, ignoring...') + return fname def convert(self, stream, options, file_ext, log, accelerators): @@ -177,8 +192,9 @@ class RTFInput(InputFormatPlugin): from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException self.log = log self.log('Converting RTF to XML...') + fname = self.preprocess(stream.name) try: - xml = self.generate_xml(stream.name) + xml = self.generate_xml(fname) except RtfInvalidCodeException: raise ValueError(_('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.')) diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py new file mode 100644 index 0000000000..07e6d41fac --- /dev/null +++ b/src/calibre/ebooks/rtf/preprocess.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2010, Gerendi Sandor Attila' +__docformat__ = 'restructuredtext en' + +""" +RTF tokenizer and token parser. v.1.0 (1/17/2010) +Author: Gerendi Sandor Attila + +At this point this will tokenize a RTF file then rebuild it from the tokens. +In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant. +""" + +class tokenDelimitatorStart(): + def __init__(self): + pass + def toRTF(self): + return b'{' + def __repr__(self): + return '{' + +class tokenDelimitatorEnd(): + def __init__(self): + pass + def toRTF(self): + return b'}' + def __repr__(self): + return '}' + +class tokenControlWord(): + def __init__(self, name, separator = ''): + self.name = name + self.separator = separator + def toRTF(self): + return self.name + self.separator + def __repr__(self): + return self.name + self.separator + +class tokenControlWordWithNumericArgument(): + def __init__(self, name, argument, separator = ''): + self.name = name + self.argument = argument + self.separator = separator + def toRTF(self): + return self.name + repr(self.argument) + self.separator + def __repr__(self): + return self.name + repr(self.argument) + self.separator + +class tokenControlSymbol(): + def __init__(self, name): + self.name = name + def toRTF(self): + return self.name + def __repr__(self): + return self.name + +class tokenData(): + def __init__(self, data): + self.data = data + def toRTF(self): + return self.data + def __repr__(self): + return self.data + +class tokenBinN(): + def __init__(self, data, separator = ''): + self.data = data + self.separator = separator + def toRTF(self): + return "\\bin" + repr(len(self.data)) + self.separator + self.data + def __repr__(self): + return "\\bin" + repr(len(self.data)) + self.separator + self.data + +class token8bitChar(): + def __init__(self, data): + self.data = data + def toRTF(self): + return "\\'" + self.data + def __repr__(self): + return "\\'" + self.data + +class tokenUnicode(): + def __init__(self, data, separator = '', current_ucn = 1, eqList = []): + self.data = data + self.separator = separator + self.current_ucn = current_ucn + self.eqList = eqList + def toRTF(self): + result = '\\u' + repr(self.data) + ' ' + ucn = self.current_ucn + if len(self.eqList) < ucn: + ucn = len(self.eqList) + result = tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result + i = 0 + for eq in self.eqList: + if i >= ucn: + break + result = result + eq.toRTF() + return result + def __repr__(self): + return '\\u' + repr(self.data) + + +def isAsciiLetter(value): + return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z')) + +def isDigit(value): + return (value >= '0') and (value <= '9') + +def isChar(value, char): + return value == char + +def isString(buffer, string): + return buffer == string + + +class RtfTokenParser(): + def __init__(self, tokens): + self.tokens = tokens + self.process() + self.processUnicode() + + def process(self): + i = 0 + newTokens = [] + while i < len(self.tokens): + if isinstance(self.tokens[i], tokenControlSymbol): + if isString(self.tokens[i].name, "\\'"): + i = i + 1 + if not isinstance(self.tokens[i], tokenData): + raise BaseException('Error: token8bitChar without data.') + if len(self.tokens[i].data) < 2: + raise BaseException('Error: token8bitChar without data.') + newTokens.append(token8bitChar(self.tokens[i].data[0:2])) + if len(self.tokens[i].data) > 2: + newTokens.append(tokenData(self.tokens[i].data[2:])) + i = i + 1 + continue + + newTokens.append(self.tokens[i]) + i = i + 1 + + self.tokens = list(newTokens) + + def processUnicode(self): + i = 0 + newTokens = [] + ucNbStack = [1] + while i < len(self.tokens): + if isinstance(self.tokens[i], tokenDelimitatorStart): + ucNbStack.append(ucNbStack[len(ucNbStack) - 1]) + newTokens.append(self.tokens[i]) + i = i + 1 + continue + if isinstance(self.tokens[i], tokenDelimitatorEnd): + ucNbStack.pop() + newTokens.append(self.tokens[i]) + i = i + 1 + continue + if isinstance(self.tokens[i], tokenControlWordWithNumericArgument): + if isString(self.tokens[i].name, '\\uc'): + ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument + newTokens.append(self.tokens[i]) + i = i + 1 + continue + if isString(self.tokens[i].name, '\\u'): + x = i + j = 0 + i = i + 1 + replace = [] + partialData = None + ucn = ucNbStack[len(ucNbStack) - 1] + while (i < len(self.tokens)) and (j < ucn): + if isinstance(self.tokens[i], tokenDelimitatorStart): + break + if isinstance(self.tokens[i], tokenDelimitatorEnd): + break + if isinstance(self.tokens[i], tokenData): + if len(self.tokens[i].data) >= ucn - j: + replace.append(tokenData(self.tokens[i].data[0 : ucn - j])) + if len(self.tokens[i].data) > ucn - j: + partialData = tokenData(self.tokens[i].data[ucn - j:]) + i = i + 1 + break + else: + replace.append(self.tokens[i]) + j = j + len(self.tokens[i].data) + i = i + 1 + continue + if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN): + replace.append(self.tokens[i]) + i = i + 1 + j = j + 1 + continue + raise BaseException('Error: incorect utf replacement.') + + #calibre rtf2xml does not support utfreplace + replace = [] + + newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace)) + if partialData != None: + newTokens.append(partialData) + continue + + newTokens.append(self.tokens[i]) + i = i + 1 + + self.tokens = list(newTokens) + + + def toRTF(self): + result = [] + for token in self.tokens: + result.append(token.toRTF()) + return "".join(result) + + +class RtfTokenizer(): + def __init__(self, rtfData): + self.rtfData = [] + self.tokens = [] + self.rtfData = rtfData + self.tokenize() + + def tokenize(self): + i = 0 + lastDataStart = -1 + while i < len(self.rtfData): + + if isChar(self.rtfData[i], '{'): + if lastDataStart > -1: + self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) + lastDataStart = -1 + self.tokens.append(tokenDelimitatorStart()) + i = i + 1 + continue + + if isChar(self.rtfData[i], '}'): + if lastDataStart > -1: + self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) + lastDataStart = -1 + self.tokens.append(tokenDelimitatorEnd()) + i = i + 1 + continue + + if isChar(self.rtfData[i], '\\'): + if i + 1 >= len(self.rtfData): + raise BaseException('Error: Control character found at the end of the document.') + + if lastDataStart > -1: + self.tokens.append(tokenData(self.rtfData[lastDataStart : i])) + lastDataStart = -1 + + tokenStart = i + i = i + 1 + + #Control Words + if isAsciiLetter(self.rtfData[i]): + #consume + consumed = False + while i < len(self.rtfData): + if not isAsciiLetter(self.rtfData[i]): + tokenEnd = i + consumed = True + break + i = i + 1 + + if not consumed: + raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart)) + + #we have numeric argument before delimiter + if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]): + #consume the numeric argument + consumed = False + l = 0 + while i < len(self.rtfData): + if not isDigit(self.rtfData[i]): + consumed = True + break + l = l + 1 + i = i + 1 + if l > 10 : + raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart]) + + if not consumed: + raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart]) + + separator = '' + if isChar(self.rtfData[i], ' '): + separator = ' ' + + controlWord = self.rtfData[tokenStart: tokenEnd] + if tokenEnd < i: + value = int(self.rtfData[tokenEnd: i]) + if isString(controlWord, "\\bin"): + i = i + value + self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator)) + else: + self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator)) + else: + self.tokens.append(tokenControlWord(controlWord, separator)) + #space delimiter, we should discard it + if self.rtfData[i] == ' ': + i = i + 1 + + #Control Symbol + else: + self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1])) + i = i + 1 + continue + + if lastDataStart < 0: + lastDataStart = i + i = i + 1 + + def toRTF(self): + result = [] + for token in self.tokens: + result.append(token.toRTF()) + return "".join(result) + + +if __name__ == "__main__": + import sys + if len(sys.argv) < 2: + print ("Usage %prog rtfFileToConvert") + sys.exit() + f = open(sys.argv[1], 'rb') + data = f.read() + f.close() + + tokenizer = RtfTokenizer(data) + parsedTokens = RtfTokenParser(tokenizer.tokens) + + data = parsedTokens.toRTF() + + f = open(sys.argv[1], 'w') + f.write(data) + f.close() + + diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index db4bb5c754..34f9f57161 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \ ORG_NAME = 'KovidsBrain' APP_UID = 'libprs500' from calibre import islinux, iswindows, isosx -from calibre.utils.config import Config, ConfigProxy, dynamic +from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig from calibre.utils.localization import set_qt_translator from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats from calibre.ebooks.metadata import MetaInformation +gprefs = JSONConfig('gui') NONE = QVariant() #: Null value to return from the data function of item models diff --git a/src/calibre/gui2/convert/gui_conversion.py b/src/calibre/gui2/convert/gui_conversion.py index 95396aa9dd..b951244e71 100644 --- a/src/calibre/gui2/convert/gui_conversion.py +++ b/src/calibre/gui2/convert/gui_conversion.py @@ -4,10 +4,14 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +import os +from optparse import OptionParser + from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.ebooks.conversion.plumber import Plumber -# ?from calibre.library.catalog import Catalog +from calibre.customize.ui import plugin_for_catalog_format from calibre.utils.logging import Log +from calibre.gui2 import choose_dir, Application def gui_convert(input, output, recommendations, notification=DummyReporter(), abort_after_input_dump=False, log=None): @@ -21,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(), plumber.run() -def gui_catalog(fmt, title, dbspec, ids, out_file_name, +def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options, notification=DummyReporter(), log=None): if log is None: log = Log() @@ -33,19 +37,25 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, else: # To be implemented in the future pass - # Implement the interface to the catalog generating code here - #db - log("gui2.convert.gui_conversion:gui_catalog()") - log("fmt: %s" % fmt) - log("title: %s" % title) - log("dbspec: %s" % dbspec) - log("ids: %s" % ids) - log("out_file_name: %s" % out_file_name) - - # This needs to call the .run() method of the plugin associated with fmt - # Needs to set up options before the call - # catalog = Catalog(out_file_name, options, dbspec) - # Can I call library.cli:catalog_option_parser()? + # Create a minimal OptionParser that we can append to + parser = OptionParser() + args = [] + parser.add_option("--verbose", action="store_true", dest="verbose", default=True) + opts, args = parser.parse_args() + + # Populate opts + opts.ids = ids + opts.search_text = None + opts.sort_by = None + + # Extract the option dictionary to comma-separated lists + for option in fmt_options: + setattr(opts,option, ','.join(fmt_options[option])) + + # Fetch and run the plugin for fmt + plugin = plugin_for_catalog_format(fmt) + plugin.run(out_file_name, opts, db) + diff --git a/src/calibre/gui2/dialogs/catalog.py b/src/calibre/gui2/dialogs/catalog.py index 9108eb0e1b..8407e2c426 100644 --- a/src/calibre/gui2/dialogs/catalog.py +++ b/src/calibre/gui2/dialogs/catalog.py @@ -12,15 +12,18 @@ from PyQt4.Qt import QDialog, QWidget from calibre.customize.ui import config from calibre.gui2.dialogs.catalog_ui import Ui_Dialog -from calibre.gui2 import dynamic +from calibre.gui2 import gprefs, dynamic from calibre.customize.ui import available_catalog_formats, catalog_plugins from calibre.gui2.catalog.catalog_csv_xml import PluginWidget class Catalog(QDialog, Ui_Dialog): + ''' Catalog Dialog builder''' + widgets = [] def __init__(self, parent, dbspec, ids): import re, cStringIO from calibre import prints as info + from calibre.gui2 import dynamic from PyQt4.uic import compileUi QDialog.__init__(self, parent) @@ -42,6 +45,7 @@ class Catalog(QDialog, Ui_Dialog): self.fmts = [] from calibre.customize.builtins import plugins as builtin_plugins + from calibre.customize import CatalogPlugin for plugin in catalog_plugins(): if plugin.name in config['disabled_plugins']: @@ -49,38 +53,30 @@ class Catalog(QDialog, Ui_Dialog): name = plugin.name.lower().replace(' ', '_') if type(plugin) in builtin_plugins: - info("Adding tab for builtin Catalog plugin %s" % plugin.name) + #info("Adding widget for builtin Catalog plugin %s" % plugin.name) try: catalog_widget = __import__('calibre.gui2.catalog.'+name, fromlist=[1]) pw = catalog_widget.PluginWidget() - pw.initialize() + pw.initialize(name) pw.ICON = I('forward.svg') - page = self.tabs.addTab(pw,pw.TITLE) - [self.fmts.append([file_type, pw.sync_enabled]) for file_type in plugin.file_types] - info("\tSupported formats: %s" % plugin.file_types) - info("\tsync_enabled: %s" % pw.sync_enabled) - + self.widgets.append(pw) + [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types] except ImportError: info("ImportError with %s" % name) continue else: - # Test to see if .ui and .py files exist in tmpdir/calibre_plugin_resources - form = os.path.join(tempfile.gettempdir(), - 'calibre_plugin_resources','%s.ui' % name) - klass = os.path.join(tempfile.gettempdir(), - 'calibre_plugin_resources','%s.py' % name) - compiled_form = os.path.join(tempfile.gettempdir(), - 'calibre_plugin_resources','%s_ui.py' % name) - plugin_resources = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources') + # Load dynamic tab + form = os.path.join(plugin.resources_path,'%s.ui' % name) + klass = os.path.join(plugin.resources_path,'%s.py' % name) + compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name) if os.path.exists(form) and os.path.exists(klass): - info("Adding tab for user-installed Catalog plugin %s" % plugin.name) + #info("Adding widget for user-installed Catalog plugin %s" % plugin.name) - # Compile the form provided in plugin.zip - if not os.path.exists(compiled_form) or \ - os.stat(form).st_mtime > os.stat(compiled_form).st_mtime: - info('\tCompiling form', form) + # Compile the .ui form provided in plugin.zip + if not os.path.exists(compiled_form): + # info('\tCompiling form', form) buf = cStringIO.StringIO() compileUi(form, buf) dat = buf.getvalue() @@ -88,35 +84,41 @@ class Catalog(QDialog, Ui_Dialog): re.DOTALL).sub(r'_("\1")', dat) open(compiled_form, 'wb').write(dat) - # Import the Catalog class from the dynamic .py file + # Import the dynamic PluginWidget() from .py file provided in plugin.zip try: - sys.path.insert(0, plugin_resources) + sys.path.insert(0, plugin.resources_path) catalog_widget = __import__(name, fromlist=[1]) - dpw = catalog_widget.PluginWidget() - dpw.initialize() - dpw.ICON = I('forward.svg') - page = self.tabs.addTab(dpw, dpw.TITLE) - [self.fmts.append([file_type, dpw.sync_enabled]) for file_type in plugin.file_types] - info("\tSupported formats: %s" % plugin.file_types) - info("\tsync_enabled: %s" % dpw.sync_enabled) + pw = catalog_widget.PluginWidget() + pw.initialize(name) + pw.ICON = I('forward.svg') + self.widgets.append(pw) + [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types] except ImportError: info("ImportError with %s" % name) continue finally: - sys.path.remove(plugin_resources) + sys.path.remove(plugin.resources_path) else: info("No dynamic tab resources found for %s" % name) + self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE)) + for pw in self.widgets: + page = self.tabs.addTab(pw,pw.TITLE) + # Generate a sorted list of installed catalog formats/sync_enabled pairs - # Generate a parallel list of sync_enabled[True|False]ß - self.fmts = sorted([x[0].upper() for x in self.fmts]) + fmts = sorted([x[0] for x in self.fmts]) + + self.sync_enabled_formats = [] + for fmt in self.fmts: + if fmt[1]: + self.sync_enabled_formats.append(fmt[0]) # Callback when format changes self.format.currentIndexChanged.connect(self.format_changed) # Add the installed catalog format list to the format QComboBox - self.format.addItems(self.fmts) + self.format.addItems(fmts) pref = dynamic.get('catalog_preferred_format', 'CSV') idx = self.format.findText(pref) @@ -127,9 +129,8 @@ class Catalog(QDialog, Ui_Dialog): self.sync.setChecked(dynamic.get('catalog_sync_to_device', True)) def format_changed(self, idx): - print "format_changed(idx): idx: %d" % idx cf = unicode(self.format.currentText()) - if cf in ('EPUB', 'MOBI'): + if cf in self.sync_enabled_formats: self.sync.setEnabled(True) else: self.sync.setDisabled(True) diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index f0f091c089..b23e0b6259 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -239,23 +239,35 @@ def fetch_scheduled_recipe(arg): def generate_catalog(parent, dbspec, ids): from calibre.gui2.dialogs.catalog import Catalog - # Build the Catalog dialog + # Build the Catalog dialog in gui2.dialogs.catalog d = Catalog(parent, dbspec, ids) + if d.exec_() != d.Accepted: return None # Create the output file out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower()) + # Retrieve plugin options + fmt_options = {} + for x in range(d.tabs.count()): + if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1: + for fmt in d.fmts: + if fmt[0] == d.catalog_format: + fmt_options = fmt[2].options() + # print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options) + args = [ d.catalog_format, d.catalog_title, dbspec, ids, out.name, + fmt_options ] out.close() + # This calls gui2.convert.gui_conversion:gui_catalog() return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \ d.catalog_title diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 5e77bb1664..ccff7ccdc8 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' '''The main GUI''' -import os, sys, textwrap, collections, time +import atexit, os, shutil, sys, tempfile, textwrap, collections, time from xml.parsers.expat import ExpatError from Queue import Queue, Empty from threading import Thread @@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server from calibre.gui2 import warning_dialog, choose_files, error_dialog, \ question_dialog,\ pixmap_to_data, choose_dir, \ - Dispatcher, \ + Dispatcher, gprefs, \ available_height, \ max_available_height, config, info_dialog, \ available_width, GetMetadata @@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): cm.addAction(_('Bulk convert')) cm.addSeparator() ac = cm.addAction( - _('Create catalog of the books in your calibre library')) + _('Create catalog of books in your calibre library')) ac.triggered.connect(self.generate_catalog) self.action_convert.setMenu(cm) self._convert_single_hook = partial(self.convert_ebook, bulk=False) @@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): self.connect(self.library_view.model(), SIGNAL('count_changed(int)'), self.tags_view.recount) self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear) + if not gprefs.get('quick_start_guide_added', False): + from calibre.ebooks.metadata import MetaInformation + mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember']) + mi.author_sort = 'Schember, John' + mi.comments = "A guide to get you up an running with calibre" + mi.publisher = 'calibre' + self.library_view.model().add_books([P('quick_start.epub')], ['epub'], + [mi]) + gprefs['quick_start_guide_added'] = True + self.library_view.model().books_added(1) + if hasattr(self, 'db_images'): + self.db_images.reset() + self.library_view.model().count_changed() + ########################### Cover Flow ################################ self.cover_flow = None if CoverFlow is not None: @@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): return self._add_books(books, to_device) - def _add_books(self, paths, to_device, on_card=None): if on_card is None: on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None @@ -1346,27 +1359,32 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): ############################### Generate catalog ########################### - def generate_catalog(self): + def generate_catalog(self): rows = self.library_view.selectionModel().selectedRows() - if not rows: + if not rows or len(rows) < 2: rows = xrange(self.library_view.model().rowCount(QModelIndex())) ids = map(self.library_view.model().id, rows) + dbspec = None if not ids: return error_dialog(self, _('No books selected'), _('No books selected to generate catalog for'), show=True) - # calibre.gui2.tools:generate_catalog() + + # Calling gui2.tools:generate_catalog() ret = generate_catalog(self, dbspec, ids) if ret is None: return + func, args, desc, out, sync, title = ret + fmt = os.path.splitext(out)[1][1:].upper() job = self.job_manager.run_job( Dispatcher(self.catalog_generated), func, args=args, description=desc) job.catalog_file_path = out - job.catalog_sync, job.catalog_title = sync, title + job.fmt = fmt + job.catalog_sync, job.catalog_title = sync, title self.status_bar.showMessage(_('Generating %s catalog...')%fmt) def catalog_generated(self, job): @@ -1380,8 +1398,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): dynamic.set('catalogs_to_be_synced', sync) self.status_bar.showMessage(_('Catalog generated.'), 3000) self.sync_catalogs() - - + if job.fmt in ['CSV','XML']: + export_dir = choose_dir(self, 'Export Catalog Directory', + 'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower())) + if export_dir: + destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower())) + shutil.copyfile(job.catalog_file_path, destination) + ############################### Fetch news ################################# def download_scheduled_recipe(self, arg): diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index d5baa036f1..32f2503b2c 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -40,10 +40,9 @@ class CSV_XML(CatalogPlugin): from calibre.utils.logging import Log log = Log() - self.fmt = path_to_output[path_to_output.rfind('.') + 1:] - # Update to .partition self.fmt = path_to_output.rpartition('.')[2] - if opts.verbose: + + if False and opts.verbose: log("%s:run" % self.name) log(" path_to_output: %s" % path_to_output) log(" Output format: %s" % self.fmt) @@ -55,7 +54,7 @@ class CSV_XML(CatalogPlugin): log(" opts:") for key in keys: log(" %s: %s" % (key, opts_dict[key])) - + # Get the sorted, filtered database as a dictionary data = self.search_sort_db(db, opts) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 6e2d672202..ddfb96704c 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -644,6 +644,10 @@ def catalog_option_parser(args): output, fmt = validate_command_line(parser, args, log) # Add options common to all catalog plugins + parser.add_option('-i', '--ids', default=None, dest='ids', + help=_("Comma-separated list of database IDs to catalog.\n" + "If declared, --search is ignored.\n" + "Default: all")) parser.add_option('-s', '--search', default=None, dest='search_text', help=_("Filter the results by the search query. " "For the format of the search query, please see " @@ -656,31 +660,6 @@ def catalog_option_parser(args): # Add options specific to fmt plugin plugin = add_plugin_parser_options(fmt, parser, log) - # Merge options from GUI Preferences - ''' - # Placeholder sample code until we implement GUI preferences - from calibre.library.save_to_disk import config - c = config() - for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']: - opt = c.get_option(pref) - switch = '--dont-'+pref.replace('_', '-') - parser.add_option(switch, default=True, action='store_false', - help=opt.help+' '+_('Specifying this switch will turn ' - 'this behavior off.'), dest=pref) - - for pref in ['timefmt', 'template', 'formats']: - opt = c.get_option(pref) - switch = '--'+pref - parser.add_option(switch, default=opt.default, - help=opt.help, dest=pref) - - for pref in ('replace_whitespace', 'to_lowercase'): - opt = c.get_option(pref) - switch = '--'+pref.replace('_', '-') - parser.add_option(switch, default=False, action='store_true', - help=opt.help) - ''' - return parser, plugin, log def command_catalog(args, dbpath): @@ -693,6 +672,9 @@ def command_catalog(args, dbpath): return 1 if opts.verbose: log("library.cli:command_catalog dispatching to plugin %s" % plugin.name) + if opts.ids: + opts.ids = [int(id) for id in opts.ids.split(',')] + with plugin: plugin.run(args[1], opts, get_db(dbpath, opts)) return 0 diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 84638410c7..7b0f7a083e 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase): for i in iter(self): yield i[x] - def get_data_as_dict(self, prefix=None, authors_as_string=False): + def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None): ''' Return all metadata stored in the database as a dict. Includes paths to the cover and each format. :param prefix: The prefix for all paths. By default, the prefix is the absolute path to the library folder. + :param ids: Set of ids to return the data for. If None return data for + all entries in database. ''' if prefix is None: prefix = self.library_path @@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase): data = [] for record in self.data: if record is None: continue + db_id = record[FIELD_MAP['id']] + if ids is not None and db_id not in ids: + continue x = {} for field in FIELDS: x[field] = record[FIELD_MAP[field]] data.append(x) - x['id'] = record[FIELD_MAP['id']] + x['id'] = db_id x['formats'] = [] if not x['authors']: x['authors'] = _('Unknown') diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 697cfbe388..a0e5632cb7 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' ''' Manage application-wide preferences. ''' -import os, re, cPickle, textwrap, traceback, plistlib +import os, re, cPickle, textwrap, traceback, plistlib, json from copy import deepcopy from functools import partial from optparse import OptionParser as _OptionParser @@ -564,23 +564,31 @@ class XMLConfig(dict): data types. ''' + EXTENSION = '.plist' + def __init__(self, rel_path_to_cf_file): dict.__init__(self) self.file_path = os.path.join(config_dir, *(rel_path_to_cf_file.split('/'))) self.file_path = os.path.abspath(self.file_path) - if not self.file_path.endswith('.plist'): - self.file_path += '.plist' + if not self.file_path.endswith(self.EXTENSION): + self.file_path += self.EXTENSION self.refresh() + def raw_to_object(self, raw): + return plistlib.readPlistFromString(raw) + + def to_raw(self): + return plistlib.writePlistToString(self) + def refresh(self): d = {} if os.path.exists(self.file_path): with ExclusiveFile(self.file_path) as f: raw = f.read() try: - d = plistlib.readPlistFromString(raw) if raw.strip() else {} + d = self.raw_to_object(raw) if raw.strip() else {} except SystemError: pass except: @@ -618,11 +626,21 @@ class XMLConfig(dict): if not os.path.exists(dpath): os.makedirs(dpath, mode=CONFIG_DIR_MODE) with ExclusiveFile(self.file_path) as f: - raw = plistlib.writePlistToString(self) + raw = self.to_raw() f.seek(0) f.truncate() f.write(raw) +class JSONConfig(XMLConfig): + + EXTENSION = '.json' + + def raw_to_object(self, raw): + return json.loads(raw.decode('utf-8')) + + def to_raw(self): + return json.dumps(self, indent=2) + def _prefs(): c = Config('global', 'calibre wide preferences') diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index 1ade012b1f..90f86a8368 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -104,6 +104,7 @@ _extra_lang_codes = { 'en_CY' : _('English (Cyprus)'), 'en_PK' : _('English (Pakistan)'), 'en_SG' : _('English (Singapore)'), + 'en_YE' : _('English (Yemen)'), 'de_AT' : _('German (AT)'), 'nl' : _('Dutch (NL)'), 'nl_BE' : _('Dutch (BE)'),