diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index 00b4a8753e..50a980dc92 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -66,4 +66,3 @@ class Adventure_zone(BasicNewsRecipe): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] return soup - diff --git a/recipes/am730.recipe b/recipes/am730.recipe new file mode 100644 index 0000000000..0fac4bea51 --- /dev/null +++ b/recipes/am730.recipe @@ -0,0 +1,290 @@ +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals +__license__ = 'GPL v3' +__copyright__ = '2013, Eddie Lau' +__Date__ = '' +__HiResImg__ = True + +''' +Change Log: +2013/03/30 -- first version +''' + +from calibre import (__appname__, force_unicode, strftime) +from calibre.utils.date import now as nowf +import os, datetime, re +from calibre.web.feeds.recipes import BasicNewsRecipe +from contextlib import nested +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag +from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.metadata import MetaInformation +from calibre.utils.localization import canonicalize_lang + +class AppleDaily(BasicNewsRecipe): + title = u'AM730' + __author__ = 'Eddie Lau' + publisher = 'AM730' + oldest_article = 1 + max_articles_per_feed = 100 + auto_cleanup = False + language = 'zh' + encoding = 'utf-8' + auto_cleanup = False + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + description = 'http://www.am730.com.hk' + category = 'Chinese, News, Hong Kong' + masthead_url = 'http://www.am730.com.hk/images/logo.jpg' + + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}' + keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}), + dict(name='div', attrs={'class':'thecontent wordsnap'}), + dict(name='a', attrs={'class':'lightboximg'})] + remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}), + dict(name='img', attrs={'src':'/images/am_endmark.gif'})] + + def get_dtlocal(self): + dt_utc = datetime.datetime.utcnow() + # convert UTC to local hk time - at HKT 6am, all news are available + return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24) + + def get_fetchdate(self): + if __Date__ <> '': + return __Date__ + else: + return self.get_dtlocal().strftime("%Y%m%d") + + def get_fetchformatteddate(self): + if __Date__ <> '': + return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] + else: + return self.get_dtlocal().strftime("%Y-%m-%d") + + def get_fetchyear(self): + if __Date__ <> '': + return __Date__[0:4] + else: + return self.get_dtlocal().strftime("%Y") + + def get_fetchmonth(self): + if __Date__ <> '': + return __Date__[4:6] + else: + return self.get_dtlocal().strftime("%m") + + def get_fetchday(self): + if __Date__ <> '': + return __Date__[6:8] + else: + return self.get_dtlocal().strftime("%d") + + # Note: does not work with custom date given by __Date__ + def get_weekday(self): + return self.get_dtlocal().weekday() + + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) + + def parse_index(self): + feeds = [] + soup = self.index_to_soup('http://www.am730.com.hk/') + ul = soup.find(attrs={'class':'nav-section'}) + sectionList = [] + for li in ul.findAll('li'): + a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False) + title = li.find('a').get('title', False).strip() + sectionList.append((title, a)) + for title, url in sectionList: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds + + def parse_section(self, url): + soup = self.index_to_soup(url) + items = soup.findAll(attrs={'style':'padding-bottom: 15px;'}) + current_articles = [] + for item in items: + a = item.find(attrs={'class':'t6 f14'}).find('a', href=True) + articlelink = 'http://www.am730.com.hk/' + a.get('href', True) + title = self.tag_to_string(a) + description = self.tag_to_string(item.find(attrs={'class':'t3 f14'})) + current_articles.append({'title': title, 'url': articlelink, 'description': description}) + return current_articles + + def preprocess_html(self, soup): + multia = soup.findAll('a') + for a in multia: + if not (a == None): + image = a.find('img') + if not (image == None): + if __HiResImg__: + image['src'] = image.get('src').replace('/thumbs/', '/') + caption = image.get('alt') + tag = Tag(soup, "photo", []) + tag2 = Tag(soup, "photocaption", []) + tag.insert(0, image) + if not caption == None: + tag2.insert(0, caption) + tag.insert(1, tag2) + a.replaceWith(tag) + return soup + + def create_opf(self, feeds, dir=None): + if dir is None: + dir = self.output_dir + title = self.short_title() + if self.output_profile.periodical_date_in_title: + title += strftime(self.timefmt) + mi = MetaInformation(title, [__appname__]) + mi.publisher = __appname__ + mi.author_sort = __appname__ + if self.publication_type: + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() + mi.timestamp = nowf() + article_titles, aseen = [], set() + for f in feeds: + for a in f: + if a.title and a.title not in aseen: + aseen.add(a.title) + article_titles.append(force_unicode(a.title, 'utf-8')) + + mi.comments = self.description + if not isinstance(mi.comments, unicode): + mi.comments = mi.comments.decode('utf-8', 'replace') + mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + + '\n\n'.join(article_titles)) + + language = canonicalize_lang(self.language) + if language is not None: + mi.language = language + # This one affects the pub date shown in kindle title + #mi.pubdate = nowf() + # now appears to need the time field to be > 12.00noon as well + mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) + opf_path = os.path.join(dir, 'index.opf') + ncx_path = os.path.join(dir, 'index.ncx') + + opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None and os.access(mp, os.R_OK): + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) + + manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] + manifest.append(os.path.join(dir, 'index.html')) + manifest.append(os.path.join(dir, 'index.ncx')) + + # Get cover + cpath = getattr(self, 'cover_path', None) + if cpath is None: + pf = open(os.path.join(dir, 'cover.jpg'), 'wb') + if self.default_cover(pf): + cpath = pf.name + if cpath is not None and os.access(cpath, os.R_OK): + opf.cover = cpath + manifest.append(cpath) + + # Get masthead + mpath = getattr(self, 'masthead_path', None) + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) + + opf.create_manifest_from_files_in(manifest) + for mani in opf.manifest: + if mani.path.endswith('.ncx'): + mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' + + entries = ['index.html'] + toc = TOC(base_path=dir) + self.play_order_counter = 0 + self.play_order_map = {} + + + def feed_index(num, parent): + f = feeds[num] + for j, a in enumerate(f): + if getattr(a, 'downloaded', False): + adir = 'feed_%d/article_%d/'%(num, j) + auth = a.author + if not auth: + auth = None + desc = a.text_summary + if not desc: + desc = None + else: + desc = self.description_limiter(desc) + tt = a.toc_thumbnail if a.toc_thumbnail else None + entries.append('%sindex.html'%adir) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + parent.add_item('%sindex.html'%adir, None, + a.title if a.title else _('Untitled Article'), + play_order=po, author=auth, + description=desc, toc_thumbnail=tt) + last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) + for sp in a.sub_pages: + prefix = os.path.commonprefix([opf_path, sp]) + relp = sp[len(prefix):] + entries.append(relp.replace(os.sep, '/')) + last = sp + + if os.path.exists(last): + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') + soup = BeautifulSoup(src) + body = soup.find('body') + if body is not None: + prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, __appname__, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) + if len(feeds) == 0: + raise Exception('All feeds are empty, aborting.') + + if len(feeds) > 1: + for i, f in enumerate(feeds): + entries.append('feed_%d/index.html'%i) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + auth = getattr(f, 'author', None) + if not auth: + auth = None + desc = getattr(f, 'description', None) + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc, author=auth)) + + else: + entries.append('feed_%d/index.html'%0) + feed_index(0, toc) + + for i, p in enumerate(entries): + entries[i] = os.path.join(dir, p.replace('/', os.sep)) + opf.create_spine(entries) + opf.set_toc(toc) + + with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): + opf.render(opf_file, ncx_file) + diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index 763136c9b0..522427ed6a 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -1,161 +1,275 @@ -# -*- coding: utf-8 -*- -import re +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals +__license__ = 'GPL v3' +__copyright__ = '2013, Eddie Lau' +__Date__ = '' + +from calibre import (__appname__, force_unicode, strftime) +from calibre.utils.date import now as nowf +import os, datetime, re from calibre.web.feeds.recipes import BasicNewsRecipe +from contextlib import nested +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.metadata.toc import TOC +from calibre.ebooks.metadata import MetaInformation +from calibre.utils.localization import canonicalize_lang class AppleDaily(BasicNewsRecipe): - - title = u'蘋果日報' - __author__ = u'蘋果日報' - __publisher__ = u'蘋果日報' - description = u'蘋果日報' - masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif' - language = 'zh_TW' - encoding = 'UTF-8' - timefmt = ' [%a, %d %b, %Y]' - needs_subscription = False + title = u'蘋果日報 (香港)' + __author__ = 'Eddie Lau' + publisher = '蘋果日報' + oldest_article = 1 + max_articles_per_feed = 100 + auto_cleanup = False + language = 'zh' + encoding = 'utf-8' + auto_cleanup = False remove_javascript = True - remove_tags_before = dict(name=['ul', 'h1']) - remove_tags_after = dict(name='form') - remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}), - dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']), - dict(name=['script', 'noscript', 'style', 'form'])] + use_embedded_content = False no_stylesheets = True - extra_css = ''' - @font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n - body {margin-right: 8pt; font-family: 'uming', serif;} - h1 {font-family: 'uming', serif, sans-serif} - ''' - #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}' + description = 'http://hkm.appledaily.com/' + category = 'Chinese, News, Hong Kong' + masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png' - preprocess_regexps = [ - (re.compile(r'img.php?server=(?P[^&]+)&path=(?P[^&]+).*', re.DOTALL|re.IGNORECASE), - lambda match: 'http://' + match.group('server') + '/' + match.group('path')), - ] + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' + keep_only_tags = [dict(name='div', attrs={'id':'content-article'})] + remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}), + dict(name='p', attrs={'class':'next'})] + + def get_dtlocal(self): + dt_utc = datetime.datetime.utcnow() + # convert UTC to local hk time - at HKT 6am, all news are available + return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24) + + def get_fetchdate(self): + if __Date__ <> '': + return __Date__ + else: + return self.get_dtlocal().strftime("%Y%m%d") + + def get_fetchformatteddate(self): + if __Date__ <> '': + return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8] + else: + return self.get_dtlocal().strftime("%Y-%m-%d") + + def get_fetchyear(self): + if __Date__ <> '': + return __Date__[0:4] + else: + return self.get_dtlocal().strftime("%Y") + + def get_fetchmonth(self): + if __Date__ <> '': + return __Date__[4:6] + else: + return self.get_dtlocal().strftime("%m") + + def get_fetchday(self): + if __Date__ <> '': + return __Date__[6:8] + else: + return self.get_dtlocal().strftime("%d") + + # Note: does not work with custom date given by __Date__ + def get_weekday(self): + return self.get_dtlocal().weekday() def get_cover_url(self): - return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif' - - - #def get_browser(self): - #br = BasicNewsRecipe.get_browser(self) - #if self.username is not None and self.password is not None: - # br.open('http://www.nytimes.com/auth/login') - # br.select_form(name='login') - # br['USERID'] = self.username - # br['PASSWORD'] = self.password - # br.submit() - #return br - - def preprocess_html(self, soup): - #process all the images - for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): - iurl = tag['src'] - #print 'checking image: ' + iurl - - #img\.php?server\=(?P[^&]+)&path=(?P[^&]+) - p = re.compile(r'img\.php\?server=(?P[^&]+)&path=(?P[^&]+)', re.DOTALL|re.IGNORECASE) - - m = p.search(iurl) - - if m is not None: - iurl = 'http://' + m.group('server') + '/' + m.group('path') - #print 'working! new url: ' + iurl - tag['src'] = iurl - #else: - #print 'not good' - - for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')): - iurl = tag['href'] - #print 'checking image: ' + iurl - - #img\.php?server\=(?P[^&]+)&path=(?P[^&]+) - p = re.compile(r'img\.php\?server=(?P[^&]+)&path=(?P[^&]+)', re.DOTALL|re.IGNORECASE) - - m = p.search(iurl) - - if m is not None: - iurl = 'http://' + m.group('server') + '/' + m.group('path') - #print 'working! new url: ' + iurl - tag['href'] = iurl - #else: - #print 'not good' - - return soup + soup = self.index_to_soup('http://hkm.appledaily.com/') + cover = soup.find(attrs={'class':'top-news'}).get('src', False) + br = BasicNewsRecipe.get_browser(self) + try: + br.open(cover) + except: + cover = None + return cover + def populate_article_metadata(self, article, soup, first): + if first and hasattr(self, 'add_toc_thumbnail'): + picdiv = soup.find('img') + if picdiv is not None: + self.add_toc_thumbnail(article,picdiv['src']) def parse_index(self): - base = 'http://news.hotpot.hk/fruit' - soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php') + feeds = [] + soup = self.index_to_soup('http://hkm.appledaily.com/') + ul = soup.find(attrs={'class':'menu'}) + sectionList = [] + for li in ul.findAll('li'): + a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False) + title = li.find('a', text=True).strip() + if not title == u'動新聞': + sectionList.append((title, a)) + for title, url in sectionList: + articles = self.parse_section(url) + if articles: + feeds.append((title, articles)) + return feeds - #def feed_title(div): - # return ''.join(div.findAll(text=True, recursive=False)).strip() + def parse_section(self, url): + soup = self.index_to_soup(url) + ul = soup.find(attrs={'class':'list'}) + current_articles = [] + for li in ul.findAll('li'): + a = li.find('a', href=True) + title = li.find('p', text=True).strip() + if a is not None: + current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)}) + pass + return current_articles - articles = {} - key = None - ans = [] - for div in soup.findAll('li'): - key = div.find(text=True, recursive=True); - #if key == u'豪情': - # continue; + def create_opf(self, feeds, dir=None): + if dir is None: + dir = self.output_dir + title = self.short_title() + if self.output_profile.periodical_date_in_title: + title += strftime(self.timefmt) + mi = MetaInformation(title, [__appname__]) + mi.publisher = __appname__ + mi.author_sort = __appname__ + if self.publication_type: + mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title() + mi.timestamp = nowf() + article_titles, aseen = [], set() + for f in feeds: + for a in f: + if a.title and a.title not in aseen: + aseen.add(a.title) + article_titles.append(force_unicode(a.title, 'utf-8')) - print 'section=' + key + mi.comments = self.description + if not isinstance(mi.comments, unicode): + mi.comments = mi.comments.decode('utf-8', 'replace') + mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + + '\n\n'.join(article_titles)) - articles[key] = [] + language = canonicalize_lang(self.language) + if language is not None: + mi.language = language + # This one affects the pub date shown in kindle title + #mi.pubdate = nowf() + # now appears to need the time field to be > 12.00noon as well + mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0) + opf_path = os.path.join(dir, 'index.opf') + ncx_path = os.path.join(dir, 'index.ncx') - ans.append(key) + opf = OPFCreator(dir, mi) + # Add mastheadImage entry to section + mp = getattr(self, 'masthead_path', None) + if mp is not None and os.access(mp, os.R_OK): + from calibre.ebooks.metadata.opf2 import Guide + ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + opf.guide.append(ref) - a = div.find('a', href=True) + manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] + manifest.append(os.path.join(dir, 'index.html')) + manifest.append(os.path.join(dir, 'index.ncx')) - if not a: - continue + # Get cover + cpath = getattr(self, 'cover_path', None) + if cpath is None: + pf = open(os.path.join(dir, 'cover.jpg'), 'wb') + if self.default_cover(pf): + cpath = pf.name + if cpath is not None and os.access(cpath, os.R_OK): + opf.cover = cpath + manifest.append(cpath) - url = base + '/' + a['href'] - print 'url=' + url + # Get masthead + mpath = getattr(self, 'masthead_path', None) + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) - if not articles.has_key(key): - articles[key] = [] - else: - # sub page - subSoup = self.index_to_soup(url) + opf.create_manifest_from_files_in(manifest) + for mani in opf.manifest: + if mani.path.endswith('.ncx'): + mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' - for subDiv in subSoup.findAll('li'): - subA = subDiv.find('a', href=True) - subTitle = subDiv.find(text=True, recursive=True) - subUrl = base + '/' + subA['href'] - - print 'subUrl' + subUrl - - articles[key].append( - dict(title=subTitle, - url=subUrl, - date='', - description='', - content='')) + entries = ['index.html'] + toc = TOC(base_path=dir) + self.play_order_counter = 0 + self.play_order_map = {} -# elif div['class'] in ['story', 'story headline']: -# a = div.find('a', href=True) -# if not a: -# continue -# url = re.sub(r'\?.*', '', a['href']) -# url += '?pagewanted=all' -# title = self.tag_to_string(a, use_alt=True).strip() -# description = '' -# pubdate = strftime('%a, %d %b') -# summary = div.find(True, attrs={'class':'summary'}) -# if summary: -# description = self.tag_to_string(summary, use_alt=False) -# -# feed = key if key is not None else 'Uncategorized' -# if not articles.has_key(feed): -# articles[feed] = [] -# if not 'podcasts' in url: -# articles[feed].append( -# dict(title=title, url=url, date=pubdate, -# description=description, -# content='')) -# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) - ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)] - return ans + def feed_index(num, parent): + f = feeds[num] + for j, a in enumerate(f): + if getattr(a, 'downloaded', False): + adir = 'feed_%d/article_%d/'%(num, j) + auth = a.author + if not auth: + auth = None + desc = a.text_summary + if not desc: + desc = None + else: + desc = self.description_limiter(desc) + tt = a.toc_thumbnail if a.toc_thumbnail else None + entries.append('%sindex.html'%adir) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + parent.add_item('%sindex.html'%adir, None, + a.title if a.title else _('Untitled Article'), + play_order=po, author=auth, + description=desc, toc_thumbnail=tt) + last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) + for sp in a.sub_pages: + prefix = os.path.commonprefix([opf_path, sp]) + relp = sp[len(prefix):] + entries.append(relp.replace(os.sep, '/')) + last = sp + if os.path.exists(last): + with open(last, 'rb') as fi: + src = fi.read().decode('utf-8') + soup = BeautifulSoup(src) + body = soup.find('body') + if body is not None: + prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last)))) + templ = self.navbar.generate(True, num, j, len(f), + not self.has_single_feed, + a.orig_url, __appname__, prefix=prefix, + center=self.center_navbar) + elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') + body.insert(len(body.contents), elem) + with open(last, 'wb') as fi: + fi.write(unicode(soup).encode('utf-8')) + if len(feeds) == 0: + raise Exception('All feeds are empty, aborting.') + + if len(feeds) > 1: + for i, f in enumerate(feeds): + entries.append('feed_%d/index.html'%i) + po = self.play_order_map.get(entries[-1], None) + if po is None: + self.play_order_counter += 1 + po = self.play_order_counter + auth = getattr(f, 'author', None) + if not auth: + auth = None + desc = getattr(f, 'description', None) + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc, author=auth)) + + else: + entries.append('feed_%d/index.html'%0) + feed_index(0, toc) + + for i, p in enumerate(entries): + entries[i] = os.path.join(dir, p.replace('/', os.sep)) + opf.create_spine(entries) + opf.set_toc(toc) + + with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): + opf.render(opf_file, ncx_file) diff --git a/recipes/bwmagazine.recipe b/recipes/bwmagazine.recipe index d11861ce08..ae3197da81 100644 --- a/recipes/bwmagazine.recipe +++ b/recipes/bwmagazine.recipe @@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe): , 'language' : language } - #remove_tags = [ - #dict(attrs={'class':'inStory'}) - #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td']) - #,dict(attrs={'id':['inset','videoDisplay']}) - #] - #keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})] - remove_attributes = ['lang'] - match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*'] - feeds = [ - (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'), - (u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ), - (u'Asia', u'http://www.businessweek.com/rss/asia.rss'), - (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'), - (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'), - (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'), - (u'Europe', u'http://www.businessweek.com/rss/europe.rss'), - (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'), - (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'), - (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'), - (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'), - (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'), - (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'), - (u'Technology', u'http://www.businessweek.com/rss/technology.rss'), - (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'), - (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'), - (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'), - (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'), - (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'), - (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'), + (u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'), ] - def get_article_url(self, article): - url = article.get('guid', None) - if 'podcasts' in url: - return None - if 'surveys' in url: - return None - if 'images' in url: - return None - if 'feedroom' in url: - return None - if '/magazine/toc/' in url: - return None - rurl, sep, rest = url.rpartition('?') - if rurl: - return rurl - return rest - def print_version(self, url): - if '/news/' in url or '/blog/ in url': - return url - rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/') - return rurl.replace('/investing/','/investor/') + soup = self.index_to_soup(url) + prntver = soup.find('li', attrs={'class':'print tracked'}) + rurl = prntver.find('a', href=True)['href'] + return rurl + - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup - diff --git a/recipes/dzial_zagraniczny.recipe b/recipes/dzial_zagraniczny.recipe new file mode 100644 index 0000000000..1b8453dd40 --- /dev/null +++ b/recipes/dzial_zagraniczny.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +dzialzagraniczny.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class dzial_zagraniczny(BasicNewsRecipe): + title = u'Dział Zagraniczny' + __author__ = 'teepel ' + language = 'pl' + description = u'Polskiego czytelnika to nie interesuje' + INDEX = 'http://dzialzagraniczny.pl' + extra_css = 'img {display: block;}' + oldest_article = 7 + cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg' + max_articles_per_feed = 100 + remove_empty_feeds = True + remove_javascript = True + no_stylesheets = True + use_embedded_content = True + + feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')] diff --git a/recipes/el_diplo.recipe b/recipes/el_diplo.recipe index b9ef8268e1..7827cbbdd7 100644 --- a/recipes/el_diplo.recipe +++ b/recipes/el_diplo.recipe @@ -26,7 +26,7 @@ class ElDiplo_Recipe(BasicNewsRecipe): title = u'El Diplo' __author__ = 'Tomas Di Domenico' description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina' - langauge = 'es_AR' + language = 'es_AR' needs_subscription = True auto_cleanup = True diff --git a/recipes/equipped.recipe b/recipes/equipped.recipe new file mode 100644 index 0000000000..af74c10523 --- /dev/null +++ b/recipes/equipped.recipe @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel , Artur Stachecki ' + +''' +equipped.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +class equipped(BasicNewsRecipe): + title = u'Equipped' + __author__ = 'teepel ' + language = 'pl' + description = u'Wiadomości z equipped.pl' + INDEX = 'http://equipped.pl' + extra_css = '.alignleft {float:left; margin-right:5px;}' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + #keep_only_tags = [dict(name='article')] + #remove_tags = [dict(id='disqus_thread')] + #remove_tags_after = [dict(id='disqus_thread')] + + feeds = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')] diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe index 66864b8561..bac16ebbd5 100644 --- a/recipes/focus_pl.recipe +++ b/recipes/focus_pl.recipe @@ -1,12 +1,12 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' + import re - from calibre.web.feeds.news import BasicNewsRecipe - class FocusRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/gazeta-prawna-calibre-v1.recipe b/recipes/gazeta-prawna-calibre-v1.recipe index 293aa05b0d..f7d2c4935b 100644 --- a/recipes/gazeta-prawna-calibre-v1.recipe +++ b/recipes/gazeta-prawna-calibre-v1.recipe @@ -14,13 +14,14 @@ class gazetaprawna(BasicNewsRecipe): title = u'Gazeta Prawna' __author__ = u'Vroo' publisher = u'Infor Biznes' - oldest_article = 7 + oldest_article = 1 max_articles_per_feed = 20 no_stylesheets = True remove_javascript = True description = 'Polski dziennik gospodarczy' language = 'pl' encoding = 'utf-8' + ignore_duplicate_articles = {'title', 'url'} remove_tags_after = [ dict(name='div', attrs={'class':['data-art']}) @@ -30,7 +31,7 @@ class gazetaprawna(BasicNewsRecipe): ] feeds = [ - (u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'), + (u'Z ostatniej chwili', u'http://rss.gazetaprawna.pl/GazetaPrawna'), (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'), (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'), (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'), @@ -51,3 +52,8 @@ class gazetaprawna(BasicNewsRecipe): url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna') url = url.replace('praca.gazetaprawna', 'www.gazetaprawna') return url + + def get_cover_url(self): + soup = self.index_to_soup('http://www.egazety.pl/infor/e-wydanie-dziennik-gazeta-prawna.html') + self.cover_url = soup.find('p', attrs={'class':'covr'}).a['href'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/icons/dzial_zagraniczny.png b/recipes/icons/dzial_zagraniczny.png new file mode 100644 index 0000000000..1982db0462 Binary files /dev/null and b/recipes/icons/dzial_zagraniczny.png differ diff --git a/recipes/icons/equipped.png b/recipes/icons/equipped.png new file mode 100644 index 0000000000..a532b6f6ac Binary files /dev/null and b/recipes/icons/equipped.png differ diff --git a/recipes/icons/gazeta-prawna-calibre-v1.png b/recipes/icons/gazeta-prawna-calibre-v1.png new file mode 100644 index 0000000000..e5c7ae965c Binary files /dev/null and b/recipes/icons/gazeta-prawna-calibre-v1.png differ diff --git a/recipes/icons/ittechblog.png b/recipes/icons/ittechblog.png new file mode 100644 index 0000000000..825e025510 Binary files /dev/null and b/recipes/icons/ittechblog.png differ diff --git a/recipes/icons/magazyn_consido.png b/recipes/icons/magazyn_consido.png new file mode 100644 index 0000000000..5d54a337de Binary files /dev/null and b/recipes/icons/magazyn_consido.png differ diff --git a/recipes/icons/media2.png b/recipes/icons/media2.png new file mode 100644 index 0000000000..8e98c4df4e Binary files /dev/null and b/recipes/icons/media2.png differ diff --git a/recipes/icons/mobilna.png b/recipes/icons/mobilna.png new file mode 100644 index 0000000000..30db9287be Binary files /dev/null and b/recipes/icons/mobilna.png differ diff --git a/recipes/icons/mojegotowanie.png b/recipes/icons/mojegotowanie.png new file mode 100644 index 0000000000..b9df6dc6d0 Binary files /dev/null and b/recipes/icons/mojegotowanie.png differ diff --git a/recipes/icons/najwyzszy_czas.png b/recipes/icons/najwyzszy_czas.png new file mode 100644 index 0000000000..bc6812ce0b Binary files /dev/null and b/recipes/icons/najwyzszy_czas.png differ diff --git a/recipes/icons/nowiny_rybnik.png b/recipes/icons/nowiny_rybnik.png new file mode 100644 index 0000000000..6f4b11c1f3 Binary files /dev/null and b/recipes/icons/nowiny_rybnik.png differ diff --git a/recipes/icons/osw.png b/recipes/icons/osw.png new file mode 100644 index 0000000000..0693aee762 Binary files /dev/null and b/recipes/icons/osw.png differ diff --git a/recipes/icons/ppe_pl.png b/recipes/icons/ppe_pl.png new file mode 100644 index 0000000000..42c9b42fa5 Binary files /dev/null and b/recipes/icons/ppe_pl.png differ diff --git a/recipes/icons/presseurop.png b/recipes/icons/presseurop.png new file mode 100644 index 0000000000..9967aac1fb Binary files /dev/null and b/recipes/icons/presseurop.png differ diff --git a/recipes/icons/res_publica.png b/recipes/icons/res_publica.png new file mode 100644 index 0000000000..7c21e9d96e Binary files /dev/null and b/recipes/icons/res_publica.png differ diff --git a/recipes/icons/wolne_media.png b/recipes/icons/wolne_media.png new file mode 100644 index 0000000000..78d72713ab Binary files /dev/null and b/recipes/icons/wolne_media.png differ diff --git a/recipes/ittechblog.recipe b/recipes/ittechblog.recipe new file mode 100644 index 0000000000..3fa557d11e --- /dev/null +++ b/recipes/ittechblog.recipe @@ -0,0 +1,26 @@ +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.ittechblog.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ittechblog(BasicNewsRecipe): + title = u'IT techblog' + __author__ = 'MrStefan ' + language = 'pl' + description =u'Na naszym blogu technologicznym znajdziesz między innymi: testy sprzętu, najnowsze startupy, technologiczne nowinki, felietony tematyczne.' + extra_css = '.cover > img {display:block;}' + remove_empty_feeds = True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + keep_only_tags =[dict(attrs={'class':'box'})] + remove_tags =[dict(name='aside'), dict(attrs={'class':['tags', 'counter', 'twitter-share-button']})] + + feeds = [(u'Artykuły', u'http://feeds.feedburner.com/ITTechBlog?format=xml')] diff --git a/recipes/kp.recipe b/recipes/kp.recipe index 85bf356b4d..3a2bc62eb0 100644 --- a/recipes/kp.recipe +++ b/recipes/kp.recipe @@ -2,8 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KrytykaPolitycznaRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'intromatyk ' + __author__ = u'Artur Stachecki ' language = 'pl' version = 1 diff --git a/recipes/list_apart.recipe b/recipes/list_apart.recipe index 35cbaad958..c11956110f 100644 --- a/recipes/list_apart.recipe +++ b/recipes/list_apart.recipe @@ -1,33 +1,23 @@ -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals from calibre.web.feeds.news import BasicNewsRecipe class AListApart (BasicNewsRecipe): - __author__ = u'Marc Busqué ' + __author__ = 'Marc Busqué ' __url__ = 'http://www.lamarciana.com' - __version__ = '1.0' + __version__ = '2.0' __license__ = 'GPL v3' - __copyright__ = u'2012, Marc Busqué ' + __copyright__ = '2012, Marc Busqué ' title = u'A List Apart' - description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.' + description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.' language = 'en' tags = 'web development, software' oldest_article = 120 remove_empty_feeds = True - no_stylesheets = True encoding = 'utf8' cover_url = u'http://alistapart.com/pix/alalogo.gif' - keep_only_tags = [ - dict(name='div', attrs={'id': 'content'}) - ] - remove_tags = [ - dict(name='ul', attrs={'id': 'metastuff'}), - dict(name='div', attrs={'class': 'discuss'}), - dict(name='div', attrs={'class': 'discuss'}), - dict(name='div', attrs={'id': 'learnmore'}), - ] - remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height'] - extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}' + extra_css = u'img {max-width: 100%; display: block; margin: auto;}' feeds = [ - (u'A List Apart', u'http://www.alistapart.com/site/rss'), + (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'), ] diff --git a/recipes/magazyn_consido.recipe b/recipes/magazyn_consido.recipe new file mode 100644 index 0000000000..d24c66d6a4 --- /dev/null +++ b/recipes/magazyn_consido.recipe @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +''' +magazynconsido.pl/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.magick import Image + +class magazynconsido(BasicNewsRecipe): + title = u'Magazyn Consido' + __author__ = 'Artur Stachecki ,teepel ' + language = 'pl' + description =u'Portal dla architektów i projektantów' + masthead_url='http://qualitypixels.pl/wp-content/themes/airlock/advance/inc/timthumb.php?src=http://qualitypixels.pl/wp-content/uploads/2012/01/logotyp-magazynconsido-11.png&w=455&zc=1' + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets = True + use_embedded_content = False + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h1')) + keep_only_tags.append(dict(name = 'p')) + keep_only_tags.append(dict(attrs = {'class' : 'navigation'})) + remove_tags =[dict(attrs = {'style' : 'font-size: x-small;' })] + + remove_tags_after =[dict(attrs = {'class' : 'navigation' })] + + extra_css=''' img {max-width:30%; max-height:30%; display: block; margin-left: auto; margin-right: auto;} + h1 {text-align: center;}''' + + def parse_index(self): #(kk) + soup = self.index_to_soup('http://feeds.feedburner.com/magazynconsido?format=xml') + feeds = [] + articles = {} + sections = [] + section = '' + + for item in soup.findAll('item') : + section = self.tag_to_string(item.category) + if not articles.has_key(section) : + sections.append(section) + articles[section] = [] + article_url = self.tag_to_string(item.guid) + article_title = self.tag_to_string(item.title) + article_date = self.tag_to_string(item.pubDate) + article_description = self.tag_to_string(item.description) + articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date, 'description' : article_description }) + + for section in sections : + if section == 'Video': + feeds.append((section, articles[section])) + feeds.pop() + else: + feeds.append((section, articles[section])) + return feeds + + def append_page(self, soup, appendtag): + apage = soup.find('div', attrs={'class':'wp-pagenavi'}) + if apage is not None: + nexturl = soup.find('a', attrs={'class':'nextpostslink'}) + soup2 = self.index_to_soup(nexturl['href']) + pagetext = soup2.findAll('p') + for tag in pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, tag) + + while appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}) is not None: + appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}).replaceWith('') + + def preprocess_html(self, soup): #(kk) + self.append_page(soup, soup.body) + return self.adeify_images(soup) + + def postprocess_html(self, soup, first): + #process all the images + for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): + iurl = tag['src'] + img = Image() + img.open(iurl) + if img < 0: + raise RuntimeError('Out of memory') + img.type = "GrayscaleType" + img.save(iurl) + return soup diff --git a/recipes/media2.recipe b/recipes/media2.recipe new file mode 100644 index 0000000000..135740a62e --- /dev/null +++ b/recipes/media2.recipe @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'teepel' + +''' +media2.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class media2_pl(BasicNewsRecipe): + title = u'Media2' + __author__ = 'teepel ' + language = 'pl' + description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' + masthead_url='http://media2.pl/res/logo/www.png' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + simultaneous_downloads = 5 + + extra_css = '''.news-lead{font-weight: bold; }''' + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})) + + remove_tags =[] + remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'})) + + feeds = [(u'Media2', u'http://feeds.feedburner.com/media2')] diff --git a/recipes/mobilna.recipe b/recipes/mobilna.recipe new file mode 100644 index 0000000000..68ae011438 --- /dev/null +++ b/recipes/mobilna.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.mobilna.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class mobilna(BasicNewsRecipe): + title = u'Mobilna.pl' + __author__ = 'MrStefan ' + language = 'pl' + description =u'twoja mobilna strona' + #masthead_url='' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + use_embedded_content = True + #keep_only_tags =[dict(attrs={'class':'Post'})] + + feeds = [(u'Artykuły', u'http://mobilna.pl/feed/')] diff --git a/recipes/mojegotowanie.recipe b/recipes/mojegotowanie.recipe new file mode 100644 index 0000000000..4b0de4a0e1 --- /dev/null +++ b/recipes/mojegotowanie.recipe @@ -0,0 +1,50 @@ +#!usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan, teepel' + +''' +www.mojegotowanie.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class mojegotowanie(BasicNewsRecipe): + title = u'Moje Gotowanie' + __author__ = 'MrStefan , teepel ' + language = 'pl' + description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.' + masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif' + cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'})) + + feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'), + (u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + for article in feed.articles[:]: + if 'film' in article.title: + feed.articles.remove(article) + return feeds + + def get_article_url(self, article): + link = article.get('link') + if 'Clayout0Cset0Cprint0' in link: + return link + + def print_version(self, url): + segment = url.split('/') + URLPart = segment[-2] + URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/') + URLPart = URLPart.replace('0I', '_') + URLPart = URLPart.replace('0C', '/') + return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart diff --git a/recipes/najwyzszy_czas.recipe b/recipes/najwyzszy_czas.recipe new file mode 100644 index 0000000000..9c4a82c4ea --- /dev/null +++ b/recipes/najwyzszy_czas.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +nczas.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class nczas(BasicNewsRecipe): + title = u'Najwy\u017cszy Czas' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z nczas.com' + INDEX='http://nczas.com' + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = True + remove_empty_feeds= True + simultaneous_downloads = 5 + remove_javascript=True + remove_attributes = ['style'] + no_stylesheets=True + + feeds = [(u'Najwyższy Czas', u'http://nczas.com/feed/')] diff --git a/recipes/nowiny_rybnik.recipe b/recipes/nowiny_rybnik.recipe new file mode 100644 index 0000000000..e00a72e09b --- /dev/null +++ b/recipes/nowiny_rybnik.recipe @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class NowinyRybnik(BasicNewsRecipe): + title = u'Nowiny - Rybnik' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Tygodnik Regionalny NOWINY. Ogłoszenia drobne, wiadomości i wydarzenia z regionu Rybnika i okolic' + oldest_article = 7 + masthead_url = 'http://www.nowiny.rybnik.pl/logo/logo.jpg' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [(dict(name='div', attrs={'id': 'drukuj'}))] + + remove_tags = [] + remove_tags.append(dict(name='div', attrs={'id': 'footer'})) + + feeds = [(u'Wszystkie artykuły', u'http://www.nowiny.rybnik.pl/rss,artykuly,dzial,0,miasto,0,ile,25.xml')] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/osw.recipe b/recipes/osw.recipe new file mode 100644 index 0000000000..8022f3e346 --- /dev/null +++ b/recipes/osw.recipe @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +http://www.osw.waw.pl - Osrodek studiow wschodnich +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class OSW_Recipe(BasicNewsRecipe): + + language = 'pl' + title = u'Ośrodek Studiów Wschodnich' + __author__ = 'teepel ' + INDEX='http://www.osw.waw.pl' + description = u'Ośrodek Studiów Wschodnich im. Marka Karpia. Centre for Eastern Studies.' + category = u'News' + oldest_article = 7 + max_articles_per_feed = 100 + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + remove_javascript = True + simultaneous_downloads = 5 + + keep_only_tags =[] + #this line should show title of the article, but it doesnt work + keep_only_tags.append(dict(name = 'h1', attrs = {'class' : 'print-title'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'print-content'})) + + remove_tags =[] + remove_tags.append(dict(name = 'table', attrs = {'id' : 'attachments'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'print-submitted'})) + + feeds = [(u'OSW', u'http://www.osw.waw.pl/pl/rss.xml')] + + def print_version(self, url): + return url.replace('http://www.osw.waw.pl/pl/', 'http://www.osw.waw.pl/pl/print/') diff --git a/recipes/ppe_pl.recipe b/recipes/ppe_pl.recipe new file mode 100644 index 0000000000..2edc611ad7 --- /dev/null +++ b/recipes/ppe_pl.recipe @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ppeRecipe(BasicNewsRecipe): + __author__ = u'Artur Stachecki ' + language = 'pl' + + title = u'ppe.pl' + category = u'News' + description = u'Portal o konsolach i grach wideo.' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 1 + max_articles_per_feed = 100000 + recursions = 0 + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 2 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-heading'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'tresc-poziom'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria1'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria2'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'bateria3'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-photo'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fbl'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'info'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'links'})) + + remove_tags.append(dict(name = 'div', attrs = {'style' : 'padding: 4px'})) + + feeds = [ + ('Newsy', 'feed://ppe.pl/rss/rss.xml'), + ] diff --git a/recipes/presseurop.recipe b/recipes/presseurop.recipe new file mode 100644 index 0000000000..ea06eb0c32 --- /dev/null +++ b/recipes/presseurop.recipe @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +''' +www.presseurop.eu/pl +''' + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class presseurop(BasicNewsRecipe): + title = u'Presseurop' + description = u'Najlepsze artykuły z prasy europejskiej' + language = 'pl' + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [ + (u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'), + (u'Społeczeństwo', u'http://www.presseurop.eu/pl/taxonomy/term/2/%2A/feed'), + (u'Gospodarka', u'http://www.presseurop.eu/pl/taxonomy/term/3/%2A/feed'), + (u'Kultura i debaty', u'http://www.presseurop.eu/pl/taxonomy/term/4/%2A/feed'), + (u'UE i Świat', u'http://www.presseurop.eu/pl/taxonomy/term/5/%2A/feed') + ] + + + preprocess_regexps = [ + (re.compile(r'\|.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), +] diff --git a/recipes/res_publica.recipe b/recipes/res_publica.recipe new file mode 100644 index 0000000000..e0d9ebbb56 --- /dev/null +++ b/recipes/res_publica.recipe @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ResPublicaNowaRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + + title = u'Res Publica Nowa' + category = u'News' + description = u'Portal kulturalno-społecznego kwartalnika o profilu liberalnym, wydawany przez Fundację Res Publica' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 7 + max_articles_per_feed = 100000 + recursions = 0 + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 5 + + feeds = [ + ('Artykuly', 'feed://publica.pl/feed'), + ] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/singtaohk.recipe b/recipes/singtaohk.recipe index d830381731..bb76c335a0 100644 --- a/recipes/singtaohk.recipe +++ b/recipes/singtaohk.recipe @@ -1,30 +1,30 @@ +# vim:fileencoding=UTF-8 +from __future__ import unicode_literals __license__ = 'GPL v3' -__copyright__ = '2011, Eddie Lau' +__copyright__ = '2011-2013, Eddie Lau' # data source: normal, mobile __Source__ = 'mobile' # please replace the following "True" with "False". (Default: True) __MakePeriodical__ = True # Turn below to True if your device supports display of CJK titles (Default: False) -__UseChineseTitle__ = False +__UseChineseTitle__ = True # Set it to False if you want to skip images (Default: True) __KeepImages__ = True # Set it to True if you want to include a summary in Kindle's article view (Default: False) -__IncludeSummary__ = False +__IncludeSummary__ = True # Set it to True if you want thumbnail images in Kindle's article view (Default: True) __IncludeThumbnails__ = True ''' Change Log: +2013/03/31 -- fix cover retrieval code and heading size, and remove   in summary 2011/12/29 -- first version done -TODO: -* use alternative source at http://m.singtao.com/index.php ''' from calibre.utils.date import now as nowf import os, datetime, re -from datetime import date from calibre.web.feeds.recipes import BasicNewsRecipe from contextlib import nested from calibre.ebooks.BeautifulSoup import BeautifulSoup @@ -41,7 +41,7 @@ class STHKRecipe(BasicNewsRecipe): title = 'Sing Tao Daily - Hong Kong' description = 'Hong Kong Chinese Newspaper (http://singtao.com)' category = 'Chinese, News, Hong Kong' - extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}' + extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:200%;}' masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png' if __Source__ == 'normal': keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})] @@ -96,17 +96,13 @@ class STHKRecipe(BasicNewsRecipe): return self.get_dtlocal().strftime("%d") def get_cover_url(self): - #cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29 - base = 2660 - todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday())) - diff = todaydate - date(2011, 12, 29) - base = base + int(diff.total_seconds()/(3600*24)) - cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg' + soup = self.index_to_soup('http://m.singtao.com/') + cover = soup.find(attrs={'class':'special'}).get('src', False) br = BasicNewsRecipe.get_browser(self) try: br.open(cover) except: - cover = 'http://singtao.com/images/stlogo.gif' + cover = None return cover def parse_index(self): @@ -289,11 +285,11 @@ class STHKRecipe(BasicNewsRecipe): # the text may or may not be enclosed in

tag paras = articlebody.findAll('p') if not paras: - paras = articlebody + paras = articlebody textFound = False for p in paras: if not textFound: - summary_candidate = self.tag_to_string(p).strip() + summary_candidate = self.tag_to_string(p).strip().replace(' ', '') if len(summary_candidate) > 0: summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1) article.summary = article.text_summary = summary_candidate @@ -489,3 +485,4 @@ class STHKRecipe(BasicNewsRecipe): + diff --git a/recipes/sport_pl.recipe b/recipes/sport_pl.recipe index 711fa44126..dd7faccdb0 100644 --- a/recipes/sport_pl.recipe +++ b/recipes/sport_pl.recipe @@ -20,7 +20,7 @@ class sport_pl(BasicNewsRecipe): remove_javascript=True no_stylesheets=True remove_empty_feeds = True - + ignore_duplicate_articles = {'title', 'url'} keep_only_tags =[] keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'})) diff --git a/recipes/wirtualnemedia_pl.recipe b/recipes/wirtualnemedia_pl.recipe index 28278c2e24..ed3b3787f8 100644 --- a/recipes/wirtualnemedia_pl.recipe +++ b/recipes/wirtualnemedia_pl.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class WirtualneMedia(BasicNewsRecipe): - title = u'wirtualnemedia.pl' + title = u'Wirtualnemedia.pl' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True diff --git a/recipes/wolne_media.recipe b/recipes/wolne_media.recipe new file mode 100644 index 0000000000..5f8c87a607 --- /dev/null +++ b/recipes/wolne_media.recipe @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +wolnemedia.net +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class wolne_media(BasicNewsRecipe): + title = u'Wolne Media' + __author__ = 'teepel ' + language = 'pl' + description ='Wiadomości z wolnemedia.net' + INDEX='http://wolnemedia.net' + oldest_article = 1 + max_articles_per_feed = 100 + remove_empty_feeds= True + simultaneous_downloads = 5 + remove_javascript=True + no_stylesheets=True + auto_cleanup = True + + feeds = [(u'Wiadomości z kraju', u'http://wolnemedia.net/category/wiadomosci-z-kraju/feed/'),(u'Wiadomości ze świata', u'http://wolnemedia.net/category/wiadomosci-ze-swiata/feed/'),(u'Edukacja', u'http://wolnemedia.net/category/edukacja/feed/'),(u'Ekologia', u'http://wolnemedia.net/category/ekologia/feed/'),(u'Gospodarka', u'http://wolnemedia.net/category/gospodarka/feed/'),(u'Historia', u'http://wolnemedia.net/category/historia/feed/'),(u'Kultura', u'http://wolnemedia.net/category/kultura/feed/'),(u'Kulturoznawstwo', u'http://wolnemedia.net/category/kulturoznawstwo/feed/'),(u'Media', u'http://wolnemedia.net/category/media/feed/'),(u'Nauka', u'http://wolnemedia.net/category/nauka/feed/'),(u'Opowiadania', u'http://wolnemedia.net/category/opowiadania/feed/'),(u'Paranauka i ezoteryka', u'http://wolnemedia.net/category/ezoteryka/feed/'),(u'Polityka', u'http://wolnemedia.net/category/polityka/feed/'),(u'Prawo', u'http://wolnemedia.net/category/prawo/feed/'),(u'Publicystyka', u'http://wolnemedia.net/category/publicystyka/feed/'),(u'Reportaż', u'http://wolnemedia.net/category/reportaz/feed/'),(u'Seks', u'http://wolnemedia.net/category/seks/feed/'),(u'Społeczeństwo', u'http://wolnemedia.net/category/spoleczenstwo/feed/'),(u'Świat komputerów', u'http://wolnemedia.net/category/swiat-komputerow/feed/'),(u'Wierzenia', u'http://wolnemedia.net/category/wierzenia/feed/'),(u'Zdrowie', u'http://wolnemedia.net/category/zdrowie/feed/')] diff --git a/recipes/wprost.recipe b/recipes/wprost.recipe index 90dde251ca..d923f64a3f 100644 --- a/recipes/wprost.recipe +++ b/recipes/wprost.recipe @@ -1,10 +1,9 @@ #!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2010, matek09, matek09@gmail.com' -__copyright__ = 'Modified 2011, Mariusz Wolek ' -__copyright__ = 'Modified 2012, Artur Stachecki ' - +__copyright__ = '''2010, matek09, matek09@gmail.com + Modified 2011, Mariusz Wolek + Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re @@ -16,12 +15,12 @@ class Wprost(BasicNewsRecipe): ICO_BLOCKED = 'http://www.wprost.pl/G/layout2/ico_blocked.png' title = u'Wprost' __author__ = 'matek09' - description = 'Weekly magazine' + description = u'Popularny tygodnik ogólnopolski - Wprost. Najlepszy wśród polskich tygodników - opiniotwórczy - społeczno-informacyjny - społeczno-kulturalny.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True - recursions = 0 + recursions = 0 remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) ''' @@ -94,5 +93,3 @@ class Wprost(BasicNewsRecipe): 'description' : '' }) return articles - - diff --git a/recipes/wprost_rss.recipe b/recipes/wprost_rss.recipe index bffbacc474..59c130fc75 100644 --- a/recipes/wprost_rss.recipe +++ b/recipes/wprost_rss.recipe @@ -1,10 +1,9 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2010, matek09, matek09@gmail.com' -__copyright__ = 'Modified 2011, Mariusz Wolek ' -__copyright__ = 'Modified 2012, Artur Stachecki ' +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '''2010, matek09, matek09@gmail.com + Modified 2011, Mariusz Wolek + Modified 2012, Artur Stachecki ''' from calibre.web.feeds.news import BasicNewsRecipe import re @@ -12,13 +11,14 @@ import re class Wprost(BasicNewsRecipe): title = u'Wprost (RSS)' __author__ = 'matek09' - description = 'Weekly magazine' + description = u'Portal informacyjny. Najświeższe wiadomości, najciekawsze komentarze i opinie. Blogi najlepszych publicystów.' encoding = 'ISO-8859-2' no_stylesheets = True language = 'pl' remove_javascript = True recursions = 0 use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True remove_tags_before = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) remove_tags_after = dict(dict(name = 'div', attrs = {'id' : 'print-layer'})) @@ -48,20 +48,20 @@ class Wprost(BasicNewsRecipe): #h2 {font-size: x-large; font-weight: bold} feeds = [(u'Tylko u nas', u'http://www.wprost.pl/rss/rss_wprostextra.php'), - (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), - (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), - (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), - (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), - (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), - (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), - (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), - (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), - (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), - (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), - (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), - (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), - (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') - ] + (u'Wydarzenia', u'http://www.wprost.pl/rss/rss.php'), + (u'Komentarze', u'http://www.wprost.pl/rss/rss_komentarze.php'), + (u'Wydarzenia: Kraj', u'http://www.wprost.pl/rss/rss_kraj.php'), + (u'Komentarze: Kraj', u'http://www.wprost.pl/rss/rss_komentarze_kraj.php'), + (u'Wydarzenia: Świat', u'http://www.wprost.pl/rss/rss_swiat.php'), + (u'Komentarze: Świat', u'http://www.wprost.pl/rss/rss_komentarze_swiat.php'), + (u'Wydarzenia: Gospodarka', u'http://www.wprost.pl/rss/rss_gospodarka.php'), + (u'Komentarze: Gospodarka', u'http://www.wprost.pl/rss/rss_komentarze_gospodarka.php'), + (u'Wydarzenia: Życie', u'http://www.wprost.pl/rss/rss_zycie.php'), + (u'Komentarze: Życie', u'http://www.wprost.pl/rss/rss_komentarze_zycie.php'), + (u'Wydarzenia: Sport', u'http://www.wprost.pl/rss/rss_sport.php'), + (u'Komentarze: Sport', u'http://www.wprost.pl/rss/rss_komentarze_sport.php'), + (u'Przegląd prasy', u'http://www.wprost.pl/rss/rss_prasa.php') + ] def get_cover_url(self): soup = self.index_to_soup('http://www.wprost.pl/tygodnik') diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index ff1a53de96..9851d76af4 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -79,7 +79,7 @@ author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council', # By default, calibre splits a string containing multiple author names on # ampersands and the words "and" and "with". You can customize the splitting # by changing the regular expression below. Strings are split on whatever the -# specified regular expression matches. +# specified regular expression matches, in addition to ampersands. # Default: r'(?i),?\s+(and|with)\s+' authors_split_regex = r'(?i),?\s+(and|with)\s+' diff --git a/setup/installer/linux/freeze2.py b/setup/installer/linux/freeze2.py index cbf0363fc9..44b8e81bff 100644 --- a/setup/installer/linux/freeze2.py +++ b/setup/installer/linux/freeze2.py @@ -47,6 +47,10 @@ binary_includes = [ '/usr/lib/libgthread-2.0.so.0', '/usr/lib/libpng14.so.14', '/usr/lib/libexslt.so.0', + # Ensure that libimobiledevice is compiled against openssl, not gnutls + '/usr/lib/libimobiledevice.so.3', + '/usr/lib/libusbmuxd.so.2', + '/usr/lib/libplist.so.1', MAGICK_PREFIX+'/lib/libMagickWand.so.5', MAGICK_PREFIX+'/lib/libMagickCore.so.5', '/usr/lib/libgcrypt.so.11', diff --git a/setup/installer/osx/app/main.py b/setup/installer/osx/app/main.py index 345b75f56f..2182038088 100644 --- a/setup/installer/osx/app/main.py +++ b/setup/installer/osx/app/main.py @@ -399,7 +399,8 @@ class Py2App(object): @flush def add_fontconfig(self): info('\nAdding fontconfig') - for x in ('fontconfig.1', 'freetype.6', 'expat.1'): + for x in ('fontconfig.1', 'freetype.6', 'expat.1', + 'plist.1', 'usbmuxd.2', 'imobiledevice.3'): src = os.path.join(SW, 'lib', 'lib'+x+'.dylib') self.install_dylib(src) dst = os.path.join(self.resources_dir, 'fonts') diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index e157c36c5e..474617c911 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -757,6 +757,7 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB from calibre.ebooks.metadata.sources.overdrive import OverDrive from calibre.ebooks.metadata.sources.douban import Douban from calibre.ebooks.metadata.sources.ozon import Ozon +# from calibre.ebooks.metadata.sources.google_images import GoogleImages plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 849d1a21f4..06fd2784e4 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name): config['enabled_plugins'] = ep default_disabled_plugins = set([ - 'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', + 'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', ]) def is_disabled(plugin): diff --git a/src/calibre/db/view.py b/src/calibre/db/view.py index e9de69e320..e0f99eede0 100644 --- a/src/calibre/db/view.py +++ b/src/calibre/db/view.py @@ -7,7 +7,9 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import weakref from functools import partial +from itertools import izip, imap def sanitize_sort_field_name(field_metadata, field): field = field_metadata.search_term_to_field_key(field.lower().strip()) @@ -15,11 +17,39 @@ def sanitize_sort_field_name(field_metadata, field): field = {'title': 'sort', 'authors':'author_sort'}.get(field, field) return field +class MarkedVirtualField(object): + + def __init__(self, marked_ids): + self.marked_ids = marked_ids + + def iter_searchable_values(self, get_metadata, candidates, default_value=None): + for book_id in candidates: + yield self.marked_ids.get(book_id, default_value), {book_id} + +class TableRow(list): + + def __init__(self, book_id, view): + self.book_id = book_id + self.view = weakref.ref(view) + + def __getitem__(self, obj): + view = self.view() + if isinstance(obj, slice): + return [view._field_getters[c](self.book_id) + for c in xrange(*obj.indices(len(view._field_getters)))] + else: + return view._field_getters[obj](self.book_id) + class View(object): + ''' A table view of the database, with rows and columns. Also supports + filtering and sorting. ''' + def __init__(self, cache): self.cache = cache self.marked_ids = {} + self.search_restriction_book_count = 0 + self.search_restriction = '' self._field_getters = {} for col, idx in cache.backend.FIELD_MAP.iteritems(): if isinstance(col, int): @@ -38,16 +68,33 @@ class View(object): except KeyError: self._field_getters[idx] = partial(self.get, col) - self._map = list(self.cache.all_book_ids()) - self._map_filtered = list(self._map) + self._map = tuple(self.cache.all_book_ids()) + self._map_filtered = tuple(self._map) @property def field_metadata(self): return self.cache.field_metadata def _get_id(self, idx, index_is_id=True): - ans = idx if index_is_id else self.index_to_id(idx) - return ans + return idx if index_is_id else self.index_to_id(idx) + + def __getitem__(self, row): + return TableRow(self._map_filtered[row], self.cache) + + def __len__(self): + return len(self._map_filtered) + + def __iter__(self): + for book_id in self._map_filtered: + yield self._data[book_id] + + def iterall(self): + for book_id in self._map: + yield self[book_id] + + def iterallids(self): + for book_id in self._map: + yield book_id def get_field_map_field(self, row, col, index_is_id=True): ''' @@ -66,7 +113,7 @@ class View(object): def get_ondevice(self, idx, index_is_id=True, default_value=''): id_ = idx if index_is_id else self.index_to_id(idx) - self.cache.field_for('ondevice', id_, default_value=default_value) + return self.cache.field_for('ondevice', id_, default_value=default_value) def get_marked(self, idx, index_is_id=True, default_value=None): id_ = idx if index_is_id else self.index_to_id(idx) @@ -93,7 +140,7 @@ class View(object): ans.append(self.cache._author_data(id_)) return tuple(ans) - def multisort(self, fields=[], subsort=False): + def multisort(self, fields=[], subsort=False, only_ids=None): fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields] keys = self.field_metadata.sortable_field_keys() fields = [x for x in fields if x[0] in keys] @@ -102,8 +149,70 @@ class View(object): if not fields: fields = [('timestamp', False)] - sorted_book_ids = self.cache.multisort(fields) - sorted_book_ids - # TODO: change maps + sorted_book_ids = self.cache.multisort(fields, ids_to_sort=only_ids) + if only_ids is None: + self._map = tuple(sorted_book_ids) + if len(self._map_filtered) == len(self._map): + self._map_filtered = tuple(self._map) + else: + fids = frozenset(self._map_filtered) + self._map_filtered = tuple(i for i in self._map if i in fids) + else: + smap = {book_id:i for i, book_id in enumerate(sorted_book_ids)} + only_ids.sort(key=smap.get) + def search(self, query, return_matches=False): + ans = self.search_getting_ids(query, self.search_restriction, + set_restriction_count=True) + if return_matches: + return ans + self._map_filtered = tuple(ans) + + def search_getting_ids(self, query, search_restriction, + set_restriction_count=False): + q = '' + if not query or not query.strip(): + q = search_restriction + else: + q = query + if search_restriction: + q = u'(%s) and (%s)' % (search_restriction, query) + if not q: + if set_restriction_count: + self.search_restriction_book_count = len(self._map) + return list(self._map) + matches = self.cache.search( + query, search_restriction, virtual_fields={'marked':MarkedVirtualField(self.marked_ids)}) + rv = [x for x in self._map if x in matches] + if set_restriction_count and q == search_restriction: + self.search_restriction_book_count = len(rv) + return rv + + def set_search_restriction(self, s): + self.search_restriction = s + + def search_restriction_applied(self): + return bool(self.search_restriction) + + def get_search_restriction_book_count(self): + return self.search_restriction_book_count + + def set_marked_ids(self, id_dict): + ''' + ids in id_dict are "marked". They can be searched for by + using the search term ``marked:true``. Pass in an empty dictionary or + set to clear marked ids. + + :param id_dict: Either a dictionary mapping ids to values or a set + of ids. In the latter case, the value is set to 'true' for all ids. If + a mapping is provided, then the search can be used to search for + particular values: ``marked:value`` + ''' + if not hasattr(id_dict, 'items'): + # Simple list. Make it a dict of string 'true' + self.marked_ids = dict.fromkeys(id_dict, u'true') + else: + # Ensure that all the items in the dict are text + self.marked_ids = dict(izip(id_dict.iterkeys(), imap(unicode, + id_dict.itervalues()))) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 95a00a315c..36ab076417 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -239,7 +239,7 @@ class ANDROID(USBMS): 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS', - 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1'] + 'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index a8e15a6d94..3fefe2d886 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -858,7 +858,7 @@ class Amazon(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index e15d11c3c1..41812af8eb 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False # Google covers are often poor quality (scans/errors) but they have high # resolution, so they trump covers from better sources. So make sure they # are only used if no other covers are found. -msprefs.defaults['cover_priorities'] = {'Google':2} +msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2} def create_log(ostream=None): from calibre.utils.logging import ThreadSafeLog, FileStream @@ -222,6 +222,9 @@ class Source(Plugin): #: plugin config_help_message = None + #: If True this source can return multiple covers for a given query + can_get_multiple_covers = False + def __init__(self, *args, **kwargs): Plugin.__init__(self, *args, **kwargs) @@ -522,7 +525,7 @@ class Source(Plugin): return None def download_cover(self, log, result_queue, abort, - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): ''' Download a cover and put it into result_queue. The parameters all have the same meaning as for :meth:`identify`. Put (self, cover_data) into @@ -531,6 +534,9 @@ class Source(Plugin): This method should use cached cover URLs for efficiency whenever possible. When cached data is not present, most plugins simply call identify and use its results. + + If the parameter get_best_cover is True and this plugin can get + multiple covers, it should only get the "best" one. ''' pass diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py index d28ce146c6..0fe963e3f7 100644 --- a/src/calibre/ebooks/metadata/sources/covers.py +++ b/src/calibre/ebooks/metadata/sources/covers.py @@ -35,9 +35,14 @@ class Worker(Thread): start_time = time.time() if not self.abort.is_set(): try: - self.plugin.download_cover(self.log, self.rq, self.abort, - title=self.title, authors=self.authors, - identifiers=self.identifiers, timeout=self.timeout) + if self.plugin.can_get_multiple_covers: + self.plugin.download_cover(self.log, self.rq, self.abort, + title=self.title, authors=self.authors, get_best_cover=True, + identifiers=self.identifiers, timeout=self.timeout) + else: + self.plugin.download_cover(self.log, self.rq, self.abort, + title=self.title, authors=self.authors, + identifiers=self.identifiers, timeout=self.timeout) except: self.log.exception('Failed to download cover from', self.plugin.name) diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py index 6857d62d4d..f955fb8a79 100644 --- a/src/calibre/ebooks/metadata/sources/douban.py +++ b/src/calibre/ebooks/metadata/sources/douban.py @@ -221,7 +221,7 @@ class Douban(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/edelweiss.py b/src/calibre/ebooks/metadata/sources/edelweiss.py index c86f16ff0d..53ae6c6ee3 100644 --- a/src/calibre/ebooks/metadata/sources/edelweiss.py +++ b/src/calibre/ebooks/metadata/sources/edelweiss.py @@ -320,7 +320,7 @@ class Edelweiss(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index 3962afcb5e..c03f20cd6b 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -209,7 +209,7 @@ class GoogleBooks(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.info('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/google_images.py b/src/calibre/ebooks/metadata/sources/google_images.py new file mode 100644 index 0000000000..c755fea192 --- /dev/null +++ b/src/calibre/ebooks/metadata/sources/google_images.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from collections import OrderedDict + +from calibre import as_unicode +from calibre.ebooks.metadata.sources.base import Source, Option + +class GoogleImages(Source): + + name = 'Google Images' + description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.') + capabilities = frozenset(['cover']) + config_help_message = _('Configure the Google Image Search plugin') + can_get_multiple_covers = True + options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'), + _('The maximum number of covers to process from the google search result')), + Option('size', 'choices', 'svga', _('Cover size'), + _('Search for covers larger than the specified size'), + choices=OrderedDict(( + ('any', _('Any size'),), + ('l', _('Large'),), + ('qsvga', _('Larger than %s')%'400x300',), + ('vga', _('Larger than %s')%'640x480',), + ('svga', _('Larger than %s')%'600x800',), + ('xga', _('Larger than %s')%'1024x768',), + ('2mp', _('Larger than %s')%'2 MP',), + ('4mp', _('Larger than %s')%'4 MP',), + ))), + ) + + def download_cover(self, log, result_queue, abort, + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): + if not title: + return + from threading import Thread + import time + timeout = max(60, timeout) # Needs at least a minute + title = ' '.join(self.get_title_tokens(title)) + author = ' '.join(self.get_author_tokens(authors)) + urls = self.get_image_urls(title, author, log, abort, timeout) + if not urls: + log('No images found in Google for, title: %r and authors: %r'%(title, author)) + return + urls = urls[:self.prefs['max_covers']] + if get_best_cover: + urls = urls[:1] + workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls] + for w in workers: + w.daemon = True + w.start() + alive = True + start_time = time.time() + while alive and not abort.is_set() and time.time() - start_time < timeout: + alive = False + for w in workers: + if w.is_alive(): + alive = True + break + abort.wait(0.1) + + def download_image(self, url, timeout, log, result_queue): + try: + ans = self.browser.open_novisit(url, timeout=timeout).read() + result_queue.put((self, ans)) + log('Downloaded cover from: %s'%url) + except Exception: + self.log.exception('Failed to download cover from: %r'%url) + + def get_image_urls(self, title, author, log, abort, timeout): + from calibre.utils.ipc.simple_worker import fork_job, WorkerError + try: + return fork_job('calibre.ebooks.metadata.sources.google_images', + 'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result'] + except WorkerError as e: + if e.orig_tb: + log.error(e.orig_tb) + log.exception('Searching google failed:' + as_unicode(e)) + except Exception as e: + log.exception('Searching google failed:' + as_unicode(e)) + + return [] + +USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Firefox/3.6.13' + +def find_image_urls(br, ans): + import urlparse + for w in br.page.mainFrame().documentElement().findAll('.images_table a[href]'): + try: + imgurl = urlparse.parse_qs(urlparse.urlparse(unicode(w.attribute('href'))).query)['imgurl'][0] + except: + continue + if imgurl not in ans: + ans.append(imgurl) + +def search(title, author, size, timeout, debug=False): + import time + from calibre.web.jsbrowser.browser import Browser, LoadWatcher, Timeout + ans = [] + start_time = time.time() + br = Browser(user_agent=USER_AGENT, enable_developer_tools=debug) + br.visit('https://www.google.com/advanced_image_search') + f = br.select_form('form[action="/search"]') + f['as_q'] = '%s %s'%(title, author) + if size != 'any': + f['imgsz'] = size + f['imgar'] = 't|xt' + f['as_filetype'] = 'jpg' + br.submit(wait_for_load=False) + + # Loop until the page finishes loading or at least five image urls are + # found + lw = LoadWatcher(br.page, br) + while lw.is_loading and len(ans) < 5: + br.run_for_a_time(0.2) + find_image_urls(br, ans) + if time.time() - start_time > timeout: + raise Timeout('Timed out trying to load google image search page') + find_image_urls(br, ans) + if debug: + br.show_browser() + br.close() + del br # Needed to prevent PyQt from segfaulting + return ans + +def test_google(): + import pprint + pprint.pprint(search('heroes', 'abercrombie', 'svga', 60, debug=True)) + +def test(): + from Queue import Queue + from threading import Event + from calibre.utils.logging import default_log + p = GoogleImages(None) + rq = Queue() + p.download_cover(default_log, rq, Event(), title='The Heroes', + authors=('Joe Abercrombie',)) + print ('Downloaded', rq.qsize(), 'covers') + +if __name__ == '__main__': + test() + diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py index 4645d2a18a..b0eeb940a5 100644 --- a/src/calibre/ebooks/metadata/sources/openlibrary.py +++ b/src/calibre/ebooks/metadata/sources/openlibrary.py @@ -19,7 +19,7 @@ class OpenLibrary(Source): OPENLIBRARY = 'http://covers.openlibrary.org/b/isbn/%s-L.jpg?default=false' def download_cover(self, log, result_queue, abort, - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): if 'isbn' not in identifiers: return isbn = identifiers['isbn'] diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index 6d6ebd3990..b232c7c9a4 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -75,7 +75,7 @@ class OverDrive(Source): # }}} def download_cover(self, log, result_queue, abort, # {{{ - title=None, authors=None, identifiers={}, timeout=30): + title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): import mechanize cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py index ebb104818f..0f4b0c2c53 100644 --- a/src/calibre/ebooks/metadata/sources/ozon.py +++ b/src/calibre/ebooks/metadata/sources/ozon.py @@ -55,7 +55,7 @@ class Ozon(Source): # for ozon.ru search we have to format ISBN with '-' isbn = _format_isbn(log, identifiers.get('isbn', None)) ozonid = identifiers.get('ozon', None) - + unk = unicode(_('Unknown')).upper() if (title and title != unk) or (authors and authors != [unk]) or isbn or not ozonid: qItems = set([isbn, title]) @@ -64,19 +64,19 @@ class Ozon(Source): qItems.discard(None) qItems.discard('') qItems = map(_quoteString, qItems) - + q = u' '.join(qItems).strip() log.info(u'search string: ' + q) - + if isinstance(q, unicode): q = q.encode('utf-8') if not q: return None - + search_url += quote_plus(q) else: search_url = self.ozon_url + '/webservices/OzonWebSvc.asmx/ItemDetail?ID=%s' % ozonid - + log.debug(u'search url: %r'%search_url) return search_url # }}} @@ -250,7 +250,7 @@ class Ozon(Source): return url # }}} - def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30): # {{{ + def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): # {{{ cached_url = self.get_cached_cover_url(identifiers) if cached_url is None: log.debug('No cached cover found, running identify') diff --git a/src/calibre/ebooks/metadata/sources/worker.py b/src/calibre/ebooks/metadata/sources/worker.py index 48f0f99584..51fb883e7d 100644 --- a/src/calibre/ebooks/metadata/sources/worker.py +++ b/src/calibre/ebooks/metadata/sources/worker.py @@ -11,6 +11,7 @@ import os from threading import Event, Thread from Queue import Queue, Empty from io import BytesIO +from collections import Counter from calibre.utils.date import as_utc from calibre.ebooks.metadata.sources.identify import identify, msprefs @@ -113,13 +114,18 @@ def single_covers(title, authors, identifiers, caches, tdir): kwargs=dict(title=title, authors=authors, identifiers=identifiers)) worker.daemon = True worker.start() + c = Counter() while worker.is_alive(): try: plugin, width, height, fmt, data = results.get(True, 1) except Empty: continue else: - name = '%s,,%s,,%s,,%s.cover'%(plugin.name, width, height, fmt) + name = plugin.name + if plugin.can_get_multiple_covers: + name += '{%d}'%c[plugin.name] + c[plugin.name] += 1 + name = '%s,,%s,,%s,,%s.cover'%(name, width, height, fmt) with open(name, 'wb') as f: f.write(data) os.mkdir(name+'.done') diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 3a72b837c8..c84dd1b094 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -262,6 +262,35 @@ def from_links(container): toc.remove(child) return toc +def find_text(node): + LIMIT = 200 + pat = re.compile(r'\s+') + for child in node: + if isinstance(child, etree._Element): + text = xml2text(child).strip() + text = pat.sub(' ', text) + if len(text) < 1: + continue + if len(text) > LIMIT: + # Look for less text in a child of this node, recursively + ntext = find_text(child) + return ntext or (text[:LIMIT] + '...') + else: + return text + +def from_files(container): + toc = TOC() + for spinepath in container.spine_items: + name = container.abspath_to_name(spinepath) + root = container.parsed(name) + body = XPath('//h:body')(root) + if not body: + continue + text = find_text(body[0]) + if text: + toc.add(text, name) + return toc + def add_id(container, name, loc): root = container.parsed(name) body = root.xpath('//*[local-name()="body"]')[0] diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 68db089073..d0474fa7e8 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -333,8 +333,8 @@ class OEBReader(object): guide = self.oeb.guide manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): - href = elem.get('href') - path = urlnormalize(urldefrag(href)[0]) + ref_href = elem.get('href') + path = urlnormalize(urldefrag(ref_href)[0]) if path not in manifest.hrefs: corrected_href = None for href in manifest.hrefs: @@ -342,12 +342,12 @@ class OEBReader(object): corrected_href = href break if corrected_href is None: - self.logger.warn(u'Guide reference %r not found' % href) + self.logger.warn(u'Guide reference %r not found' % ref_href) continue - href = corrected_href + ref_href = corrected_href typ = elem.get('type') if typ not in guide: - guide.add(typ, elem.get('title'), href) + guide.add(typ, elem.get('title'), ref_href) def _find_ncx(self, opf): result = xpath(opf, '/o2:package/o2:spine/@toc') diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py index e4a78b674a..ffa83b6ea8 100644 --- a/src/calibre/gui2/metadata/single_download.py +++ b/src/calibre/gui2/metadata/single_download.py @@ -16,13 +16,12 @@ from operator import attrgetter from Queue import Queue, Empty from io import BytesIO -from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, - QApplication, QDialog, QVBoxLayout, QLabel, - QDialogButtonBox, QStyle, QStackedWidget, QWidget, - QTableView, QGridLayout, QFontInfo, QPalette, QTimer, - pyqtSignal, QAbstractTableModel, QVariant, QSize, - QListView, QPixmap, QAbstractListModel, QColor, QRect, - QTextBrowser, QStringListModel) +from PyQt4.Qt import ( + QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt, QApplication, + QDialog, QVBoxLayout, QLabel, QDialogButtonBox, QStyle, QStackedWidget, + QWidget, QTableView, QGridLayout, QFontInfo, QPalette, QTimer, pyqtSignal, + QAbstractTableModel, QVariant, QSize, QListView, QPixmap, QModelIndex, + QAbstractListModel, QColor, QRect, QTextBrowser, QStringListModel) from PyQt4.QtWebKit import QWebView from calibre.customize.ui import metadata_plugins @@ -654,7 +653,7 @@ class CoversModel(QAbstractListModel): # {{{ for i, plugin in enumerate(metadata_plugins(['cover'])): self.covers.append((plugin.name+'\n'+_('Searching...'), QVariant(self.blank), None, True)) - self.plugin_map[plugin] = i+1 + self.plugin_map[plugin] = [i+1] if do_reset: self.reset() @@ -685,48 +684,82 @@ class CoversModel(QAbstractListModel): # {{{ def plugin_for_index(self, index): row = index.row() if hasattr(index, 'row') else index for k, v in self.plugin_map.iteritems(): - if v == row: + if row in v: return k - def cover_keygen(self, x): - pmap = x[2] - if pmap is None: - return 1 - return pmap.width()*pmap.height() - def clear_failed(self): + # Remove entries that are still waiting good = [] pmap = {} - dcovers = sorted(self.covers[1:], key=self.cover_keygen, reverse=True) - cmap = {x:self.covers.index(x) for x in self.covers} + def keygen(x): + pmap = x[2] + if pmap is None: + return 1 + return pmap.width()*pmap.height() + dcovers = sorted(self.covers[1:], key=keygen, reverse=True) + cmap = {i:self.plugin_for_index(i) for i in xrange(len(self.covers))} for i, x in enumerate(self.covers[0:1] + dcovers): if not x[-1]: good.append(x) - if i > 0: - plugin = self.plugin_for_index(cmap[x]) - pmap[plugin] = len(good) - 1 + plugin = cmap[i] + if plugin is not None: + try: + pmap[plugin].append(len(good) - 1) + except KeyError: + pmap[plugin] = [len(good)-1] self.covers = good self.plugin_map = pmap self.reset() - def index_for_plugin(self, plugin): - idx = self.plugin_map.get(plugin, 0) - return self.index(idx) + def pointer_from_index(self, index): + row = index.row() if hasattr(index, 'row') else index + try: + return self.covers[row][2] + except IndexError: + pass + + def index_from_pointer(self, pointer): + for r, (text, scaled, pmap, waiting) in enumerate(self.covers): + if pointer == pmap: + return self.index(r) + return self.index(0) def update_result(self, plugin_name, width, height, data): - idx = None - for plugin, i in self.plugin_map.iteritems(): - if plugin.name == plugin_name: - idx = i - break - if idx is None: - return - pmap = QPixmap() - pmap.loadFromData(data) - if pmap.isNull(): - return - self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False) - self.dataChanged.emit(self.index(idx), self.index(idx)) + if plugin_name.endswith('}'): + # multi cover plugin + plugin_name = plugin_name.partition('{')[0] + plugin = [plugin for plugin in self.plugin_map if plugin.name == plugin_name] + if not plugin: + return + plugin = plugin[0] + last_row = max(self.plugin_map[plugin]) + pmap = QPixmap() + pmap.loadFromData(data) + if pmap.isNull(): + return + self.beginInsertRows(QModelIndex(), last_row, last_row) + for rows in self.plugin_map.itervalues(): + for i in xrange(len(rows)): + if rows[i] >= last_row: + rows[i] += 1 + self.plugin_map[plugin].insert(-1, last_row) + self.covers.insert(last_row, self.get_item(plugin_name, pmap, waiting=False)) + self.endInsertRows() + else: + # single cover plugin + idx = None + for plugin, rows in self.plugin_map.iteritems(): + if plugin.name == plugin_name: + idx = rows[0] + break + if idx is None: + return + pmap = QPixmap() + pmap.loadFromData(data) + if pmap.isNull(): + return + self.covers[idx] = self.get_item(plugin_name, pmap, waiting=False) + self.dataChanged.emit(self.index(idx), self.index(idx)) def cover_pixmap(self, index): row = index.row() @@ -774,9 +807,12 @@ class CoversView(QListView): # {{{ self.m.reset_covers() def clear_failed(self): - plugin = self.m.plugin_for_index(self.currentIndex()) + pointer = self.m.pointer_from_index(self.currentIndex()) self.m.clear_failed() - self.select(self.m.index_for_plugin(plugin).row()) + if pointer is None: + self.select(0) + else: + self.select(self.m.index_from_pointer(pointer).row()) # }}} @@ -852,10 +888,11 @@ class CoversWidget(QWidget): # {{{ if num < 2: txt = _('Could not find any covers for %s')%self.book.title else: - txt = _('Found %(num)d covers of %(title)s. ' - 'Pick the one you like best.')%dict(num=num-1, + txt = _('Found %(num)d possible covers for %(title)s. ' + 'When the download completes, the covers will be sorted by size.')%dict(num=num-1, title=self.title) self.msg.setText(txt) + self.msg.setWordWrap(True) self.finished.emit() diff --git a/src/calibre/gui2/store/stores/amazon_de_plugin.py b/src/calibre/gui2/store/stores/amazon_de_plugin.py index 7b4027794a..6833bd3710 100644 --- a/src/calibre/gui2/store/stores/amazon_de_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_de_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,13 +18,26 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonDEKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + aff_id = {'tag': 'charhale0a-21'} + store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' + '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454' + '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') + store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8' + '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de' + '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') + search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + author_article = 'von ' + + and_word = ' und ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -108,20 +121,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonDEKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'charhale0a-21'} - store_link = ('http://www.amazon.de/gp/redirect.html?ie=UTF8&site-redirect=de' - '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=19454' - '&location=http://www.amazon.de/ebooks-kindle/b?node=530886031') - store_link_details = ('http://www.amazon.de/gp/redirect.html?ie=UTF8' - '&location=http://www.amazon.de/dp/%(asin)s&site-redirect=de' - '&tag=%(tag)s&linkCode=ur2&camp=1638&creative=6742') - search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'von ' - - and_word = ' und ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_es_plugin.py b/src/calibre/gui2/store/stores/amazon_es_plugin.py index 68387ffe11..0b71ae657b 100644 --- a/src/calibre/gui2/store/stores/amazon_es_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_es_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,12 +18,25 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonESKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + aff_id = {'tag': 'charhale09-21'} + store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&' + 'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790') + store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s' + '&linkCode=ur2&camp=3626&creative=24790') + search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' + + author_article = 'de ' + + and_word = ' y ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -107,19 +120,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonESKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'charhale09-21'} - store_link = ('http://www.amazon.es/ebooks-kindle/b?_encoding=UTF8&' - 'node=827231031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3626&creative=24790') - store_link_details = ('http://www.amazon.es/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.es/dp/%(asin)s&tag=%(tag)s' - '&linkCode=ur2&camp=3626&creative=24790') - search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'de ' - - and_word = ' y ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_fr_plugin.py b/src/calibre/gui2/store/stores/amazon_fr_plugin.py index 9b425a2fc9..4520a3a104 100644 --- a/src/calibre/gui2/store/stores/amazon_fr_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_fr_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,13 +18,22 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonFRKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' + aff_id = {'tag': 'charhale-21'} + store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id + store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' + search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + author_article = 'de ' + + and_word = ' et ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -108,16 +117,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonFRKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'charhale-21'} - store_link = 'http://www.amazon.fr/livres-kindle/b?ie=UTF8&node=695398031&ref_=sa_menu_kbo1&_encoding=UTF8&tag=%(tag)s&linkCode=ur2&camp=1642&creative=19458' % aff_id - store_link_details = 'http://www.amazon.fr/gp/redirect.html?ie=UTF8&location=http://www.amazon.fr/dp/%(asin)s&tag=%(tag)s&linkCode=ur2&camp=1634&creative=6738' - search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'de ' - - and_word = ' et ' diff --git a/src/calibre/gui2/store/stores/amazon_it_plugin.py b/src/calibre/gui2/store/stores/amazon_it_plugin.py index 2493f78ea3..f8a756d1d5 100644 --- a/src/calibre/gui2/store/stores/amazon_it_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_it_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -18,12 +18,25 @@ from calibre import browser from calibre.gui2 import open_url from calibre.gui2.store.search_result import SearchResult +class AmazonITKindleStore(StorePlugin): + ''' + For comments on the implementation, please see amazon_plugin.py + ''' -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. + aff_id = {'tag': 'httpcharles07-21'} + store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&' + 'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322') + store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&' + 'linkCode=ur2&camp=3370&creative=23322') + search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' + + author_article = 'di ' + + and_word = ' e ' + + # ---- Copy from here to end -class AmazonEUBase(StorePlugin): ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -106,20 +119,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass - -class AmazonITKindleStore(AmazonEUBase): - ''' - For comments on the implementation, please see amazon_plugin.py - ''' - - aff_id = {'tag': 'httpcharles07-21'} - store_link = ('http://www.amazon.it/ebooks-kindle/b?_encoding=UTF8&' - 'node=827182031&tag=%(tag)s&ie=UTF8&linkCode=ur2&camp=3370&creative=23322') - store_link_details = ('http://www.amazon.it/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.it/dp/%(asin)s&tag=%(tag)s&' - 'linkCode=ur2&camp=3370&creative=23322') - search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'di ' - - and_word = ' e ' \ No newline at end of file diff --git a/src/calibre/gui2/store/stores/amazon_uk_plugin.py b/src/calibre/gui2/store/stores/amazon_uk_plugin.py index 054072824b..f6082ac790 100644 --- a/src/calibre/gui2/store/stores/amazon_uk_plugin.py +++ b/src/calibre/gui2/store/stores/amazon_uk_plugin.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (unicode_literals, division, absolute_import, print_function) -store_version = 2 # Needed for dynamic plugin loading +store_version = 3 # Needed for dynamic plugin loading __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' @@ -19,11 +19,28 @@ from calibre.gui2.store import StorePlugin from calibre.gui2.store.search_result import SearchResult -# This class is copy/pasted from amason_uk_plugin. Do not modify it in any -# other amazon EU plugin. Be sure to paste it into all other amazon EU plugins -# when modified. -class AmazonEUBase(StorePlugin): +class AmazonUKKindleStore(StorePlugin): + aff_id = {'tag': 'calcharles-21'} + store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.co.uk/Kindle-eBooks/b?' + 'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&' + 'linkCode=ur2&camp=1634&creative=19450') + store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' + 'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&' + 'linkCode=ur2&camp=1634&creative=6738') + search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' + + author_article = 'by ' + + and_word = ' and ' + + # This code is copy/pasted from from here to the other amazon EU. Do not + # modify it in any other amazon EU plugin. Be sure to paste it into all + # other amazon EU plugins when modified. + + # ---- Copy from here to end + ''' For comments on the implementation, please see amazon_plugin.py ''' @@ -107,18 +124,3 @@ class AmazonEUBase(StorePlugin): def get_details(self, search_result, timeout): pass -class AmazonUKKindleStore(AmazonEUBase): - aff_id = {'tag': 'calcharles-21'} - store_link = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.co.uk/Kindle-eBooks/b?' - 'ie=UTF8&node=341689031&ref_=sa_menu_kbo2&tag=%(tag)s&' - 'linkCode=ur2&camp=1634&creative=19450') - store_link_details = ('http://www.amazon.co.uk/gp/redirect.html?ie=UTF8&' - 'location=http://www.amazon.co.uk/dp/%(asin)s&tag=%(tag)s&' - 'linkCode=ur2&camp=1634&creative=6738') - search_url = 'http://www.amazon.co.uk/s/?url=search-alias%3Ddigital-text&field-keywords=' - - author_article = 'by ' - - and_word = ' and ' - diff --git a/src/calibre/gui2/toc/main.py b/src/calibre/gui2/toc/main.py index de5ed91bcd..7cb4f9b462 100644 --- a/src/calibre/gui2/toc/main.py +++ b/src/calibre/gui2/toc/main.py @@ -14,11 +14,11 @@ from functools import partial from PyQt4.Qt import (QPushButton, QFrame, QVariant, QMenu, QInputDialog, QDialog, QVBoxLayout, QDialogButtonBox, QSize, QStackedWidget, QWidget, QLabel, Qt, pyqtSignal, QIcon, QTreeWidget, QGridLayout, QTreeWidgetItem, - QToolButton, QItemSelectionModel) + QToolButton, QItemSelectionModel, QCursor) from calibre.ebooks.oeb.polish.container import get_container, AZW3Container from calibre.ebooks.oeb.polish.toc import ( - get_toc, add_id, TOC, commit_toc, from_xpaths, from_links) + get_toc, add_id, TOC, commit_toc, from_xpaths, from_links, from_files) from calibre.gui2 import Application, error_dialog, gprefs from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.toc.location import ItemEdit @@ -126,6 +126,7 @@ class ItemView(QFrame): # {{{ go_to_root = pyqtSignal() create_from_xpath = pyqtSignal(object) create_from_links = pyqtSignal() + create_from_files = pyqtSignal() flatten_toc = pyqtSignal() def __init__(self, parent): @@ -183,6 +184,15 @@ class ItemView(QFrame): # {{{ ))) l.addWidget(b) + self.cfb = b = QPushButton(_('Generate ToC from &files')) + b.clicked.connect(self.create_from_files) + b.setToolTip(textwrap.fill(_( + 'Generate a Table of Contents from individual files in the book.' + ' Each entry in the ToC will point to the start of the file, the' + ' text of the entry will be the "first line" of text from the file.' + ))) + l.addWidget(b) + self.xpb = b = QPushButton(_('Generate ToC from &XPath')) b.clicked.connect(self.create_from_user_xpath) b.setToolTip(textwrap.fill(_( @@ -190,7 +200,7 @@ class ItemView(QFrame): # {{{ ))) l.addWidget(b) - self.fal = b = QPushButton(_('Flatten the ToC')) + self.fal = b = QPushButton(_('&Flatten the ToC')) b.clicked.connect(self.flatten_toc) b.setToolTip(textwrap.fill(_( 'Flatten the Table of Contents, putting all entries at the top level' @@ -339,7 +349,7 @@ class ItemView(QFrame): # {{{ # }}} -class TreeWidget(QTreeWidget): +class TreeWidget(QTreeWidget): # {{{ def __init__(self, parent): QTreeWidget.__init__(self, parent) @@ -357,6 +367,9 @@ class TreeWidget(QTreeWidget): self.setAnimated(True) self.setMouseTracking(True) self.in_drop_event = False + self.root = self.invisibleRootItem() + self.setContextMenuPolicy(Qt.CustomContextMenu) + self.customContextMenuRequested.connect(self.show_context_menu) def iteritems(self, parent=None): if parent is None: @@ -384,6 +397,137 @@ class TreeWidget(QTreeWidget): ans = sorted(ans, key=lambda x:sort_map.get(x, -1), reverse=True) return ans + def highlight_item(self, item): + self.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect) + self.scrollToItem(item) + + def move_left(self): + item = self.currentItem() + if item is not None: + parent = item.parent() + if parent is not None: + is_expanded = item.isExpanded() or item.childCount() == 0 + gp = parent.parent() or self.invisibleRootItem() + idx = gp.indexOfChild(parent) + for gc in [parent.child(i) for i in xrange(parent.indexOfChild(item)+1, parent.childCount())]: + parent.removeChild(gc) + item.addChild(gc) + parent.removeChild(item) + gp.insertChild(idx+1, item) + if is_expanded: + self.expandItem(item) + self.highlight_item(item) + + def move_right(self): + item = self.currentItem() + if item is not None: + parent = item.parent() or self.invisibleRootItem() + idx = parent.indexOfChild(item) + if idx > 0: + is_expanded = item.isExpanded() + np = parent.child(idx-1) + parent.removeChild(item) + np.addChild(item) + if is_expanded: + self.expandItem(item) + self.highlight_item(item) + + def move_down(self): + item = self.currentItem() + if item is None: + if self.root.childCount() == 0: + return + item = self.root.child(0) + self.highlight_item(item) + return + parent = item.parent() or self.root + idx = parent.indexOfChild(item) + if idx == parent.childCount() - 1: + # At end of parent, need to become sibling of parent + if parent is self.root: + return + gp = parent.parent() or self.root + parent.removeChild(item) + gp.insertChild(gp.indexOfChild(parent)+1, item) + else: + sibling = parent.child(idx+1) + parent.removeChild(item) + sibling.insertChild(0, item) + self.highlight_item(item) + + def move_up(self): + item = self.currentItem() + if item is None: + if self.root.childCount() == 0: + return + item = self.root.child(self.root.childCount()-1) + self.highlight_item(item) + return + parent = item.parent() or self.root + idx = parent.indexOfChild(item) + if idx == 0: + # At end of parent, need to become sibling of parent + if parent is self.root: + return + gp = parent.parent() or self.root + parent.removeChild(item) + gp.insertChild(gp.indexOfChild(parent), item) + else: + sibling = parent.child(idx-1) + parent.removeChild(item) + sibling.addChild(item) + self.highlight_item(item) + + def del_items(self): + for item in self.selectedItems(): + p = item.parent() or self.root + p.removeChild(item) + + def title_case(self): + from calibre.utils.titlecase import titlecase + for item in self.selectedItems(): + t = unicode(item.data(0, Qt.DisplayRole).toString()) + item.setData(0, Qt.DisplayRole, titlecase(t)) + + def keyPressEvent(self, ev): + if ev.key() == Qt.Key_Left and ev.modifiers() & Qt.CTRL: + self.move_left() + ev.accept() + elif ev.key() == Qt.Key_Right and ev.modifiers() & Qt.CTRL: + self.move_right() + ev.accept() + elif ev.key() == Qt.Key_Up and ev.modifiers() & Qt.CTRL: + self.move_up() + ev.accept() + elif ev.key() == Qt.Key_Down and ev.modifiers() & Qt.CTRL: + self.move_down() + ev.accept() + elif ev.key() in (Qt.Key_Delete, Qt.Key_Backspace): + self.del_items() + ev.accept() + else: + return super(TreeWidget, self).keyPressEvent(ev) + + def show_context_menu(self, point): + item = self.currentItem() + if item is not None: + m = QMenu() + ci = unicode(item.data(0, Qt.DisplayRole).toString()) + p = item.parent() or self.invisibleRootItem() + idx = p.indexOfChild(item) + if idx > 0: + m.addAction(QIcon(I('arrow-up.png')), _('Move "%s" up')%ci, self.move_up) + if idx + 1 < p.childCount(): + m.addAction(QIcon(I('arrow-down.png')), _('Move "%s" down')%ci, self.move_down) + m.addAction(QIcon(I('trash.png')), _('Remove all selected items'), self.del_items) + if item.parent() is not None: + m.addAction(QIcon(I('back.png')), _('Unindent "%s"')%ci, self.move_left) + if idx > 0: + m.addAction(QIcon(I('forward.png')), _('Indent "%s"')%ci, self.move_right) + m.addAction(_('Change all selected items to title case'), self.title_case) + m.exec_(QCursor.pos()) +# }}} + class TOCView(QWidget): # {{{ add_new_item = pyqtSignal(object, object) @@ -393,27 +537,43 @@ class TOCView(QWidget): # {{{ l = self.l = QGridLayout() self.setLayout(l) self.tocw = t = TreeWidget(self) - l.addWidget(t, 0, 0, 5, 3) + l.addWidget(t, 0, 0, 7, 3) self.up_button = b = QToolButton(self) b.setIcon(QIcon(I('arrow-up.png'))) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) l.addWidget(b, 0, 3) - b.setToolTip(_('Move current entry up')) + b.setToolTip(_('Move current entry up [Ctrl+Up]')) b.clicked.connect(self.move_up) + + self.left_button = b = QToolButton(self) + b.setIcon(QIcon(I('back.png'))) + b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) + l.addWidget(b, 2, 3) + b.setToolTip(_('Unindent the current entry [Ctrl+Left]')) + b.clicked.connect(self.tocw.move_left) + self.del_button = b = QToolButton(self) b.setIcon(QIcon(I('trash.png'))) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) - l.addWidget(b, 2, 3) + l.addWidget(b, 3, 3) b.setToolTip(_('Remove all selected entries')) b.clicked.connect(self.del_items) + + self.left_button = b = QToolButton(self) + b.setIcon(QIcon(I('forward.png'))) + b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) + l.addWidget(b, 4, 3) + b.setToolTip(_('Unindent the current entry [Ctrl+Left]')) + b.clicked.connect(self.tocw.move_right) + self.down_button = b = QToolButton(self) b.setIcon(QIcon(I('arrow-down.png'))) b.setIconSize(QSize(ICON_SIZE, ICON_SIZE)) - l.addWidget(b, 4, 3) - b.setToolTip(_('Move current entry down')) + l.addWidget(b, 6, 3) + b.setToolTip(_('Move current entry down [Ctrl+Down]')) b.clicked.connect(self.move_down) self.expand_all_button = b = QPushButton(_('&Expand all')) - col = 5 + col = 7 l.addWidget(b, col, 0) b.clicked.connect(self.tocw.expandAll) self.collapse_all_button = b = QPushButton(_('&Collapse all')) @@ -427,6 +587,7 @@ class TOCView(QWidget): # {{{ i.add_new_item.connect(self.add_new_item) i.create_from_xpath.connect(self.create_from_xpath) i.create_from_links.connect(self.create_from_links) + i.create_from_files.connect(self.create_from_files) i.flatten_item.connect(self.flatten_item) i.flatten_toc.connect(self.flatten_toc) i.go_to_root.connect(self.go_to_root) @@ -444,9 +605,7 @@ class TOCView(QWidget): # {{{ return unicode(item.data(0, Qt.DisplayRole).toString()) def del_items(self): - for item in self.tocw.selectedItems(): - p = item.parent() or self.root - p.removeChild(item) + self.tocw.del_items() def delete_current_item(self): item = self.tocw.currentItem() @@ -484,54 +643,13 @@ class TOCView(QWidget): # {{{ self.tocw.setCurrentItem(None) def highlight_item(self, item): - self.tocw.setCurrentItem(item, 0, QItemSelectionModel.ClearAndSelect) - self.tocw.scrollToItem(item) - - def move_down(self): - item = self.tocw.currentItem() - if item is None: - if self.root.childCount() == 0: - return - item = self.root.child(0) - self.highlight_item(item) - return - parent = item.parent() or self.root - idx = parent.indexOfChild(item) - if idx == parent.childCount() - 1: - # At end of parent, need to become sibling of parent - if parent is self.root: - return - gp = parent.parent() or self.root - parent.removeChild(item) - gp.insertChild(gp.indexOfChild(parent)+1, item) - else: - sibling = parent.child(idx+1) - parent.removeChild(item) - sibling.insertChild(0, item) - self.highlight_item(item) + self.tocw.highlight_item(item) def move_up(self): - item = self.tocw.currentItem() - if item is None: - if self.root.childCount() == 0: - return - item = self.root.child(self.root.childCount()-1) - self.highlight_item(item) - return - parent = item.parent() or self.root - idx = parent.indexOfChild(item) - if idx == 0: - # At end of parent, need to become sibling of parent - if parent is self.root: - return - gp = parent.parent() or self.root - parent.removeChild(item) - gp.insertChild(gp.indexOfChild(parent), item) - else: - sibling = parent.child(idx-1) - parent.removeChild(item) - sibling.addChild(item) - self.highlight_item(item) + self.tocw.move_up() + + def move_down(self): + self.tocw.move_down() def update_status_tip(self, item): c = item.data(0, Qt.UserRole).toPyObject() @@ -671,6 +789,14 @@ class TOCView(QWidget): # {{{ _('No links were found that could be added to the Table of Contents.'), show=True) self.insert_toc_fragment(toc) + def create_from_files(self): + toc = from_files(self.ebook) + if len(toc) == 0: + return error_dialog(self, _('No items found'), + _('No files were found that could be added to the Table of Contents.'), show=True) + self.insert_toc_fragment(toc) + + # }}} class TOCEditor(QDialog): # {{{ diff --git a/src/calibre/translations/de.po b/src/calibre/translations/de.po index b0e512c6bf..0655ceb4ee 100644 --- a/src/calibre/translations/de.po +++ b/src/calibre/translations/de.po @@ -22507,7 +22507,7 @@ msgstr "Autoren beginnend mit '%s'" #: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3477 #, python-format msgid "Authors beginning with '%s'" -msgstr "Autoren beginnen mit mit %s" +msgstr "Autoren beginnen mit %s" #: /home/kovid/work/calibre/src/calibre/library/catalogs/epub_mobi_builder.py:3518 msgid "NCX for Recently Added"