diff --git a/manual/templates/layout.html b/manual/templates/layout.html index 88e1605f92..b8389b0ac9 100644 --- a/manual/templates/layout.html +++ b/manual/templates/layout.html @@ -59,10 +59,10 @@

-
+ - +

diff --git a/recipes/baltimore_sun.recipe b/recipes/baltimore_sun.recipe index ac6906a5e6..7c55bfd5fe 100644 --- a/recipes/baltimore_sun.recipe +++ b/recipes/baltimore_sun.recipe @@ -1,45 +1,37 @@ from __future__ import with_statement __license__ = 'GPL 3' -__copyright__ = 'Original 2009, Kovid Goyal ' -__copyright__= 'Modified 2011, Josh Hall ' +__copyright__ = '2009, Kovid Goyal ' +__copyright__ = '2012 Josh Hall' __docformat__ = 'restructuredtext en' -''' -www.baltimoresun.com -''' - +import urllib, re from calibre.web.feeds.news import BasicNewsRecipe class BaltimoreSun(BasicNewsRecipe): title = 'The Baltimore Sun' __author__ = 'Josh Hall' - description = 'Politics, local and business news from Baltimore' - language = 'en' + + description = 'Complete local news and blogs from Baltimore' + language = 'en' + version = 2 oldest_article = 1 max_articles_per_feed = 100 - remove_empty_feeds = True - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - #masthead_url = 'http://www.baltimoresun.com/images/thirdpartylogo.gif' - - remove_tags_before = dict(name='div', attrs={'class':['story', 'entry']}) - remove_tags_after = [ - {'class':['photo_article',]}, - dict(name='div', attrs={'class':'shirttail-promo right clearfix'}), - ] + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + recursions = 1 keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}), dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}), ] + remove_tags_after = [{'class':['photo_article',]}] + match_regexps = [r'page=[0-9]+'] - remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer","article-promo"]}, - {'class':["entry-footer-left","entry-footer-right","shirttail-promo right clearfix","clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent","toppaginate","module","module-header","module-content"]}, - dict(name='font',attrs={'id':["cr-other-headlines"]}), - dict(name=['iframe']), - ] + remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']}, + {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']}, + dict(name='font',attrs={'id':["cr-other-headlines"]})] extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} @@ -53,8 +45,9 @@ class BaltimoreSun(BasicNewsRecipe): .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' + ''' feeds = [ +## News ## (u'Top Headlines', u'http://www.baltimoresun.com/rss2.0.xml'), (u'Breaking News', u'http://www.baltimoresun.com/news/breaking/rss2.0.xml'), (u'Top Maryland', u'http://www.baltimoresun.com/news/maryland/rss2.0.xml'), @@ -69,10 +62,10 @@ class BaltimoreSun(BasicNewsRecipe): (u'Local Politics', u'http://www.baltimoresun.com/news/maryland/politics/rss2.0.xml'), (u'Weather', u'http://www.baltimoresun.com/news/weather/rss2.0.xml'), #(u'Traffic', u'http://www.baltimoresun.com/features/commuting/rss2.0.xml'), - (u'Nation/world', u'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'), + (u'Nation/world', u'http://feeds.feedburner.com/baltimoresun/news/nationworld/rss2'), (u'Weird News', u'http://www.baltimoresun.com/news/offbeat/rss2.0.xml'), - +##Sports## (u'Top Sports', u'http://www.baltimoresun.com/sports/rss2.0.xml'), (u'Orioles/Baseball', u'http://www.baltimoresun.com/sports/orioles/rss2.0.xml'), (u'Ravens/Football', u'http://www.baltimoresun.com/sports/ravens/rss2.0.xml'), @@ -85,6 +78,7 @@ class BaltimoreSun(BasicNewsRecipe): #(u'High School', u'http://www.baltimoresun.com/sports/high-school/rss2.0.xml'), #(u'Outdoors', u'http://www.baltimoresun.com/sports/outdoors/rss2.0.xml'), +## Entertainment ## (u'Celebrity News', u'http://www.baltimoresun.com/entertainment/celebrities/rss2.0.xml'), (u'Arts & Theater', u'http://www.baltimoresun.com/entertainment/arts/rss2.0.xml'), (u'Movies', u'http://www.baltimoresun.com/entertainment/movies/rss2.0.xml'), @@ -92,14 +86,16 @@ class BaltimoreSun(BasicNewsRecipe): (u'Restaurants & Food', u'http://www.baltimoresun.com/entertainment/dining/rss2.0.xml'), (u'TV/Media', u'http://www.baltimoresun.com/entertainment/tv/rss2.0.xml'), +## Life ## (u'Health&Wellness', u'http://www.baltimoresun.com/health/rss2.0.xml'), (u'Home & Garden', u'http://www.baltimoresun.com/features/home-garden/rss2.0.xml'), (u'Living Green', u'http://www.baltimoresun.com/features/green/rss2.0.xml'), (u'Parenting', u'http://www.baltimoresun.com/features/parenting/rss2.0.xml'), (u'Fashion', u'http://www.baltimoresun.com/features/fashion/rss2.0.xml'), (u'Travel', u'http://www.baltimoresun.com/travel/rss2.0.xml'), - (u'Faith', u'http://www.baltimoresun.com/features/faith/rss2.0.xml'), + #(u'Faith', u'http://www.baltimoresun.com/features/faith/rss2.0.xml'), +## Business ## (u'Top Business', u'http://www.baltimoresun.com/business/rss2.0.xml'), (u'Technology', u'http://www.baltimoresun.com/business/technology/rss2.0.xml'), (u'Personal finance', u'http://www.baltimoresun.com/business/money/rss2.0.xml'), @@ -109,12 +105,14 @@ class BaltimoreSun(BasicNewsRecipe): (u'Consumer Safety', u'http://www.baltimoresun.com/business/consumer-safety/rss2.0.xml'), (u'Investing', u'http://www.baltimoresun.com/business/money/rss2.0.xml'), +## Opinion## (u'Sun Editorials', u'http://www.baltimoresun.com/news/opinion/editorial/rss2.0.xml'), (u'Op/Ed', u'http://www.baltimoresun.com/news/opinion/oped/rss2.0.xml'), (u'Readers Respond', u'http://www.baltimoresun.com/news/opinion/readersrespond/'), - (u'Kevin Cowherd', 'http://www.baltimoresun.com/sports/bal-columnist-cowherd,0,6829726.columnist-rss2.0.xml'), - (u'Jay Hancock', u'http://www.baltimoresun.com/business/money/bal-columnist-hancock,0,6673611.columnist-rss2.0.xml'), +## Columnists ## + (u'Kevin Cowherd', u'http://www.baltimoresun.com/sports/bal-columnist-cowherd,0,6829726.columnist-rss2.0.xml'), + (u'Robert Ehrlich', u'http://www.baltimoresun.com/news/opinion/columnists/bal-columnist-ehrlich,0,1825227.columnist-rss2.0.xml'), (u'Jacques Kelly', u'http://www.baltimoresun.com/news/maryland/bal-columnist-kelly,0,1154701.columnist-rss2.0.xml'), (u'Marta H. Mossburg', u'http://www.baltimoresun.com/news/opinion/oped/bal-columnist-mossburg,0,7982155.columnist-rss2.0.xml'), (u'Mike Preston', u'http://www.baltimoresun.com/sports/bal-columnist-preston,0,6169796.columnist-rss2.0.xml'), @@ -122,59 +120,80 @@ class BaltimoreSun(BasicNewsRecipe): (u'Dan Rodricks', u'http://www.baltimoresun.com/news/maryland/bal-columnist-rodricks,0,7089843.columnist-rss2.0.xml'), (u'Thomas F. Schaller', u'http://www.baltimoresun.com/news/opinion/columnists/bal-columnist-schaller,0,897397.columnist-rss2.0.xml'), (u'Peter Schmuck', u'http://www.baltimoresun.com/sports/bal-columnist-schmuck,0,7485088.columnist-rss2.0.xml'), - (u'Ron Smith', u'http://www.baltimoresun.com/news/opinion/bal-columnist-ronsmith,0,3964803.columnist-rss2.0.xml'), - (u'Baltimore Crime Beat', u'http://weblogs.baltimoresun.com/news/crime/blog/index.xml'), - (u'Getting There', u'http://weblogs.baltimoresun.com/news/traffic/index.xml'), - (u'InsideEd', u'http://weblogs.baltimoresun.com/news/education/blog/index.xml'), - (u'Maryland Politics', u'http://weblogs.baltimoresun.com/news/local/politics/index.xml'), - (u'Maryland Weather', u'http://weblogs.marylandweather.com/index.xml'), - (u'Second Opinion', u'http://weblogs.baltimoresun.com/news/opinion/index.xml'), - (u'You Dont Say', u'http://weblogs.baltimoresun.com/news/mcintyre/blog/index.xml'), +## News Blogs ## + (u'Baltimore Crime Beat', u'http://baltimore.feedsportal.com/c/34255/f/623075/index.rss'), + (u'InsideEd', u'http://www.baltimoresun.com/news/maryland/education/blog/rss2.0.xml'), + (u'Maryland Politics', u'http://www.baltimoresun.com/news/maryland/politics/blog/rss2.0.xml'), + (u'Maryland Weather', u'http://www.baltimoresun.com/news/weather/weather-blog/rss2.0.xml'), + (u'Second Opinion', u'http://www.baltimoresun.com/news/opinion/second-opinion-blog/rss2.0.xml'), + (u'Sun Investigates', u'http://www.baltimoresun.com/news/maryland/sun-investigates/rss2.0.xml'), + (u'You Dont Say', u'http://www.baltimoresun.com/news/language-blog/rss2.0.xml'), - (u'BaltTech', u'http://weblogs.baltimoresun.com/news/technology/index.xml'), - (u'Consuming Interests', u'http://weblogs.baltimoresun.com/business/consuminginterests/blog/index.xml'), - (u'Jay Hancocks Blog', u'http://weblogs.baltimoresun.com/business/hancock/blog/index.xml'), - (u'The Real Estate Wonk', u'http://weblogs.baltimoresun.com/business/realestate/blog/index.xml'), +## Business Blogs ## + (u'BaltTech', u'http://www.baltimoresun.com/business/technology/blog/rss2.0.xml'), + (u'Consuming Interests', u'http://www.baltimoresun.com/business/consuming-interests-blog/rss2.0.xml'), + (u'The Real Estate Wonk', u'http://www.baltimoresun.com/business/real-estate/wonk/rss2.0.xml'), - (u'Clef Notes', 'http://weblogs.baltimoresun.com/entertainment/classicalmusic/index.xml'), - (u'Dining at Large', u'http://weblogs.baltimoresun.com/entertainment/dining/reviews/blog/index.xml'), - (u'Midnight Sun', u'http://weblogs.baltimoresun.com/entertainment/midnight_sun/blog/index.xml'), - (u'Mike Sragow Gets Reel', u'http://weblogs.baltimoresun.com/entertainment/movies/blog/index.xml'), - (u'Read Street', u'http://weblogs.baltimoresun.com/entertainment/books/blog/index.xml'), - (u'Reality Check', u'http://weblogs.baltimoresun.com/entertainment/realitycheck/blog/index.xml'), - (u'Z on TV', u'http://weblogs.baltimoresun.com/entertainment/zontv/index.xml'), +## Entertainment Blogs ## + (u'Clef Notes & Drama Queens', 'http://weblogs.baltimoresun.com/entertainment/classicalmusic/index.xml'), + (u'Baltimore Diner', u'http://baltimore.feedsportal.com/c/34255/f/623088/index.rss'), + (u'Midnight Sun', u'http://www.baltimoresun.com/entertainment/music/midnight-sun-blog/rss2.0.xml'), + (u'Read Street', u'http://www.baltimoresun.com/features/books/read-street/rss2.0.xml'), + (u'Z on TV', u'http://www.baltimoresun.com/entertainment/tv/z-on-tv-blog/rss2.0.xml'), +## Life Blogs ## (u'BMore Green', u'http://weblogs.baltimoresun.com/features/green/index.xml'), - (u'Charm City Moms', u'http://weblogs.baltimoresun.com/features/baltimoremomblog/index.xml'), - (u'Exercists', u'http://weblogs.baltimoresun.com/health/fitness/index.xml'), - (u'Garden Variety', 'http://weblogs.baltimoresun.com/features/gardening/index.xml'), - #(u'In Good Faith', u'http://weblogs.baltimoresun.com/news/faith/index.xml'), - (u'Picture of Health', u'http://weblogs.baltimoresun.com/health/index.xml'), + (u'Baltimore Insider',u'http://www.baltimoresun.com/features/baltimore-insider-blog/rss2.0.xml'), + (u'Homefront', u'http://www.baltimoresun.com/features/parenting/homefront/rss2.0.xml'), + (u'Picture of Health', u'http://www.baltimoresun.com/health/blog/rss2.0.xml'), (u'Unleashed', u'http://weblogs.baltimoresun.com/features/mutts/blog/index.xml'), +## b the site blogs ## + (u'Game Cache', u'http://www.baltimoresun.com/entertainment/bthesite/game-cache/rss2.0.xml'), + (u'TV Lust', u'http://www.baltimoresun.com/entertainment/bthesite/tv-lust/rss2.0.xml'), + +## Sports Blogs ## + (u'Baltimore Sports Blitz', u'http://baltimore.feedsportal.com/c/34255/f/623097/index.rss'), #(u'Faceoff', u'http://weblogs.baltimoresun.com/sports/lacrosse/blog/index.xml'), #(u'MMA Stomping Grounds', u'http://weblogs.baltimoresun.com/sports/mma/blog/index.xml'), - (u'Orioles Insider', u'http://weblogs.baltimoresun.com/sports/orioles/blog/index.xml'), - #(u'Outdoors Girl', u'http://weblogs.baltimoresun.com/sports/outdoors/blog/index.xml'), - (u'Ravens Insider', u'http://weblogs.baltimoresun.com/sports/ravens/blog/index.xml'), + (u'Orioles Insider', u'http://baltimore.feedsportal.com/c/34255/f/623100/index.rss'), + (u'Ravens Insider', u'http://www.baltimoresun.com/sports/ravens/ravens-insider/rss2.0.xml'), #(u'Recruiting Report', u'http://weblogs.baltimoresun.com/sports/college/recruiting/index.xml'), #(u'Ring Posts', u'http://weblogs.baltimoresun.com/sports/wrestling/blog/index.xml'), - (u'The Schmuck Stops Here', u'http://weblogs.baltimoresun.com/sports/schmuck/index.xml'), - (u'Toy Department', u'http://weblogs.baltimoresun.com/sports/thetoydepartment/index.xml'), + (u'The Schmuck Stops Here', u'http://www.baltimoresun.com/sports/schmuck-blog/rss2.0.xml'), #(u'Tracking the Terps', u'http://weblogs.baltimoresun.com/sports/college/maryland_terps/blog/index.xml'), #(u'Varsity Letters', u'http://weblogs.baltimoresun.com/sports/highschool/varsityletters/index.xml'), - (u'Virtual Vensanity', u'http://weblogs.baltimoresun.com/entertainment/bthesite/vensel/index.xml'), - ] def get_article_url(self, article): - print article.get('feedburner_origlink', article.get('guid', article.get('link'))) - return article.get('feedburner_origlink', article.get('guid', article.get('link'))) + ans = None + try: + s = article.summary + ans = urllib.unquote( + re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) + except: + pass + if ans is None: + ans = article.get('feedburner_origlink', article.get('guid', article.get('link'))) + if ans is not None: + return ans.replace('?track=rss', '') + def skip_ad_pages(self, soup): + text = soup.find(text='click here to continue to article') + if text: + a = text.parent + url = a.get('href') + if url: + return self.index_to_soup(url, raw=True) def postprocess_html(self, soup, first_fetch): + # Remove the navigation bar. It was kept until now to be able to follow + # the links to further pages. But now we don't need them anymore. + for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}): + nav.extract() + for t in soup.findAll(['table', 'tr', 'td']): t.name = 'div' @@ -182,5 +201,3 @@ class BaltimoreSun(BasicNewsRecipe): tag.extract() for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})): tag.extract() - - return soup diff --git a/recipes/chronicle_higher_ed.recipe b/recipes/chronicle_higher_ed.recipe index f0188d4d77..15b284cd7a 100644 --- a/recipes/chronicle_higher_ed.recipe +++ b/recipes/chronicle_higher_ed.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.recipes import BasicNewsRecipe from collections import OrderedDict @@ -14,7 +15,8 @@ class Chronicle(BasicNewsRecipe): dict(name='div', attrs={'class':'article'}), ] remove_tags = [dict(name='div',attrs={'class':['related module1','maintitle']}), - dict(name='div', attrs={'id':['section-nav','icon-row']})] + dict(name='div', attrs={'id':['section-nav','icon-row', 'enlarge-popup']}), + dict(name='a', attrs={'class':'show-enlarge enlarge'})] no_javascript = True no_stylesheets = True @@ -31,7 +33,6 @@ class Chronicle(BasicNewsRecipe): return br def parse_index(self): - #Go to the issue soup0 = self.index_to_soup('http://chronicle.com/section/Archives/39/') issue = soup0.find('ul',attrs={'class':'feature-promo-list'}).li @@ -42,9 +43,12 @@ class Chronicle(BasicNewsRecipe): self.timefmt = u' [%s]'%dates #Find cover - cover=soup0.find('div',attrs={'class':'promo'}).findNext('div') - self.cover_url="http://chronicle.com"+cover.find('img')['src'] - + cover=soup0.find('div',attrs={'class':'side-content'}).find(attrs={'src':re.compile("photos/biz/Current")}) + if cover is not None: + if "chronicle.com" in cover['src']: + self.cover_url=cover['src'] + else: + self.cover_url="http://chronicle.com" + cover['src'] #Go to the main body soup = self.index_to_soup(issueurl) div = soup.find ('div', attrs={'id':'article-body'}) @@ -74,8 +78,10 @@ class Chronicle(BasicNewsRecipe): def preprocess_html(self,soup): #process all the images for div in soup.findAll('div', attrs={'class':'tableauPlaceholder'}): + noscripts=div.find('noscript').a div.replaceWith(noscripts) for div0 in soup.findAll('div',text='Powered by Tableau'): div0.extract() return soup + diff --git a/recipes/elpais_impreso.recipe b/recipes/elpais_impreso.recipe index ffa1033477..2dbd79d094 100644 --- a/recipes/elpais_impreso.recipe +++ b/recipes/elpais_impreso.recipe @@ -41,7 +41,7 @@ class ElPais_RSS(BasicNewsRecipe): ,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']}) ] remove_tags = [ - dict(name=['meta','link','base','iframe','embed','object']) + dict(name=['iframe','embed','object']) ,dict(attrs={'class':'disposicion_vertical'}) ] @@ -74,13 +74,14 @@ class ElPais_RSS(BasicNewsRecipe): ,(u'Justicia y Leyes' , u'http://elpais.com/tag/rss/justicia/a/' ) ,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/' ) ,(u'Politica' , u'http://ep00.epimg.net/rss/politica/portada.xml' ) - ,(u'Opinion' , u'http://ep01.epimg.net/rss/politica/opinion.xml' ) + ,(u'Opinion' , u'http://ep01.epimg.net/rss/elpais/opinion.xml' ) ] def get_article_url(self, article): url = BasicNewsRecipe.get_article_url(self, article) if url and (not('/album/' in url) and not('/futbol/partido/' in url)): - return url + urlverified = self.browser.open_novisit(url).geturl() + return urlverified self.log('Skipping non-article', url) return None @@ -107,3 +108,7 @@ class ElPais_RSS(BasicNewsRecipe): for item in soup.findAll('img',alt=False): item['alt'] = 'image' return soup + + def preprocess_raw_html(self, raw, url): + return 'Untitled'+raw[raw.find(''):] + \ No newline at end of file diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe index 16295905bc..4e5b522ae9 100644 --- a/recipes/financial_times_uk.recipe +++ b/recipes/financial_times_uk.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010-2011, Darko Miletic ' +__copyright__ = '2010-2012, Darko Miletic ' ''' www.ft.com/uk-edition ''' @@ -51,10 +51,15 @@ class FinancialTimes(BasicNewsRecipe): return br keep_only_tags = [ - dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']}) - ,dict(name='div', attrs={'class':'standfirst'}) - ,dict(name='div', attrs={'id' :'storyContent'}) - ,dict(name='div', attrs={'class':['ft-story-body','index-detail']}) + dict(name='div' , attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']}) + ,dict(name='div' , attrs={'class':'standfirst'}) + ,dict(name='div' , attrs={'id' :'storyContent'}) + ,dict(name='div' , attrs={'class':['ft-story-body','index-detail']}) + ,dict(name='div' , attrs={'class':['ft-story-body','index-detail']}) + ,dict(name='h2' , attrs={'class':'entry-title'} ) + ,dict(name='span', attrs={'class':lambda x: x and 'posted-on' in x.split()} ) + ,dict(name='span', attrs={'class':'author_byline'} ) + ,dict(name='div' , attrs={'class':'entry-content'} ) ] remove_tags = [ dict(name='div', attrs={'id':'floating-con'}) @@ -83,10 +88,9 @@ class FinancialTimes(BasicNewsRecipe): if self.test and count > 2: return articles rawlink = item['href'] - if rawlink.startswith('http://'): - url = rawlink - else: - url = self.PREFIX + rawlink + url = rawlink + if not rawlink.startswith('http://'): + url = self.PREFIX + rawlink urlverified = self.browser.open_novisit(url).geturl() # resolve redirect. title = self.tag_to_string(item) date = strftime(self.timefmt) @@ -106,20 +110,20 @@ class FinancialTimes(BasicNewsRecipe): wide = soup.find('div',attrs={'class':'wide'}) if not wide: return feeds - strest = wide.findAll('h3', attrs={'class':'section'}) - if not strest: + allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()}) + if not allsections: return feeds - st = wide.findAll('h4',attrs={'class':'section-no-arrow'}) - if st: - st.extend(strest) count = 0 - for item in st: + for item in allsections: count = count + 1 if self.test and count > 2: return feeds - ftitle = self.tag_to_string(item) + fitem = item.h3 + if not fitem: + fitem = item.h4 + ftitle = self.tag_to_string(fitem) self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle)) - feedarts = self.get_artlinks(item.parent.ul) + feedarts = self.get_artlinks(item.ul) feeds.append((ftitle,feedarts)) return feeds @@ -166,7 +170,8 @@ class FinancialTimes(BasicNewsRecipe): except: print "Retrying download..." count += 1 - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name + tfile = PersistentTemporaryFile('_fa.html') + tfile.write(html) + tfile.close() + self.temp_files.append(tfile) + return tfile.name diff --git a/recipes/icons/monitor.png b/recipes/icons/monitor.png new file mode 100644 index 0000000000..89aa1fd399 Binary files /dev/null and b/recipes/icons/monitor.png differ diff --git a/recipes/mac_world.recipe b/recipes/mac_world.recipe index 486aa9cb87..5abbffb6bb 100644 --- a/recipes/mac_world.recipe +++ b/recipes/mac_world.recipe @@ -34,20 +34,21 @@ class macWorld(BasicNewsRecipe): remove_javascript = True no_stylesheets = True + auto_cleanup = True - keep_only_tags = [ - dict(name='div', attrs={'id':'content'}) - ] + #keep_only_tags = [ + #dict(name='div', attrs={'id':'content'}) + #] - remove_tags = [ - {'class':['toolBar','mac_tags','toolBar btmTools','textAds']}, - dict(name='p', attrs={'class':'breadcrumbs'}), - dict(id=['breadcrumb','sidebar','comments','topContentWrapper', - 'rightColumn', 'aboveFootPromo', 'storyCarousel']), - {'class':lambda x: x and ('tools' in x or 'toolBar' - in x)} + #remove_tags = [ + #{'class':['toolBar','mac_tags','toolBar btmTools','textAds']}, + #dict(name='p', attrs={'class':'breadcrumbs'}), + #dict(id=['breadcrumb','sidebar','comments','topContentWrapper', + #'rightColumn', 'aboveFootPromo', 'storyCarousel']), + #{'class':lambda x: x and ('tools' in x or 'toolBar' + #in x)} - ] + #] feeds = [ (u'MacWorld Headlines', u'http://rss.macworld.com/macworld/news'), @@ -82,3 +83,4 @@ class macWorld(BasicNewsRecipe): .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;} img {align:left;} ''' + diff --git a/recipes/maximum_pc.recipe b/recipes/maximum_pc.recipe index 3e4d8a58d9..c6e8099fcf 100644 --- a/recipes/maximum_pc.recipe +++ b/recipes/maximum_pc.recipe @@ -1,4 +1,3 @@ -from calibre.ptempfile import PersistentTemporaryFile from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1276930924(BasicNewsRecipe): @@ -14,30 +13,30 @@ class AdvancedUserRecipe1276930924(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True language = 'en' - temp_files = [] - articles_are_obfuscated = True - feeds = [(u'News', u'http://www.maximumpc.com/articles/4/feed'), + auto_cleanup = True + feeds = [#(u'News', u'http://www.maximumpc.com/articles/all/feed'), + (u'News', u'http://www.maximumpc.com/articles/4/feed'), (u'Reviews', u'http://www.maximumpc.com/articles/40/feed'), (u'Editors Blog', u'http://www.maximumpc.com/articles/6/feed'), (u'How-to', u'http://www.maximumpc.com/articles/32/feed'), (u'Features', u'http://www.maximumpc.com/articles/31/feed'), (u'From the Magazine', u'http://www.maximumpc.com/articles/72/feed') ] - keep_only_tags = [ - dict(name='div', attrs={'class':['print-title','article_body']}), - ] - remove_tags = [ - dict(name='div', attrs={'class':'comments-tags-actions'}), - ] - remove_tags_before = dict(name='div', attrs={'class':'print-title'}) - remove_tags_after = dict(name='div', attrs={'class':'meta-content'}) + #keep_only_tags = [ + #dict(name='div', attrs={'class':['print-title','article_body']}), + #] + #remove_tags = [ + #dict(name='div', attrs={'class':'comments-tags-actions'}), + #] + #remove_tags_before = dict(name='div', attrs={'class':'print-title'}) + #remove_tags_after = dict(name='div', attrs={'class':'meta-content'}) - def get_obfuscated_article(self, url): - br = self.get_browser() - br.open(url) - response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0) - html = response.read() - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write(html) - self.temp_files[-1].close() - return self.temp_files[-1].name + #def get_obfuscated_article(self, url): + #br = self.get_browser() + #br.open(url) + #response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0) + #html = response.read() + #self.temp_files.append(PersistentTemporaryFile('_fa.html')) + #self.temp_files[-1].write(html) + #self.temp_files[-1].close() + #return self.temp_files[-1].name diff --git a/recipes/monitor.recipe b/recipes/monitor.recipe index 8762e68c16..ca594a3b4e 100644 --- a/recipes/monitor.recipe +++ b/recipes/monitor.recipe @@ -1,99 +1,66 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2012, Darko Miletic ' ''' -monitorcg.com +www.monitor.co.me ''' import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class MonitorCG(BasicNewsRecipe): - title = 'Monitor online' + title = 'MONITOR online' __author__ = 'Darko Miletic' - description = 'News from Montenegro' - publisher = 'MONITOR d.o.o. Podgorica' + description = 'Nezavisni nedjeljnik Monitor' + publisher = '"Monitor" D.O.O. Podgorica' category = 'news, politics, Montenegro' oldest_article = 15 max_articles_per_feed = 150 no_stylesheets = True encoding = 'utf-8' + auto_cleanup = False use_embedded_content = False - language = 'sr' - - lang ='sr-Latn-Me' - INDEX = 'http://www.monitorcg.com' - - extra_css = ' @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} ' + language = 'sr' + remove_empty_feeds = True + extra_css = """ + @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} + @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} + h2{font-family: Cambria,"Times New Roman",Times,serif1,serif} + body{font-family: Arial,sans1,sans-serif} + img{display: block} + """ conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'pretty_print': True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + keep_only_tags = [dict(attrs={'class':['contentheading','article-meta','article-content']})] + remove_attributes = ['width','height','font','border','align'] - keep_only_tags = [dict(name='div', attrs={'id':'ja-current-content'})] - - remove_tags = [ dict(name=['object','link','embed']) - , dict(attrs={'class':['buttonheading','article-section']})] - - remove_attributes = ['style','width','height','font','border','align'] - - def adeify_images2(cls, soup): - for item in soup.findAll('img'): - for attrib in ['height','width','border','align','style']: - if item.has_key(attrib): - del item[attrib] - oldParent = item.parent - if oldParent.name == 'a': - oldParent.name == 'p' - myIndex = oldParent.contents.index(item) - brtag = Tag(soup,'br') - oldParent.insert(myIndex+1,brtag) - return soup - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.html.insert(0,mlang) - return self.adeify_images2(soup) - - def parse_index(self): - totalfeeds = [] - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('div',attrs={'class':'ja-catslwi'}) - if cover_item: - dt = cover_item['onclick'].partition("location.href=")[2] - curl = self.INDEX + dt.strip("'") - lfeeds = [(u'Svi clanci', curl)] - for feedobj in lfeeds: - feedtitle, feedurl = feedobj - self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) - articles = [] - soup = self.index_to_soup(feedurl) - contitem = soup.find('div',attrs={'class':'article-content'}) - if contitem: - img = contitem.find('img') - if img: - self.cover_url = self.INDEX + img['src'] - for item in contitem.findAll('a'): - url = self.INDEX + item['href'] - title = self.tag_to_string(item) - articles.append({ - 'title' :title - ,'date' :'' - ,'url' :url - ,'description':'' - }) - totalfeeds.append((feedtitle, articles)) - return totalfeeds - + feeds = [ + (u'Danas, Sjutra' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=5&Itemid=27&format=feed&type=rss') + ,(u'Duhankesa' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=13&Itemid=37&format=feed&type=rss') + ,(u'Znaci prepoznavanja', u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=6&Itemid=358&format=feed&type=rss') + ,(u'Paralele' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=8&Itemid=359&format=feed&type=rss') + ,(u'Razbijeno ogledalo' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=18&Itemid=354&format=feed&type=rss') + ,(u'Tržište' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=26&Itemid=371&format=feed&type=rss') + ,(u'Feljton' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=29&Itemid=471&format=feed&type=rss') + ,(u'Monitor' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=1&Itemid=1852&format=feed&type=rss') + ,(u'Altervizija' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=31&Itemid=2623&format=feed&type=rss') + ,(u'Fenomeni' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=35&Itemid=3549&format=feed&type=rss') + ,(u'Fokus' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=19&Itemid=252&format=feed&type=rss') + ,(u'Monitoring' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=19&Itemid=252&format=feed&type=rss') + ,(u'Profil' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=21&Itemid=256&format=feed&type=rss') + ,(u'Intervju' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=27&Itemid=404&format=feed&type=rss') + ,(u'Društvo' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=14&Itemid=2&format=feed&type=rss') + ,(u'Region' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=12&Itemid=53&format=feed&type=rss') + ,(u'Svijet' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=11&Itemid=360&format=feed&type=rss') + ,(u'Kultura' , u'http://www.monitor.co.me/index.php?option=com_content&view=section&layout=blog&id=9&Itemid=361&format=feed&type=rss') + ] diff --git a/recipes/pubblico_giornale.recipe b/recipes/pubblico_giornale.recipe new file mode 100644 index 0000000000..b11b5fb8aa --- /dev/null +++ b/recipes/pubblico_giornale.recipe @@ -0,0 +1,21 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'iusvar' +__description__ = 'Pubblico giornale' + +''' +http://pubblicogiornale.it/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Pubblicogiornale(BasicNewsRecipe): + description = 'Italian newspaper directed by Luca Telese' + cover_url = 'http://pubblicogiornale.it/wp-content/uploads/logo_n.png?84cd58' + title = u'Pubblico giornale' + publisher = 'PUBBLICO EDIZIONI Srl' + category = 'News' + language = 'it' + __author__ = 'iusvar' + + feeds = [(u'Pubblico giornale', u'http://pubblicogiornale.it/feed/')] diff --git a/resources/content_server/button-donate.png b/resources/content_server/button-donate.png index 25ccf3f514..1c15b3d13c 100644 Binary files a/resources/content_server/button-donate.png and b/resources/content_server/button-donate.png differ diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 18b2ab31e9..be3e7e0e03 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -230,7 +230,7 @@ class ANDROID(USBMS): 'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE', 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', - 'NOVO7'] + 'NOVO7', 'MB526'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', diff --git a/src/calibre/devices/kindle/driver.py b/src/calibre/devices/kindle/driver.py index a657b777f7..7821631e85 100644 --- a/src/calibre/devices/kindle/driver.py +++ b/src/calibre/devices/kindle/driver.py @@ -288,7 +288,7 @@ class KINDLE2(KINDLE): name = 'Kindle 2/3/4/Touch Device Interface' description = _('Communicate with the Kindle 2/3/4/Touch eBook reader.') - FORMATS = ['azw3'] + KINDLE.FORMATS + ['pdf', 'azw4', 'pobi'] + FORMATS = ['azw', 'mobi', 'azw3', 'prc', 'azw1', 'tpz', 'azw4', 'pobi', 'pdf', 'txt'] DELETE_EXTS = KINDLE.DELETE_EXTS + ['.mbp1', '.mbs', '.sdr', '.han'] # On the Touch, there's also .asc files, but not using the same basename (for X-Ray & End Actions), azw3f & azw3r files, but all of them are in the .sdr sidecar folder @@ -450,7 +450,7 @@ class KINDLE_DX(KINDLE2): name = 'Kindle DX Device Interface' description = _('Communicate with the Kindle DX eBook reader.') - FORMATS = KINDLE2.FORMATS[1:] + FORMATS = ['azw', 'mobi', 'prc', 'azw1', 'tpz', 'azw4', 'pobi', 'pdf', 'txt'] PRODUCT_ID = [0x0003] BCD = [0x0100] @@ -462,7 +462,7 @@ class KINDLE_FIRE(KINDLE2): name = 'Kindle Fire Device Interface' description = _('Communicate with the Kindle Fire') gui_name = 'Fire' - FORMATS = list(KINDLE2.FORMATS) + FORMATS = ['azw3', 'azw', 'mobi', 'prc', 'azw1', 'tpz', 'azw4', 'pobi', 'pdf', 'txt'] PRODUCT_ID = [0x0006] BCD = [0x216, 0x100] diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index d86262c78b..760113c366 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -169,6 +169,7 @@ class MTP_DEVICE(MTPDeviceBase): try: self.dev = self.create_device(connected_device) except Exception as e: + self.blacklisted_devices.add(connected_device) raise OpenFailed('Failed to open %s: Error: %s'%( connected_device, as_unicode(e))) @@ -195,6 +196,19 @@ class MTP_DEVICE(MTPDeviceBase): self.current_serial_num = snum self.currently_connected_dev = connected_device + @synchronous + def device_debug_info(self): + ans = self.get_gui_name() + ans += '\nSerial number: %s'%self.current_serial_num + ans += '\nManufacturer: %s'%self.dev.manufacturer_name + ans += '\nModel: %s'%self.dev.model_name + ans += '\nids: %s'%(self.dev.ids,) + ans += '\nDevice version: %s'%self.dev.device_version + ans += '\nStorage:\n' + storage = sorted(self.dev.storage_info, key=operator.itemgetter('id')) + ans += pprint.pformat(storage) + return ans + @property def filesystem_cache(self): if self._filesystem_cache is None: diff --git a/src/calibre/devices/mtp/windows/device_enumeration.cpp b/src/calibre/devices/mtp/windows/device_enumeration.cpp index 2c9b48d506..9fddd6bb4d 100644 --- a/src/calibre/devices/mtp/windows/device_enumeration.cpp +++ b/src/calibre/devices/mtp/windows/device_enumeration.cpp @@ -84,8 +84,8 @@ PyObject* get_storage_info(IPortableDevice *device) { // {{{ PWSTR object_ids[10]; GUID guid; ULONGLONG capacity, free_space, capacity_objects, free_objects; - ULONG access; - LPWSTR storage_desc = NULL; + ULONG access, storage_type = WPD_STORAGE_TYPE_UNDEFINED; + LPWSTR storage_desc = NULL, st = NULL; storage = PyList_New(0); if (storage == NULL) { PyErr_NoMemory(); goto end; } @@ -116,6 +116,7 @@ PyObject* get_storage_info(IPortableDevice *device) { // {{{ hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_OBJECTS); hr = storage_properties->Add(WPD_STORAGE_ACCESS_CAPABILITY); hr = storage_properties->Add(WPD_STORAGE_FILE_SYSTEM_TYPE); + hr = storage_properties->Add(WPD_STORAGE_TYPE); hr = storage_properties->Add(WPD_OBJECT_NAME); Py_END_ALLOW_THREADS; if (FAILED(hr)) {hresult_set_exc("Failed to create collection of properties for storage query", hr); goto end; } @@ -145,6 +146,7 @@ PyObject* get_storage_info(IPortableDevice *device) { // {{{ values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY_IN_OBJECTS, &capacity_objects); values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_BYTES, &free_space); values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_OBJECTS, &free_objects); + values->GetUnsignedIntegerValue(WPD_STORAGE_TYPE, &storage_type); desc = Py_False; if (SUCCEEDED(values->GetUnsignedIntegerValue(WPD_STORAGE_ACCESS_CAPABILITY, &access)) && access == WPD_STORAGE_ACCESS_CAPABILITY_READWRITE) desc = Py_True; soid = PyUnicode_FromWideChar(object_ids[i], wcslen(object_ids[i])); @@ -167,6 +169,25 @@ PyObject* get_storage_info(IPortableDevice *device) { // {{{ if (desc != NULL) { PyDict_SetItemString(so, "filesystem", desc); Py_DECREF(desc);} CoTaskMemFree(storage_desc); storage_desc = NULL; } + switch(storage_type) { + case WPD_STORAGE_TYPE_REMOVABLE_RAM: + st = L"removable_ram"; + break; + case WPD_STORAGE_TYPE_REMOVABLE_ROM: + st = L"removable_rom"; + break; + case WPD_STORAGE_TYPE_FIXED_RAM: + st = L"fixed_ram"; + break; + case WPD_STORAGE_TYPE_FIXED_ROM: + st = L"fixed_rom"; + break; + default: + st = L"unknown_unknown"; + } + desc = PyUnicode_FromWideChar(st, wcslen(st)); + if (desc != NULL) {PyDict_SetItemString(so, "type", desc); Py_DECREF(desc);} + desc = NULL; PyList_Append(storage, so); Py_DECREF(so); } diff --git a/src/calibre/devices/mtp/windows/driver.py b/src/calibre/devices/mtp/windows/driver.py index 22079c287b..202c8dfd6e 100644 --- a/src/calibre/devices/mtp/windows/driver.py +++ b/src/calibre/devices/mtp/windows/driver.py @@ -13,7 +13,7 @@ from future_builtins import zip from itertools import chain from calibre import as_unicode, prints -from calibre.constants import plugins, __appname__, numeric_version +from calibre.constants import plugins, __appname__, numeric_version, isxp from calibre.ptempfile import SpooledTemporaryFile from calibre.devices.errors import OpenFailed, DeviceError, BlacklistedDevice from calibre.devices.mtp.base import MTPDeviceBase, debug @@ -52,10 +52,15 @@ class MTP_DEVICE(MTPDeviceBase): self.start_thread = None self._filesystem_cache = None self.eject_dev_on_next_scan = False + self.current_device_data = {} def startup(self): self.start_thread = threading.current_thread() - self.wpd, self.wpd_error = plugins['wpd'] + if isxp: + self.wpd = None + self.wpd_error = _('MTP devices are not supported on Windows XP') + else: + self.wpd, self.wpd_error = plugins['wpd'] if self.wpd is not None: try: self.wpd.init(__appname__, *(numeric_version[:3])) @@ -196,6 +201,12 @@ class MTP_DEVICE(MTPDeviceBase): if not devdata.get('has_storage', False): return False has_rw_storage = False for s in devdata.get('storage', []): + if s.get('filesystem', None) == 'DCF': + # DCF filesystem indicates a camera or an iPhone + # See https://bugs.launchpad.net/calibre/+bug/1054562 + continue + if s.get('type', 'unknown_unknown').split('_')[-1] == 'rom': + continue # Read only storage if s.get('rw', False): has_rw_storage = True break @@ -280,6 +291,8 @@ class MTP_DEVICE(MTPDeviceBase): raise BlacklistedDevice( 'The %s device has been blacklisted by the user'%(connected_device,)) + storage.sort(key=lambda x:x.get('id', 'zzzzz')) + self._main_id = storage[0]['id'] if len(storage) > 1: self._carda_id = storage[1]['id'] @@ -291,6 +304,11 @@ class MTP_DEVICE(MTPDeviceBase): _('Unknown MTP device')) self.currently_connected_pnp_id = connected_device self.current_serial_num = snum + self.current_device_data = devdata.copy() + + def device_debug_info(self): + import pprint + return pprint.pformat(self.current_device_data) @same_thread def get_basic_device_information(self): diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py index 8b76255532..4cbe9b4994 100644 --- a/src/calibre/devices/prst1/driver.py +++ b/src/calibre/devices/prst1/driver.py @@ -687,7 +687,7 @@ class PRST1(USBMS): 'WHERE _id = ?') t = (collectionId,) cursor.execute(query, t) - debug_print('Deleted Collection: ' + collection) + debug_print('Deleted Collection: ' + repr(collection)) connection.commit() cursor.close() diff --git a/src/calibre/devices/smart_device_app/driver.py b/src/calibre/devices/smart_device_app/driver.py index 1931d68b82..b60f2ec7b1 100644 --- a/src/calibre/devices/smart_device_app/driver.py +++ b/src/calibre/devices/smart_device_app/driver.py @@ -902,10 +902,16 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin): return False def get_gui_name(self): - if self.client_device_kind: + if getattr(self, 'client_device_kind', None): return self.gui_name_template%(self.gui_name, self.client_device_kind) return self.gui_name + def config_widget(self): + from calibre.gui2.device_drivers.configwidget import ConfigWidget + cw = ConfigWidget(self.settings(), self.FORMATS, self.SUPPORTS_SUB_DIRS, + self.MUST_READ_METADATA, self.SUPPORTS_USE_AUTHOR_SORT, + self.EXTRA_CUSTOMIZATION_MESSAGE, self) + return cw @synchronous('sync_lock') def get_device_information(self, end_session=True): diff --git a/src/calibre/ebooks/conversion/plugins/pdf_output.py b/src/calibre/ebooks/conversion/plugins/pdf_output.py index 35504b31fb..b3eed763ac 100644 --- a/src/calibre/ebooks/conversion/plugins/pdf_output.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py @@ -14,6 +14,8 @@ import os from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation from calibre.ptempfile import TemporaryDirectory +from calibre.constants import iswindows +from calibre import walk UNITS = [ 'millimeter', @@ -148,6 +150,16 @@ class PDFOutput(OutputFormatPlugin): oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) + if iswindows: + # On windows Qt generates an image based PDF if the html uses + # embedded fonts. See https://launchpad.net/bugs/1053906 + for f in walk(oeb_dir): + if f.rpartition('.')[-1].lower() in {'ttf', 'otf'}: + self.log.warn('Found embedded font %s, removing it, as ' + 'embedded fonts on windows are not supported by ' + 'the PDF Output plugin'%os.path.basename(f)) + os.remove(f) + opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 966e5caa30..3e5d95f1ce 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -792,19 +792,16 @@ class OPF(object): # {{{ remove = list(self.authors_path(self.metadata)) for elem in remove: elem.getparent().remove(elem) - elems = [] - for author in val: - attrib = {'{%s}role'%self.NAMESPACES['opf']: 'aut'} - elem = self.create_metadata_element('creator', attrib=attrib) + # Ensure new author element is at the top of the list + # for broken implementations that always use the first + # element with no attention to the role + for author in reversed(val): + elem = self.metadata.makeelement('{%s}creator'% + self.NAMESPACES['dc'], nsmap=self.NAMESPACES) + elem.tail = '\n' + self.metadata.insert(0, elem) + elem.set('{%s}role'%self.NAMESPACES['opf'], 'aut') self.set_text(elem, author.strip()) - # Ensure new author element is at the top of the list - # for broken implementations that always use the first - # element with no attention to the role - elems.append(elem) - for elem in reversed(elems): - parent = elem.getparent() - parent.remove(elem) - parent.insert(0, elem) return property(fget=fget, fset=fset) @@ -1020,9 +1017,8 @@ class OPF(object): # {{{ def fset(self, val): matches = self.bkp_path(self.metadata) if not matches: - attrib = {'{%s}role'%self.NAMESPACES['opf']: 'bkp'} - matches = [self.create_metadata_element('contributor', - attrib=attrib)] + matches = [self.create_metadata_element('contributor')] + matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp') self.set_text(matches[0], unicode(val)) return property(fget=fget, fset=fset) @@ -1155,7 +1151,7 @@ class OPF(object): # {{{ def smart_update(self, mi, replace_metadata=False): for attr in ('title', 'authors', 'author_sort', 'title_sort', 'publisher', 'series', 'series_index', 'rating', - 'isbn', 'tags', 'category', 'comments', + 'isbn', 'tags', 'category', 'comments', 'book_producer', 'pubdate', 'user_categories', 'author_link_map'): val = getattr(mi, attr, None) if val is not None and val != [] and val != (None, None): diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py index ae8fdf364c..2c3562e87b 100644 --- a/src/calibre/ebooks/mobi/writer8/skeleton.py +++ b/src/calibre/ebooks/mobi/writer8/skeleton.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import re from collections import namedtuple from functools import partial +from xml.sax.saxutils import escape from lxml import etree @@ -289,6 +290,7 @@ class Chunker(object): self.chunk_selector = ('S', aid) def chunk_up_text(self, text): + text = escape(text) text = text.encode('utf-8') ans = [] diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index d2f5704c6d..b56c40d402 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -853,7 +853,7 @@ class DeviceMixin(object): # {{{ self.connect_to_folder_named(tweaks['auto_connect_to_folder']) def allow_connect(self, name, icon): - return question_dialog(self, _('Mange the %s?')%name, + return question_dialog(self, _('Manage the %s?')%name, _('Detected the %s. Do you want calibre to manage it?')% name, show_copy_button=False, override_icon=QIcon(icon)) diff --git a/src/calibre/gui2/device_drivers/mtp_config.py b/src/calibre/gui2/device_drivers/mtp_config.py index 9fd59ab124..dbb31a3e3d 100644 --- a/src/calibre/gui2/device_drivers/mtp_config.py +++ b/src/calibre/gui2/device_drivers/mtp_config.py @@ -12,7 +12,8 @@ import weakref from PyQt4.Qt import (QWidget, QListWidgetItem, Qt, QToolButton, QLabel, QTabWidget, QGridLayout, QListWidget, QIcon, QLineEdit, QVBoxLayout, QPushButton, QGroupBox, QScrollArea, QHBoxLayout, QComboBox, - pyqtSignal, QSizePolicy, QDialog, QDialogButtonBox) + pyqtSignal, QSizePolicy, QDialog, QDialogButtonBox, QPlainTextEdit, + QApplication) from calibre.ebooks import BOOK_EXTENSIONS from calibre.gui2 import error_dialog @@ -372,15 +373,19 @@ class MTPConfig(QTabWidget): _('&Ignore the %s in calibre')%device.current_friendly_name, self.base) b.clicked.connect(self.ignore_device) + self.show_debug_button = bd = QPushButton(QIcon(I('debug.png')), + _('Show device information')) + bd.clicked.connect(self.show_debug_info) l.addWidget(b, 0, 0, 1, 2) l.addWidget(la, 1, 0, 1, 1) - l.addWidget(self.formats, 2, 0, 3, 1) + l.addWidget(self.formats, 2, 0, 4, 1) l.addWidget(self.send_to, 2, 1, 1, 1) l.addWidget(self.template, 3, 1, 1, 1) - l.setRowStretch(4, 10) - l.addWidget(r, 5, 0, 1, 2) - l.setRowStretch(5, 100) + l.addWidget(self.show_debug_button, 4, 1, 1, 1) + l.setRowStretch(5, 10) + l.addWidget(r, 6, 0, 1, 2) + l.setRowStretch(6, 100) self.igntab = IgnoredDevices(self.device.prefs['history'], self.device.prefs['blacklist']) @@ -388,6 +393,26 @@ class MTPConfig(QTabWidget): self.setCurrentIndex(1 if msg else 0) + def show_debug_info(self): + info = self.device.device_debug_info() + d = QDialog(self) + d.l = l = QVBoxLayout() + d.setLayout(l) + d.v = v = QPlainTextEdit() + d.setWindowTitle(self.device.get_gui_name()) + v.setPlainText(info) + v.setMinimumWidth(400) + v.setMinimumHeight(350) + l.addWidget(v) + bb = d.bb = QDialogButtonBox(QDialogButtonBox.Close) + bb.accepted.connect(d.accept) + bb.rejected.connect(d.reject) + l.addWidget(bb) + bb.addButton(_('Copy to clipboard'), bb.ActionRole) + bb.clicked.connect(lambda : + QApplication.clipboard().setText(v.toPlainText())) + d.exec_() + def ignore_device(self): self.igntab.ignore_device(self.device.current_serial_num) self.base.b.setEnabled(False) diff --git a/src/calibre/gui2/store/stores/ebooks_com_plugin.py b/src/calibre/gui2/store/stores/ebooks_com_plugin.py index 7bf6704d9f..826b59d41d 100644 --- a/src/calibre/gui2/store/stores/ebooks_com_plugin.py +++ b/src/calibre/gui2/store/stores/ebooks_com_plugin.py @@ -97,7 +97,7 @@ class EbookscomStore(BasicStoreConfig, StorePlugin): with closing(br.open(url + id, timeout=timeout)) as nf: pdoc = html.fromstring(nf.read()) - price_l = pdoc.xpath('//span[@class="price"]/text()') + price_l = pdoc.xpath('//div[@class="book-info"]/div[@class="price"]/text()') if price_l: price = price_l[0] search_result.price = price.strip() diff --git a/src/calibre/translations/calibre.pot b/src/calibre/translations/calibre.pot index fa0f160cb0..ccd6b65bc4 100644 --- a/src/calibre/translations/calibre.pot +++ b/src/calibre/translations/calibre.pot @@ -5,8 +5,8 @@ msgid "" msgstr "" "Project-Id-Version: calibre 0.8.70\n" -"POT-Creation-Date: 2012-09-21 09:52+IST\n" -"PO-Revision-Date: 2012-09-21 09:52+IST\n" +"POT-Creation-Date: 2012-09-21 13:10+IST\n" +"PO-Revision-Date: 2012-09-21 13:10+IST\n" "Last-Translator: Automatically generated\n" "Language-Team: LANGUAGE\n" "MIME-Version: 1.0\n" @@ -3485,7 +3485,7 @@ msgid "" "Fetch a cover image/social metadata for the book identified by ISBN from LibraryThing.com\n" msgstr "" -#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1488 +#: /home/kovid/work/calibre/src/calibre/ebooks/metadata/opf2.py:1491 #: /home/kovid/work/calibre/src/calibre/ebooks/oeb/base.py:1279 #: /home/kovid/work/calibre/src/calibre/gui2/metadata/single.py:958 #: /home/kovid/work/calibre/src/calibre/gui2/store/search/models.py:41 @@ -7827,7 +7827,7 @@ msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/device.py:856 #, python-format -msgid "Mange the %s?" +msgid "Manage the %s?" msgstr "" #: /home/kovid/work/calibre/src/calibre/gui2/device.py:857 @@ -17967,7 +17967,7 @@ msgid "select(val, key) -- interpret the value as a comma-separated list of item msgstr "" #: /home/kovid/work/calibre/src/calibre/utils/formatter_functions.py:600 -msgid "approximate_formats() -- return a comma-separated list of formats that at one point were associated with the book. There is no guarantee that this list is correct, although it probably is. This function can be called in template program mode using the template \"{:'approximate_formats()'}. Note that format names are always uppercase, as in EPUB." +msgid "approximate_formats() -- return a comma-separated list of formats that at one point were associated with the book. There is no guarantee that this list is correct, although it probably is. This function can be called in template program mode using the template \"{:'approximate_formats()'}\". Note that format names are always uppercase, as in EPUB." msgstr "" #: /home/kovid/work/calibre/src/calibre/utils/formatter_functions.py:620 diff --git a/src/calibre/utils/fonts/embedflag.py b/src/calibre/utils/fonts/embedflag.py new file mode 100644 index 0000000000..0c4e94bae6 --- /dev/null +++ b/src/calibre/utils/fonts/embedflag.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import sys, struct + +class UnsupportedFont(ValueError): + pass + +def remove_embed_restriction(raw): + sfnt_version = raw[:4] + if sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO'}: + raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sfnt_version) + + num_tables = struct.unpack_from(b'>H', raw, 4)[0] + + # Find OS/2 table + offset = 4 + 4*2 # Start of the Table record entries + os2_table_offset = None + for i in xrange(num_tables): + table_tag = raw[offset:offset+4] + offset += 16 # Size of a table record + if table_tag == b'OS/2': + os2_table_offset = struct.unpack_from(b'>I', raw, offset+8)[0] + break + if os2_table_offset is None: + raise UnsupportedFont('Not a supported font, has no OS/2 table') + + version, = struct.unpack_from(b'>H', raw, os2_table_offset) + + fs_type_offset = os2_table_offset + struct.calcsize(b'>HhHH') + fs_type = struct.unpack_from(b'>H', raw, fs_type_offset)[0] + if fs_type == 0: + return raw + + return raw[:fs_type_offset] + struct.pack(b'>H', 0) + raw[fs_type_offset+2:] + +if __name__ == '__main__': + remove_embed_restriction(open(sys.argv[-1], 'rb').read()) + diff --git a/src/calibre/utils/formatter_functions.py b/src/calibre/utils/formatter_functions.py index 393ef876c9..67c75bdc79 100644 --- a/src/calibre/utils/formatter_functions.py +++ b/src/calibre/utils/formatter_functions.py @@ -602,7 +602,7 @@ class BuiltinApproximateFormats(BuiltinFormatterFunction): 'book. There is no guarantee that this list is correct, ' 'although it probably is. ' 'This function can be called in template program mode using ' - 'the template "{:\'approximate_formats()\'}. ' + 'the template "{:\'approximate_formats()\'}". ' 'Note that format names are always uppercase, as in EPUB.' ) diff --git a/src/calibre/utils/windows/winutil.c b/src/calibre/utils/windows/winutil.c index 6b23f47c6d..53ebfcca89 100644 --- a/src/calibre/utils/windows/winutil.c +++ b/src/calibre/utils/windows/winutil.c @@ -295,10 +295,10 @@ get_all_removable_disks(struct tagDrives *g_drives) for(nLoopIndex = 0; nLoopIndex < MAX_DRIVES; nLoopIndex++) { - // if a drive is present (we ignore the A and B drives as they are - // always present (even if no actual floppy is present) and we dont - // care about floppies) - if(nLoopIndex > 1 && dwDriveMask & 1) + // if a drive is present (we cannot ignore the A and B drives as there + // are people out there that think mapping devices to use those letters + // is a good idea, sigh) + if(dwDriveMask & 1) { caDrive[0] = 'A' + nLoopIndex;