Merge from trunk

This commit is contained in:
Charles Haley 2013-01-02 11:22:27 +01:00
commit 05887d353f
36 changed files with 1858 additions and 1079 deletions

10
README
View File

@ -1,7 +1,7 @@
calibre is an e-book library manager. It can view, convert and catalog e-books \
in most of the major e-book formats. It can also talk to e-book reader \
devices. It can go out to the internet and fetch metadata for your books. \
It can download newspapers and convert them into e-books for convenient \
calibre is an e-book library manager. It can view, convert and catalog e-books
in most of the major e-book formats. It can also talk to e-book reader
devices. It can go out to the internet and fetch metadata for your books.
It can download newspapers and convert them into e-books for convenient
reading. It is cross platform, running on Linux, Windows and OS X.
For screenshots: https://calibre-ebook.com/demo
@ -15,5 +15,5 @@ bzr branch lp:calibre
To update your copy of the source code:
bzr merge
Tarballs of the source code for each release are now available \
Tarballs of the source code for each release are now available
at http://code.google.com/p/calibre-ebook

View File

@ -162,7 +162,8 @@ Follow these steps to find the problem:
* If you are connecting an Apple iDevice (iPad, iPod Touch, iPhone), use the 'Connect to iTunes' method in the 'Getting started' instructions in `Calibre + Apple iDevices: Start here <http://www.mobileread.com/forums/showthread.php?t=118559>`_.
* Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website <http://calibre-ebook.com/download>`_.
* Ensure your operating system is seeing the device. That is, the device should show up in Windows Explorer (in Windows) or Finder (in OS X).
* In |app|, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled.
* In |app|, go to Preferences->Ignored Devices and check that your device
is not being ignored
* If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker <http://bugs.calibre-ebook.com>`_.
My device is non-standard or unusual. What can I do to connect to it?
@ -668,6 +669,9 @@ There are three possible things I know of, that can cause this:
the blacklist of programs inside RoboForm to fix this. Or uninstall
RoboForm.
* The Logitech SetPoint Settings application causes random crashes in
|app| when it is open. Close it before starting |app|.
|app| is not starting on OS X?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -10,14 +10,12 @@ class Alternet(BasicNewsRecipe):
category = 'News, Magazine'
description = 'News magazine and online community'
feeds = [
(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
(u'Front Page', u'http://feeds.feedblitz.com/alternet')
]
remove_attributes = ['width', 'align','cellspacing']
remove_javascript = True
use_embedded_content = False
use_embedded_content = True
no_stylesheets = True
language = 'en'
encoding = 'UTF-8'

View File

@ -11,21 +11,21 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
by Chen Wei weichen302@gmx.com, 2012-02-05'''
__license__ = 'GPL v3'
__author__ = 'kwetal'
__author__ = 'Rick Shang, kwetal'
language = 'en'
version = 1.01
title = u'Foreign Affairs (Subcription or (free) Registration)'
title = u'Foreign Affairs (Subcription)'
publisher = u'Council on Foreign Relations'
category = u'USA, Foreign Affairs'
description = u'The leading forum for serious discussion of American foreign policy and international affairs.'
no_stylesheets = True
remove_javascript = True
needs_subscription = True
INDEX = 'http://www.foreignaffairs.com'
FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
INCLUDE_PREMIUM = False
remove_tags = []
@ -68,43 +68,57 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
def parse_index(self):
answer = []
soup = self.index_to_soup(self.FRONTPAGE)
sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
#get dates
date = re.split('\s\|\s',self.tag_to_string(soup.head.title.string))[0]
self.timefmt = u' [%s]'%date
sec_start = soup.findAll('div', attrs= {'class':'panel-pane'})
for sec in sec_start:
content = sec.nextSibling
if content:
section = self.tag_to_string(content.find('h2'))
articles = []
tags = []
for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
tags.append(div)
for li in content.findAll('li'):
tags.append(li)
for div in tags:
title = url = description = author = None
if self.INCLUDE_PREMIUM:
found_premium = False
else:
found_premium = div.findAll('span', attrs={'class':
'premium-icon'})
if not found_premium:
tag = div.find('div', attrs={'class': 'views-field-title'})
if tag:
a = tag.find('a')
if a:
title = self.tag_to_string(a)
url = self.INDEX + a['href']
author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
description = self.tag_to_string(tag_summary)
articles.append({'title':title, 'date':None, 'url':url,
'description':description, 'author':author})
if articles:
articles = []
section = self.tag_to_string(sec.find('h2'))
if 'Books' in section:
reviewsection=sec.find('div', attrs = {'class': 'item-list'})
for subsection in reviewsection.findAll('div'):
subsectiontitle=self.tag_to_string(subsection.span.a)
subsectionurl=self.INDEX + subsection.span.a['href']
soup1 = self.index_to_soup(subsectionurl)
for div in soup1.findAll('div', attrs = {'class': 'views-field-title'}):
if div.find('a') is not None:
originalauthor=self.tag_to_string(div.findNext('div', attrs = {'class':'views-field-field-article-book-nid'}).div.a)
title=subsectiontitle+': '+self.tag_to_string(div.span.a)+' by '+originalauthor
url=self.INDEX+div.span.a['href']
atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
if atr is not None:
author=self.tag_to_string(atr.span.a)
else:
author=''
desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'})
if desc is not None:
description=self.tag_to_string(desc.div.p)
else:
description=''
articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author})
subsectiontitle=''
else:
for div in sec.findAll('div', attrs = {'class': 'views-field-title'}):
if div.find('a') is not None:
title=self.tag_to_string(div.span.a)
url=self.INDEX+div.span.a['href']
atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
if atr is not None:
author=self.tag_to_string(atr.span.a)
else:
author=''
desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'})
if desc is not None:
description=self.tag_to_string(desc.div.p)
else:
description=''
articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author})
if articles:
answer.append((section, articles))
return answer
@ -115,15 +129,17 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
return soup
needs_subscription = True
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://www.foreignaffairs.com/user?destination=home')
br.open('https://www.foreignaffairs.com/user?destination=user%3Fop%3Dlo')
br.select_form(nr = 1)
br['name'] = self.username
br['pass'] = self.password
br.submit()
return br
def cleanup(self):
self.browser.open('http://www.foreignaffairs.com/logout?destination=user%3Fop=lo')

View File

@ -8,7 +8,7 @@ If you have institutional subscription based on access IP you do not need to ent
anything in username/password fields
'''
import time
import time, re
import urllib
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -29,7 +29,6 @@ class Harpers_full(BasicNewsRecipe):
needs_subscription = 'optional'
masthead_url = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif'
publication_type = 'magazine'
INDEX = strftime('http://harpers.org/archive/%Y/%m')
LOGIN = 'http://harpers.org/wp-content/themes/harpers/ajax_login.php'
extra_css = """
body{font-family: adobe-caslon-pro,serif}
@ -65,17 +64,28 @@ class Harpers_full(BasicNewsRecipe):
return br
def parse_index(self):
#find current issue
soup = self.index_to_soup('http://harpers.org/')
currentIssue=soup.find('div',attrs={'class':'mainNavi'}).find('li',attrs={'class':'curentIssue'})
currentIssue_url=self.tag_to_string(currentIssue.a['href'])
self.log(currentIssue_url)
#go to the current issue
soup1 = self.index_to_soup(currentIssue_url)
date = re.split('\s\|\s',self.tag_to_string(soup1.head.title.string))[0]
self.timefmt = u' [%s]'%date
#get cover
coverurl='http://harpers.org/wp-content/themes/harpers/ajax_microfiche.php?img=harpers-'+re.split('harpers.org/',currentIssue_url)[1]+'gif/0001.gif'
soup2 = self.index_to_soup(coverurl)
self.cover_url = self.tag_to_string(soup2.find('img')['src'])
self.log(self.cover_url)
articles = []
print 'Processing ' + self.INDEX
soup = self.index_to_soup(self.INDEX)
count = 0
for item in soup.findAll('div', attrs={'class':'articleData'}):
for item in soup1.findAll('div', attrs={'class':'articleData'}):
text_links = item.findAll('h2')
for text_link in text_links:
if count == 0:
lcover_url = item.find(attrs={'class':'dwpdf'})
if lcover_url:
self.cover_url = lcover_url.a['href']
count = 1
else:
url = text_link.a['href']
@ -87,7 +97,14 @@ class Harpers_full(BasicNewsRecipe):
,'url' :url
,'description':''
})
return [(soup.head.title.string, articles)]
return [(soup1.head.title.string, articles)]
def print_version(self, url):
return url + '?single=1'
def cleanup(self):
soup = self.index_to_soup('http://harpers.org/')
signouturl=self.tag_to_string(soup.find('li', attrs={'class':'subLogOut'}).findNext('li').a['href'])
self.log(signouturl)
self.browser.open(signouturl)

View File

@ -2,7 +2,7 @@
__license__ = 'GPL v3'
__author__ = 'Gabriele Marini, based on Darko Miletic'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__description__ = 'La Stampa 05/05/2010'
__description__ = 'La Stampa 28/12/2012'
'''
http://www.lastampa.it/
@ -14,10 +14,11 @@ class LaStampa(BasicNewsRecipe):
title = u'La Stampa'
language = 'it'
__author__ = 'Gabriele Marini'
oldest_article = 15
#oldest_article = 15
oldest_articlce = 7 #for daily schedule
max_articles_per_feed = 50
recursion = 100
cover_url = 'http://www.lastampa.it/edicola/PDF/1.pdf'
cover_url = 'http://www1.lastampa.it/edicola/PDF/1.pdf'
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
@ -33,35 +34,41 @@ class LaStampa(BasicNewsRecipe):
if link:
return link[0]['href']
keep_only_tags = [dict(attrs={'class':['boxocchiello2','titoloRub','titologir','catenaccio','sezione','articologirata']}),
keep_only_tags = [dict(attrs={'class':['boxocchiello2','titoloRub','titologir','autore-girata','luogo-girata','catenaccio','sezione','articologirata','bodytext','news-single-img','ls-articoloCorpo','ls-blog-list-1col']}),
dict(name='div', attrs={'id':'corpoarticolo'})
]
remove_tags = [dict(name='div', attrs={'id':'menutop'}),
dict(name='div', attrs={'id':'fwnetblocco'}),
dict(name='table', attrs={'id':'strumenti'}),
dict(name='table', attrs={'id':'imgesterna'}),
dict(name='a', attrs={'class':'linkblu'}),
dict(name='a', attrs={'class':'link'}),
remove_tags = [dict(name='div', attrs={'id':['menutop','fwnetblocco']}),
dict(attrs={'class':['ls-toolbarCommenti','ls-boxCommentsBlog']}),
dict(name='table', attrs={'id':['strumenti','imgesterna']}),
dict(name='a', attrs={'class':['linkblu','link']}),
dict(name='span', attrs={'class':['boxocchiello','boxocchiello2','sezione']})
]
feeds = [
(u'Home', u'http://www.lastampa.it/redazione/rss_home.xml'),
(u'Editoriali', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=25'),
(u'Politica', u'http://www.lastampa.it/redazione/cmssezioni/politica/rss_politica.xml'),
(u'ArciItaliana', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=14'),
(u'Cronache', u'http://www.lastampa.it/redazione/cmssezioni/cronache/rss_cronache.xml'),
(u'Esteri', u'http://www.lastampa.it/redazione/cmssezioni/esteri/rss_esteri.xml'),
(u'Danni Collaterali', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=90'),
(u'Economia', u'http://www.lastampa.it/redazione/cmssezioni/economia/rss_economia.xml'),
(u'Tecnologia ', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=30'),
(u'Spettacoli', u'http://www.lastampa.it/redazione/cmssezioni/spettacoli/rss_spettacoli.xml'),
(u'Sport', u'http://www.lastampa.it/sport/rss_home.xml'),
(u'Torino', u'http://rss.feedsportal.com/c/32418/f/466938/index.rss'),
(u'Motori', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=57'),
(u'Scienza', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=38'),
(u'Fotografia', u'http://rss.feedsportal.com/c/32418/f/478449/index.rss'),
(u'Scuola', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=60'),
(u'Tempo Libero', u'http://www.lastampa.it/tempolibero/rss_home.xml')
feeds = [(u'BuonGiorno',u'http://www.lastampa.it/cultura/opinioni/buongiorno/rss.xml'),
(u'Jena', u'http://www.lastampa.it/cultura/opinioni/jena/rss.xml'),
(u'Editoriali', u'http://www.lastampa.it/cultura/opinioni/editoriali'),
(u'Finestra sull America', u'http://lastampa.feedsportal.com/c/32418/f/625713/index.rss'),
(u'HomePage', u'http://www.lastampa.it/rss.xml'),
(u'Politica Italia', u'http://www.lastampa.it/italia/politica/rss.xml'),
(u'ArciItaliana', u'http://www.lastampa.it/rss/blog/arcitaliana'),
(u'Cronache', u'http://www.lastampa.it/italia/cronache/rss.xml'),
(u'Esteri', u'http://www.lastampa.it/esteri/rss.xml'),
(u'Danni Collaterali', u'http://www.lastampa.it/rss/blog/danni-collaterali'),
(u'Economia', u'http://www.lastampa.it/economia/rss.xml'),
(u'Tecnologia ', u'http://www.lastampa.it/tecnologia/rss.xml'),
(u'Spettacoli', u'http://www.lastampa.it/spettacoli/rss.xml'),
(u'Sport', u'http://www.lastampa.it/sport/rss.xml'),
(u'Torino', u'http://www.lastampa.it/cronaca/rss.xml'),
(u'Motori', u'http://www.lastampa.it/motori/rss.xml'),
(u'Scienza', u'http://www.lastampa.it/scienza/rss.xml'),
(u'Cultura', u'http://www.lastampa.it/cultura/rss.xml'),
(u'Scuola', u'http://www.lastampa.it/cultura/scuola/rss.xml'),
(u'Benessere', u'http://www.lastampa.it/scienza/benessere/rss.xml'),
(u'Cucina', u'http://www.lastampa.it/societa/cucina/rss.xml'),
(u'Casa', u'http://www.lastampa.it/societa/casa/rss.xml'),
(u'Moda',u'http://www.lastampa.it/societa/moda/rss.xml'),
(u'Giochi',u'http://www.lastampa.it/tecnologia/giochi/rss.xml'),
(u'Viaggi',u'http://www.lastampa.it/societa/viaggi/rss.xml'),
(u'Ambiente', u'http://www.lastampa.it/scienza/ambiente/rss.xml')
]

View File

@ -15,6 +15,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe):
recursions=1 # set this to zero to omit Related articles lists
match_regexps=[r'/[12][0-9][0-9][0-9]/[0-9]+/'] # speeds up processing by preventing index page links from being followed
# set getTechBlogs to True to include the technology blogs
# set tech_oldest_article to control article age
@ -24,6 +25,14 @@ class NYTimes(BasicNewsRecipe):
tech_oldest_article = 14
tech_max_articles_per_feed = 25
# set getPopularArticles to False if you don't want the Most E-mailed and Most Viewed articles
# otherwise you will get up to 20 of the most popular e-mailed and viewed articles (in each category)
getPopularArticles = True
popularPeriod = '1' # set this to the number of days to include in the measurement
# e.g. 7 will get the most popular measured over the last 7 days
# and 30 will get the most popular measured over 30 days.
# you still only get up to 20 articles in each category
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
headlinesOnly = True
@ -153,7 +162,7 @@ class NYTimes(BasicNewsRecipe):
timefmt = ''
simultaneous_downloads = 1
#simultaneous_downloads = 1 # no longer required to deal with ads
cover_margins = (18,18,'grey99')
@ -204,7 +213,8 @@ class NYTimes(BasicNewsRecipe):
re.compile('^subNavigation'),
re.compile('^leaderboard'),
re.compile('^module'),
re.compile('commentCount')
re.compile('commentCount'),
'credit'
]}),
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
@ -291,11 +301,11 @@ class NYTimes(BasicNewsRecipe):
del ans[idx]
idx_max = idx_max-1
continue
if self.verbose:
if True: #self.verbose
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
for article in ans[idx][1]:
total_article_count += 1
if self.verbose:
if True: #self.verbose
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
article['url'].encode('cp1252','replace')))
idx = idx+1
@ -351,23 +361,8 @@ class NYTimes(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser()
return br
## This doesn't work (and probably never did). It either gets another serve of the advertisement,
## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
##
## def skip_ad_pages(self, soup):
## # Skip ad pages served before actual article
## skip_tag = soup.find(True, {'name':'skip'})
## if skip_tag is not None:
## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
## url += '?pagewanted=all'
## self.log.warn("Skipping ad to article at '%s'" % url)
## return self.index_to_soup(url, raw=True)
cover_tag = 'NY_NYT'
def get_cover_url(self):
from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
@ -390,6 +385,7 @@ class NYTimes(BasicNewsRecipe):
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
def short_title(self):
return self.title
@ -398,6 +394,7 @@ class NYTimes(BasicNewsRecipe):
from contextlib import closing
import copy
from calibre.ebooks.chardet import xml_to_unicode
print("ARTICLE_TO_SOUP "+url_or_raw)
if re.match(r'\w+://', url_or_raw):
br = self.clone_browser(self.browser)
open_func = getattr(br, 'open_novisit', br.open)
@ -489,6 +486,67 @@ class NYTimes(BasicNewsRecipe):
description=description, author=author,
content=''))
def get_popular_articles(self,ans):
if self.getPopularArticles:
popular_articles = {}
key_list = []
def handleh3(h3tag):
try:
url = h3tag.a['href']
except:
return ('','','','')
url = re.sub(r'\?.*', '', url)
if self.exclude_url(url):
return ('','','','')
url += '?pagewanted=all'
title = self.tag_to_string(h3tag.a,False)
h6tag = h3tag.findNextSibling('h6')
if h6tag is not None:
author = self.tag_to_string(h6tag,False)
else:
author = ''
ptag = h3tag.findNextSibling('p')
if ptag is not None:
desc = self.tag_to_string(ptag,False)
else:
desc = ''
return(title,url,author,desc)
have_emailed = False
emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
for h3tag in emailed_soup.findAll('h3'):
(title,url,author,desc) = handleh3(h3tag)
if url=='':
continue
if not have_emailed:
key_list.append('Most E-Mailed')
popular_articles['Most E-Mailed'] = []
have_emailed = True
popular_articles['Most E-Mailed'].append(
dict(title=title, url=url, date=strftime('%a, %d %b'),
description=desc, author=author,
content=''))
have_viewed = False
viewed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-viewed?period='+self.popularPeriod)
for h3tag in viewed_soup.findAll('h3'):
(title,url,author,desc) = handleh3(h3tag)
if url=='':
continue
if not have_viewed:
key_list.append('Most Viewed')
popular_articles['Most Viewed'] = []
have_viewed = True
popular_articles['Most Viewed'].append(
dict(title=title, url=url, date=strftime('%a, %d %b'),
description=desc, author=author,
content=''))
viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)]
for x in viewed_ans:
ans.append(x)
return ans
def get_tech_feeds(self,ans):
if self.getTechBlogs:
tech_articles = {}
@ -550,7 +608,7 @@ class NYTimes(BasicNewsRecipe):
self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.get_tech_feeds(self.ans))
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
def parse_todays_index(self):
@ -583,7 +641,7 @@ class NYTimes(BasicNewsRecipe):
self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.get_tech_feeds(self.ans))
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
def parse_headline_index(self):
@ -657,7 +715,7 @@ class NYTimes(BasicNewsRecipe):
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.get_tech_feeds(self.ans))
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
def parse_index(self):
if self.headlinesOnly:
@ -745,11 +803,12 @@ class NYTimes(BasicNewsRecipe):
def preprocess_html(self, soup):
print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
#print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title))
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
#url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url = 'http://www.nytimes.com' + skip_tag.parent['href']
#url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
sleep(5)
soup = self.handle_tags(self.article_to_soup(url))
@ -920,6 +979,7 @@ class NYTimes(BasicNewsRecipe):
for aside in soup.findAll('div','aside'):
aside.extract()
soup = self.strip_anchors(soup,True)
#print("RECURSIVE: "+self.tag_to_string(soup.title))
if soup.find('div',attrs={'id':'blogcontent'}) is None:
if first_fetch:
@ -969,122 +1029,122 @@ class NYTimes(BasicNewsRecipe):
self.log("ERROR: One picture per article in postprocess_html")
try:
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
except:
self.log("ERROR: Problem in change captions to italic")
self.log("ERROR: Problem in change captions to italic")
try:
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
blogheadline = str(h1) #added for dealbook
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
elif blogheadline.find('entry-title'):#added for dealbook
tag = Tag(soup, "h2")#added for dealbook
tag['class'] = "headline"#added for dealbook
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
h1.replaceWith(tag)#added for dealbook
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
blogheadline = str(h1) #added for dealbook
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
elif blogheadline.find('entry-title'):#added for dealbook
tag = Tag(soup, "h2")#added for dealbook
tag['class'] = "headline"#added for dealbook
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
h1.replaceWith(tag)#added for dealbook
else:
# Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.renderContents()))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
else:
# Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(self.tag_to_string(headline,False)))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
try:
#if this is from a blog (dealbook, fix the byline format
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
if bylineauthor:
tag = Tag(soup, "h6")
tag['class'] = "byline"
tag.insert(0, self.fixChars(bylineauthor.renderContents()))
bylineauthor.replaceWith(tag)
#if this is from a blog (dealbook, fix the byline format
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
if bylineauthor:
tag = Tag(soup, "h6")
tag['class'] = "byline"
tag.insert(0, self.fixChars(self.tag_to_string(bylineauthor,False)))
bylineauthor.replaceWith(tag)
except:
self.log("ERROR: fixing byline author format")
try:
#if this is a blog (dealbook) fix the credit style for the pictures
blogcredit = soup.find('div',attrs={'class':'credit'})
if blogcredit:
tag = Tag(soup, "h6")
tag['class'] = "credit"
tag.insert(0, self.fixChars(blogcredit.renderContents()))
blogcredit.replaceWith(tag)
#if this is a blog (dealbook) fix the credit style for the pictures
blogcredit = soup.find('div',attrs={'class':'credit'})
if blogcredit:
tag = Tag(soup, "h6")
tag['class'] = "credit"
tag.insert(0, self.fixChars(self.tag_to_string(blogcredit,False)))
blogcredit.replaceWith(tag)
except:
self.log("ERROR: fixing credit format")
try:
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
#remove the <strong> update tag
blogupdated = soup.find('span', {'class':'update'})
if blogupdated:
blogupdated.replaceWith("")
#remove the <strong> update tag
blogupdated = soup.find('span', {'class':'update'})
if blogupdated:
blogupdated.replaceWith("")
except:
self.log("ERROR: Removing strong tag")
self.log("ERROR: Removing strong tag")
try:
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
except:
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
try:
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
except:
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
#print(strftime("%H:%M:%S")+" -- POSTPROCESS TITLE="+self.tag_to_string(soup.title))
return soup
def populate_article_metadata(self, article, soup, first):

View File

@ -15,6 +15,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe):
recursions=1 # set this to zero to omit Related articles lists
match_regexps=[r'/[12][0-9][0-9][0-9]/[0-9]+/'] # speeds up processing by preventing index page links from being followed
# set getTechBlogs to True to include the technology blogs
# set tech_oldest_article to control article age
@ -24,6 +25,14 @@ class NYTimes(BasicNewsRecipe):
tech_oldest_article = 14
tech_max_articles_per_feed = 25
# set getPopularArticles to False if you don't want the Most E-mailed and Most Viewed articles
# otherwise you will get up to 20 of the most popular e-mailed and viewed articles (in each category)
getPopularArticles = True
popularPeriod = '1' # set this to the number of days to include in the measurement
# e.g. 7 will get the most popular measured over the last 7 days
# and 30 will get the most popular measured over 30 days.
# you still only get up to 20 articles in each category
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
headlinesOnly = False
@ -32,7 +41,7 @@ class NYTimes(BasicNewsRecipe):
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
# will be included. Note: oldest_web_article is ignored if webEdition = False
webEdition = False
oldest_web_article = 7
oldest_web_article = None
# download higher resolution images than the small thumbnails typically included in the article
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
@ -153,7 +162,7 @@ class NYTimes(BasicNewsRecipe):
timefmt = ''
simultaneous_downloads = 1
#simultaneous_downloads = 1 # no longer required to deal with ads
cover_margins = (18,18,'grey99')
@ -204,7 +213,8 @@ class NYTimes(BasicNewsRecipe):
re.compile('^subNavigation'),
re.compile('^leaderboard'),
re.compile('^module'),
re.compile('commentCount')
re.compile('commentCount'),
'credit'
]}),
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
@ -291,11 +301,11 @@ class NYTimes(BasicNewsRecipe):
del ans[idx]
idx_max = idx_max-1
continue
if self.verbose:
if True: #self.verbose
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
for article in ans[idx][1]:
total_article_count += 1
if self.verbose:
if True: #self.verbose
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
article['url'].encode('cp1252','replace')))
idx = idx+1
@ -351,23 +361,8 @@ class NYTimes(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser()
return br
## This doesn't work (and probably never did). It either gets another serve of the advertisement,
## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
##
## def skip_ad_pages(self, soup):
## # Skip ad pages served before actual article
## skip_tag = soup.find(True, {'name':'skip'})
## if skip_tag is not None:
## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
## url += '?pagewanted=all'
## self.log.warn("Skipping ad to article at '%s'" % url)
## return self.index_to_soup(url, raw=True)
cover_tag = 'NY_NYT'
def get_cover_url(self):
from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
@ -390,6 +385,7 @@ class NYTimes(BasicNewsRecipe):
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
def short_title(self):
return self.title
@ -398,6 +394,7 @@ class NYTimes(BasicNewsRecipe):
from contextlib import closing
import copy
from calibre.ebooks.chardet import xml_to_unicode
print("ARTICLE_TO_SOUP "+url_or_raw)
if re.match(r'\w+://', url_or_raw):
br = self.clone_browser(self.browser)
open_func = getattr(br, 'open_novisit', br.open)
@ -489,6 +486,67 @@ class NYTimes(BasicNewsRecipe):
description=description, author=author,
content=''))
def get_popular_articles(self,ans):
if self.getPopularArticles:
popular_articles = {}
key_list = []
def handleh3(h3tag):
try:
url = h3tag.a['href']
except:
return ('','','','')
url = re.sub(r'\?.*', '', url)
if self.exclude_url(url):
return ('','','','')
url += '?pagewanted=all'
title = self.tag_to_string(h3tag.a,False)
h6tag = h3tag.findNextSibling('h6')
if h6tag is not None:
author = self.tag_to_string(h6tag,False)
else:
author = ''
ptag = h3tag.findNextSibling('p')
if ptag is not None:
desc = self.tag_to_string(ptag,False)
else:
desc = ''
return(title,url,author,desc)
have_emailed = False
emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod)
for h3tag in emailed_soup.findAll('h3'):
(title,url,author,desc) = handleh3(h3tag)
if url=='':
continue
if not have_emailed:
key_list.append('Most E-Mailed')
popular_articles['Most E-Mailed'] = []
have_emailed = True
popular_articles['Most E-Mailed'].append(
dict(title=title, url=url, date=strftime('%a, %d %b'),
description=desc, author=author,
content=''))
have_viewed = False
viewed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-viewed?period='+self.popularPeriod)
for h3tag in viewed_soup.findAll('h3'):
(title,url,author,desc) = handleh3(h3tag)
if url=='':
continue
if not have_viewed:
key_list.append('Most Viewed')
popular_articles['Most Viewed'] = []
have_viewed = True
popular_articles['Most Viewed'].append(
dict(title=title, url=url, date=strftime('%a, %d %b'),
description=desc, author=author,
content=''))
viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)]
for x in viewed_ans:
ans.append(x)
return ans
def get_tech_feeds(self,ans):
if self.getTechBlogs:
tech_articles = {}
@ -550,7 +608,7 @@ class NYTimes(BasicNewsRecipe):
self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.get_tech_feeds(self.ans))
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
def parse_todays_index(self):
@ -583,7 +641,7 @@ class NYTimes(BasicNewsRecipe):
self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.get_tech_feeds(self.ans))
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
def parse_headline_index(self):
@ -657,7 +715,7 @@ class NYTimes(BasicNewsRecipe):
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.get_tech_feeds(self.ans))
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
def parse_index(self):
if self.headlinesOnly:
@ -745,11 +803,12 @@ class NYTimes(BasicNewsRecipe):
def preprocess_html(self, soup):
print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
#print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title))
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
#url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url = 'http://www.nytimes.com' + skip_tag.parent['href']
#url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
sleep(5)
soup = self.handle_tags(self.article_to_soup(url))
@ -920,6 +979,7 @@ class NYTimes(BasicNewsRecipe):
for aside in soup.findAll('div','aside'):
aside.extract()
soup = self.strip_anchors(soup,True)
#print("RECURSIVE: "+self.tag_to_string(soup.title))
if soup.find('div',attrs={'id':'blogcontent'}) is None:
if first_fetch:
@ -969,122 +1029,122 @@ class NYTimes(BasicNewsRecipe):
self.log("ERROR: One picture per article in postprocess_html")
try:
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
# Change captions to italic
for caption in soup.findAll(True, {'class':'caption'}) :
if caption and len(caption) > 0:
cTag = Tag(soup, "p", [("class", "caption")])
c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip()
mp_off = c.find("More Photos")
if mp_off >= 0:
c = c[:mp_off]
cTag.insert(0, c)
caption.replaceWith(cTag)
except:
self.log("ERROR: Problem in change captions to italic")
self.log("ERROR: Problem in change captions to italic")
try:
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
blogheadline = str(h1) #added for dealbook
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
elif blogheadline.find('entry-title'):#added for dealbook
tag = Tag(soup, "h2")#added for dealbook
tag['class'] = "headline"#added for dealbook
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
h1.replaceWith(tag)#added for dealbook
# Change <nyt_headline> to <h2>
h1 = soup.find('h1')
blogheadline = str(h1) #added for dealbook
if h1:
headline = h1.find("nyt_headline")
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag)
elif blogheadline.find('entry-title'):#added for dealbook
tag = Tag(soup, "h2")#added for dealbook
tag['class'] = "headline"#added for dealbook
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
h1.replaceWith(tag)#added for dealbook
else:
# Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.renderContents()))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
else:
# Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
headline = soup.find('title')
if headline:
tag = Tag(soup, "h2")
tag['class'] = "headline"
tag.insert(0, self.fixChars(self.tag_to_string(headline,False)))
soup.insert(0, tag)
hrs = soup.findAll('hr')
for hr in hrs:
hr.extract()
except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
try:
#if this is from a blog (dealbook, fix the byline format
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
if bylineauthor:
tag = Tag(soup, "h6")
tag['class'] = "byline"
tag.insert(0, self.fixChars(bylineauthor.renderContents()))
bylineauthor.replaceWith(tag)
#if this is from a blog (dealbook, fix the byline format
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
if bylineauthor:
tag = Tag(soup, "h6")
tag['class'] = "byline"
tag.insert(0, self.fixChars(self.tag_to_string(bylineauthor,False)))
bylineauthor.replaceWith(tag)
except:
self.log("ERROR: fixing byline author format")
try:
#if this is a blog (dealbook) fix the credit style for the pictures
blogcredit = soup.find('div',attrs={'class':'credit'})
if blogcredit:
tag = Tag(soup, "h6")
tag['class'] = "credit"
tag.insert(0, self.fixChars(blogcredit.renderContents()))
blogcredit.replaceWith(tag)
#if this is a blog (dealbook) fix the credit style for the pictures
blogcredit = soup.find('div',attrs={'class':'credit'})
if blogcredit:
tag = Tag(soup, "h6")
tag['class'] = "credit"
tag.insert(0, self.fixChars(self.tag_to_string(blogcredit,False)))
blogcredit.replaceWith(tag)
except:
self.log("ERROR: fixing credit format")
try:
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
# Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1")
if masthead:
# Nuke the href
if masthead.a:
del(masthead.a['href'])
tag = Tag(soup, "h3")
tag.insert(0, self.fixChars(masthead.contents[0]))
masthead.replaceWith(tag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
# Change <span class="bold"> to <b>
for subhead in soup.findAll(True, {'class':'bold'}) :
if subhead.contents:
bTag = Tag(soup, "b")
bTag.insert(0, subhead.contents[0])
subhead.replaceWith(bTag)
except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
#remove the <strong> update tag
blogupdated = soup.find('span', {'class':'update'})
if blogupdated:
blogupdated.replaceWith("")
#remove the <strong> update tag
blogupdated = soup.find('span', {'class':'update'})
if blogupdated:
blogupdated.replaceWith("")
except:
self.log("ERROR: Removing strong tag")
self.log("ERROR: Removing strong tag")
try:
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
divTag = soup.find('div',attrs={'id':'articleBody'})
if divTag:
divTag['class'] = divTag['id']
except:
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})")
try:
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
# Add class="authorId" to <div> so we can format with CSS
divTag = soup.find('div',attrs={'id':'authorId'})
if divTag and divTag.contents[0]:
tag = Tag(soup, "p")
tag['class'] = "authorId"
tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0],
use_alt=False)))
divTag.replaceWith(tag)
except:
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
#print(strftime("%H:%M:%S")+" -- POSTPROCESS TITLE="+self.tag_to_string(soup.title))
return soup
def populate_article_metadata(self, article, soup, first):

View File

@ -1529,6 +1529,15 @@ class StoreNextoStore(StoreBase):
formats = ['EPUB', 'MOBI', 'PDF']
affiliate = True
class StoreNookUKStore(StoreBase):
name = 'Nook UK'
author = 'John Schember'
description = u'Barnes & Noble S.à r.l, a subsidiary of Barnes & Noble, Inc., a leading retailer of content, digital media and educational products, is proud to bring the award-winning NOOK® reading experience and a leading digital bookstore to the UK.'
actual_plugin = 'calibre.gui2.store.stores.nook_uk_plugin:NookUKStore'
headquarters = 'UK'
formats = ['NOOK']
class StoreOpenBooksStore(StoreBase):
name = 'Open Books'
description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.'
@ -1660,7 +1669,7 @@ plugins += [
StoreAmazonITKindleStore,
StoreAmazonUKKindleStore,
StoreBaenWebScriptionStore,
StoreBNStore, StoreSonyStore,
StoreBNStore,
StoreBeWriteStore,
StoreBiblioStore,
StoreBookotekaStore,
@ -1686,12 +1695,14 @@ plugins += [
StoreMillsBoonUKStore,
StoreMobileReadStore,
StoreNextoStore,
StoreNookUKStore,
StoreOpenBooksStore,
StoreOzonRUStore,
StorePragmaticBookshelfStore,
StorePublioStore,
StoreRW2010Store,
StoreSmashwordsStore,
StoreSonyStore,
StoreVirtualoStore,
StoreWaterstonesUKStore,
StoreWeightlessBooksStore,

View File

@ -48,6 +48,7 @@ class ANDROID(USBMS):
0x2910 : HTC_BCDS,
0xe77 : HTC_BCDS,
0xff9 : HTC_BCDS,
0x0001 : [0x255],
},
# Eken
@ -212,7 +213,8 @@ class ANDROID(USBMS):
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD',
'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY', 'SXZ', 'USB_2.0',
'COBY_MID', 'VS', 'AINOL', 'TOPWISE', 'PAD703', 'NEXT8D12']
'COBY_MID', 'VS', 'AINOL', 'TOPWISE', 'PAD703', 'NEXT8D12',
'MEDIATEK']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -232,7 +234,7 @@ class ANDROID(USBMS):
'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F']
'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',

View File

@ -91,12 +91,14 @@ class PDFOutput(OutputFormatPlugin):
OptionRecommendation(name='pdf_mono_font_size',
recommended_value=16, help=_(
'The default font size for monospaced text')),
# OptionRecommendation(name='old_pdf_engine', recommended_value=False,
# help=_('Use the old, less capable engine to generate the PDF')),
# OptionRecommendation(name='uncompressed_pdf',
# recommended_value=False, help=_(
# 'Generate an uncompressed PDF, useful for debugging, '
# 'only works with the new PDF engine.')),
OptionRecommendation(name='pdf_mark_links', recommended_value=False,
help=_('Surround all links with a red box, useful for debugging.')),
OptionRecommendation(name='old_pdf_engine', recommended_value=False,
help=_('Use the old, less capable engine to generate the PDF')),
OptionRecommendation(name='uncompressed_pdf',
recommended_value=False, help=_(
'Generate an uncompressed PDF, useful for debugging, '
'only works with the new PDF engine.')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
@ -190,13 +192,12 @@ class PDFOutput(OutputFormatPlugin):
val[i].value = family_map[k]
def convert_text(self, oeb_book):
from calibre.utils.config import tweaks
if tweaks.get('new_pdf_engine', False):
from calibre.ebooks.pdf.render.from_html import PDFWriter
from calibre.ebooks.metadata.opf2 import OPF
if self.opts.old_pdf_engine:
from calibre.ebooks.pdf.writer import PDFWriter
PDFWriter
else:
from calibre.ebooks.pdf.writer import PDFWriter
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.pdf.render.from_html import PDFWriter
self.log.debug('Serializing oeb input to disk for processing...')
self.get_cover_data()
@ -231,7 +232,15 @@ class PDFOutput(OutputFormatPlugin):
out_stream.seek(0)
out_stream.truncate()
self.log.debug('Rendering pages to PDF...')
writer.dump(items, out_stream, PDFMetadata(self.metadata))
import time
st = time.time()
if False:
import cProfile
cProfile.runctx('writer.dump(items, out_stream, PDFMetadata(self.metadata))',
globals(), locals(), '/tmp/profile')
else:
writer.dump(items, out_stream, PDFMetadata(self.metadata))
self.log('Rendered PDF in %g seconds:'%(time.time()-st))
if close:
out_stream.close()

View File

@ -17,7 +17,7 @@ from urllib import unquote
from calibre.ebooks.chardet import detect_xml_encoding
from calibre.constants import iswindows
from calibre import unicode_path, as_unicode
from calibre import unicode_path, as_unicode, replace_entities
class Link(object):
'''
@ -147,6 +147,7 @@ class HTMLFile(object):
url = match.group(i)
if url:
break
url = replace_entities(url)
try:
link = self.resolve(url)
except ValueError:

View File

@ -9,7 +9,10 @@ __docformat__ = 'restructuredtext en'
import codecs, zlib
from io import BytesIO
from struct import pack
from calibre.constants import plugins, ispy3
pdf_float = plugins['speedup'][0].pdf_float
EOL = b'\n'
@ -51,15 +54,25 @@ PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
# Basic PDF datatypes {{{
ic = str if ispy3 else unicode
icb = (lambda x: str(x).encode('ascii')) if ispy3 else bytes
def fmtnum(o):
if isinstance(o, float):
return pdf_float(o)
return ic(o)
def serialize(o, stream):
if hasattr(o, 'pdf_serialize'):
if isinstance(o, float):
stream.write_raw(pdf_float(o).encode('ascii'))
elif isinstance(o, (int, long)):
stream.write_raw(icb(o))
elif hasattr(o, 'pdf_serialize'):
o.pdf_serialize(stream)
elif isinstance(o, bool):
stream.write(b'true' if o else b'false')
elif isinstance(o, (int, long, float)):
stream.write(type(u'')(o).encode('ascii'))
elif o is None:
stream.write(b'null')
stream.write_raw(b'null')
elif isinstance(o, bool):
stream.write_raw(b'true' if o else b'false')
else:
raise ValueError('Unknown object: %r'%o)
@ -85,19 +98,13 @@ class String(unicode):
raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be')
stream.write(b'('+raw+b')')
class GlyphIndex(int):
def pdf_serialize(self, stream):
byts = bytearray(pack(b'>H', self))
stream.write('<%s>'%''.join(map(
lambda x: bytes(hex(x)[2:]).rjust(2, b'0'), byts)))
class Dictionary(dict):
def pdf_serialize(self, stream):
stream.write(b'<<' + EOL)
sorted_keys = sorted(self.iterkeys(),
key=lambda x:((' ' if x == 'Type' else '')+x))
key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
x, x)+x))
for k in sorted_keys:
serialize(Name(k), stream)
stream.write(b' ')
@ -161,6 +168,9 @@ class Stream(BytesIO):
super(Stream, self).write(raw if isinstance(raw, bytes) else
raw.encode('ascii'))
def write_raw(self, raw):
BytesIO.write(self, raw)
class Reference(object):
def __init__(self, num, obj):
@ -169,5 +179,11 @@ class Reference(object):
def pdf_serialize(self, stream):
raw = '%d 0 R'%self.num
stream.write(raw.encode('ascii'))
def __repr__(self):
return '%d 0 R'%self.num
def __str__(self):
return repr(self)
# }}}

View File

@ -8,24 +8,27 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, traceback
from math import sqrt
from collections import namedtuple
from functools import wraps, partial
from future_builtins import map
import sip
from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QApplication, QPainter,
QTransform, QPainterPath, QImage, QByteArray, QBuffer,
qRgba)
from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QTransform, QBrush)
from calibre.constants import plugins
from calibre.ebooks.pdf.render.serialize import (Color, PDFStream, Path)
from calibre.ebooks.pdf.render.common import inch, A4
from calibre.ebooks.pdf.render.serialize import (PDFStream, Path)
from calibre.ebooks.pdf.render.common import inch, A4, fmtnum
from calibre.ebooks.pdf.render.graphics import convert_path, Graphics
from calibre.utils.fonts.sfnt.container import Sfnt
from calibre.utils.fonts.sfnt.metrics import FontMetrics
Point = namedtuple('Point', 'x y')
ColorState = namedtuple('ColorState', 'color opacity do')
def repr_transform(t):
vals = map(fmtnum, (t.m11(), t.m12(), t.m21(), t.m22(), t.dx(), t.dy()))
return '[%s]'%' '.join(vals)
def store_error(func):
@wraps(func)
@ -38,146 +41,6 @@ def store_error(func):
return errh
class GraphicsState(object): # {{{
def __init__(self):
self.ops = {}
self.initial_state = {
'fill': ColorState(Color(0., 0., 0., 1.), 1.0, False),
'transform': QTransform(),
'dash': [],
'line_width': 0,
'stroke': ColorState(Color(0., 0., 0., 1.), 1.0, True),
'line_cap': 'flat',
'line_join': 'miter',
'clip': (Qt.NoClip, QPainterPath()),
}
self.current_state = self.initial_state.copy()
def reset(self):
self.current_state = self.initial_state.copy()
def update_color_state(self, which, color=None, opacity=None,
brush_style=None, pen_style=None):
current = self.ops.get(which, self.current_state[which])
n = ColorState(*current)
if color is not None:
n = n._replace(color=Color(*color.getRgbF()))
if opacity is not None:
n = n._replace(opacity=opacity)
if opacity is not None:
opacity *= n.color.opacity
if brush_style is not None:
if which == 'fill':
do = (False if opacity == 0.0 or brush_style == Qt.NoBrush else
True)
else:
do = (False if opacity == 0.0 or brush_style == Qt.NoBrush or
pen_style == Qt.NoPen else True)
n = n._replace(do=do)
self.ops[which] = n
def read(self, state):
flags = state.state()
if flags & QPaintEngine.DirtyTransform:
self.ops['transform'] = state.transform()
# TODO: Add support for brush patterns
if flags & QPaintEngine.DirtyBrush:
brush = state.brush()
color = brush.color()
self.update_color_state('fill', color=color,
brush_style=brush.style())
if flags & QPaintEngine.DirtyPen:
pen = state.pen()
brush = pen.brush()
color = pen.color()
self.update_color_state('stroke', color, brush_style=brush.style(),
pen_style=pen.style())
ps = {Qt.DashLine:[3], Qt.DotLine:[1,2], Qt.DashDotLine:[3,2,1,2],
Qt.DashDotDotLine:[3, 2, 1, 2, 1, 2]}.get(pen.style(), [])
self.ops['dash'] = ps
self.ops['line_width'] = pen.widthF()
self.ops['line_cap'] = {Qt.FlatCap:'flat', Qt.RoundCap:'round',
Qt.SquareCap:'square'}.get(pen.capStyle(), 'flat')
self.ops['line_join'] = {Qt.MiterJoin:'miter', Qt.RoundJoin:'round',
Qt.BevelJoin:'bevel'}.get(pen.joinStyle(), 'miter')
if flags & QPaintEngine.DirtyOpacity:
self.update_color_state('fill', opacity=state.opacity())
self.update_color_state('stroke', opacity=state.opacity())
if flags & QPaintEngine.DirtyClipPath or flags & QPaintEngine.DirtyClipRegion:
self.ops['clip'] = True
def __call__(self, engine):
if not self.ops:
return
pdf = engine.pdf
ops = self.ops
current_transform = self.current_state['transform']
transform_changed = 'transform' in ops and ops['transform'] != current_transform
reset_stack = transform_changed or 'clip' in ops
if reset_stack:
pdf.restore_stack()
pdf.save_stack()
# Since we have reset the stack we need to re-apply all previous
# operations, that are different from the default value (clip is
# handled separately).
for op in set(self.initial_state) - {'clip'}:
if op in ops: # These will be applied below
self.current_state[op] = self.initial_state[op]
elif self.current_state[op] != self.initial_state[op]:
self.apply(op, self.current_state[op], engine, pdf)
# Now apply the new operations
for op, val in ops.iteritems():
if op != 'clip' and self.current_state[op] != val:
self.apply(op, val, engine, pdf)
self.current_state[op] = val
if 'clip' in ops:
# Get the current clip
path = engine.painter().clipPath()
if not path.isEmpty():
engine.add_clip(path)
self.ops = {}
def apply(self, op, val, engine, pdf):
getattr(self, 'apply_'+op)(val, engine, pdf)
def apply_transform(self, val, engine, pdf):
if not val.isIdentity():
pdf.transform(val)
def apply_stroke(self, val, engine, pdf):
self.apply_color_state('stroke', val, engine, pdf)
def apply_fill(self, val, engine, pdf):
self.apply_color_state('fill', val, engine, pdf)
def apply_color_state(self, which, val, engine, pdf):
color = val.color._replace(opacity=val.opacity*val.color.opacity)
getattr(pdf, 'set_%s_color'%which)(color)
setattr(engine, 'do_%s'%which, val.do)
def apply_dash(self, val, engine, pdf):
pdf.set_dash(val)
def apply_line_width(self, val, engine, pdf):
pdf.set_line_width(val)
def apply_line_cap(self, val, engine, pdf):
pdf.set_line_cap(val)
def apply_line_join(self, val, engine, pdf):
pdf.set_line_join(val)
# }}}
class Font(FontMetrics):
def __init__(self, sfnt):
@ -186,12 +49,21 @@ class Font(FontMetrics):
class PdfEngine(QPaintEngine):
FEATURES = QPaintEngine.AllFeatures & ~(
QPaintEngine.PorterDuff | QPaintEngine.PerspectiveTransform
| QPaintEngine.ObjectBoundingModeGradients
| QPaintEngine.LinearGradientFill
| QPaintEngine.RadialGradientFill
| QPaintEngine.ConicalGradientFill
)
def __init__(self, file_object, page_width, page_height, left_margin,
top_margin, right_margin, bottom_margin, width, height,
errors=print, debug=print, compress=True):
QPaintEngine.__init__(self, self.features)
errors=print, debug=print, compress=True,
mark_links=False):
QPaintEngine.__init__(self, self.FEATURES)
self.file_object = file_object
self.compress = compress
self.compress, self.mark_links = compress, mark_links
self.page_height, self.page_width = page_height, page_width
self.left_margin, self.top_margin = left_margin, top_margin
self.right_margin, self.bottom_margin = right_margin, bottom_margin
@ -210,49 +82,48 @@ class PdfEngine(QPaintEngine):
self.bottom_margin) / self.pixel_height
self.pdf_system = QTransform(sx, 0, 0, -sy, dx, dy)
self.do_stroke = True
self.do_fill = False
self.scale = sqrt(sy**2 + sx**2)
self.xscale, self.yscale = sx, sy
self.graphics_state = GraphicsState()
self.graphics = Graphics()
self.errors_occurred = False
self.errors, self.debug = errors, debug
self.fonts = {}
i = QImage(1, 1, QImage.Format_ARGB32)
i.fill(qRgba(0, 0, 0, 255))
self.alpha_bit = i.constBits().asstring(4).find(b'\xff')
self.current_page_num = 1
self.current_page_inited = False
self.qt_hack, err = plugins['qt_hack']
if err:
raise RuntimeError('Failed to load qt_hack with err: %s'%err)
def init_page(self):
self.pdf.transform(self.pdf_system)
self.pdf.set_rgb_colorspace()
width = self.painter().pen().widthF() if self.isActive() else 0
self.pdf.set_line_width(width)
self.do_stroke = True
self.do_fill = False
self.graphics_state.reset()
self.pdf.save_stack()
self.current_page_inited = True
def apply_graphics_state(self):
self.graphics(self.pdf_system, self.painter())
def resolve_fill(self, rect):
self.graphics.resolve_fill(rect, self.pdf_system,
self.painter().transform())
@property
def features(self):
return (QPaintEngine.Antialiasing | QPaintEngine.AlphaBlend |
QPaintEngine.ConstantOpacity | QPaintEngine.PainterPaths |
QPaintEngine.PaintOutsidePaintEvent |
QPaintEngine.PrimitiveTransform)
def do_fill(self):
return self.graphics.current_state.do_fill
@property
def do_stroke(self):
return self.graphics.current_state.do_stroke
def init_page(self):
self.pdf.transform(self.pdf_system)
self.graphics.reset()
self.pdf.save_stack()
self.current_page_inited = True
def begin(self, device):
if not hasattr(self, 'pdf'):
try:
self.pdf = PDFStream(self.file_object, (self.page_width,
self.page_height),
compress=self.compress)
self.page_height), compress=self.compress,
mark_links=self.mark_links,
debug=self.debug)
self.graphics.begin(self.pdf)
except:
self.errors.append(traceback.format_exc())
self.errors(traceback.format_exc())
self.errors_occurred = True
return False
return True
@ -268,7 +139,8 @@ class PdfEngine(QPaintEngine):
self.end_page()
self.pdf.end()
except:
self.errors.append(traceback.format_exc())
self.errors(traceback.format_exc())
self.errors_occurred = True
return False
finally:
self.pdf = self.file_object = None
@ -277,139 +149,63 @@ class PdfEngine(QPaintEngine):
def type(self):
return QPaintEngine.Pdf
def add_image(self, img, cache_key):
if img.isNull(): return
return self.pdf.add_image(img, cache_key)
@store_error
def drawTiledPixmap(self, rect, pixmap, point):
self.apply_graphics_state()
brush = QBrush(pixmap)
bl = rect.topLeft()
color, opacity, pattern, do_fill = self.graphics.convert_brush(
brush, bl-point, 1.0, self.pdf_system,
self.painter().transform())
self.pdf.save_stack()
self.pdf.apply_fill(color, pattern)
self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(),
stroke=False, fill=True)
self.pdf.restore_stack()
@store_error
def drawPixmap(self, rect, pixmap, source_rect):
self.graphics_state(self)
self.apply_graphics_state()
source_rect = source_rect.toRect()
pixmap = (pixmap if source_rect == pixmap.rect() else
pixmap.copy(source_rect))
image = pixmap.toImage()
ref = self.add_image(image, pixmap.cacheKey())
if ref is not None:
self.pdf.draw_image(rect.x(), rect.height()+rect.y(), rect.width(),
-rect.height(), ref)
self.pdf.draw_image(rect.x(), rect.y(), rect.width(),
rect.height(), ref)
@store_error
def drawImage(self, rect, image, source_rect, flags=Qt.AutoColor):
self.graphics_state(self)
self.apply_graphics_state()
source_rect = source_rect.toRect()
image = (image if source_rect == image.rect() else
image.copy(source_rect))
ref = self.add_image(image, image.cacheKey())
if ref is not None:
self.pdf.draw_image(rect.x(), rect.height()+rect.y(), rect.width(),
-rect.height(), ref)
def add_image(self, img, cache_key):
if img.isNull(): return
ref = self.pdf.get_image(cache_key)
if ref is not None:
return ref
fmt = img.format()
image = QImage(img)
if (image.depth() == 1 and img.colorTable().size() == 2 and
img.colorTable().at(0) == QColor(Qt.black).rgba() and
img.colorTable().at(1) == QColor(Qt.white).rgba()):
if fmt == QImage.Format_MonoLSB:
image = image.convertToFormat(QImage.Format_Mono)
fmt = QImage.Format_Mono
else:
if (fmt != QImage.Format_RGB32 and fmt != QImage.Format_ARGB32):
image = image.convertToFormat(QImage.Format_ARGB32)
fmt = QImage.Format_ARGB32
w = image.width()
h = image.height()
d = image.depth()
if fmt == QImage.Format_Mono:
bytes_per_line = (w + 7) >> 3
data = image.constBits().asstring(bytes_per_line * h)
return self.pdf.write_image(data, w, h, d, cache_key=cache_key)
ba = QByteArray()
buf = QBuffer(ba)
image.save(buf, 'jpeg', 94)
data = bytes(ba.data())
has_alpha = has_mask = False
soft_mask = mask = None
if fmt == QImage.Format_ARGB32:
tmask = image.constBits().asstring(4*w*h)[self.alpha_bit::4]
sdata = bytearray(tmask)
vals = set(sdata)
vals.discard(255)
has_mask = bool(vals)
vals.discard(0)
has_alpha = bool(vals)
if has_alpha:
soft_mask = self.pdf.write_image(tmask, w, h, 8)
elif has_mask:
# dither the soft mask to 1bit and add it. This also helps PDF
# viewers without transparency support
bytes_per_line = (w + 7) >> 3
mdata = bytearray(0 for i in xrange(bytes_per_line * h))
spos = mpos = 0
for y in xrange(h):
for x in xrange(w):
if sdata[spos]:
mdata[mpos + x>>3] |= (0x80 >> (x&7))
spos += 1
mpos += bytes_per_line
mdata = bytes(mdata)
mask = self.pdf.write_image(mdata, w, h, 1)
return self.pdf.write_image(data, w, h, 32, mask=mask, dct=True,
soft_mask=soft_mask, cache_key=cache_key)
self.pdf.draw_image(rect.x(), rect.y(), rect.width(),
rect.height(), ref)
@store_error
def updateState(self, state):
self.graphics_state.read(state)
def convert_path(self, path):
p = Path()
i = 0
while i < path.elementCount():
elem = path.elementAt(i)
em = (elem.x, elem.y)
i += 1
if elem.isMoveTo():
p.move_to(*em)
elif elem.isLineTo():
p.line_to(*em)
elif elem.isCurveTo():
added = False
if path.elementCount() > i+1:
c1, c2 = path.elementAt(i), path.elementAt(i+1)
if (c1.type == path.CurveToDataElement and c2.type ==
path.CurveToDataElement):
i += 2
p.curve_to(em[0], em[1], c1.x, c1.y, c2.x, c2.y)
added = True
if not added:
raise ValueError('Invalid curve to operation')
return p
self.graphics.update_state(state, self.painter())
@store_error
def drawPath(self, path):
self.graphics_state(self)
p = self.convert_path(path)
self.apply_graphics_state()
p = convert_path(path)
fill_rule = {Qt.OddEvenFill:'evenodd',
Qt.WindingFill:'winding'}[path.fillRule()]
self.pdf.draw_path(p, stroke=self.do_stroke,
fill=self.do_fill, fill_rule=fill_rule)
def add_clip(self, path):
p = self.convert_path(path)
fill_rule = {Qt.OddEvenFill:'evenodd',
Qt.WindingFill:'winding'}[path.fillRule()]
self.pdf.add_clip(p, fill_rule=fill_rule)
@store_error
def drawPoints(self, points):
self.graphics_state(self)
self.apply_graphics_state()
p = Path()
for point in points:
p.move_to(point.x(), point.y())
@ -418,11 +214,13 @@ class PdfEngine(QPaintEngine):
@store_error
def drawRects(self, rects):
self.graphics_state(self)
for rect in rects:
bl = rect.topLeft()
self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(),
stroke=self.do_stroke, fill=self.do_fill)
self.apply_graphics_state()
with self.graphics:
for rect in rects:
self.resolve_fill(rect)
bl = rect.topLeft()
self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(),
stroke=self.do_stroke, fill=self.do_fill)
def create_sfnt(self, text_item):
get_table = partial(self.qt_hack.get_sfnt_table, text_item)
@ -438,7 +236,7 @@ class PdfEngine(QPaintEngine):
@store_error
def drawTextItem(self, point, text_item):
# super(PdfEngine, self).drawTextItem(point, text_item)
self.graphics_state(self)
self.apply_graphics_state()
gi = self.qt_hack.get_glyphs(point, text_item)
if not gi.indices:
sip.delete(gi)
@ -469,7 +267,7 @@ class PdfEngine(QPaintEngine):
@store_error
def drawPolygon(self, points, mode):
self.graphics_state(self)
self.apply_graphics_state()
if not points: return
p = Path()
p.move_to(points[0].x(), points[0].y())
@ -484,20 +282,31 @@ class PdfEngine(QPaintEngine):
def set_metadata(self, *args, **kwargs):
self.pdf.set_metadata(*args, **kwargs)
def __enter__(self):
self.pdf.save_stack()
self.saved_ps = (self.do_stroke, self.do_fill)
def add_outline(self, toc):
self.pdf.links.add_outline(toc)
def __exit__(self, *args):
self.do_stroke, self.do_fill = self.saved_ps
self.pdf.restore_stack()
def add_links(self, current_item, start_page, links, anchors):
for pos in anchors.itervalues():
pos['left'], pos['top'] = self.pdf_system.map(pos['left'], pos['top'])
for link in links:
pos = link[1]
llx = pos['left']
lly = pos['top'] + pos['height']
urx = pos['left'] + pos['width']
ury = pos['top']
llx, lly = self.pdf_system.map(llx, lly)
urx, ury = self.pdf_system.map(urx, ury)
link[1] = pos['column'] + start_page
link.append((llx, lly, urx, ury))
self.pdf.links.add(current_item, start_page, links, anchors)
class PdfDevice(QPaintDevice): # {{{
def __init__(self, file_object, page_size=A4, left_margin=inch,
top_margin=inch, right_margin=inch, bottom_margin=inch,
xdpi=1200, ydpi=1200, errors=print, debug=print, compress=True):
xdpi=1200, ydpi=1200, errors=print, debug=print,
compress=True, mark_links=False):
QPaintDevice.__init__(self)
self.xdpi, self.ydpi = xdpi, ydpi
self.page_width, self.page_height = page_size
@ -506,7 +315,10 @@ class PdfDevice(QPaintDevice): # {{{
self.engine = PdfEngine(file_object, self.page_width, self.page_height,
left_margin, top_margin, right_margin,
bottom_margin, self.width(), self.height(),
errors=errors, debug=debug, compress=compress)
errors=errors, debug=debug, compress=compress,
mark_links=mark_links)
self.add_outline = self.engine.add_outline
self.add_links = self.engine.add_links
def paintEngine(self):
return self.engine
@ -553,59 +365,4 @@ class PdfDevice(QPaintDevice): # {{{
# }}}
if __name__ == '__main__':
from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap)
QBrush, QColor, QPoint, QPixmap
app = QApplication([])
p = QPainter()
with open('/tmp/painter.pdf', 'wb') as f:
dev = PdfDevice(f, compress=False)
p.begin(dev)
dev.init_page()
xmax, ymax = p.viewport().width(), p.viewport().height()
try:
p.drawRect(0, 0, xmax, ymax)
# p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax),
# QPoint(0, ymax), QPoint(0, 0))
# pp = QPainterPath()
# pp.addRect(0, 0, xmax, ymax)
# p.drawPath(pp)
# p.save()
# for i in xrange(3):
# col = [0, 0, 0, 200]
# col[i] = 255
# p.setOpacity(0.3)
# p.setBrush(QBrush(QColor(*col)))
# p.drawRect(0, 0, xmax/10, xmax/10)
# p.translate(xmax/10, xmax/10)
# p.scale(1, 1.5)
# p.restore()
# # p.scale(2, 2)
# # p.rotate(45)
# p.drawPixmap(0, 0, 2048, 2048, QPixmap(I('library.png')))
# p.drawRect(0, 0, 2048, 2048)
# p.save()
# p.drawLine(0, 0, 5000, 0)
# p.rotate(45)
# p.drawLine(0, 0, 5000, 0)
# p.restore()
f = p.font()
f.setPointSize(20)
# f.setLetterSpacing(f.PercentageSpacing, 200)
# f.setUnderline(True)
# f.setOverline(True)
# f.setStrikeOut(True)
f.setFamily('Calibri')
p.setFont(f)
# p.setPen(QColor(0, 0, 255))
# p.scale(2, 2)
# p.rotate(45)
p.drawText(QPoint(300, 300), 'Some—text not Bys ū --- Д AV ff ff')
finally:
p.end()
if dev.engine.errors_occurred:
raise SystemExit(1)

View File

@ -20,7 +20,6 @@ from calibre.ebooks.oeb.display.webview import load_html
from calibre.ebooks.pdf.render.common import (inch, cm, mm, pica, cicero,
didot, PAPER_SIZES)
from calibre.ebooks.pdf.render.engine import PdfDevice
from calibre.ebooks.pdf.render.links import Links
def get_page_size(opts, for_comic=False): # {{{
use_profile = not (opts.override_profile_size or
@ -143,7 +142,6 @@ class PDFWriter(QObject):
self.view.page().mainFrame().setScrollBarPolicy(x,
Qt.ScrollBarAlwaysOff)
self.report_progress = lambda x, y: x
self.links = Links()
def dump(self, items, out_stream, pdf_metadata):
opts = self.opts
@ -156,7 +154,8 @@ class PDFWriter(QObject):
top_margin=0, right_margin=mr, bottom_margin=0,
xdpi=xdpi, ydpi=ydpi, errors=self.log.error,
debug=self.log.debug, compress=not
opts.uncompressed_pdf)
opts.uncompressed_pdf,
mark_links=opts.pdf_mark_links)
self.page.setViewportSize(QSize(self.doc.width(), self.doc.height()))
self.render_queue = items
@ -187,7 +186,9 @@ class PDFWriter(QObject):
QTimer.singleShot(0, self.render_book)
self.loop.exec_()
# TODO: Outline and links
if self.toc is not None and len(self.toc) > 0:
self.doc.add_outline(self.toc)
self.painter.end()
if self.doc.errors_occurred:
@ -261,8 +262,7 @@ class PDFWriter(QObject):
amap = self.bridge_value
if not isinstance(amap, dict):
amap = {'links':[], 'anchors':{}} # Some javascript error occurred
self.links.add(self.current_item, self.current_page_num, amap['links'],
amap['anchors'])
start_page = self.current_page_num
mf = self.view.page().mainFrame()
while True:
@ -278,3 +278,6 @@ class PDFWriter(QObject):
if self.doc.errors_occurred:
break
self.doc.add_links(self.current_item, start_page, amap['links'],
amap['anchors'])

View File

@ -0,0 +1,470 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from math import sqrt
from collections import namedtuple
from PyQt4.Qt import (
QBrush, QPen, Qt, QPointF, QTransform, QPainterPath, QPaintEngine, QImage)
from calibre.ebooks.pdf.render.common import (
Name, Array, fmtnum, Stream, Dictionary)
from calibre.ebooks.pdf.render.serialize import Path
def convert_path(path): # {{{
p = Path()
i = 0
while i < path.elementCount():
elem = path.elementAt(i)
em = (elem.x, elem.y)
i += 1
if elem.isMoveTo():
p.move_to(*em)
elif elem.isLineTo():
p.line_to(*em)
elif elem.isCurveTo():
added = False
if path.elementCount() > i+1:
c1, c2 = path.elementAt(i), path.elementAt(i+1)
if (c1.type == path.CurveToDataElement and c2.type ==
path.CurveToDataElement):
i += 2
p.curve_to(em[0], em[1], c1.x, c1.y, c2.x, c2.y)
added = True
if not added:
raise ValueError('Invalid curve to operation')
return p
# }}}
Brush = namedtuple('Brush', 'origin brush color')
class TilingPattern(Stream):
def __init__(self, cache_key, matrix, w=8, h=8, paint_type=2, compress=False):
Stream.__init__(self, compress=compress)
self.paint_type = paint_type
self.w, self.h = w, h
self.matrix = (matrix.m11(), matrix.m12(), matrix.m21(), matrix.m22(),
matrix.dx(), matrix.dy())
self.resources = Dictionary()
self.cache_key = (self.__class__.__name__, cache_key, self.matrix)
def add_extra_keys(self, d):
d['Type'] = Name('Pattern')
d['PatternType'] = 1
d['PaintType'] = self.paint_type
d['TilingType'] = 1
d['BBox'] = Array([0, 0, self.w, self.h])
d['XStep'] = self.w
d['YStep'] = self.h
d['Matrix'] = Array(self.matrix)
d['Resources'] = self.resources
class QtPattern(TilingPattern):
qt_patterns = ( # {{{
"0 J\n"
"6 w\n"
"[] 0 d\n"
"4 0 m\n"
"4 8 l\n"
"0 4 m\n"
"8 4 l\n"
"S\n", # Dense1Pattern
"0 J\n"
"2 w\n"
"[6 2] 1 d\n"
"0 0 m\n"
"0 8 l\n"
"8 0 m\n"
"8 8 l\n"
"S\n"
"[] 0 d\n"
"2 0 m\n"
"2 8 l\n"
"6 0 m\n"
"6 8 l\n"
"S\n"
"[6 2] -3 d\n"
"4 0 m\n"
"4 8 l\n"
"S\n", # Dense2Pattern
"0 J\n"
"2 w\n"
"[6 2] 1 d\n"
"0 0 m\n"
"0 8 l\n"
"8 0 m\n"
"8 8 l\n"
"S\n"
"[2 2] -1 d\n"
"2 0 m\n"
"2 8 l\n"
"6 0 m\n"
"6 8 l\n"
"S\n"
"[6 2] -3 d\n"
"4 0 m\n"
"4 8 l\n"
"S\n", # Dense3Pattern
"0 J\n"
"2 w\n"
"[2 2] 1 d\n"
"0 0 m\n"
"0 8 l\n"
"8 0 m\n"
"8 8 l\n"
"S\n"
"[2 2] -1 d\n"
"2 0 m\n"
"2 8 l\n"
"6 0 m\n"
"6 8 l\n"
"S\n"
"[2 2] 1 d\n"
"4 0 m\n"
"4 8 l\n"
"S\n", # Dense4Pattern
"0 J\n"
"2 w\n"
"[2 6] -1 d\n"
"0 0 m\n"
"0 8 l\n"
"8 0 m\n"
"8 8 l\n"
"S\n"
"[2 2] 1 d\n"
"2 0 m\n"
"2 8 l\n"
"6 0 m\n"
"6 8 l\n"
"S\n"
"[2 6] 3 d\n"
"4 0 m\n"
"4 8 l\n"
"S\n", # Dense5Pattern
"0 J\n"
"2 w\n"
"[2 6] -1 d\n"
"0 0 m\n"
"0 8 l\n"
"8 0 m\n"
"8 8 l\n"
"S\n"
"[2 6] 3 d\n"
"4 0 m\n"
"4 8 l\n"
"S\n", # Dense6Pattern
"0 J\n"
"2 w\n"
"[2 6] -1 d\n"
"0 0 m\n"
"0 8 l\n"
"8 0 m\n"
"8 8 l\n"
"S\n", # Dense7Pattern
"1 w\n"
"0 4 m\n"
"8 4 l\n"
"S\n", # HorPattern
"1 w\n"
"4 0 m\n"
"4 8 l\n"
"S\n", # VerPattern
"1 w\n"
"4 0 m\n"
"4 8 l\n"
"0 4 m\n"
"8 4 l\n"
"S\n", # CrossPattern
"1 w\n"
"-1 5 m\n"
"5 -1 l\n"
"3 9 m\n"
"9 3 l\n"
"S\n", # BDiagPattern
"1 w\n"
"-1 3 m\n"
"5 9 l\n"
"3 -1 m\n"
"9 5 l\n"
"S\n", # FDiagPattern
"1 w\n"
"-1 3 m\n"
"5 9 l\n"
"3 -1 m\n"
"9 5 l\n"
"-1 5 m\n"
"5 -1 l\n"
"3 9 m\n"
"9 3 l\n"
"S\n", # DiagCrossPattern
) # }}}
def __init__(self, pattern_num, matrix):
super(QtPattern, self).__init__(pattern_num, matrix)
self.write(self.qt_patterns[pattern_num-2])
class TexturePattern(TilingPattern):
def __init__(self, pixmap, matrix, pdf, clone=None):
if clone is None:
image = pixmap.toImage()
cache_key = pixmap.cacheKey()
imgref = pdf.add_image(image, cache_key)
paint_type = (2 if image.format() in {QImage.Format_MonoLSB,
QImage.Format_Mono} else 1)
super(TexturePattern, self).__init__(
cache_key, matrix, w=image.width(), h=image.height(),
paint_type=paint_type)
m = (self.w, 0, 0, -self.h, 0, self.h)
self.resources['XObject'] = Dictionary({'Texture':imgref})
self.write_line('%s cm /Texture Do'%(' '.join(map(fmtnum, m))))
else:
super(TexturePattern, self).__init__(
clone.cache_key[1], matrix, w=clone.w, h=clone.h,
paint_type=clone.paint_type)
self.resources['XObject'] = Dictionary(clone.resources['XObject'])
self.write(clone.getvalue())
class GraphicsState(object):
FIELDS = ('fill', 'stroke', 'opacity', 'transform', 'brush_origin',
'clip', 'do_fill', 'do_stroke')
def __init__(self):
self.fill = QBrush()
self.stroke = QPen()
self.opacity = 1.0
self.transform = QTransform()
self.brush_origin = QPointF()
self.clip = QPainterPath()
self.do_fill = False
self.do_stroke = True
self.qt_pattern_cache = {}
def __eq__(self, other):
for x in self.FIELDS:
if getattr(other, x) != getattr(self, x):
return False
return True
def copy(self):
ans = GraphicsState()
ans.fill = QBrush(self.fill)
ans.stroke = QPen(self.stroke)
ans.opacity = self.opacity
ans.transform = self.transform * QTransform()
ans.brush_origin = QPointF(self.brush_origin)
ans.clip = self.clip
ans.do_fill, ans.do_stroke = self.do_fill, self.do_stroke
return ans
class Graphics(object):
def __init__(self):
self.base_state = GraphicsState()
self.current_state = GraphicsState()
self.pending_state = None
def begin(self, pdf):
self.pdf = pdf
def update_state(self, state, painter):
flags = state.state()
if self.pending_state is None:
self.pending_state = self.current_state.copy()
s = self.pending_state
if flags & QPaintEngine.DirtyTransform:
s.transform = state.transform()
if flags & QPaintEngine.DirtyBrushOrigin:
s.brush_origin = state.brushOrigin()
if flags & QPaintEngine.DirtyBrush:
s.fill = state.brush()
if flags & QPaintEngine.DirtyPen:
s.stroke = state.pen()
if flags & QPaintEngine.DirtyOpacity:
s.opacity = state.opacity()
if flags & QPaintEngine.DirtyClipPath or flags & QPaintEngine.DirtyClipRegion:
s.clip = painter.clipPath()
def reset(self):
self.current_state = GraphicsState()
self.pending_state = None
def __call__(self, pdf_system, painter):
# Apply the currently pending state to the PDF
if self.pending_state is None:
return
pdf_state = self.current_state
ps = self.pending_state
pdf = self.pdf
if (ps.transform != pdf_state.transform or ps.clip != pdf_state.clip):
pdf.restore_stack()
pdf.save_stack()
pdf_state = self.base_state
if (pdf_state.transform != ps.transform):
pdf.transform(ps.transform)
if (pdf_state.opacity != ps.opacity or pdf_state.stroke != ps.stroke):
self.apply_stroke(ps, pdf_system, painter)
if (pdf_state.opacity != ps.opacity or pdf_state.fill != ps.fill or
pdf_state.brush_origin != ps.brush_origin):
self.apply_fill(ps, pdf_system, painter)
if (pdf_state.clip != ps.clip):
p = convert_path(ps.clip)
fill_rule = {Qt.OddEvenFill:'evenodd',
Qt.WindingFill:'winding'}[ps.clip.fillRule()]
pdf.add_clip(p, fill_rule=fill_rule)
self.current_state = self.pending_state
self.pending_state = None
def convert_brush(self, brush, brush_origin, global_opacity,
pdf_system, qt_system):
# Convert a QBrush to PDF operators
style = brush.style()
pdf = self.pdf
pattern = color = pat = None
opacity = 1.0
do_fill = True
matrix = (QTransform.fromTranslate(brush_origin.x(), brush_origin.y())
* pdf_system * qt_system.inverted()[0])
vals = list(brush.color().getRgbF())
self.brushobj = None
if style <= Qt.DiagCrossPattern:
opacity = global_opacity * vals[-1]
color = vals[:3]
if style > Qt.SolidPattern:
pat = QtPattern(style, matrix)
pattern = pdf.add_pattern(pat)
if opacity < 1e-4 or style == Qt.NoBrush:
do_fill = False
elif style == Qt.TexturePattern:
pat = TexturePattern(brush.texture(), matrix, pdf)
opacity = global_opacity
if pat.paint_type == 2:
opacity *= vals[-1]
color = vals[:3]
pattern = pdf.add_pattern(pat)
if opacity < 1e-4 or style == Qt.NoBrush:
do_fill = False
self.brushobj = Brush(brush_origin, pat, color)
# TODO: Add support for gradient fills
return color, opacity, pattern, do_fill
def apply_stroke(self, state, pdf_system, painter):
# TODO: Support miter limit by using QPainterPathStroker
pen = state.stroke
self.pending_state.do_stroke = True
pdf = self.pdf
# Width
w = pen.widthF()
if pen.isCosmetic():
t = painter.transform()
w /= sqrt(t.m11()**2 + t.m22()**2)
pdf.serialize(w)
pdf.current_page.write(' w ')
# Line cap
cap = {Qt.FlatCap:0, Qt.RoundCap:1, Qt.SquareCap:
2}.get(pen.capStyle(), 0)
pdf.current_page.write('%d J '%cap)
# Line join
join = {Qt.MiterJoin:0, Qt.RoundJoin:1,
Qt.BevelJoin:2}.get(pen.joinStyle(), 0)
pdf.current_page.write('%d j '%join)
# Dash pattern
ps = {Qt.DashLine:[3], Qt.DotLine:[1,2], Qt.DashDotLine:[3,2,1,2],
Qt.DashDotDotLine:[3, 2, 1, 2, 1, 2]}.get(pen.style(), [])
if ps:
pdf.serialize(Array(ps))
pdf.current_page.write(' 0 d ')
# Stroke fill
color, opacity, pattern, self.pending_state.do_stroke = self.convert_brush(
pen.brush(), state.brush_origin, state.opacity, pdf_system,
painter.transform())
self.pdf.apply_stroke(color, pattern, opacity)
if pen.style() == Qt.NoPen:
self.pending_state.do_stroke = False
def apply_fill(self, state, pdf_system, painter):
self.pending_state.do_fill = True
color, opacity, pattern, self.pending_state.do_fill = self.convert_brush(
state.fill, state.brush_origin, state.opacity, pdf_system,
painter.transform())
self.pdf.apply_fill(color, pattern, opacity)
self.last_fill = self.brushobj
def __enter__(self):
self.pdf.save_stack()
def __exit__(self, *args):
self.pdf.restore_stack()
def resolve_fill(self, rect, pdf_system, qt_system):
'''
Qt's paint system does not update brushOrigin when using
TexturePatterns and it also uses TexturePatterns to emulate gradients,
leading to brokenness. So this method allows the paint engine to update
the brush origin before painting an object. While not perfect, this is
better than nothing.
'''
if not hasattr(self, 'last_fill') or not self.current_state.do_fill:
return
if isinstance(self.last_fill.brush, TexturePattern):
tl = rect.topLeft()
if tl == self.last_fill.origin:
return
matrix = (QTransform.fromTranslate(tl.x(), tl.y())
* pdf_system * qt_system.inverted()[0])
pat = TexturePattern(None, matrix, self.pdf, clone=self.last_fill.brush)
pattern = self.pdf.add_pattern(pat)
self.pdf.apply_fill(self.last_fill.color, pattern)

View File

@ -8,25 +8,115 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from future_builtins import map
from urlparse import urlparse, urlunparse
from urllib2 import quote, unquote
from calibre.ebooks.pdf.render.common import Array, Name
from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String
class Destination(Array):
def __init__(self, start_page, pos):
def __init__(self, start_page, pos, get_pageref):
super(Destination, self).__init__(
[start_page + pos['column'], Name('FitH'), pos['y']])
[get_pageref(start_page + pos['column']), Name('XYZ'), pos['left'],
pos['top'], None]
)
class Links(object):
def __init__(self):
def __init__(self, pdf, mark_links, page_size):
self.anchors = {}
self.links = []
self.start = {'top':page_size[1], 'column':0, 'left':0}
self.pdf = pdf
self.mark_links = mark_links
def add(self, base_path, start_page, links, anchors):
path = os.path.normcase(os.path.abspath(base_path))
self.anchors[path] = a = {}
a[None] = Destination(start_page, {'y':0, 'column':0})
a[None] = Destination(start_page, self.start, self.pdf.get_pageref)
for anchor, pos in anchors.iteritems():
a[anchor] = Destination(start_page, pos)
a[anchor] = Destination(start_page, pos, self.pdf.get_pageref)
for link in links:
href, page, rect = link
p, frag = href.partition('#')[0::2]
link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect))
self.links.append(link)
def add_links(self):
for link in self.links:
path, href, frag = link[0]
page, rect = link[1:]
combined_path = os.path.abspath(os.path.join(os.path.dirname(path), *href.split('/')))
is_local = not href or combined_path in self.anchors
annot = Dictionary({
'Type':Name('Annot'), 'Subtype':Name('Link'),
'Rect':rect, 'Border':Array([0,0,0]),
})
if self.mark_links:
annot.update({'Border':Array([16, 16, 1]), 'C':Array([1.0, 0,
0])})
if is_local:
path = combined_path if href else path
annot['Dest'] = self.anchors[path][frag]
else:
url = href + (('#'+frag) if frag else '')
purl = urlparse(url)
if purl.scheme and purl.scheme != 'file':
action = Dictionary({
'Type':Name('Action'), 'S':Name('URI'),
})
parts = (x.encode('utf-8') if isinstance(x, type(u'')) else
x for x in purl)
url = urlunparse(map(quote, map(unquote,
parts))).decode('ascii')
action['URI'] = String(url)
annot['A'] = action
if 'A' in annot or 'Dest' in annot:
if 'Annots' not in page:
page['Annots'] = Array()
page['Annots'].append(self.pdf.objects.add(annot))
else:
self.pdf.debug('Could not find destination for link: %s in file %s'%
(href, path))
def add_outline(self, toc):
parent = Dictionary({'Type':Name('Outlines')})
parentref = self.pdf.objects.add(parent)
self.process_children(toc, parentref, parent_is_root=True)
self.pdf.catalog.obj['Outlines'] = parentref
def process_children(self, toc, parentref, parent_is_root=False):
childrefs = []
for child in toc:
childref = self.process_toc_item(child, parentref)
if childref is None:
continue
if childrefs:
childrefs[-1].obj['Next'] = childref
childref.obj['Prev'] = childrefs[-1]
childrefs.append(childref)
if len(child) > 0:
self.process_children(child, childref)
if childrefs:
parentref.obj['First'] = childrefs[0]
parentref.obj['Last'] = childrefs[-1]
if not parent_is_root:
parentref.obj['Count'] = -len(childrefs)
def process_toc_item(self, toc, parentref):
path = toc.abspath or None
frag = toc.fragment or None
if path is None:
return
path = os.path.normcase(os.path.abspath(path))
if path not in self.anchors:
return None
a = self.anchors[path]
dest = a.get(frag, a[None])
item = Dictionary({'Parent':parentref, 'Dest':dest,
'Title':String(toc.text or _('Unknown'))})
return self.pdf.objects.add(item)

View File

@ -9,19 +9,18 @@ __docformat__ = 'restructuredtext en'
import hashlib
from future_builtins import map
from itertools import izip
from collections import namedtuple
from PyQt4.Qt import QBuffer, QByteArray, QImage, Qt, QColor, qRgba
from calibre.constants import (__appname__, __version__)
from calibre.ebooks.pdf.render.common import (
Reference, EOL, serialize, Stream, Dictionary, String, Name, Array,
GlyphIndex)
fmtnum)
from calibre.ebooks.pdf.render.fonts import FontManager
from calibre.ebooks.pdf.render.links import Links
PDFVER = b'%PDF-1.3'
Color = namedtuple('Color', 'red green blue opacity')
class IndirectObjects(object):
def __init__(self):
@ -89,6 +88,7 @@ class Page(Stream):
self.opacities = {}
self.fonts = {}
self.xobjects = {}
self.patterns = {}
def set_opacity(self, opref):
if opref not in self.opacities:
@ -107,6 +107,11 @@ class Page(Stream):
self.xobjects[imgref] = 'Image%d'%len(self.xobjects)
return self.xobjects[imgref]
def add_pattern(self, patternref):
if patternref not in self.patterns:
self.patterns[patternref] = 'Pat%d'%len(self.patterns)
return self.patterns[patternref]
def add_resources(self):
r = Dictionary()
if self.opacities:
@ -124,6 +129,13 @@ class Page(Stream):
for ref, name in self.xobjects.iteritems():
xobjects[name] = ref
r['XObject'] = xobjects
if self.patterns:
r['ColorSpace'] = Dictionary({'PCSp':Array(
[Name('Pattern'), Name('DeviceRGB')])})
patterns = Dictionary()
for ref, name in self.patterns.iteritems():
patterns[name] = ref
r['Pattern'] = patterns
if r:
self.page_dict['Resources'] = r
@ -153,54 +165,6 @@ class Path(object):
def close(self):
self.ops.append(('h',))
class Text(object):
def __init__(self):
self.transform = self.default_transform = [1, 0, 0, 1, 0, 0]
self.font_name = 'Times-Roman'
self.font_path = None
self.horizontal_scale = self.default_horizontal_scale = 100
self.word_spacing = self.default_word_spacing = 0
self.char_space = self.default_char_space = 0
self.glyph_adjust = self.default_glyph_adjust = None
self.size = 12
self.text = ''
def set_transform(self, *args):
if len(args) == 1:
m = args[0]
vals = [m.m11(), m.m12(), m.m21(), m.m22(), m.dx(), m.dy()]
else:
vals = args
self.transform = vals
def pdf_serialize(self, stream, font_name):
if not self.text: return
stream.write_line('BT ')
serialize(Name(font_name), stream)
stream.write(' %g Tf '%self.size)
stream.write(' '.join(map(type(u''), self.transform)) + ' Tm ')
if self.horizontal_scale != self.default_horizontal_scale:
stream.write('%g Tz '%self.horizontal_scale)
if self.word_spacing != self.default_word_spacing:
stream.write('%g Tw '%self.word_spacing)
if self.char_space != self.default_char_space:
stream.write('%g Tc '%self.char_space)
stream.write_line()
if self.glyph_adjust is self.default_glyph_adjust:
serialize(String(self.text), stream)
stream.write(' Tj ')
else:
chars = Array()
frac, widths = self.glyph_adjust
for c, width in izip(self.text, widths):
chars.append(String(c))
chars.append(int(width * frac))
serialize(chars, stream)
stream.write(' TJ ')
stream.write_line('ET')
class Catalog(Dictionary):
def __init__(self, pagetree):
@ -219,6 +183,9 @@ class PageTree(Dictionary):
self['Kids'].append(pageref)
self['Count'] += 1
def get_ref(self, num):
return self['Kids'][num-1]
class HashingStream(object):
def __init__(self, f):
@ -228,7 +195,9 @@ class HashingStream(object):
self.last_char = b''
def write(self, raw):
raw = raw if isinstance(raw, bytes) else raw.encode('ascii')
self.write_raw(raw if isinstance(raw, bytes) else raw.encode('ascii'))
def write_raw(self, raw):
self.f.write(raw)
self.hashobj.update(raw)
if raw:
@ -277,7 +246,8 @@ class PDFStream(object):
( True, True, 'evenodd') : 'B*',
}
def __init__(self, stream, page_size, compress=False):
def __init__(self, stream, page_size, compress=False, mark_links=False,
debug=print):
self.stream = HashingStream(stream)
self.compress = compress
self.write_line(PDFVER)
@ -294,6 +264,12 @@ class PDFStream(object):
self.stroke_opacities, self.fill_opacities = {}, {}
self.font_manager = FontManager(self.objects, self.compress)
self.image_cache = {}
self.pattern_cache = {}
self.debug = debug
self.links = Links(self, mark_links, page_size)
i = QImage(1, 1, QImage.Format_ARGB32)
i.fill(qRgba(0, 0, 0, 255))
self.alpha_bit = i.constBits().asstring(4).find(b'\xff')
@property
def page_tree(self):
@ -303,6 +279,9 @@ class PDFStream(object):
def catalog(self):
return self.objects[1]
def get_pageref(self, pagenum):
return self.page_tree.obj.get_ref(pagenum)
def set_metadata(self, title=None, author=None, tags=None):
if title:
self.info['Title'] = String(title)
@ -321,12 +300,9 @@ class PDFStream(object):
vals = [m.m11(), m.m12(), m.m21(), m.m22(), m.dx(), m.dy()]
else:
vals = args
cm = ' '.join(map(type(u''), vals))
cm = ' '.join(map(fmtnum, vals))
self.current_page.write_line(cm + ' cm')
def set_rgb_colorspace(self):
self.current_page.write_line('/DeviceRGB CS /DeviceRGB cs')
def save_stack(self):
self.current_page.write_line('q')
@ -337,7 +313,7 @@ class PDFStream(object):
self.current_page.write_line('Q q')
def draw_rect(self, x, y, width, height, stroke=True, fill=False):
self.current_page.write('%g %g %g %g re '%(x, y, width, height))
self.current_page.write('%s re '%' '.join(map(fmtnum, (x, y, width, height))))
self.current_page.write_line(self.PATH_OPS[(stroke, fill, 'winding')])
def write_path(self, path):
@ -345,7 +321,8 @@ class PDFStream(object):
if i != 0:
self.current_page.write_line()
for x in op:
self.current_page.write(type(u'')(x) + ' ')
self.current_page.write(
(fmtnum(x) if isinstance(x, (int, long, float)) else x) + ' ')
def draw_path(self, path, stroke=True, fill=False, fill_rule='winding'):
if not path.ops: return
@ -358,67 +335,38 @@ class PDFStream(object):
op = 'W' if fill_rule == 'winding' else 'W*'
self.current_page.write_line(op + ' ' + 'n')
def set_dash(self, array, phase=0):
array = Array(array)
serialize(array, self.current_page)
self.current_page.write(b' ')
serialize(phase, self.current_page)
self.current_page.write_line(' d')
def serialize(self, o):
serialize(o, self.current_page)
def set_line_width(self, width):
serialize(width, self.current_page)
self.current_page.write_line(' w')
def set_line_cap(self, style):
serialize({'flat':0, 'round':1, 'square':2}.get(style),
self.current_page)
self.current_page.write_line(' J')
def set_line_join(self, style):
serialize({'miter':0, 'round':1, 'bevel':2}[style], self.current_page)
self.current_page.write_line(' j')
def set_stroke_color(self, color):
opacity = color.opacity
def set_stroke_opacity(self, opacity):
if opacity not in self.stroke_opacities:
op = Dictionary({'Type':Name('ExtGState'), 'CA': opacity})
self.stroke_opacities[opacity] = self.objects.add(op)
self.current_page.set_opacity(self.stroke_opacities[opacity])
self.current_page.write_line(' '.join(map(type(u''), color[:3])) + ' SC')
def set_fill_color(self, color):
opacity = color.opacity
def set_fill_opacity(self, opacity):
opacity = float(opacity)
if opacity not in self.fill_opacities:
op = Dictionary({'Type':Name('ExtGState'), 'ca': opacity})
self.fill_opacities[opacity] = self.objects.add(op)
self.current_page.set_opacity(self.fill_opacities[opacity])
self.current_page.write_line(' '.join(map(type(u''), color[:3])) + ' sc')
def end_page(self):
pageref = self.current_page.end(self.objects, self.stream)
self.page_tree.obj.add_page(pageref)
self.current_page = Page(self.page_tree, compress=self.compress)
def draw_text(self, text_object):
if text_object.font_path is None:
fontref = self.font_manager.add_standard_font(text_object.font_name)
else:
raise NotImplementedError()
name = self.current_page.add_font(fontref)
text_object.pdf_serialize(self.current_page, name)
def draw_glyph_run(self, transform, size, font_metrics, glyphs):
glyph_ids = {x[-1] for x in glyphs}
fontref = self.font_manager.add_font(font_metrics, glyph_ids)
name = self.current_page.add_font(fontref)
self.current_page.write(b'BT ')
serialize(Name(name), self.current_page)
self.current_page.write(' %g Tf '%size)
self.current_page.write('%s Tm '%' '.join(map(type(u''), transform)))
self.current_page.write(' %s Tf '%fmtnum(size))
self.current_page.write('%s Tm '%' '.join(map(fmtnum, transform)))
for x, y, glyph_id in glyphs:
self.current_page.write('%g %g Td '%(x, y))
serialize(GlyphIndex(glyph_id), self.current_page)
self.current_page.write(' Tj ')
self.current_page.write_raw(('%s %s Td <%04X> Tj '%(
fmtnum(x), fmtnum(y), glyph_id)).encode('ascii'))
self.current_page.write_line(b' ET')
def get_image(self, cache_key):
@ -431,17 +379,109 @@ class PDFStream(object):
self.objects.commit(r, self.stream)
return r
def draw_image(self, x, y, xscale, yscale, imgref):
def add_image(self, img, cache_key):
ref = self.get_image(cache_key)
if ref is not None:
return ref
fmt = img.format()
image = QImage(img)
if (image.depth() == 1 and img.colorTable().size() == 2 and
img.colorTable().at(0) == QColor(Qt.black).rgba() and
img.colorTable().at(1) == QColor(Qt.white).rgba()):
if fmt == QImage.Format_MonoLSB:
image = image.convertToFormat(QImage.Format_Mono)
fmt = QImage.Format_Mono
else:
if (fmt != QImage.Format_RGB32 and fmt != QImage.Format_ARGB32):
image = image.convertToFormat(QImage.Format_ARGB32)
fmt = QImage.Format_ARGB32
w = image.width()
h = image.height()
d = image.depth()
if fmt == QImage.Format_Mono:
bytes_per_line = (w + 7) >> 3
data = image.constBits().asstring(bytes_per_line * h)
return self.write_image(data, w, h, d, cache_key=cache_key)
ba = QByteArray()
buf = QBuffer(ba)
image.save(buf, 'jpeg', 94)
data = bytes(ba.data())
has_alpha = has_mask = False
soft_mask = mask = None
if fmt == QImage.Format_ARGB32:
tmask = image.constBits().asstring(4*w*h)[self.alpha_bit::4]
sdata = bytearray(tmask)
vals = set(sdata)
vals.discard(255)
has_mask = bool(vals)
vals.discard(0)
has_alpha = bool(vals)
if has_alpha:
soft_mask = self.write_image(tmask, w, h, 8)
elif has_mask:
# dither the soft mask to 1bit and add it. This also helps PDF
# viewers without transparency support
bytes_per_line = (w + 7) >> 3
mdata = bytearray(0 for i in xrange(bytes_per_line * h))
spos = mpos = 0
for y in xrange(h):
for x in xrange(w):
if sdata[spos]:
mdata[mpos + x>>3] |= (0x80 >> (x&7))
spos += 1
mpos += bytes_per_line
mdata = bytes(mdata)
mask = self.write_image(mdata, w, h, 1)
return self.write_image(data, w, h, 32, mask=mask, dct=True,
soft_mask=soft_mask, cache_key=cache_key)
def add_pattern(self, pattern):
if pattern.cache_key not in self.pattern_cache:
self.pattern_cache[pattern.cache_key] = self.objects.add(pattern)
return self.current_page.add_pattern(self.pattern_cache[pattern.cache_key])
def draw_image(self, x, y, width, height, imgref):
name = self.current_page.add_image(imgref)
self.current_page.write('q %g 0 0 %g %g %g cm '%(xscale, yscale, x, y))
self.current_page.write('q %s 0 0 %s %s %s cm '%(fmtnum(width),
fmtnum(-height), fmtnum(x), fmtnum(y+height)))
serialize(Name(name), self.current_page)
self.current_page.write_line(' Do Q')
def apply_color_space(self, color, pattern, stroke=False):
wl = self.current_page.write_line
if color is not None and pattern is None:
wl(' '.join(map(fmtnum, color)) + (' RG' if stroke else ' rg'))
elif color is None and pattern is not None:
wl('/Pattern %s /%s %s'%('CS' if stroke else 'cs', pattern,
'SCN' if stroke else 'scn'))
elif color is not None and pattern is not None:
col = ' '.join(map(fmtnum, color))
wl('/PCSp %s %s /%s %s'%('CS' if stroke else 'cs', col, pattern,
'SCN' if stroke else 'scn'))
def apply_fill(self, color=None, pattern=None, opacity=None):
if opacity is not None:
self.set_fill_opacity(opacity)
self.apply_color_space(color, pattern)
def apply_stroke(self, color=None, pattern=None, opacity=None):
if opacity is not None:
self.set_stroke_opacity(opacity)
self.apply_color_space(color, pattern, stroke=True)
def end(self):
if self.current_page.getvalue():
self.end_page()
self.font_manager.embed_fonts()
inforef = self.objects.add(self.info)
self.links.add_links()
self.objects.pdf_serialize(self.stream)
self.write_line()
startxref = self.objects.write_xref(self.stream)

View File

@ -0,0 +1,128 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from tempfile import gettempdir
from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF,
QApplication, QPainter, Qt, QImage, QLinearGradient,
QPointF, QPen)
QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, Qt, QPointF
from calibre.ebooks.pdf.render.engine import PdfDevice
def full(p, xmax, ymax):
p.drawRect(0, 0, xmax, ymax)
p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax),
QPoint(0, ymax), QPoint(0, 0))
pp = QPainterPath()
pp.addRect(0, 0, xmax, ymax)
p.drawPath(pp)
p.save()
for i in xrange(3):
col = [0, 0, 0, 200]
col[i] = 255
p.setOpacity(0.3)
p.fillRect(0, 0, xmax/10, xmax/10, QBrush(QColor(*col)))
p.setOpacity(1)
p.drawRect(0, 0, xmax/10, xmax/10)
p.translate(xmax/10, xmax/10)
p.scale(1, 1.5)
p.restore()
# p.scale(2, 2)
# p.rotate(45)
p.drawPixmap(0, 0, xmax/4, xmax/4, QPixmap(I('library.png')))
p.drawRect(0, 0, xmax/4, xmax/4)
f = p.font()
f.setPointSize(20)
# f.setLetterSpacing(f.PercentageSpacing, 200)
f.setUnderline(True)
# f.setOverline(True)
# f.setStrikeOut(True)
f.setFamily('Calibri')
p.setFont(f)
# p.setPen(QColor(0, 0, 255))
# p.scale(2, 2)
# p.rotate(45)
p.drawText(QPoint(xmax/3.9, 30), 'Some—text not Bys ū --- Д AV ff ff')
b = QBrush(Qt.HorPattern)
b.setColor(QColor(Qt.blue))
pix = QPixmap(I('console.png'))
w = xmax/4
p.fillRect(0, ymax/3, w, w, b)
p.fillRect(xmax/3, ymax/3, w, w, QBrush(pix))
x, y = 2*xmax/3, ymax/3
p.drawTiledPixmap(QRectF(x, y, w, w), pix, QPointF(10, 10))
x, y = 1, ymax/1.9
g = QLinearGradient(QPointF(x, y), QPointF(x+w, y+w))
g.setColorAt(0, QColor('#00f'))
g.setColorAt(1, QColor('#fff'))
p.fillRect(x, y, w, w, QBrush(g))
def run(dev, func):
p = QPainter(dev)
if isinstance(dev, PdfDevice):
dev.init_page()
xmax, ymax = p.viewport().width(), p.viewport().height()
try:
func(p, xmax, ymax)
finally:
p.end()
if isinstance(dev, PdfDevice):
if dev.engine.errors_occurred:
raise SystemExit(1)
def brush(p, xmax, ymax):
x = xmax/3
y = 0
w = xmax/2
pix = QPixmap(I('console.png'))
p.fillRect(x, y, w, w, QBrush(pix))
p.fillRect(0, y+xmax/1.9, w, w, QBrush(pix))
def pen(p, xmax, ymax):
pix = QPixmap(I('console.png'))
pen = QPen(QBrush(pix), 60)
p.setPen(pen)
p.drawRect(0, xmax/3, xmax/3, xmax/2)
def text(p, xmax, ymax):
p.drawText(QPoint(0, ymax/3), 'Text')
def main():
app = QApplication([])
app
tdir = gettempdir()
pdf = os.path.join(tdir, 'painter.pdf')
func = full
dpi = 100
with open(pdf, 'wb') as f:
dev = PdfDevice(f, xdpi=dpi, ydpi=dpi, compress=False)
img = QImage(dev.width(), dev.height(),
QImage.Format_ARGB32_Premultiplied)
img.setDotsPerMeterX(dpi*39.37)
img.setDotsPerMeterY(dpi*39.37)
img.fill(Qt.white)
run(dev, func)
run(img, func)
path = os.path.join(tdir, 'painter.png')
img.save(path)
print ('PDF written to:', pdf)
print ('Image written to:', path)
if __name__ == '__main__':
main()

View File

@ -33,7 +33,10 @@ from calibre.utils.config import prefs
from calibre.utils.logging import Log
class NoSupportedInputFormats(Exception):
pass
def __init__(self, available_formats):
Exception.__init__(self)
self.available_formats = available_formats
def sort_formats_by_preference(formats, prefs):
uprefs = [x.upper() for x in prefs]
@ -86,7 +89,7 @@ def get_supported_input_formats_for_book(db, book_id):
input_formats = set([x.lower() for x in supported_input_formats()])
input_formats = sorted(available_formats.intersection(input_formats))
if not input_formats:
raise NoSupportedInputFormats
raise NoSupportedInputFormats(tuple(x for x in available_formats if x))
return input_formats

View File

@ -372,7 +372,7 @@ class Series(Base):
self.widgets.append(QLabel('&'+self.col_metadata['name']+_(' index:'), parent))
w = QDoubleSpinBox(parent)
w.setRange(-100., float(100000000))
w.setRange(-10000., float(100000000))
w.setDecimals(2)
w.setSingleStep(1)
self.idx_widget=w

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import functools, re, os, traceback, errno
import functools, re, os, traceback, errno, time
from collections import defaultdict
from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage,
@ -1419,7 +1419,11 @@ class DeviceBooksModel(BooksModel): # {{{
return QVariant(human_readable(size))
elif cname == 'timestamp':
dt = self.db[self.map[row]].datetime
dt = dt_factory(dt, assume_utc=True, as_utc=False)
try:
dt = dt_factory(dt, assume_utc=True, as_utc=False)
except OverflowError:
dt = dt_factory(time.gmtime(), assume_utc=True,
as_utc=False)
return QVariant(strftime(TIME_FMT, dt.timetuple()))
elif cname == 'collections':
tags = self.db[self.map[row]].device_collections

View File

@ -1094,6 +1094,9 @@ class RatingEdit(QSpinBox): # {{{
db.set_rating(id_, 2*self.current_val, notify=False, commit=False)
return True
def zero(self):
self.setValue(0)
# }}}
class TagsEdit(EditWithComplete): # {{{

View File

@ -181,6 +181,11 @@ class MetadataSingleDialogBase(ResizableDialog):
self.basic_metadata_widgets.append(self.comments)
self.rating = RatingEdit(self)
self.clear_ratings_button = QToolButton(self)
self.clear_ratings_button.setToolTip(_('Clear rating'))
self.clear_ratings_button.setIcon(QIcon(I('trash.png')))
self.clear_ratings_button.clicked.connect(self.rating.zero)
self.basic_metadata_widgets.append(self.rating)
self.tags = TagsEdit(self)
@ -659,8 +664,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
QSizePolicy.Expanding)
l.addItem(self.tabs[0].spc_one, 1, 0, 1, 3)
sto(self.cover.buttons[-1], self.rating)
create_row2(1, self.rating)
sto(self.rating, self.tags_editor_button)
create_row2(1, self.rating, self.clear_ratings_button)
sto(self.rating, self.clear_ratings_button)
sto(self.clear_ratings_button, self.tags_editor_button)
sto(self.tags_editor_button, self.tags)
create_row2(2, self.tags, self.clear_tags_button, front_button=self.tags_editor_button)
sto(self.clear_tags_button, self.paste_isbn_button)
@ -780,7 +786,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{
button=self.clear_series_button, icon='trash.png')
create_row(5, self.series_index, self.tags)
create_row(6, self.tags, self.rating, button=self.clear_tags_button)
create_row(7, self.rating, self.pubdate)
create_row(7, self.rating, self.pubdate, button=self.clear_ratings_button)
create_row(8, self.pubdate, self.publisher,
button=self.pubdate.clear_button, icon='trash.png')
create_row(9, self.publisher, self.languages)
@ -917,7 +923,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{
button=self.clear_series_button, icon='trash.png')
create_row(5, self.series_index, self.tags)
create_row(6, self.tags, self.rating, button=self.clear_tags_button)
create_row(7, self.rating, self.pubdate)
create_row(7, self.rating, self.pubdate, button=self.clear_ratings_button)
create_row(8, self.pubdate, self.publisher,
button=self.pubdate.clear_button, icon='trash.png')
create_row(9, self.publisher, self.languages)

View File

@ -7,8 +7,10 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import (QLabel, QVBoxLayout, QListWidget, QListWidgetItem, Qt)
from PyQt4.Qt import (QLabel, QVBoxLayout, QListWidget, QListWidgetItem, Qt,
QIcon)
from calibre.customize.ui import enable_plugin
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
class ConfigWidget(ConfigWidgetBase):
@ -31,6 +33,18 @@ class ConfigWidget(ConfigWidgetBase):
f.itemChanged.connect(self.changed_signal)
f.itemDoubleClicked.connect(self.toggle_item)
self.la2 = la = QLabel(_(
'The list of device plugins you have disabled. Uncheck an entry '
'to enable the plugin. calibre cannot detect devices that are '
'managed by disabled plugins.'))
la.setWordWrap(True)
l.addWidget(la)
self.device_plugins = f = QListWidget(f)
l.addWidget(f)
f.itemChanged.connect(self.changed_signal)
f.itemDoubleClicked.connect(self.toggle_item)
def toggle_item(self, item):
item.setCheckState(Qt.Checked if item.checkState() == Qt.Unchecked else
Qt.Unchecked)
@ -46,6 +60,17 @@ class ConfigWidget(ConfigWidgetBase):
item.setCheckState(Qt.Checked)
self.devices.blockSignals(False)
self.device_plugins.blockSignals(True)
for dev in self.gui.device_manager.disabled_device_plugins:
n = dev.get_gui_name()
item = QListWidgetItem(n, self.device_plugins)
item.setData(Qt.UserRole, dev)
item.setFlags(Qt.ItemIsEnabled|Qt.ItemIsUserCheckable|Qt.ItemIsSelectable)
item.setCheckState(Qt.Checked)
item.setIcon(QIcon(I('plugins.png')))
self.device_plugins.sortItems()
self.device_plugins.blockSignals(False)
def restore_defaults(self):
if self.devices.count() > 0:
self.devices.clear()
@ -63,6 +88,12 @@ class ConfigWidget(ConfigWidgetBase):
for dev, bl in devs.iteritems():
dev.set_user_blacklisted_devices(bl)
for i in xrange(self.device_plugins.count()):
e = self.device_plugins.item(i)
dev = e.data(Qt.UserRole).toPyObject()
if e.checkState() == Qt.Unchecked:
enable_plugin(dev)
return True # Restart required
if __name__ == '__main__':

View File

@ -273,7 +273,7 @@
<widget class="QLabel" name="label_13">
<property name="text">
<string>&lt;p&gt;Remember to leave calibre running as the server only runs as long as calibre is running.
&lt;p&gt;To connect to the calibre server from your device you should use a URL of the form &lt;b&gt;http://myhostname:8080&lt;/b&gt; as a new catalog in the Stanza reader on your iPhone. Here myhostname should be either the fully qualified hostname or the IP address of the computer calibre is running on.</string>
&lt;p&gt;To connect to the calibre server from your device you should use a URL of the form &lt;b&gt;http://myhostname:8080&lt;/b&gt;. Here myhostname should be either the fully qualified hostname or the IP address of the computer calibre is running on. If you want to access the server from anywhere in the world, you will have to setup port forwarding for it on your router.</string>
</property>
<property name="wordWrap">
<bool>true</bool>

View File

@ -6,6 +6,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from contextlib import closing
@ -50,12 +51,17 @@ class BNStore(BasicStoreConfig, StorePlugin):
if not id:
continue
cover_url = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@src'))
cover_url = ''
cover_id = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@id'))
m = re.search(r"%s'.*?srcUrl: '(?P<iurl>.*?)'.*?}" % cover_id, raw)
if m:
cover_url = m.group('iurl')
title = ''.join(data.xpath('descendant::p[@class="title"]//span[@class="name"]//text()')).strip()
if not title: continue
if not title:
continue
author = ', '.join(data.xpath('.//ul[@class="contributors"]//a[@class="subtle"]//text()')).strip()
author = ', '.join(data.xpath('.//ul[contains(@class, "contributors")]//a[contains(@class, "subtle")]//text()')).strip()
price = ''.join(data.xpath('.//a[contains(@class, "bn-price")]//text()'))
counter -= 1

View File

@ -59,7 +59,7 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//ol[@id="rso"]/li'):
for data in doc.xpath('//ol/li'):
if counter <= 0:
break
@ -68,7 +68,7 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin):
continue
title = ''.join(data.xpath('.//h3/a//text()'))
authors = data.xpath('.//div[@class="f"]//a//text()')
authors = data.xpath('.//span[contains(@class, "f")]//a//text()')
while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'):
authors = authors[:-1]
if not authors:

View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2012, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re
import urllib
from contextlib import closing
from lxml import html
from PyQt4.Qt import QUrl
from calibre import browser, url_slash_cleaner
from calibre.gui2 import open_url
from calibre.gui2.store import StorePlugin
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.search_result import SearchResult
from calibre.gui2.store.web_store_dialog import WebStoreDialog
class NookUKStore(BasicStoreConfig, StorePlugin):
def open(self, parent=None, detail_item=None, external=False):
url = "http://uk.nook.com"
if external or self.config.get('open_external', False):
open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url)))
else:
d = WebStoreDialog(self.gui, url, parent, detail_item)
d.setWindowTitle(self.name)
d.set_tags(self.config.get('tags', ''))
d.exec_()
def search(self, query, max_results=10, timeout=60):
url = u'http://uk.nook.com/s/%s?s%%5Bdref%%5D=1&s%%5Bkeyword%%5D=%s' % (query.replace(' ', '-'), urllib.quote(query))
br = browser()
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
raw = f.read()
doc = html.fromstring(raw)
for data in doc.xpath('//ul[contains(@class, "product_list")]/li'):
if counter <= 0:
break
id = ''.join(data.xpath('.//span[contains(@class, "image")]/a/@href'))
if not id:
continue
cover_url = ''.join(data.xpath('.//span[contains(@class, "image")]//img/@data-src'))
title = ''.join(data.xpath('.//div[contains(@class, "title")]//text()')).strip()
if not title:
continue
author = ', '.join(data.xpath('.//div[contains(@class, "contributor")]//a/text()')).strip()
price = ''.join(data.xpath('.//div[contains(@class, "action")]//a//text()')).strip()
price = re.sub(r'[^\d.,£]', '', price);
counter -= 1
s = SearchResult()
s.cover_url = cover_url
s.title = title.strip()
s.author = author.strip()
s.price = price.strip()
s.detail_item = 'http://uk.nook.com/' + id.strip()
s.drm = SearchResult.DRM_UNKNOWN
s.formats = 'Nook'
yield s

View File

@ -76,7 +76,7 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin):
title = ''.join(data.xpath('//a[@class="bookTitle"]/text()'))
subnote = ''.join(data.xpath('//span[@class="subnote"]/text()'))
author = ''.join(data.xpath('//span[@class="subnote"]/a/text()'))
author = ''.join(data.xpath('//span[@class="subnote"]//a[1]//text()'))
if '$' in subnote:
price = subnote.partition('$')[2]
price = price.split(u'\xa0')[0]

View File

@ -88,20 +88,35 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{
changed = True
d.break_cycles()
except NoSupportedInputFormats:
bad.append(book_id)
except NoSupportedInputFormats as nsif:
bad.append((book_id, nsif.available_formats))
if bad and show_no_format_warning:
res = []
for id in bad:
title = db.title(id, True)
res.append('%s'%title)
if len(bad) == 1 and not bad[0][1]:
title = db.title(bad[0][0], True)
warning_dialog(parent, _('Could not convert'), '<p>'+
_('Could not convert <b>%s</b> as it has no ebook files. If you '
'think it should have files, but calibre is not finding '
'them, that is most likely because you moved the book\'s '
'files around outside of calibre. You will need to find those files '
'and re-add them to calibre.')%title, show=True)
else:
res = []
for id, available_formats in bad:
title = db.title(id, True)
if available_formats:
msg = _('No supported formats (Available formats: %s)')%(
', '.join(available_formats))
else:
msg = _('This book has no actual ebook files')
res.append('%s - %s'%(title, msg))
msg = '%s' % '\n'.join(res)
warning_dialog(parent, _('Could not convert some books'),
_('Could not convert %(num)d of %(tot)d books, because no suitable source'
' format was found.') % dict(num=len(res), tot=total),
msg).exec_()
msg = '%s' % '\n'.join(res)
warning_dialog(parent, _('Could not convert some books'),
_('Could not convert %(num)d of %(tot)d books, because no supported source'
' formats were found.') % dict(num=len(res), tot=total),
msg).exec_()
return jobs, changed, bad
# }}}

View File

@ -17,7 +17,7 @@ def ascii_text(orig):
ascii = udc.decode(orig)
except:
if isinstance(orig, unicode):
ascii = orig.encode('ascii', 'replace')
orig = orig.encode('ascii', 'replace')
ascii = orig.decode(preferred_encoding,
'replace').encode('ascii', 'replace')
return ascii

View File

@ -84,7 +84,7 @@ def do_warn(warnings, *args):
def pdf_subset(sfnt, glyphs):
for tag in tuple(sfnt.tables):
if tag not in {b'hhea', b'head', b'hmtx', b'maxp',
b'OS/2', b'post', b'cvt', b'fpgm', b'glyf', b'loca',
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca',
b'prep', b'CFF ', b'VORG'}:
# Remove non core tables since they are unused in PDF rendering
del sfnt[tag]

View File

@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments
"""
__version__ = "1.5.6"
__versionTime__ = "26 June 2011 10:53"
__version__ = "1.5.7"
__versionTime__ = "17 November 2012 16:18"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@ -81,66 +81,51 @@ __all__ = [
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'originalTextFor',
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation',
]
"""
Detect if we are running version 3.X and make appropriate changes
Robert A. Clark
"""
_PY3K = sys.version_info[0] > 2
if _PY3K:
_MAX_INT = sys.maxsize
basestring = str
unichr = chr
_ustr = str
alphas = string.ascii_lowercase + string.ascii_uppercase
else:
_MAX_INT = sys.maxint
range = xrange
set = lambda s : dict( [(c,0) for c in s] )
alphas = string.lowercase + string.uppercase
_MAX_INT = sys.maxint
range = xrange
set = lambda s : dict( [(c,0) for c in s] )
def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
then < returns the unicode object | encodes it with the default encoding | ... >.
"""
if isinstance(obj,unicode):
return obj
def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
then < returns the unicode object | encodes it with the default encoding | ... >.
"""
if isinstance(obj,unicode):
return obj
try:
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code.
return str(obj)
try:
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code.
return str(obj)
except UnicodeEncodeError:
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
# state that "The return value must be a string object". However, does a
# unicode object (being a subclass of basestring) count as a "string
# object"?
# If so, then return a unicode object:
return unicode(obj)
# Else encode it... but how? There are many choices... :)
# Replace unprintables with escape codes?
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
# Replace unprintables with question marks?
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ...
alphas = string.lowercase + string.uppercase
except UnicodeEncodeError:
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
# state that "The return value must be a string object". However, does a
# unicode object (being a subclass of basestring) count as a "string
# object"?
# If so, then return a unicode object:
return unicode(obj)
# Else encode it... but how? There are many choices... :)
# Replace unprintables with escape codes?
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
# Replace unprintables with question marks?
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ...
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
import __builtin__
for fname in "sum len enumerate sorted reversed list tuple set any all".split():
for fname in "sum len sorted reversed list tuple set any all min max".split():
try:
singleArgBuiltins.append(getattr(__builtin__,fname))
except AttributeError:
@ -159,7 +144,8 @@ def _xml_escape(data):
class _Constants(object):
pass
nums = string.digits
alphas = string.ascii_lowercase + string.ascii_uppercase
nums = "0123456789"
hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
_bslash = chr(92)
@ -211,7 +197,7 @@ class ParseBaseException(Exception):
return line_str.strip()
def __dir__(self):
return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split()
"markInputline __str__ __repr__".split()
class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class;
@ -228,8 +214,8 @@ class ParseFatalException(ParseBaseException):
pass
class ParseSyntaxException(ParseFatalException):
"""just like C{ParseFatalException}, but thrown internally when an
C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because
"""just like C{L{ParseFatalException}}, but thrown internally when an
C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found"""
def __init__(self, pe):
super(ParseSyntaxException, self).__init__(
@ -444,16 +430,13 @@ class ParseResults(object):
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
def __str__( self ):
out = "["
sep = ""
out = []
for i in self.__toklist:
if isinstance(i, ParseResults):
out += sep + _ustr(i)
out.append(_ustr(i))
else:
out += sep + repr(i)
sep = ", "
out += "]"
return out
out.append(repr(i))
return '[' + ', '.join(out) + ']'
def _asStringList( self, sep='' ):
out = []
@ -616,7 +599,7 @@ class ParseResults(object):
self.__parent = None
def __dir__(self):
return dir(super(ParseResults,self)) + self.keys()
return dir(super(ParseResults,self)) + list(self.keys())
def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators.
@ -624,7 +607,7 @@ def col (loc,strg):
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
"""
@ -636,7 +619,7 @@ def lineno(loc,strg):
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
"""
@ -666,33 +649,23 @@ def nullDebugAction(*args):
pass
'decorator to trim function calls to match the arity of the target'
if not _PY3K:
def _trim_arity(func, maxargs=2):
limit = [0]
def wrapper(*args):
while 1:
try:
return func(*args[limit[0]:])
except TypeError:
if limit[0] <= maxargs:
limit[0] += 1
continue
raise
return wrapper
else:
def _trim_arity(func, maxargs=2):
limit = maxargs
def wrapper(*args):
#~ nonlocal limit
while 1:
try:
return func(*args[limit:])
except TypeError:
if limit:
limit -= 1
continue
raise
return wrapper
def _trim_arity(func, maxargs=2):
if func in singleArgBuiltins:
return lambda s,l,t: func(t)
limit = [0]
foundArity = [False]
def wrapper(*args):
while 1:
try:
ret = func(*args[limit[0]:])
foundArity[0] = True
return ret
except TypeError:
if limit[0] <= maxargs and not foundArity[0]:
limit[0] += 1
continue
raise
return wrapper
class ParserElement(object):
"""Abstract base level parser element class."""
@ -705,6 +678,13 @@ class ParserElement(object):
ParserElement.DEFAULT_WHITE_CHARS = chars
setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
def inlineLiteralsUsing(cls):
"""
Set class to be used for inclusion of string literals into a parser.
"""
ParserElement.literalStringClass = cls
inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
def __init__( self, savelist=False ):
self.parseAction = list()
self.failAction = None
@ -789,14 +769,14 @@ class ParserElement(object):
C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- s = the original string being parsed (see note below)
- loc = the location of the matching substring
- toks = a list of the matched tokens, packaged as a ParseResults object
- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
If the functions in fns modify the tokens, they can return them as the return
value from fn, and the modified list of tokens will replace the original.
Otherwise, fn does not need to return any value.
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{parseString}<parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
"""
@ -818,7 +798,7 @@ class ParserElement(object):
- loc = location where expression match was attempted and failed
- expr = the parse expression that failed
- err = the exception thrown
The function returns no value. It may throw C{ParseFatalException}
The function returns no value. It may throw C{L{ParseFatalException}}
if it is desired to stop parsing immediately."""
self.failAction = fn
return self
@ -872,15 +852,12 @@ class ParserElement(object):
loc,tokens = self.parseImpl( instring, preloc, doActions )
except IndexError:
raise ParseException( instring, len(instring), self.errmsg, self )
except ParseBaseException:
except ParseBaseException, err:
#~ print ("Exception raised:", err)
err = None
if self.debugActions[2]:
err = sys.exc_info()[1]
self.debugActions[2]( instring, tokensStart, self, err )
if self.failAction:
if err is None:
err = sys.exc_info()[1]
self.failAction( instring, tokensStart, self, err )
raise
else:
@ -910,10 +887,9 @@ class ParserElement(object):
self.resultsName,
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
modal=self.modalResults )
except ParseBaseException:
except ParseBaseException, err:
#~ print "Exception raised in user parse action:", err
if (self.debugActions[2] ):
err = sys.exc_info()[1]
self.debugActions[2]( instring, tokensStart, self, err )
raise
else:
@ -952,8 +928,7 @@ class ParserElement(object):
value = self._parseNoCache( instring, loc, doActions, callPreParse )
ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
return value
except ParseBaseException:
pe = sys.exc_info()[1]
except ParseBaseException, pe:
ParserElement._exprArgCache[ lookup ] = pe
raise
@ -994,7 +969,7 @@ class ParserElement(object):
If you want the grammar to require that the entire input string be
successfully parsed, then set C{parseAll} to True (equivalent to ending
the grammar with C{StringEnd()}).
the grammar with C{L{StringEnd()}}).
Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
in order to report proper column numbers in parse actions.
@ -1023,12 +998,11 @@ class ParserElement(object):
loc = self.preParse( instring, loc )
se = Empty() + StringEnd()
se._parse( instring, loc )
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
else:
return tokens
@ -1076,16 +1050,15 @@ class ParserElement(object):
loc = nextLoc
else:
loc = preloc+1
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
def transformString( self, instring ):
"""Extension to C{scanString}, to modify matching text with modified tokens that may
"""Extension to C{L{scanString}}, to modify matching text with modified tokens that may
be returned from a parse action. To use C{transformString}, define a grammar and
attach a parse action to it that modifies the returned token list.
Invoking C{transformString()} on a target string will then scan for matches,
@ -1110,33 +1083,31 @@ class ParserElement(object):
out.append(instring[lastE:])
out = [o for o in out if o]
return "".join(map(_ustr,_flatten(out)))
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
def searchString( self, instring, maxMatches=_MAX_INT ):
"""Another extension to C{scanString}, simplifying the access to the tokens found
"""Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found.
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
def __add__(self, other ):
"""Implementation of + operator - returns And"""
"""Implementation of + operator - returns C{L{And}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1144,9 +1115,9 @@ class ParserElement(object):
return And( [ self, other ] )
def __radd__(self, other ):
"""Implementation of + operator when left operand is not a C{ParserElement}"""
"""Implementation of + operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1154,9 +1125,9 @@ class ParserElement(object):
return other + self
def __sub__(self, other):
"""Implementation of - operator, returns C{And} with error stop"""
"""Implementation of - operator, returns C{L{And}} with error stop"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1164,9 +1135,9 @@ class ParserElement(object):
return And( [ self, And._ErrorStop(), other ] )
def __rsub__(self, other ):
"""Implementation of - operator when left operand is not a C{ParserElement}"""
"""Implementation of - operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1179,12 +1150,12 @@ class ParserElement(object):
tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
may also include C{None} as in:
- C{expr*(n,None)} or C{expr*(n,)} is equivalent
to C{expr*n + ZeroOrMore(expr)}
to C{expr*n + L{ZeroOrMore}(expr)}
(read as "at least n instances of C{expr}")
- C{expr*(None,n)} is equivalent to C{expr*(0,n)}
(read as "0 to n instances of C{expr}")
- C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)}
- C{expr*(1,None)} is equivalent to C{OneOrMore(expr)}
- C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
Note that C{expr*(None,n)} does not raise an exception if
more than n exprs exist in the input stream; that is,
@ -1245,9 +1216,9 @@ class ParserElement(object):
return self.__mul__(other)
def __or__(self, other ):
"""Implementation of | operator - returns C{MatchFirst}"""
"""Implementation of | operator - returns C{L{MatchFirst}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1255,9 +1226,9 @@ class ParserElement(object):
return MatchFirst( [ self, other ] )
def __ror__(self, other ):
"""Implementation of | operator when left operand is not a C{ParserElement}"""
"""Implementation of | operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1265,9 +1236,9 @@ class ParserElement(object):
return other | self
def __xor__(self, other ):
"""Implementation of ^ operator - returns C{Or}"""
"""Implementation of ^ operator - returns C{L{Or}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1275,9 +1246,9 @@ class ParserElement(object):
return Or( [ self, other ] )
def __rxor__(self, other ):
"""Implementation of ^ operator when left operand is not a C{ParserElement}"""
"""Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1285,9 +1256,9 @@ class ParserElement(object):
return other ^ self
def __and__(self, other ):
"""Implementation of & operator - returns C{Each}"""
"""Implementation of & operator - returns C{L{Each}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1295,9 +1266,9 @@ class ParserElement(object):
return Each( [ self, other ] )
def __rand__(self, other ):
"""Implementation of & operator when left operand is not a C{ParserElement}"""
"""Implementation of & operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1305,11 +1276,11 @@ class ParserElement(object):
return other & self
def __invert__( self ):
"""Implementation of ~ operator - returns C{NotAny}"""
"""Implementation of ~ operator - returns C{L{NotAny}}"""
return NotAny( self )
def __call__(self, name):
"""Shortcut for C{setResultsName}, with C{listAllMatches=default}::
"""Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
could be written as::
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
@ -1403,15 +1374,17 @@ class ParserElement(object):
try:
file_contents = file_or_filename.read()
except AttributeError:
f = open(file_or_filename, "rb")
f = open(file_or_filename, "r")
file_contents = f.read()
f.close()
try:
return self.parseString(file_contents, parseAll)
except ParseBaseException:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def getException(self):
return ParseException("",0,self.errmsg,self)
@ -1515,10 +1488,11 @@ class Literal(Token):
exc.pstr = instring
raise exc
_L = Literal
ParserElement.literalStringClass = Literal
class Keyword(Token):
"""Token to exactly match a specified string as a keyword, that is, it must be
immediately followed by a non-keyword character. Compare with C{Literal}::
immediately followed by a non-keyword character. Compare with C{L{Literal}}::
Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
Accepts two optional constructor arguments in addition to the keyword string:
@ -1821,9 +1795,9 @@ class QuotedString(Token):
- quoteChar - string of one or more characters defining the quote delimiting string
- escChar - character to escape quotes, typically backslash (default=None)
- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
- multiline - boolean indicating whether quotes can span multiple lines (default=False)
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
- multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
"""
super(QuotedString,self).__init__()
@ -2003,7 +1977,7 @@ class White(Token):
by pyparsing grammars. This class is included when some whitespace structures
are significant. Define with a string containing the whitespace characters to be
matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
as defined for the C{Word} class."""
as defined for the C{L{Word}} class."""
whiteStrs = {
" " : "<SPC>",
"\t": "<TAB>",
@ -2331,7 +2305,8 @@ class And(ParseExpression):
class _ErrorStop(Empty):
def __init__(self, *args, **kwargs):
super(Empty,self).__init__(*args, **kwargs)
super(And._ErrorStop,self).__init__(*args, **kwargs)
self.name = '-'
self.leaveWhitespace()
def __init__( self, exprs, savelist = True ):
@ -2359,8 +2334,7 @@ class And(ParseExpression):
loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException:
raise
except ParseBaseException:
pe = sys.exc_info()[1]
except ParseBaseException, pe:
raise ParseSyntaxException(pe)
except IndexError:
raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
@ -2412,8 +2386,7 @@ class Or(ParseExpression):
for e in self.exprs:
try:
loc2 = e.tryParse( instring, loc )
except ParseException:
err = sys.exc_info()[1]
except ParseException, err:
if err.loc > maxExcLoc:
maxException = err
maxExcLoc = err.loc
@ -2436,7 +2409,7 @@ class Or(ParseExpression):
def __ixor__(self, other ):
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
return self.append( other ) #Or( [ self, other ] )
def __str__( self ):
@ -2495,7 +2468,7 @@ class MatchFirst(ParseExpression):
def __ior__(self, other ):
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
return self.append( other ) #MatchFirst( [ self, other ] )
def __str__( self ):
@ -2916,13 +2889,14 @@ class Forward(ParseElementEnhance):
thereby leaving b and c out as parseable alternatives. It is recommended that you
explicitly group the values inserted into the C{Forward}::
fwdExpr << (a | b | c)
Converting to use the '<<=' operator instead will avoid this problem.
"""
def __init__( self, other=None ):
super(Forward,self).__init__( other, savelist=False )
def __lshift__( self, other ):
if isinstance( other, basestring ):
other = Literal(other)
other = ParserElement.literalStringClass(other)
self.expr = other
self.mayReturnEmpty = other.mayReturnEmpty
self.strRepr = None
@ -2933,7 +2907,8 @@ class Forward(ParseElementEnhance):
self.saveAsList = self.expr.saveAsList
self.ignoreExprs.extend(self.expr.ignoreExprs)
return None
__ilshift__ = __lshift__
def leaveWhitespace( self ):
self.skipWhitespace = False
return self
@ -2993,7 +2968,7 @@ class Upcase(TokenConverter):
DeprecationWarning,stacklevel=2)
def postParse( self, instring, loc, tokenlist ):
return list(map( string.upper, tokenlist ))
return list(map( str.upper, tokenlist ))
class Combine(TokenConverter):
@ -3029,7 +3004,7 @@ class Combine(TokenConverter):
return retToks
class Group(TokenConverter):
"""Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
"""Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
def __init__( self, expr ):
super(Group,self).__init__( expr )
self.saveAsList = True
@ -3105,8 +3080,7 @@ def traceParseAction(f):
sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
try:
ret = f(*paArgs)
except Exception:
exc = sys.exc_info()[1]
except Exception, exc:
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
raise
sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
@ -3124,7 +3098,7 @@ def delimitedList( expr, delim=",", combine=False ):
"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing C{combine=True} in the constructor.
If C{combine} is set to True, the matching tokens are returned as a single token
If C{combine} is set to C{True}, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned
as a list of tokens, with the delimiters suppressed.
"""
@ -3226,7 +3200,7 @@ def _escapeRegexRangeChars(s):
def oneOf( strs, caseless=False, useRegex=True ):
"""Helper to quickly define a set of alternative Literals, and makes sure to do
longest-first testing when there is a conflict, regardless of the input order,
but returns a C{MatchFirst} for best performance.
but returns a C{L{MatchFirst}} for best performance.
Parameters:
- strs - a string of space-delimited literals, or a list of string literals
@ -3284,7 +3258,7 @@ def oneOf( strs, caseless=False, useRegex=True ):
def dictOf( key, value ):
"""Helper to easily and clearly define a dictionary by specifying the respective patterns
for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens
for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
in the proper order. The key pattern can include delimiting markers or punctuation,
as long as they are suppressed, thereby leaving the significant key text. The value
pattern can include named results, so that the C{Dict} results can include named token
@ -3301,7 +3275,7 @@ def originalTextFor(expr, asString=True):
string containing the original parsed text.
If the optional C{asString} argument is passed as C{False}, then the return value is a
C{ParseResults} containing any results names that were originally matched, and a
C{L{ParseResults}} containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if
the expression passed to C{L{originalTextFor}} contains expressions with defined
results names, you must set C{asString} to C{False} if you want to preserve those
@ -3335,7 +3309,7 @@ stringEnd = StringEnd().setName("stringEnd")
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
@ -3376,7 +3350,7 @@ def matchOnlyAtCol(n):
def replaceWith(replStr):
"""Helper method for common parse actions that simply return a literal value. Especially
useful when used with C{transformString()}.
useful when used with C{L{transformString<ParserElement.transformString>}()}.
"""
def _replFunc(*args):
return [replStr]
@ -3398,7 +3372,7 @@ def downcaseTokens(s,l,t):
return [ tt.lower() for tt in map(_ustr,t) ]
def keepOriginalText(s,startLoc,t):
"""DEPRECATED - use new helper method C{originalTextFor}.
"""DEPRECATED - use new helper method C{L{originalTextFor}}.
Helper parse action to preserve original parsed text,
overriding any nested parse actions."""
try:
@ -3464,7 +3438,7 @@ def makeXMLTags(tagStr):
def withAttribute(*args,**attrDict):
"""Helper to create a validating parse action to be used with start tags created
with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
with a required attribute value, to avoid false matches on common tags such as
C{<TD>} or C{<DIV>}.
@ -3499,7 +3473,7 @@ opAssoc = _Constants()
opAssoc.LEFT = object()
opAssoc.RIGHT = object()
def operatorPrecedence( baseExpr, opList ):
def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
"""Helper method for constructing grammars of expressions made up of
operators working in a precedence hierarchy. Operators may be unary or
binary, left- or right-associative. Parse actions can also be attached
@ -3518,13 +3492,15 @@ def operatorPrecedence( baseExpr, opList ):
be 1, 2, or 3)
- rightLeftAssoc is the indicator whether the operator is
right or left associative, using the pyparsing-defined
constants opAssoc.RIGHT and opAssoc.LEFT.
constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- parseAction is the parse action to be associated with
expressions matching this operator expression (the
parse action tuple member may be omitted)
- lpar - expression for matching left-parentheses (default=Suppress('('))
- rpar - expression for matching right-parentheses (default=Suppress(')'))
"""
ret = Forward()
lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
lastExpr = baseExpr | ( lpar + ret + rpar )
for i,operDef in enumerate(opList):
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
if arity == 3:
@ -3569,6 +3545,7 @@ def operatorPrecedence( baseExpr, opList ):
lastExpr = thisExpr
ret << lastExpr
return ret
operatorPrecedence = infixNotation
dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
@ -3715,8 +3692,7 @@ if __name__ == "__main__":
print ("tokens.columns = " + str(tokens.columns))
print ("tokens.tables = " + str(tokens.tables))
print (tokens.asXML("SQL",True))
except ParseBaseException:
err = sys.exc_info()[1]
except ParseBaseException, err:
print (teststring + "->")
print (err.line)
print (" "*(err.column-1) + "^")

View File

@ -3,6 +3,9 @@
#include <stdlib.h>
#define min(x, y) ((x < y) ? x : y)
#define max(x, y) ((x > y) ? x : y)
static PyObject *
speedup_parse_date(PyObject *self, PyObject *args) {
const char *raw, *orig, *tz;
@ -61,11 +64,48 @@ speedup_parse_date(PyObject *self, PyObject *args) {
(tzh*60 + tzm)*sign*60);
}
static PyObject*
speedup_pdf_float(PyObject *self, PyObject *args) {
double f = 0.0, a = 0.0;
char *buf = "0", *dot;
void *free_buf = NULL;
int precision = 6, l = 0;
PyObject *ret;
if(!PyArg_ParseTuple(args, "d", &f)) return NULL;
a = fabs(f);
if (a > 1.0e-7) {
if(a > 1) precision = min(max(0, 6-(int)log10(a)), 6);
buf = PyOS_double_to_string(f, 'f', precision, 0, NULL);
if (buf != NULL) {
free_buf = (void*)buf;
if (precision > 0) {
l = strlen(buf) - 1;
while (l > 0 && buf[l] == '0') l--;
if (buf[l] == ',' || buf[l] == '.') buf[l] = 0;
else buf[l+1] = 0;
if ( (dot = strchr(buf, ',')) ) *dot = '.';
}
} else if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "Float->str failed.");
}
ret = PyUnicode_FromString(buf);
if (free_buf != NULL) PyMem_Free(free_buf);
return ret;
}
static PyMethodDef speedup_methods[] = {
{"parse_date", speedup_parse_date, METH_VARARGS,
"parse_date()\n\nParse ISO dates faster."
},
{"pdf_float", speedup_pdf_float, METH_VARARGS,
"pdf_float()\n\nConvert float to a string representation suitable for PDF"
},
{NULL, NULL, 0, NULL}
};

View File

@ -1,69 +0,0 @@
## Process this file with automake to produce Makefile.in
noinst_LTLIBRARIES = libharfbuzz-1.la
MAINSOURCES = \
harfbuzz-buffer.c \
harfbuzz-stream.c \
harfbuzz-dump.c \
harfbuzz-gdef.c \
harfbuzz-gpos.c \
harfbuzz-gsub.c \
harfbuzz-impl.c \
harfbuzz-open.c \
harfbuzz-shaper.cpp \
harfbuzz-greek.c \
harfbuzz-tibetan.c \
harfbuzz-khmer.c \
harfbuzz-indic.cpp \
harfbuzz-hebrew.c \
harfbuzz-arabic.c \
harfbuzz-hangul.c \
harfbuzz-myanmar.c \
harfbuzz-thai.c
EXTRA_SOURCES = harfbuzz.c
PUBLICHEADERS = \
harfbuzz.h \
harfbuzz-buffer.h \
harfbuzz-dump.h \
harfbuzz-gdef.h \
harfbuzz-gpos.h \
harfbuzz-gsub.h \
harfbuzz-open.h \
harfbuzz-global.h \
harfbuzz-external.h \
harfbuzz-shaper.h \
harfbuzz-stream.h
PRIVATEHEADERS = \
harfbuzz-impl.h \
harfbuzz-buffer-private.h \
harfbuzz-stream-private.h \
harfbuzz-gdef-private.h \
harfbuzz-gpos-private.h \
harfbuzz-gsub-private.h \
harfbuzz-open-private.h \
harfbuzz-shaper-private.h
libharfbuzz_1_la_SOURCES = \
$(MAINSOURCES) \
$(PUBLICHEADERS) \
$(PRIVATEHEADERS)
#noinst_PROGRAMS = harfbuzz-dump
#
#harfbuzz_dump_SOURCES = \
# harfbuzz-dump-main.c
#
#harfbuzz_dump_LDADD = \
# libharfbuzz-1.la
EXTRA_DIST = \
README \
COPYING.FTL \
COPYING.GPL \
COPYING \
$(EXTRA_SOURCES)