From e1cce49ca9ccd4731e677db7e28fd17292763e27 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 28 Dec 2012 16:39:48 +0530 Subject: [PATCH 01/46] Update La Stampa --- recipes/la_stampa.recipe | 65 ++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/recipes/la_stampa.recipe b/recipes/la_stampa.recipe index b9d8a469aa..06a7debe9d 100644 --- a/recipes/la_stampa.recipe +++ b/recipes/la_stampa.recipe @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __author__ = 'Gabriele Marini, based on Darko Miletic' __copyright__ = '2009, Darko Miletic ' -__description__ = 'La Stampa 05/05/2010' +__description__ = 'La Stampa 28/12/2012' ''' http://www.lastampa.it/ @@ -14,10 +14,11 @@ class LaStampa(BasicNewsRecipe): title = u'La Stampa' language = 'it' __author__ = 'Gabriele Marini' - oldest_article = 15 + #oldest_article = 15 + oldest_articlce = 7 #for daily schedule max_articles_per_feed = 50 recursion = 100 - cover_url = 'http://www.lastampa.it/edicola/PDF/1.pdf' + cover_url = 'http://www1.lastampa.it/edicola/PDF/1.pdf' use_embedded_content = False remove_javascript = True no_stylesheets = True @@ -33,35 +34,41 @@ class LaStampa(BasicNewsRecipe): if link: return link[0]['href'] - keep_only_tags = [dict(attrs={'class':['boxocchiello2','titoloRub','titologir','catenaccio','sezione','articologirata']}), + keep_only_tags = [dict(attrs={'class':['boxocchiello2','titoloRub','titologir','autore-girata','luogo-girata','catenaccio','sezione','articologirata','bodytext','news-single-img','ls-articoloCorpo','ls-blog-list-1col']}), dict(name='div', attrs={'id':'corpoarticolo'}) ] - remove_tags = [dict(name='div', attrs={'id':'menutop'}), - dict(name='div', attrs={'id':'fwnetblocco'}), - dict(name='table', attrs={'id':'strumenti'}), - dict(name='table', attrs={'id':'imgesterna'}), - dict(name='a', attrs={'class':'linkblu'}), - dict(name='a', attrs={'class':'link'}), + + remove_tags = [dict(name='div', attrs={'id':['menutop','fwnetblocco']}), + dict(attrs={'class':['ls-toolbarCommenti','ls-boxCommentsBlog']}), + dict(name='table', attrs={'id':['strumenti','imgesterna']}), + dict(name='a', attrs={'class':['linkblu','link']}), dict(name='span', attrs={'class':['boxocchiello','boxocchiello2','sezione']}) ] - - feeds = [ - (u'Home', u'http://www.lastampa.it/redazione/rss_home.xml'), - (u'Editoriali', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=25'), - (u'Politica', u'http://www.lastampa.it/redazione/cmssezioni/politica/rss_politica.xml'), - (u'ArciItaliana', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=14'), - (u'Cronache', u'http://www.lastampa.it/redazione/cmssezioni/cronache/rss_cronache.xml'), - (u'Esteri', u'http://www.lastampa.it/redazione/cmssezioni/esteri/rss_esteri.xml'), - (u'Danni Collaterali', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=90'), - (u'Economia', u'http://www.lastampa.it/redazione/cmssezioni/economia/rss_economia.xml'), - (u'Tecnologia ', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=30'), - (u'Spettacoli', u'http://www.lastampa.it/redazione/cmssezioni/spettacoli/rss_spettacoli.xml'), - (u'Sport', u'http://www.lastampa.it/sport/rss_home.xml'), - (u'Torino', u'http://rss.feedsportal.com/c/32418/f/466938/index.rss'), - (u'Motori', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=57'), - (u'Scienza', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=38'), - (u'Fotografia', u'http://rss.feedsportal.com/c/32418/f/478449/index.rss'), - (u'Scuola', u'http://www.lastampa.it/cmstp/rubriche/oggetti/rss.asp?ID_blog=60'), - (u'Tempo Libero', u'http://www.lastampa.it/tempolibero/rss_home.xml') + feeds = [(u'BuonGiorno',u'http://www.lastampa.it/cultura/opinioni/buongiorno/rss.xml'), + (u'Jena', u'http://www.lastampa.it/cultura/opinioni/jena/rss.xml'), + (u'Editoriali', u'http://www.lastampa.it/cultura/opinioni/editoriali'), + (u'Finestra sull America', u'http://lastampa.feedsportal.com/c/32418/f/625713/index.rss'), + (u'HomePage', u'http://www.lastampa.it/rss.xml'), + (u'Politica Italia', u'http://www.lastampa.it/italia/politica/rss.xml'), + (u'ArciItaliana', u'http://www.lastampa.it/rss/blog/arcitaliana'), + (u'Cronache', u'http://www.lastampa.it/italia/cronache/rss.xml'), + (u'Esteri', u'http://www.lastampa.it/esteri/rss.xml'), + (u'Danni Collaterali', u'http://www.lastampa.it/rss/blog/danni-collaterali'), + (u'Economia', u'http://www.lastampa.it/economia/rss.xml'), + (u'Tecnologia ', u'http://www.lastampa.it/tecnologia/rss.xml'), + (u'Spettacoli', u'http://www.lastampa.it/spettacoli/rss.xml'), + (u'Sport', u'http://www.lastampa.it/sport/rss.xml'), + (u'Torino', u'http://www.lastampa.it/cronaca/rss.xml'), + (u'Motori', u'http://www.lastampa.it/motori/rss.xml'), + (u'Scienza', u'http://www.lastampa.it/scienza/rss.xml'), + (u'Cultura', u'http://www.lastampa.it/cultura/rss.xml'), + (u'Scuola', u'http://www.lastampa.it/cultura/scuola/rss.xml'), + (u'Benessere', u'http://www.lastampa.it/scienza/benessere/rss.xml'), + (u'Cucina', u'http://www.lastampa.it/societa/cucina/rss.xml'), + (u'Casa', u'http://www.lastampa.it/societa/casa/rss.xml'), + (u'Moda',u'http://www.lastampa.it/societa/moda/rss.xml'), + (u'Giochi',u'http://www.lastampa.it/tecnologia/giochi/rss.xml'), + (u'Viaggi',u'http://www.lastampa.it/societa/viaggi/rss.xml'), + (u'Ambiente', u'http://www.lastampa.it/scienza/ambiente/rss.xml') ] From e27704e4c48d63eb0c4401139ca335e0d442edf3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 00:29:21 +0530 Subject: [PATCH 02/46] Add support for outlines and links to new pdf engine and make it the default engine --- .../ebooks/conversion/plugins/pdf_output.py | 23 ++--- src/calibre/ebooks/pdf/render/common.py | 9 +- src/calibre/ebooks/pdf/render/engine.py | 39 ++++++-- src/calibre/ebooks/pdf/render/from_html.py | 15 +-- src/calibre/ebooks/pdf/render/links.py | 99 +++++++++++++++++-- src/calibre/ebooks/pdf/render/serialize.py | 11 ++- 6 files changed, 163 insertions(+), 33 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/pdf_output.py b/src/calibre/ebooks/conversion/plugins/pdf_output.py index 23fb6418a4..c042de7050 100644 --- a/src/calibre/ebooks/conversion/plugins/pdf_output.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py @@ -91,12 +91,14 @@ class PDFOutput(OutputFormatPlugin): OptionRecommendation(name='pdf_mono_font_size', recommended_value=16, help=_( 'The default font size for monospaced text')), - # OptionRecommendation(name='old_pdf_engine', recommended_value=False, - # help=_('Use the old, less capable engine to generate the PDF')), - # OptionRecommendation(name='uncompressed_pdf', - # recommended_value=False, help=_( - # 'Generate an uncompressed PDF, useful for debugging, ' - # 'only works with the new PDF engine.')), + OptionRecommendation(name='pdf_mark_links', recommended_value=False, + help=_('Surround all links with a red box, useful for debugging.')), + OptionRecommendation(name='old_pdf_engine', recommended_value=False, + help=_('Use the old, less capable engine to generate the PDF')), + OptionRecommendation(name='uncompressed_pdf', + recommended_value=False, help=_( + 'Generate an uncompressed PDF, useful for debugging, ' + 'only works with the new PDF engine.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): @@ -190,13 +192,12 @@ class PDFOutput(OutputFormatPlugin): val[i].value = family_map[k] def convert_text(self, oeb_book): - from calibre.utils.config import tweaks - if tweaks.get('new_pdf_engine', False): - from calibre.ebooks.pdf.render.from_html import PDFWriter + from calibre.ebooks.metadata.opf2 import OPF + if self.opts.old_pdf_engine: + from calibre.ebooks.pdf.writer import PDFWriter PDFWriter else: - from calibre.ebooks.pdf.writer import PDFWriter - from calibre.ebooks.metadata.opf2 import OPF + from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() diff --git a/src/calibre/ebooks/pdf/render/common.py b/src/calibre/ebooks/pdf/render/common.py index 554d170656..5e470122c5 100644 --- a/src/calibre/ebooks/pdf/render/common.py +++ b/src/calibre/ebooks/pdf/render/common.py @@ -97,7 +97,8 @@ class Dictionary(dict): def pdf_serialize(self, stream): stream.write(b'<<' + EOL) sorted_keys = sorted(self.iterkeys(), - key=lambda x:((' ' if x == 'Type' else '')+x)) + key=lambda x:({'Type':'1', 'Subtype':'2'}.get( + x, x)+x)) for k in sorted_keys: serialize(Name(k), stream) stream.write(b' ') @@ -169,5 +170,11 @@ class Reference(object): def pdf_serialize(self, stream): raw = '%d 0 R'%self.num stream.write(raw.encode('ascii')) + + def __repr__(self): + return '%d 0 R'%self.num + + def __str__(self): + return repr(self) # }}} diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 5f1d6b9602..8ccb4a6c96 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -188,10 +188,11 @@ class PdfEngine(QPaintEngine): def __init__(self, file_object, page_width, page_height, left_margin, top_margin, right_margin, bottom_margin, width, height, - errors=print, debug=print, compress=True): + errors=print, debug=print, compress=True, + mark_links=False): QPaintEngine.__init__(self, self.features) self.file_object = file_object - self.compress = compress + self.compress, self.mark_links = compress, mark_links self.page_height, self.page_width = page_height, page_width self.left_margin, self.top_margin = left_margin, top_margin self.right_margin, self.bottom_margin = right_margin, bottom_margin @@ -249,10 +250,10 @@ class PdfEngine(QPaintEngine): if not hasattr(self, 'pdf'): try: self.pdf = PDFStream(self.file_object, (self.page_width, - self.page_height), - compress=self.compress) + self.page_height), compress=self.compress, + mark_links=self.mark_links) except: - self.errors.append(traceback.format_exc()) + self.errors(traceback.format_exc()) return False return True @@ -268,7 +269,7 @@ class PdfEngine(QPaintEngine): self.end_page() self.pdf.end() except: - self.errors.append(traceback.format_exc()) + self.errors(traceback.format_exc()) return False finally: self.pdf = self.file_object = None @@ -484,6 +485,24 @@ class PdfEngine(QPaintEngine): def set_metadata(self, *args, **kwargs): self.pdf.set_metadata(*args, **kwargs) + def add_outline(self, toc): + self.pdf.links.add_outline(toc) + + def add_links(self, current_item, start_page, links, anchors): + for pos in anchors.itervalues(): + pos['left'], pos['top'] = self.pdf_system.map(pos['left'], pos['top']) + for link in links: + pos = link[1] + llx = pos['left'] + lly = pos['top'] + pos['height'] + urx = pos['left'] + pos['width'] + ury = pos['top'] + llx, lly = self.pdf_system.map(llx, lly) + urx, ury = self.pdf_system.map(urx, ury) + link[1] = pos['column'] + start_page + link.append((llx, lly, urx, ury)) + self.pdf.links.add(current_item, start_page, links, anchors) + def __enter__(self): self.pdf.save_stack() self.saved_ps = (self.do_stroke, self.do_fill) @@ -497,7 +516,8 @@ class PdfDevice(QPaintDevice): # {{{ def __init__(self, file_object, page_size=A4, left_margin=inch, top_margin=inch, right_margin=inch, bottom_margin=inch, - xdpi=1200, ydpi=1200, errors=print, debug=print, compress=True): + xdpi=1200, ydpi=1200, errors=print, debug=print, + compress=True, mark_links=False): QPaintDevice.__init__(self) self.xdpi, self.ydpi = xdpi, ydpi self.page_width, self.page_height = page_size @@ -506,7 +526,10 @@ class PdfDevice(QPaintDevice): # {{{ self.engine = PdfEngine(file_object, self.page_width, self.page_height, left_margin, top_margin, right_margin, bottom_margin, self.width(), self.height(), - errors=errors, debug=debug, compress=compress) + errors=errors, debug=debug, compress=compress, + mark_links=mark_links) + self.add_outline = self.engine.add_outline + self.add_links = self.engine.add_links def paintEngine(self): return self.engine diff --git a/src/calibre/ebooks/pdf/render/from_html.py b/src/calibre/ebooks/pdf/render/from_html.py index 916c22c1d0..47aa295743 100644 --- a/src/calibre/ebooks/pdf/render/from_html.py +++ b/src/calibre/ebooks/pdf/render/from_html.py @@ -20,7 +20,6 @@ from calibre.ebooks.oeb.display.webview import load_html from calibre.ebooks.pdf.render.common import (inch, cm, mm, pica, cicero, didot, PAPER_SIZES) from calibre.ebooks.pdf.render.engine import PdfDevice -from calibre.ebooks.pdf.render.links import Links def get_page_size(opts, for_comic=False): # {{{ use_profile = not (opts.override_profile_size or @@ -143,7 +142,6 @@ class PDFWriter(QObject): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x - self.links = Links() def dump(self, items, out_stream, pdf_metadata): opts = self.opts @@ -156,7 +154,8 @@ class PDFWriter(QObject): top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not - opts.uncompressed_pdf) + opts.uncompressed_pdf, + mark_links=opts.pdf_mark_links) self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items @@ -187,7 +186,9 @@ class PDFWriter(QObject): QTimer.singleShot(0, self.render_book) self.loop.exec_() - # TODO: Outline and links + if self.toc is not None and len(self.toc) > 0: + self.doc.add_outline(self.toc) + self.painter.end() if self.doc.errors_occurred: @@ -261,8 +262,7 @@ class PDFWriter(QObject): amap = self.bridge_value if not isinstance(amap, dict): amap = {'links':[], 'anchors':{}} # Some javascript error occurred - self.links.add(self.current_item, self.current_page_num, amap['links'], - amap['anchors']) + start_page = self.current_page_num mf = self.view.page().mainFrame() while True: @@ -278,3 +278,6 @@ class PDFWriter(QObject): if self.doc.errors_occurred: break + self.doc.add_links(self.current_item, start_page, amap['links'], + amap['anchors']) + diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index 0d23855d09..203074f6c0 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -8,25 +8,112 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' import os +from future_builtins import map +from urlparse import urlparse, urlunparse +from urllib2 import quote, unquote -from calibre.ebooks.pdf.render.common import Array, Name +from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String class Destination(Array): - def __init__(self, start_page, pos): + def __init__(self, start_page, pos, get_pageref): super(Destination, self).__init__( - [start_page + pos['column'], Name('FitH'), pos['y']]) + [get_pageref(start_page + pos['column']), Name('XYZ'), pos['left'], + pos['top'], None] + ) class Links(object): - def __init__(self): + def __init__(self, pdf, mark_links): self.anchors = {} + self.links = [] + self.start = {'top':0, 'column':0, 'left':0} + self.pdf = pdf + self.mark_links = mark_links def add(self, base_path, start_page, links, anchors): path = os.path.normcase(os.path.abspath(base_path)) self.anchors[path] = a = {} - a[None] = Destination(start_page, {'y':0, 'column':0}) + a[None] = Destination(start_page, self.start, self.pdf.get_pageref) for anchor, pos in anchors.iteritems(): - a[anchor] = Destination(start_page, pos) + a[anchor] = Destination(start_page, pos, self.pdf.get_pageref) + for link in links: + href, page, rect = link + p, frag = href.partition('#')[0::2] + link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect)) + self.links.append(link) + + def add_links(self): + for link in self.links: + path, href, frag = link[0] + page, rect = link[1:] + combined_path = os.path.abspath(os.path.join(path, *href.split('/'))) + is_local = not href or combined_path in self.anchors + annot = Dictionary({ + 'Type':Name('Annot'), 'Subtype':Name('Link'), + 'Rect':rect, + }) + if self.mark_links: + annot.update({'Border':Array([16, 16, 1]), 'C':Array([1.0, 0, + 0])}) + if is_local: + path = combined_path if href else path + annot['Dest'] = self.anchors[path][frag] + else: + url = href + (('#'+frag) if frag else '') + purl = urlparse(url) + if purl.scheme and purl.scheme != 'file': + action = Dictionary({ + 'Type':Name('Action'), 'S':Name('URI'), + }) + parts = (x.encode('utf-8') if isinstance(x, type(u'')) else + x for x in purl) + url = urlunparse(map(quote, map(unquote, + parts))).decode('ascii') + action['URI'] = String(url) + annot['A'] = action + if 'A' in annot or 'Dest' in annot: + if 'Annots' not in page: + page['Annots'] = Array() + page['Annots'].append(self.pdf.objects.add(annot)) + + def add_outline(self, toc): + parent = Dictionary({'Type':Name('Outlines')}) + parentref = self.pdf.objects.add(parent) + self.process_children(toc, parentref, parent_is_root=True) + self.pdf.catalog.obj['Outlines'] = parentref + + def process_children(self, toc, parentref, parent_is_root=False): + childrefs = [] + for child in toc: + childref = self.process_toc_item(child, parentref) + if childref is None: + continue + if childrefs: + childrefs[-1].obj['Next'] = childref + childref.obj['Prev'] = childrefs[-1] + childrefs.append(childref) + + if len(child) > 0: + self.process_children(child, childref) + if childrefs: + parentref.obj['First'] = childrefs[0] + parentref.obj['Last'] = childrefs[-1] + if not parent_is_root: + parentref.obj['Count'] = -len(childrefs) + + def process_toc_item(self, toc, parentref): + path = toc.abspath or None + frag = toc.fragment or None + if path is None: + return + path = os.path.normcase(os.path.abspath(path)) + if path not in self.anchors: + return None + a = self.anchors[path] + dest = a.get(frag, a[None]) + item = Dictionary({'Parent':parentref, 'Dest':dest, + 'Title':String(toc.text or _('Unknown'))}) + return self.pdf.objects.add(item) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 071430c172..b2a9f38bc0 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -17,6 +17,7 @@ from calibre.ebooks.pdf.render.common import ( Reference, EOL, serialize, Stream, Dictionary, String, Name, Array, GlyphIndex) from calibre.ebooks.pdf.render.fonts import FontManager +from calibre.ebooks.pdf.render.links import Links PDFVER = b'%PDF-1.3' @@ -219,6 +220,9 @@ class PageTree(Dictionary): self['Kids'].append(pageref) self['Count'] += 1 + def get_ref(self, num): + return self['Kids'][num-1] + class HashingStream(object): def __init__(self, f): @@ -277,7 +281,7 @@ class PDFStream(object): ( True, True, 'evenodd') : 'B*', } - def __init__(self, stream, page_size, compress=False): + def __init__(self, stream, page_size, compress=False, mark_links=False): self.stream = HashingStream(stream) self.compress = compress self.write_line(PDFVER) @@ -294,6 +298,7 @@ class PDFStream(object): self.stroke_opacities, self.fill_opacities = {}, {} self.font_manager = FontManager(self.objects, self.compress) self.image_cache = {} + self.links = Links(self, mark_links) @property def page_tree(self): @@ -303,6 +308,9 @@ class PDFStream(object): def catalog(self): return self.objects[1] + def get_pageref(self, pagenum): + return self.page_tree.obj.get_ref(pagenum) + def set_metadata(self, title=None, author=None, tags=None): if title: self.info['Title'] = String(title) @@ -442,6 +450,7 @@ class PDFStream(object): self.end_page() self.font_manager.embed_fonts() inforef = self.objects.add(self.info) + self.links.add_links() self.objects.pdf_serialize(self.stream) self.write_line() startxref = self.objects.write_xref(self.stream) From 1a3c9e4d8ba171632e4e19ec427c8e40203610d5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 09:50:06 +0530 Subject: [PATCH 03/46] ... --- manual/faq.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manual/faq.rst b/manual/faq.rst index 109aff440d..266fe05f6a 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -162,6 +162,8 @@ Follow these steps to find the problem: * If you are connecting an Apple iDevice (iPad, iPod Touch, iPhone), use the 'Connect to iTunes' method in the 'Getting started' instructions in `Calibre + Apple iDevices: Start here `_. * Make sure you are running the latest version of |app|. The latest version can always be downloaded from `the calibre website `_. * Ensure your operating system is seeing the device. That is, the device should show up in Windows Explorer (in Windows) or Finder (in OS X). + * In |app|, go to Preferences->Ignored Devices and check that your device + is not being ignored * In |app|, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled. * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker `_. From 7012bb1af264d3ff37e74ca6016ab6179a906328 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 10:13:46 +0530 Subject: [PATCH 04/46] Update Harper's Magazine --- recipes/harpers_full.recipe | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/recipes/harpers_full.recipe b/recipes/harpers_full.recipe index b965bca9b8..153f82db7b 100644 --- a/recipes/harpers_full.recipe +++ b/recipes/harpers_full.recipe @@ -8,7 +8,7 @@ If you have institutional subscription based on access IP you do not need to ent anything in username/password fields ''' -import time +import time, re import urllib from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe @@ -29,7 +29,6 @@ class Harpers_full(BasicNewsRecipe): needs_subscription = 'optional' masthead_url = 'http://harpers.org/wp-content/themes/harpers/images/pheader.gif' publication_type = 'magazine' - INDEX = strftime('http://harpers.org/archive/%Y/%m') LOGIN = 'http://harpers.org/wp-content/themes/harpers/ajax_login.php' extra_css = """ body{font-family: adobe-caslon-pro,serif} @@ -65,17 +64,28 @@ class Harpers_full(BasicNewsRecipe): return br def parse_index(self): + #find current issue + soup = self.index_to_soup('http://harpers.org/') + currentIssue=soup.find('div',attrs={'class':'mainNavi'}).find('li',attrs={'class':'curentIssue'}) + currentIssue_url=self.tag_to_string(currentIssue.a['href']) + self.log(currentIssue_url) + + #go to the current issue + soup1 = self.index_to_soup(currentIssue_url) + date = re.split('\s\|\s',self.tag_to_string(soup1.head.title.string))[0] + self.timefmt = u' [%s]'%date + + #get cover + coverurl='http://harpers.org/wp-content/themes/harpers/ajax_microfiche.php?img=harpers-'+re.split('harpers.org/',currentIssue_url)[1]+'gif/0001.gif' + soup2 = self.index_to_soup(coverurl) + self.cover_url = self.tag_to_string(soup2.find('img')['src']) + self.log(self.cover_url) articles = [] - print 'Processing ' + self.INDEX - soup = self.index_to_soup(self.INDEX) count = 0 - for item in soup.findAll('div', attrs={'class':'articleData'}): + for item in soup1.findAll('div', attrs={'class':'articleData'}): text_links = item.findAll('h2') for text_link in text_links: if count == 0: - lcover_url = item.find(attrs={'class':'dwpdf'}) - if lcover_url: - self.cover_url = lcover_url.a['href'] count = 1 else: url = text_link.a['href'] @@ -87,7 +97,14 @@ class Harpers_full(BasicNewsRecipe): ,'url' :url ,'description':'' }) - return [(soup.head.title.string, articles)] + return [(soup1.head.title.string, articles)] def print_version(self, url): return url + '?single=1' + + def cleanup(self): + soup = self.index_to_soup('http://harpers.org/') + signouturl=self.tag_to_string(soup.find('li', attrs={'class':'subLogOut'}).findNext('li').a['href']) + self.log(signouturl) + self.browser.open(signouturl) + From 27a4657b2c051be67f0479765be27dd228eb1ed2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 11:02:38 +0530 Subject: [PATCH 05/46] ... --- src/calibre/ebooks/pdf/render/engine.py | 7 +++++-- src/calibre/ebooks/pdf/render/links.py | 5 ++++- src/calibre/ebooks/pdf/render/serialize.py | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 8ccb4a6c96..9210d1e805 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -251,9 +251,11 @@ class PdfEngine(QPaintEngine): try: self.pdf = PDFStream(self.file_object, (self.page_width, self.page_height), compress=self.compress, - mark_links=self.mark_links) + mark_links=self.mark_links, + debug=self.debug) except: self.errors(traceback.format_exc()) + self.errors_occurred = True return False return True @@ -270,6 +272,7 @@ class PdfEngine(QPaintEngine): self.pdf.end() except: self.errors(traceback.format_exc()) + self.errors_occurred = True return False finally: self.pdf = self.file_object = None @@ -581,7 +584,7 @@ if __name__ == '__main__': QBrush, QColor, QPoint, QPixmap app = QApplication([]) p = QPainter() - with open('/tmp/painter.pdf', 'wb') as f: + with open('/t/painter.pdf', 'wb') as f: dev = PdfDevice(f, compress=False) p.begin(dev) dev.init_page() diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index 203074f6c0..b25736fde7 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -47,7 +47,7 @@ class Links(object): for link in self.links: path, href, frag = link[0] page, rect = link[1:] - combined_path = os.path.abspath(os.path.join(path, *href.split('/'))) + combined_path = os.path.abspath(os.path.join(os.path.dirname(path), *href.split('/'))) is_local = not href or combined_path in self.anchors annot = Dictionary({ 'Type':Name('Annot'), 'Subtype':Name('Link'), @@ -76,6 +76,9 @@ class Links(object): if 'Annots' not in page: page['Annots'] = Array() page['Annots'].append(self.pdf.objects.add(annot)) + else: + self.pdf.debug('Could not find destination for link: %s in file %s'% + (href, path)) def add_outline(self, toc): parent = Dictionary({'Type':Name('Outlines')}) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index b2a9f38bc0..c60a70ba41 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -281,7 +281,8 @@ class PDFStream(object): ( True, True, 'evenodd') : 'B*', } - def __init__(self, stream, page_size, compress=False, mark_links=False): + def __init__(self, stream, page_size, compress=False, mark_links=False, + debug=print): self.stream = HashingStream(stream) self.compress = compress self.write_line(PDFVER) @@ -298,6 +299,7 @@ class PDFStream(object): self.stroke_opacities, self.fill_opacities = {}, {} self.font_manager = FontManager(self.objects, self.compress) self.image_cache = {} + self.debug = debug self.links = Links(self, mark_links) @property From 4925f63242ed027232560b9d1ddadbea7f114e16 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 11:06:41 +0530 Subject: [PATCH 06/46] ... --- src/calibre/ebooks/pdf/render/links.py | 4 ++-- src/calibre/ebooks/pdf/render/serialize.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index b25736fde7..faae73e3a8 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -24,10 +24,10 @@ class Destination(Array): class Links(object): - def __init__(self, pdf, mark_links): + def __init__(self, pdf, mark_links, page_size): self.anchors = {} self.links = [] - self.start = {'top':0, 'column':0, 'left':0} + self.start = {'top':page_size[1], 'column':0, 'left':0} self.pdf = pdf self.mark_links = mark_links diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index c60a70ba41..5042702deb 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -300,7 +300,7 @@ class PDFStream(object): self.font_manager = FontManager(self.objects, self.compress) self.image_cache = {} self.debug = debug - self.links = Links(self, mark_links) + self.links = Links(self, mark_links, page_size) @property def page_tree(self): From e0ad273951b0f1f39f620a492c76a5f71f8714d7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 14:10:53 +0530 Subject: [PATCH 07/46] Add a clear ratings button tothe edit metadata dialog --- src/calibre/gui2/metadata/basic_widgets.py | 3 +++ src/calibre/gui2/metadata/single.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py index 95255d4abb..2d1e1fe7c3 100644 --- a/src/calibre/gui2/metadata/basic_widgets.py +++ b/src/calibre/gui2/metadata/basic_widgets.py @@ -1094,6 +1094,9 @@ class RatingEdit(QSpinBox): # {{{ db.set_rating(id_, 2*self.current_val, notify=False, commit=False) return True + def zero(self): + self.setValue(0) + # }}} class TagsEdit(EditWithComplete): # {{{ diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py index a736f9fb27..654a5a474e 100644 --- a/src/calibre/gui2/metadata/single.py +++ b/src/calibre/gui2/metadata/single.py @@ -181,6 +181,11 @@ class MetadataSingleDialogBase(ResizableDialog): self.basic_metadata_widgets.append(self.comments) self.rating = RatingEdit(self) + self.clear_ratings_button = QToolButton(self) + self.clear_ratings_button.setToolTip(_('Clear rating')) + self.clear_ratings_button.setIcon(QIcon(I('trash.png'))) + self.clear_ratings_button.clicked.connect(self.rating.zero) + self.basic_metadata_widgets.append(self.rating) self.tags = TagsEdit(self) @@ -659,8 +664,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{ QSizePolicy.Expanding) l.addItem(self.tabs[0].spc_one, 1, 0, 1, 3) sto(self.cover.buttons[-1], self.rating) - create_row2(1, self.rating) - sto(self.rating, self.tags_editor_button) + create_row2(1, self.rating, self.clear_ratings_button) + sto(self.rating, self.clear_ratings_button) + sto(self.clear_ratings_button, self.tags_editor_button) sto(self.tags_editor_button, self.tags) create_row2(2, self.tags, self.clear_tags_button, front_button=self.tags_editor_button) sto(self.clear_tags_button, self.paste_isbn_button) @@ -780,7 +786,7 @@ class MetadataSingleDialogAlt1(MetadataSingleDialogBase): # {{{ button=self.clear_series_button, icon='trash.png') create_row(5, self.series_index, self.tags) create_row(6, self.tags, self.rating, button=self.clear_tags_button) - create_row(7, self.rating, self.pubdate) + create_row(7, self.rating, self.pubdate, button=self.clear_ratings_button) create_row(8, self.pubdate, self.publisher, button=self.pubdate.clear_button, icon='trash.png') create_row(9, self.publisher, self.languages) @@ -917,7 +923,7 @@ class MetadataSingleDialogAlt2(MetadataSingleDialogBase): # {{{ button=self.clear_series_button, icon='trash.png') create_row(5, self.series_index, self.tags) create_row(6, self.tags, self.rating, button=self.clear_tags_button) - create_row(7, self.rating, self.pubdate) + create_row(7, self.rating, self.pubdate, button=self.clear_ratings_button) create_row(8, self.pubdate, self.publisher, button=self.pubdate.clear_button, icon='trash.png') create_row(9, self.publisher, self.languages) From 0fb4e8e4dc471a945e5eb46b9eec33cc68a4a86b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 14:53:14 +0530 Subject: [PATCH 08/46] HTML Input: Handle entities inside href attributes when following the links in an HTML file. Fixes #1094203 (links in EPUB of ebook-convert`ed HTML files point to HTML files in /tmp/calibre_*) --- src/calibre/ebooks/html/input.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index c2ec6f9bce..9683837ad6 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -17,7 +17,7 @@ from urllib import unquote from calibre.ebooks.chardet import detect_xml_encoding from calibre.constants import iswindows -from calibre import unicode_path, as_unicode +from calibre import unicode_path, as_unicode, replace_entities class Link(object): ''' @@ -147,6 +147,7 @@ class HTMLFile(object): url = match.group(i) if url: break + url = replace_entities(url) try: link = self.resolve(url) except ValueError: From 2dfaa893c48d09a0a47c01001fa39bb51827f56e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 15:45:42 +0530 Subject: [PATCH 09/46] ... --- src/calibre/ebooks/pdf/render/engine.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 9210d1e805..4976fe36ea 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -241,10 +241,9 @@ class PdfEngine(QPaintEngine): @property def features(self): - return (QPaintEngine.Antialiasing | QPaintEngine.AlphaBlend | - QPaintEngine.ConstantOpacity | QPaintEngine.PainterPaths | - QPaintEngine.PaintOutsidePaintEvent | - QPaintEngine.PrimitiveTransform) + return (self.Antialiasing | self.AlphaBlend | self.ConstantOpacity | + self.PainterPaths | self.PaintOutsidePaintEvent | + self.PrimitiveTransform) def begin(self, device): if not hasattr(self, 'pdf'): From 270ec77734e7e8cb6efc3488dddb54c18bf45139 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 16:48:07 +0530 Subject: [PATCH 10/46] ... --- src/calibre/ebooks/pdf/render/engine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 4976fe36ea..e1a6808505 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -241,9 +241,10 @@ class PdfEngine(QPaintEngine): @property def features(self): + # gradient_flags = self.MaskedBrush | self.PatternBrush | self.PatternTransform return (self.Antialiasing | self.AlphaBlend | self.ConstantOpacity | self.PainterPaths | self.PaintOutsidePaintEvent | - self.PrimitiveTransform) + self.PrimitiveTransform) #| gradient_flags def begin(self, device): if not hasattr(self, 'pdf'): From 961ae52972bf9884481dcc82a522e9b42d25f106 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 21:57:04 +0530 Subject: [PATCH 11/46] Allow series numbers lower than -100 for custom series columns. Fixes #1094475 ([Enhancement] Series numbers can't go under -100) --- src/calibre/gui2/custom_column_widgets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/custom_column_widgets.py b/src/calibre/gui2/custom_column_widgets.py index 2b45769185..89824dbc7a 100644 --- a/src/calibre/gui2/custom_column_widgets.py +++ b/src/calibre/gui2/custom_column_widgets.py @@ -372,7 +372,7 @@ class Series(Base): self.widgets.append(QLabel('&'+self.col_metadata['name']+_(' index:'), parent)) w = QDoubleSpinBox(parent) - w.setRange(-100., float(100000000)) + w.setRange(-10000., float(100000000)) w.setDecimals(2) w.setSingleStep(1) self.idx_widget=w From 74868edee12cc83df4c3b2e3bcbd4a604f06a47c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 22:54:16 +0530 Subject: [PATCH 12/46] Suppres box around links in acroread --- src/calibre/ebooks/pdf/render/links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/render/links.py b/src/calibre/ebooks/pdf/render/links.py index faae73e3a8..41b7fcfd39 100644 --- a/src/calibre/ebooks/pdf/render/links.py +++ b/src/calibre/ebooks/pdf/render/links.py @@ -51,7 +51,7 @@ class Links(object): is_local = not href or combined_path in self.anchors annot = Dictionary({ 'Type':Name('Annot'), 'Subtype':Name('Link'), - 'Rect':rect, + 'Rect':rect, 'Border':Array([0,0,0]), }) if self.mark_links: annot.update({'Border':Array([16, 16, 1]), 'C':Array([1.0, 0, From 40539ca292b5fc8c33ad21232f75bf9c0e034aca Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 29 Dec 2012 23:10:10 +0530 Subject: [PATCH 13/46] Fix raster images being downsampled for small page sizes --- src/calibre/ebooks/pdf/render/engine.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index e1a6808505..8abc271b4d 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -8,7 +8,6 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' import sys, traceback -from math import sqrt from collections import namedtuple from functools import wraps, partial @@ -213,8 +212,6 @@ class PdfEngine(QPaintEngine): self.pdf_system = QTransform(sx, 0, 0, -sy, dx, dy) self.do_stroke = True self.do_fill = False - self.scale = sqrt(sy**2 + sx**2) - self.xscale, self.yscale = sx, sy self.graphics_state = GraphicsState() self.errors_occurred = False self.errors, self.debug = errors, debug @@ -244,7 +241,7 @@ class PdfEngine(QPaintEngine): # gradient_flags = self.MaskedBrush | self.PatternBrush | self.PatternTransform return (self.Antialiasing | self.AlphaBlend | self.ConstantOpacity | self.PainterPaths | self.PaintOutsidePaintEvent | - self.PrimitiveTransform) #| gradient_flags + self.PrimitiveTransform | self.PixmapTransform) #| gradient_flags def begin(self, device): if not hasattr(self, 'pdf'): @@ -281,6 +278,8 @@ class PdfEngine(QPaintEngine): def type(self): return QPaintEngine.Pdf + # TODO: Tiled pixmap + @store_error def drawPixmap(self, rect, pixmap, source_rect): self.graphics_state(self) From 836a623b5f3c3d62d0116114af481631b5fcb6ea Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 00:22:23 +0530 Subject: [PATCH 14/46] Fix serialization of floating point numbers --- src/calibre/ebooks/pdf/render/common.py | 20 +++++++++++++++++++- src/calibre/ebooks/pdf/render/engine.py | 7 ++++++- src/calibre/ebooks/pdf/render/serialize.py | 15 ++++++++------- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/common.py b/src/calibre/ebooks/pdf/render/common.py index 5e470122c5..5be06b1b98 100644 --- a/src/calibre/ebooks/pdf/render/common.py +++ b/src/calibre/ebooks/pdf/render/common.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import codecs, zlib from io import BytesIO from struct import pack +from decimal import Decimal EOL = b'\n' @@ -51,13 +52,30 @@ PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2' # Basic PDF datatypes {{{ +def format_float(f): + if abs(f) < 1e-7: + return '0' + places = 6 + a, b = type(u'')(Decimal(f).quantize(Decimal(10)**-places)).partition('.')[0::2] + b = b.rstrip('0') + if not b: + return '0' if a == '-0' else a + return '%s.%s'%(a, b) + +def fmtnum(o): + if isinstance(o, (int, long)): + return type(u'')(o) + return format_float(o) + def serialize(o, stream): if hasattr(o, 'pdf_serialize'): o.pdf_serialize(stream) elif isinstance(o, bool): stream.write(b'true' if o else b'false') - elif isinstance(o, (int, long, float)): + elif isinstance(o, (int, long)): stream.write(type(u'')(o).encode('ascii')) + elif isinstance(o, float): + stream.write(format_float(o).encode('ascii')) elif o is None: stream.write(b'null') else: diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 8abc271b4d..4bdf38e123 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import sys, traceback from collections import namedtuple from functools import wraps, partial +from future_builtins import map import sip from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QApplication, QPainter, @@ -18,13 +19,17 @@ from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QApplication, QPainter, from calibre.constants import plugins from calibre.ebooks.pdf.render.serialize import (Color, PDFStream, Path) -from calibre.ebooks.pdf.render.common import inch, A4 +from calibre.ebooks.pdf.render.common import inch, A4, fmtnum from calibre.utils.fonts.sfnt.container import Sfnt from calibre.utils.fonts.sfnt.metrics import FontMetrics Point = namedtuple('Point', 'x y') ColorState = namedtuple('ColorState', 'color opacity do') +def repr_transform(t): + vals = map(fmtnum, (t.m11(), t.m12(), t.m21(), t.m22(), t.dx(), t.dy())) + return '[%s]'%' '.join(vals) + def store_error(func): @wraps(func) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 5042702deb..9c5f8c6f21 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -15,7 +15,7 @@ from collections import namedtuple from calibre.constants import (__appname__, __version__) from calibre.ebooks.pdf.render.common import ( Reference, EOL, serialize, Stream, Dictionary, String, Name, Array, - GlyphIndex) + GlyphIndex, fmtnum) from calibre.ebooks.pdf.render.fonts import FontManager from calibre.ebooks.pdf.render.links import Links @@ -180,7 +180,7 @@ class Text(object): stream.write_line('BT ') serialize(Name(font_name), stream) stream.write(' %g Tf '%self.size) - stream.write(' '.join(map(type(u''), self.transform)) + ' Tm ') + stream.write(' '.join(map(fmtnum, self.transform)) + ' Tm ') if self.horizontal_scale != self.default_horizontal_scale: stream.write('%g Tz '%self.horizontal_scale) if self.word_spacing != self.default_word_spacing: @@ -331,7 +331,7 @@ class PDFStream(object): vals = [m.m11(), m.m12(), m.m21(), m.m22(), m.dx(), m.dy()] else: vals = args - cm = ' '.join(map(type(u''), vals)) + cm = ' '.join(map(fmtnum, vals)) self.current_page.write_line(cm + ' cm') def set_rgb_colorspace(self): @@ -355,7 +355,8 @@ class PDFStream(object): if i != 0: self.current_page.write_line() for x in op: - self.current_page.write(type(u'')(x) + ' ') + self.current_page.write( + (fmtnum(x) if isinstance(x, (int, long, float)) else x) + ' ') def draw_path(self, path, stroke=True, fill=False, fill_rule='winding'): if not path.ops: return @@ -394,7 +395,7 @@ class PDFStream(object): op = Dictionary({'Type':Name('ExtGState'), 'CA': opacity}) self.stroke_opacities[opacity] = self.objects.add(op) self.current_page.set_opacity(self.stroke_opacities[opacity]) - self.current_page.write_line(' '.join(map(type(u''), color[:3])) + ' SC') + self.current_page.write_line(' '.join(map(fmtnum, color[:3])) + ' SC') def set_fill_color(self, color): opacity = color.opacity @@ -402,7 +403,7 @@ class PDFStream(object): op = Dictionary({'Type':Name('ExtGState'), 'ca': opacity}) self.fill_opacities[opacity] = self.objects.add(op) self.current_page.set_opacity(self.fill_opacities[opacity]) - self.current_page.write_line(' '.join(map(type(u''), color[:3])) + ' sc') + self.current_page.write_line(' '.join(map(fmtnum, color[:3])) + ' sc') def end_page(self): pageref = self.current_page.end(self.objects, self.stream) @@ -424,7 +425,7 @@ class PDFStream(object): self.current_page.write(b'BT ') serialize(Name(name), self.current_page) self.current_page.write(' %g Tf '%size) - self.current_page.write('%s Tm '%' '.join(map(type(u''), transform))) + self.current_page.write('%s Tm '%' '.join(map(fmtnum, transform))) for x, y, glyph_id in glyphs: self.current_page.write('%g %g Td '%(x, y)) serialize(GlyphIndex(glyph_id), self.current_page) From 0e23b9827499068c778c72e685d6f49519457539 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 00:47:47 +0530 Subject: [PATCH 15/46] More number serialization fixes --- src/calibre/ebooks/pdf/render/serialize.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 9c5f8c6f21..908c4ff919 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -179,14 +179,14 @@ class Text(object): if not self.text: return stream.write_line('BT ') serialize(Name(font_name), stream) - stream.write(' %g Tf '%self.size) + stream.write(' %s Tf '%fmtnum(self.size)) stream.write(' '.join(map(fmtnum, self.transform)) + ' Tm ') if self.horizontal_scale != self.default_horizontal_scale: - stream.write('%g Tz '%self.horizontal_scale) + stream.write('%s Tz '%fmtnum(self.horizontal_scale)) if self.word_spacing != self.default_word_spacing: - stream.write('%g Tw '%self.word_spacing) + stream.write('%s Tw '%fmtnum(self.word_spacing)) if self.char_space != self.default_char_space: - stream.write('%g Tc '%self.char_space) + stream.write('%s Tc '%fmtnum(self.char_space)) stream.write_line() if self.glyph_adjust is self.default_glyph_adjust: serialize(String(self.text), stream) @@ -347,7 +347,7 @@ class PDFStream(object): self.current_page.write_line('Q q') def draw_rect(self, x, y, width, height, stroke=True, fill=False): - self.current_page.write('%g %g %g %g re '%(x, y, width, height)) + self.current_page.write('%s re '%' '.join(map(fmtnum, (x, y, width, height)))) self.current_page.write_line(self.PATH_OPS[(stroke, fill, 'winding')]) def write_path(self, path): @@ -424,10 +424,10 @@ class PDFStream(object): name = self.current_page.add_font(fontref) self.current_page.write(b'BT ') serialize(Name(name), self.current_page) - self.current_page.write(' %g Tf '%size) + self.current_page.write(' %s Tf '%fmtnum(size)) self.current_page.write('%s Tm '%' '.join(map(fmtnum, transform))) for x, y, glyph_id in glyphs: - self.current_page.write('%g %g Td '%(x, y)) + self.current_page.write('%s %s Td '%(fmtnum(x), fmtnum(y))) serialize(GlyphIndex(glyph_id), self.current_page) self.current_page.write(' Tj ') self.current_page.write_line(b' ET') @@ -444,7 +444,8 @@ class PDFStream(object): def draw_image(self, x, y, xscale, yscale, imgref): name = self.current_page.add_image(imgref) - self.current_page.write('q %g 0 0 %g %g %g cm '%(xscale, yscale, x, y)) + self.current_page.write('q %s 0 0 %s %s %s cm '%(fmtnum(xscale), + fmtnum(yscale), fmtnum(x), fmtnum(y))) serialize(Name(name), self.current_page) self.current_page.write_line(' Do Q') From 28a126709df9232176e5a201ba93d09b2ffa8e9b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 08:30:33 +0530 Subject: [PATCH 16/46] Update New York Times --- recipes/nytimes.recipe | 205 +++++++++++++++++------------------- recipes/nytimes_sub.recipe | 207 +++++++++++++++++-------------------- 2 files changed, 193 insertions(+), 219 deletions(-) diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index ba4e680158..ba97a2c0be 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -153,7 +153,7 @@ class NYTimes(BasicNewsRecipe): timefmt = '' - simultaneous_downloads = 1 + #simultaneous_downloads = 1 # no longer required to deal with ads cover_margins = (18,18,'grey99') @@ -204,7 +204,8 @@ class NYTimes(BasicNewsRecipe): re.compile('^subNavigation'), re.compile('^leaderboard'), re.compile('^module'), - re.compile('commentCount') + re.compile('commentCount'), + 'credit' ]}), dict(name='div', attrs={'class':re.compile('toolsList')}), # bits dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits @@ -291,11 +292,11 @@ class NYTimes(BasicNewsRecipe): del ans[idx] idx_max = idx_max-1 continue - if self.verbose: + if True: #self.verbose self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) ) for article in ans[idx][1]: total_article_count += 1 - if self.verbose: + if True: #self.verbose self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'), article['url'].encode('cp1252','replace'))) idx = idx+1 @@ -351,23 +352,8 @@ class NYTimes(BasicNewsRecipe): br = BasicNewsRecipe.get_browser() return br -## This doesn't work (and probably never did). It either gets another serve of the advertisement, -## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding. -## -## def skip_ad_pages(self, soup): -## # Skip ad pages served before actual article -## skip_tag = soup.find(True, {'name':'skip'}) -## if skip_tag is not None: -## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) -## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) -## url += '?pagewanted=all' -## self.log.warn("Skipping ad to article at '%s'" % url) -## return self.index_to_soup(url, raw=True) - - cover_tag = 'NY_NYT' def get_cover_url(self): - from datetime import timedelta, date cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg' br = BasicNewsRecipe.get_browser() daysback=1 @@ -745,11 +731,12 @@ class NYTimes(BasicNewsRecipe): def preprocess_html(self, soup): - print("PREPROCESS TITLE="+self.tag_to_string(soup.title)) + #print("PREPROCESS TITLE="+self.tag_to_string(soup.title)) skip_tag = soup.find(True, {'name':'skip'}) if skip_tag is not None: - url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) - url += '?pagewanted=all' + #url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) + url = 'http://www.nytimes.com' + skip_tag.parent['href'] + #url += '?pagewanted=all' self.log.warn("Skipping ad to article at '%s'" % url) sleep(5) soup = self.handle_tags(self.article_to_soup(url)) @@ -969,121 +956,121 @@ class NYTimes(BasicNewsRecipe): self.log("ERROR: One picture per article in postprocess_html") try: - # Change captions to italic - for caption in soup.findAll(True, {'class':'caption'}) : - if caption and len(caption) > 0: - cTag = Tag(soup, "p", [("class", "caption")]) - c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() - mp_off = c.find("More Photos") - if mp_off >= 0: - c = c[:mp_off] - cTag.insert(0, c) - caption.replaceWith(cTag) + # Change captions to italic + for caption in soup.findAll(True, {'class':'caption'}) : + if caption and len(caption) > 0: + cTag = Tag(soup, "p", [("class", "caption")]) + c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() + mp_off = c.find("More Photos") + if mp_off >= 0: + c = c[:mp_off] + cTag.insert(0, c) + caption.replaceWith(cTag) except: - self.log("ERROR: Problem in change captions to italic") + self.log("ERROR: Problem in change captions to italic") try: - # Change to

- h1 = soup.find('h1') - blogheadline = str(h1) #added for dealbook - if h1: - headline = h1.find("nyt_headline") - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - h1.replaceWith(tag) - elif blogheadline.find('entry-title'):#added for dealbook - tag = Tag(soup, "h2")#added for dealbook - tag['class'] = "headline"#added for dealbook - tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook - h1.replaceWith(tag)#added for dealbook + # Change to

+ h1 = soup.find('h1') + blogheadline = str(h1) #added for dealbook + if h1: + headline = h1.find("nyt_headline") + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(headline.contents[0])) + h1.replaceWith(tag) + elif blogheadline.find('entry-title'):#added for dealbook + tag = Tag(soup, "h2")#added for dealbook + tag['class'] = "headline"#added for dealbook + tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook + h1.replaceWith(tag)#added for dealbook - else: - # Blog entry - replace headline, remove
tags - BCC I think this is no longer functional 1-18-2011 - headline = soup.find('title') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.renderContents())) - soup.insert(0, tag) - hrs = soup.findAll('hr') - for hr in hrs: - hr.extract() + else: + # Blog entry - replace headline, remove
tags - BCC I think this is no longer functional 1-18-2011 + headline = soup.find('title') + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(self.tag_to_string(headline,False))) + soup.insert(0, tag) + hrs = soup.findAll('hr') + for hr in hrs: + hr.extract() except: - self.log("ERROR: Problem in Change to

") + self.log("ERROR: Problem in Change to

") try: - #if this is from a blog (dealbook, fix the byline format - bylineauthor = soup.find('address',attrs={'class':'byline author vcard'}) - if bylineauthor: - tag = Tag(soup, "h6") - tag['class'] = "byline" - tag.insert(0, self.fixChars(bylineauthor.renderContents())) - bylineauthor.replaceWith(tag) + #if this is from a blog (dealbook, fix the byline format + bylineauthor = soup.find('address',attrs={'class':'byline author vcard'}) + if bylineauthor: + tag = Tag(soup, "h6") + tag['class'] = "byline" + tag.insert(0, self.fixChars(self.tag_to_string(bylineauthor,False))) + bylineauthor.replaceWith(tag) except: self.log("ERROR: fixing byline author format") try: - #if this is a blog (dealbook) fix the credit style for the pictures - blogcredit = soup.find('div',attrs={'class':'credit'}) - if blogcredit: - tag = Tag(soup, "h6") - tag['class'] = "credit" - tag.insert(0, self.fixChars(blogcredit.renderContents())) - blogcredit.replaceWith(tag) + #if this is a blog (dealbook) fix the credit style for the pictures + blogcredit = soup.find('div',attrs={'class':'credit'}) + if blogcredit: + tag = Tag(soup, "h6") + tag['class'] = "credit" + tag.insert(0, self.fixChars(self.tag_to_string(blogcredit,False))) + blogcredit.replaceWith(tag) except: self.log("ERROR: fixing credit format") try: - # Change

to

- used in editorial blogs - masthead = soup.find("h1") - if masthead: - # Nuke the href - if masthead.a: - del(masthead.a['href']) - tag = Tag(soup, "h3") - tag.insert(0, self.fixChars(masthead.contents[0])) - masthead.replaceWith(tag) + # Change

to

- used in editorial blogs + masthead = soup.find("h1") + if masthead: + # Nuke the href + if masthead.a: + del(masthead.a['href']) + tag = Tag(soup, "h3") + tag.insert(0, self.fixChars(masthead.contents[0])) + masthead.replaceWith(tag) except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + self.log("ERROR: Problem in Change

to

- used in editorial blogs") try: - # Change to - for subhead in soup.findAll(True, {'class':'bold'}) : - if subhead.contents: - bTag = Tag(soup, "b") - bTag.insert(0, subhead.contents[0]) - subhead.replaceWith(bTag) + # Change to + for subhead in soup.findAll(True, {'class':'bold'}) : + if subhead.contents: + bTag = Tag(soup, "b") + bTag.insert(0, subhead.contents[0]) + subhead.replaceWith(bTag) except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + self.log("ERROR: Problem in Change

to

- used in editorial blogs") try: - #remove the update tag - blogupdated = soup.find('span', {'class':'update'}) - if blogupdated: - blogupdated.replaceWith("") + #remove the update tag + blogupdated = soup.find('span', {'class':'update'}) + if blogupdated: + blogupdated.replaceWith("") except: - self.log("ERROR: Removing strong tag") + self.log("ERROR: Removing strong tag") try: - divTag = soup.find('div',attrs={'id':'articleBody'}) - if divTag: - divTag['class'] = divTag['id'] + divTag = soup.find('div',attrs={'id':'articleBody'}) + if divTag: + divTag['class'] = divTag['id'] except: - self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") + self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") try: - # Add class="authorId" to
so we can format with CSS - divTag = soup.find('div',attrs={'id':'authorId'}) - if divTag and divTag.contents[0]: - tag = Tag(soup, "p") - tag['class'] = "authorId" - tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], - use_alt=False))) - divTag.replaceWith(tag) + # Add class="authorId" to
so we can format with CSS + divTag = soup.find('div',attrs={'id':'authorId'}) + if divTag and divTag.contents[0]: + tag = Tag(soup, "p") + tag['class'] = "authorId" + tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], + use_alt=False))) + divTag.replaceWith(tag) except: - self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") + self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") return soup diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 023a787983..d550a5158f 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -32,7 +32,7 @@ class NYTimes(BasicNewsRecipe): # number of days old an article can be for inclusion. If oldest_web_article = None all articles # will be included. Note: oldest_web_article is ignored if webEdition = False webEdition = False - oldest_web_article = 7 + oldest_web_article = None # download higher resolution images than the small thumbnails typically included in the article # the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper @@ -153,7 +153,7 @@ class NYTimes(BasicNewsRecipe): timefmt = '' - simultaneous_downloads = 1 + #simultaneous_downloads = 1 # no longer required to deal with ads cover_margins = (18,18,'grey99') @@ -204,7 +204,8 @@ class NYTimes(BasicNewsRecipe): re.compile('^subNavigation'), re.compile('^leaderboard'), re.compile('^module'), - re.compile('commentCount') + re.compile('commentCount'), + 'credit' ]}), dict(name='div', attrs={'class':re.compile('toolsList')}), # bits dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits @@ -291,11 +292,11 @@ class NYTimes(BasicNewsRecipe): del ans[idx] idx_max = idx_max-1 continue - if self.verbose: + if True: #self.verbose self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) ) for article in ans[idx][1]: total_article_count += 1 - if self.verbose: + if True: #self.verbose self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'), article['url'].encode('cp1252','replace'))) idx = idx+1 @@ -351,23 +352,8 @@ class NYTimes(BasicNewsRecipe): br = BasicNewsRecipe.get_browser() return br -## This doesn't work (and probably never did). It either gets another serve of the advertisement, -## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding. -## -## def skip_ad_pages(self, soup): -## # Skip ad pages served before actual article -## skip_tag = soup.find(True, {'name':'skip'}) -## if skip_tag is not None: -## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) -## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) -## url += '?pagewanted=all' -## self.log.warn("Skipping ad to article at '%s'" % url) -## return self.index_to_soup(url, raw=True) - - cover_tag = 'NY_NYT' def get_cover_url(self): - from datetime import timedelta, date cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg' br = BasicNewsRecipe.get_browser() daysback=1 @@ -745,11 +731,12 @@ class NYTimes(BasicNewsRecipe): def preprocess_html(self, soup): - print("PREPROCESS TITLE="+self.tag_to_string(soup.title)) + #print("PREPROCESS TITLE="+self.tag_to_string(soup.title)) skip_tag = soup.find(True, {'name':'skip'}) if skip_tag is not None: - url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) - url += '?pagewanted=all' + #url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) + url = 'http://www.nytimes.com' + skip_tag.parent['href'] + #url += '?pagewanted=all' self.log.warn("Skipping ad to article at '%s'" % url) sleep(5) soup = self.handle_tags(self.article_to_soup(url)) @@ -969,121 +956,121 @@ class NYTimes(BasicNewsRecipe): self.log("ERROR: One picture per article in postprocess_html") try: - # Change captions to italic - for caption in soup.findAll(True, {'class':'caption'}) : - if caption and len(caption) > 0: - cTag = Tag(soup, "p", [("class", "caption")]) - c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() - mp_off = c.find("More Photos") - if mp_off >= 0: - c = c[:mp_off] - cTag.insert(0, c) - caption.replaceWith(cTag) + # Change captions to italic + for caption in soup.findAll(True, {'class':'caption'}) : + if caption and len(caption) > 0: + cTag = Tag(soup, "p", [("class", "caption")]) + c = self.fixChars(self.tag_to_string(caption,use_alt=False)).strip() + mp_off = c.find("More Photos") + if mp_off >= 0: + c = c[:mp_off] + cTag.insert(0, c) + caption.replaceWith(cTag) except: - self.log("ERROR: Problem in change captions to italic") + self.log("ERROR: Problem in change captions to italic") try: - # Change to

- h1 = soup.find('h1') - blogheadline = str(h1) #added for dealbook - if h1: - headline = h1.find("nyt_headline") - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.contents[0])) - h1.replaceWith(tag) - elif blogheadline.find('entry-title'):#added for dealbook - tag = Tag(soup, "h2")#added for dealbook - tag['class'] = "headline"#added for dealbook - tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook - h1.replaceWith(tag)#added for dealbook + # Change to

+ h1 = soup.find('h1') + blogheadline = str(h1) #added for dealbook + if h1: + headline = h1.find("nyt_headline") + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(headline.contents[0])) + h1.replaceWith(tag) + elif blogheadline.find('entry-title'):#added for dealbook + tag = Tag(soup, "h2")#added for dealbook + tag['class'] = "headline"#added for dealbook + tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook + h1.replaceWith(tag)#added for dealbook - else: - # Blog entry - replace headline, remove
tags - BCC I think this is no longer functional 1-18-2011 - headline = soup.find('title') - if headline: - tag = Tag(soup, "h2") - tag['class'] = "headline" - tag.insert(0, self.fixChars(headline.renderContents())) - soup.insert(0, tag) - hrs = soup.findAll('hr') - for hr in hrs: - hr.extract() + else: + # Blog entry - replace headline, remove
tags - BCC I think this is no longer functional 1-18-2011 + headline = soup.find('title') + if headline: + tag = Tag(soup, "h2") + tag['class'] = "headline" + tag.insert(0, self.fixChars(self.tag_to_string(headline,False))) + soup.insert(0, tag) + hrs = soup.findAll('hr') + for hr in hrs: + hr.extract() except: - self.log("ERROR: Problem in Change to

") + self.log("ERROR: Problem in Change to

") try: - #if this is from a blog (dealbook, fix the byline format - bylineauthor = soup.find('address',attrs={'class':'byline author vcard'}) - if bylineauthor: - tag = Tag(soup, "h6") - tag['class'] = "byline" - tag.insert(0, self.fixChars(bylineauthor.renderContents())) - bylineauthor.replaceWith(tag) + #if this is from a blog (dealbook, fix the byline format + bylineauthor = soup.find('address',attrs={'class':'byline author vcard'}) + if bylineauthor: + tag = Tag(soup, "h6") + tag['class'] = "byline" + tag.insert(0, self.fixChars(self.tag_to_string(bylineauthor,False))) + bylineauthor.replaceWith(tag) except: self.log("ERROR: fixing byline author format") try: - #if this is a blog (dealbook) fix the credit style for the pictures - blogcredit = soup.find('div',attrs={'class':'credit'}) - if blogcredit: - tag = Tag(soup, "h6") - tag['class'] = "credit" - tag.insert(0, self.fixChars(blogcredit.renderContents())) - blogcredit.replaceWith(tag) + #if this is a blog (dealbook) fix the credit style for the pictures + blogcredit = soup.find('div',attrs={'class':'credit'}) + if blogcredit: + tag = Tag(soup, "h6") + tag['class'] = "credit" + tag.insert(0, self.fixChars(self.tag_to_string(blogcredit,False))) + blogcredit.replaceWith(tag) except: self.log("ERROR: fixing credit format") try: - # Change

to

- used in editorial blogs - masthead = soup.find("h1") - if masthead: - # Nuke the href - if masthead.a: - del(masthead.a['href']) - tag = Tag(soup, "h3") - tag.insert(0, self.fixChars(masthead.contents[0])) - masthead.replaceWith(tag) + # Change

to

- used in editorial blogs + masthead = soup.find("h1") + if masthead: + # Nuke the href + if masthead.a: + del(masthead.a['href']) + tag = Tag(soup, "h3") + tag.insert(0, self.fixChars(masthead.contents[0])) + masthead.replaceWith(tag) except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + self.log("ERROR: Problem in Change

to

- used in editorial blogs") try: - # Change to - for subhead in soup.findAll(True, {'class':'bold'}) : - if subhead.contents: - bTag = Tag(soup, "b") - bTag.insert(0, subhead.contents[0]) - subhead.replaceWith(bTag) + # Change to + for subhead in soup.findAll(True, {'class':'bold'}) : + if subhead.contents: + bTag = Tag(soup, "b") + bTag.insert(0, subhead.contents[0]) + subhead.replaceWith(bTag) except: - self.log("ERROR: Problem in Change

to

- used in editorial blogs") + self.log("ERROR: Problem in Change

to

- used in editorial blogs") try: - #remove the update tag - blogupdated = soup.find('span', {'class':'update'}) - if blogupdated: - blogupdated.replaceWith("") + #remove the update tag + blogupdated = soup.find('span', {'class':'update'}) + if blogupdated: + blogupdated.replaceWith("") except: - self.log("ERROR: Removing strong tag") + self.log("ERROR: Removing strong tag") try: - divTag = soup.find('div',attrs={'id':'articleBody'}) - if divTag: - divTag['class'] = divTag['id'] + divTag = soup.find('div',attrs={'id':'articleBody'}) + if divTag: + divTag['class'] = divTag['id'] except: - self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") + self.log("ERROR: Problem in soup.find(div,attrs={id:articleBody})") try: - # Add class="authorId" to
so we can format with CSS - divTag = soup.find('div',attrs={'id':'authorId'}) - if divTag and divTag.contents[0]: - tag = Tag(soup, "p") - tag['class'] = "authorId" - tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], - use_alt=False))) - divTag.replaceWith(tag) + # Add class="authorId" to
so we can format with CSS + divTag = soup.find('div',attrs={'id':'authorId'}) + if divTag and divTag.contents[0]: + tag = Tag(soup, "p") + tag['class'] = "authorId" + tag.insert(0, self.fixChars(self.tag_to_string(divTag.contents[0], + use_alt=False))) + divTag.replaceWith(tag) except: - self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") + self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") return soup From 31f52afacfe4356e7c5e190c3f1b7233706128ed Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 08:36:08 +0530 Subject: [PATCH 17/46] Fix #1087809 (Device not recognized or found) (driver for random chinese smartphone) --- src/calibre/devices/android/driver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index 40626ca1ba..cc42cf33b2 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -48,6 +48,7 @@ class ANDROID(USBMS): 0x2910 : HTC_BCDS, 0xe77 : HTC_BCDS, 0xff9 : HTC_BCDS, + 0x0001 : [0x255], }, # Eken @@ -212,7 +213,8 @@ class ANDROID(USBMS): 'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP', 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD', 'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY', 'SXZ', 'USB_2.0', - 'COBY_MID', 'VS', 'AINOL', 'TOPWISE', 'PAD703', 'NEXT8D12'] + 'COBY_MID', 'VS', 'AINOL', 'TOPWISE', 'PAD703', 'NEXT8D12', + 'MEDIATEK'] WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', @@ -232,7 +234,7 @@ class ANDROID(USBMS): 'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE', 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', - 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F'] + 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS'] WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', From 2975292f27c346aeef7083743e130282c57bdee3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 08:39:32 +0530 Subject: [PATCH 18/46] Fix #1094597 (OverflowError: date value out of range) --- src/calibre/gui2/library/models.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index 8cd84bdafc..891b775448 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import functools, re, os, traceback, errno +import functools, re, os, traceback, errno, time from collections import defaultdict from PyQt4.Qt import (QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, @@ -1419,7 +1419,11 @@ class DeviceBooksModel(BooksModel): # {{{ return QVariant(human_readable(size)) elif cname == 'timestamp': dt = self.db[self.map[row]].datetime - dt = dt_factory(dt, assume_utc=True, as_utc=False) + try: + dt = dt_factory(dt, assume_utc=True, as_utc=False) + except OverflowError: + dt = dt_factory(time.gmtime(), assume_utc=True, + as_utc=False) return QVariant(strftime(TIME_FMT, dt.timetuple())) elif cname == 'collections': tags = self.db[self.map[row]].device_collections From df25363d3eedc3046e8a7803ca42a02255b793b5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 11:42:31 +0530 Subject: [PATCH 19/46] ... --- manual/faq.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/manual/faq.rst b/manual/faq.rst index 266fe05f6a..e9bb6fc70f 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -670,6 +670,9 @@ There are three possible things I know of, that can cause this: the blacklist of programs inside RoboForm to fix this. Or uninstall RoboForm. + * The Logitech SetPoint Settings application causes random crashes in + |app| when it is open. Close it before starting |app|. + |app| is not starting on OS X? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From d03a5f252c413640e3e6c40e1211f9ab2e7a9501 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 30 Dec 2012 15:32:23 +0530 Subject: [PATCH 20/46] Refactor graphics state handling --- src/calibre/ebooks/pdf/render/engine.py | 260 ++++----------------- src/calibre/ebooks/pdf/render/graphics.py | 196 ++++++++++++++++ src/calibre/ebooks/pdf/render/serialize.py | 21 +- 3 files changed, 240 insertions(+), 237 deletions(-) create mode 100644 src/calibre/ebooks/pdf/render/graphics.py diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 4bdf38e123..aa1fa17cc3 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -14,12 +14,13 @@ from future_builtins import map import sip from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QApplication, QPainter, - QTransform, QPainterPath, QImage, QByteArray, QBuffer, + QTransform, QImage, QByteArray, QBuffer, qRgba) from calibre.constants import plugins -from calibre.ebooks.pdf.render.serialize import (Color, PDFStream, Path) +from calibre.ebooks.pdf.render.serialize import (PDFStream, Path) from calibre.ebooks.pdf.render.common import inch, A4, fmtnum +from calibre.ebooks.pdf.render.graphics import convert_path, Graphics from calibre.utils.fonts.sfnt.container import Sfnt from calibre.utils.fonts.sfnt.metrics import FontMetrics @@ -42,146 +43,6 @@ def store_error(func): return errh -class GraphicsState(object): # {{{ - - def __init__(self): - self.ops = {} - self.initial_state = { - 'fill': ColorState(Color(0., 0., 0., 1.), 1.0, False), - 'transform': QTransform(), - 'dash': [], - 'line_width': 0, - 'stroke': ColorState(Color(0., 0., 0., 1.), 1.0, True), - 'line_cap': 'flat', - 'line_join': 'miter', - 'clip': (Qt.NoClip, QPainterPath()), - } - self.current_state = self.initial_state.copy() - - def reset(self): - self.current_state = self.initial_state.copy() - - def update_color_state(self, which, color=None, opacity=None, - brush_style=None, pen_style=None): - current = self.ops.get(which, self.current_state[which]) - n = ColorState(*current) - if color is not None: - n = n._replace(color=Color(*color.getRgbF())) - if opacity is not None: - n = n._replace(opacity=opacity) - if opacity is not None: - opacity *= n.color.opacity - if brush_style is not None: - if which == 'fill': - do = (False if opacity == 0.0 or brush_style == Qt.NoBrush else - True) - else: - do = (False if opacity == 0.0 or brush_style == Qt.NoBrush or - pen_style == Qt.NoPen else True) - n = n._replace(do=do) - self.ops[which] = n - - def read(self, state): - flags = state.state() - - if flags & QPaintEngine.DirtyTransform: - self.ops['transform'] = state.transform() - - # TODO: Add support for brush patterns - if flags & QPaintEngine.DirtyBrush: - brush = state.brush() - color = brush.color() - self.update_color_state('fill', color=color, - brush_style=brush.style()) - - if flags & QPaintEngine.DirtyPen: - pen = state.pen() - brush = pen.brush() - color = pen.color() - self.update_color_state('stroke', color, brush_style=brush.style(), - pen_style=pen.style()) - ps = {Qt.DashLine:[3], Qt.DotLine:[1,2], Qt.DashDotLine:[3,2,1,2], - Qt.DashDotDotLine:[3, 2, 1, 2, 1, 2]}.get(pen.style(), []) - self.ops['dash'] = ps - self.ops['line_width'] = pen.widthF() - self.ops['line_cap'] = {Qt.FlatCap:'flat', Qt.RoundCap:'round', - Qt.SquareCap:'square'}.get(pen.capStyle(), 'flat') - self.ops['line_join'] = {Qt.MiterJoin:'miter', Qt.RoundJoin:'round', - Qt.BevelJoin:'bevel'}.get(pen.joinStyle(), 'miter') - - if flags & QPaintEngine.DirtyOpacity: - self.update_color_state('fill', opacity=state.opacity()) - self.update_color_state('stroke', opacity=state.opacity()) - - if flags & QPaintEngine.DirtyClipPath or flags & QPaintEngine.DirtyClipRegion: - self.ops['clip'] = True - - def __call__(self, engine): - if not self.ops: - return - pdf = engine.pdf - ops = self.ops - current_transform = self.current_state['transform'] - transform_changed = 'transform' in ops and ops['transform'] != current_transform - reset_stack = transform_changed or 'clip' in ops - - if reset_stack: - pdf.restore_stack() - pdf.save_stack() - # Since we have reset the stack we need to re-apply all previous - # operations, that are different from the default value (clip is - # handled separately). - for op in set(self.initial_state) - {'clip'}: - if op in ops: # These will be applied below - self.current_state[op] = self.initial_state[op] - elif self.current_state[op] != self.initial_state[op]: - self.apply(op, self.current_state[op], engine, pdf) - - # Now apply the new operations - for op, val in ops.iteritems(): - if op != 'clip' and self.current_state[op] != val: - self.apply(op, val, engine, pdf) - self.current_state[op] = val - - if 'clip' in ops: - # Get the current clip - path = engine.painter().clipPath() - if not path.isEmpty(): - engine.add_clip(path) - self.ops = {} - - def apply(self, op, val, engine, pdf): - getattr(self, 'apply_'+op)(val, engine, pdf) - - def apply_transform(self, val, engine, pdf): - if not val.isIdentity(): - pdf.transform(val) - - def apply_stroke(self, val, engine, pdf): - self.apply_color_state('stroke', val, engine, pdf) - - def apply_fill(self, val, engine, pdf): - self.apply_color_state('fill', val, engine, pdf) - - def apply_color_state(self, which, val, engine, pdf): - color = val.color._replace(opacity=val.opacity*val.color.opacity) - getattr(pdf, 'set_%s_color'%which)(color) - setattr(engine, 'do_%s'%which, val.do) - - def apply_dash(self, val, engine, pdf): - pdf.set_dash(val) - - def apply_line_width(self, val, engine, pdf): - pdf.set_line_width(val) - - def apply_line_cap(self, val, engine, pdf): - pdf.set_line_cap(val) - - def apply_line_join(self, val, engine, pdf): - pdf.set_line_join(val) - -# }}} - class Font(FontMetrics): def __init__(self, sfnt): @@ -215,9 +76,7 @@ class PdfEngine(QPaintEngine): self.bottom_margin) / self.pixel_height self.pdf_system = QTransform(sx, 0, 0, -sy, dx, dy) - self.do_stroke = True - self.do_fill = False - self.graphics_state = GraphicsState() + self.graphics = Graphics() self.errors_occurred = False self.errors, self.debug = errors, debug self.fonts = {} @@ -230,14 +89,21 @@ class PdfEngine(QPaintEngine): if err: raise RuntimeError('Failed to load qt_hack with err: %s'%err) + def apply_graphics_state(self): + self.graphics(self.pdf, self.pdf_system, self.painter()) + + @property + def do_fill(self): + return self.graphics.current_state.do_fill + + @property + def do_stroke(self): + return self.graphics.current_state.do_stroke + def init_page(self): self.pdf.transform(self.pdf_system) self.pdf.set_rgb_colorspace() - width = self.painter().pen().widthF() if self.isActive() else 0 - self.pdf.set_line_width(width) - self.do_stroke = True - self.do_fill = False - self.graphics_state.reset() + self.graphics.reset() self.pdf.save_stack() self.current_page_inited = True @@ -287,7 +153,7 @@ class PdfEngine(QPaintEngine): @store_error def drawPixmap(self, rect, pixmap, source_rect): - self.graphics_state(self) + self.apply_graphics_state() source_rect = source_rect.toRect() pixmap = (pixmap if source_rect == pixmap.rect() else pixmap.copy(source_rect)) @@ -299,7 +165,7 @@ class PdfEngine(QPaintEngine): @store_error def drawImage(self, rect, image, source_rect, flags=Qt.AutoColor): - self.graphics_state(self) + self.apply_graphics_state() source_rect = source_rect.toRect() image = (image if source_rect == image.rect() else image.copy(source_rect)) @@ -374,50 +240,20 @@ class PdfEngine(QPaintEngine): @store_error def updateState(self, state): - self.graphics_state.read(state) - - def convert_path(self, path): - p = Path() - i = 0 - while i < path.elementCount(): - elem = path.elementAt(i) - em = (elem.x, elem.y) - i += 1 - if elem.isMoveTo(): - p.move_to(*em) - elif elem.isLineTo(): - p.line_to(*em) - elif elem.isCurveTo(): - added = False - if path.elementCount() > i+1: - c1, c2 = path.elementAt(i), path.elementAt(i+1) - if (c1.type == path.CurveToDataElement and c2.type == - path.CurveToDataElement): - i += 2 - p.curve_to(em[0], em[1], c1.x, c1.y, c2.x, c2.y) - added = True - if not added: - raise ValueError('Invalid curve to operation') - return p + self.graphics.update_state(state, self.painter()) @store_error def drawPath(self, path): - self.graphics_state(self) - p = self.convert_path(path) + self.apply_graphics_state() + p = convert_path(path) fill_rule = {Qt.OddEvenFill:'evenodd', Qt.WindingFill:'winding'}[path.fillRule()] self.pdf.draw_path(p, stroke=self.do_stroke, fill=self.do_fill, fill_rule=fill_rule) - def add_clip(self, path): - p = self.convert_path(path) - fill_rule = {Qt.OddEvenFill:'evenodd', - Qt.WindingFill:'winding'}[path.fillRule()] - self.pdf.add_clip(p, fill_rule=fill_rule) - @store_error def drawPoints(self, points): - self.graphics_state(self) + self.apply_graphics_state() p = Path() for point in points: p.move_to(point.x(), point.y()) @@ -426,7 +262,7 @@ class PdfEngine(QPaintEngine): @store_error def drawRects(self, rects): - self.graphics_state(self) + self.apply_graphics_state() for rect in rects: bl = rect.topLeft() self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(), @@ -446,7 +282,7 @@ class PdfEngine(QPaintEngine): @store_error def drawTextItem(self, point, text_item): # super(PdfEngine, self).drawTextItem(point, text_item) - self.graphics_state(self) + self.apply_graphics_state() gi = self.qt_hack.get_glyphs(point, text_item) if not gi.indices: sip.delete(gi) @@ -477,7 +313,7 @@ class PdfEngine(QPaintEngine): @store_error def drawPolygon(self, points, mode): - self.graphics_state(self) + self.apply_graphics_state() if not points: return p = Path() p.move_to(points[0].x(), points[0].y()) @@ -510,14 +346,6 @@ class PdfEngine(QPaintEngine): link.append((llx, lly, urx, ury)) self.pdf.links.add(current_item, start_page, links, anchors) - def __enter__(self): - self.pdf.save_stack() - self.saved_ps = (self.do_stroke, self.do_fill) - - def __exit__(self, *args): - self.do_stroke, self.do_fill = self.saved_ps - self.pdf.restore_stack() - class PdfDevice(QPaintDevice): # {{{ @@ -584,8 +412,8 @@ class PdfDevice(QPaintDevice): # {{{ # }}} if __name__ == '__main__': - from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap) - QBrush, QColor, QPoint, QPixmap + from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap, QPainterPath) + QBrush, QColor, QPoint, QPixmap, QPainterPath app = QApplication([]) p = QPainter() with open('/t/painter.pdf', 'wb') as f: @@ -593,6 +421,7 @@ if __name__ == '__main__': p.begin(dev) dev.init_page() xmax, ymax = p.viewport().width(), p.viewport().height() + b = p.brush() try: p.drawRect(0, 0, xmax, ymax) # p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax), @@ -600,27 +429,22 @@ if __name__ == '__main__': # pp = QPainterPath() # pp.addRect(0, 0, xmax, ymax) # p.drawPath(pp) - # p.save() - # for i in xrange(3): - # col = [0, 0, 0, 200] - # col[i] = 255 - # p.setOpacity(0.3) - # p.setBrush(QBrush(QColor(*col))) - # p.drawRect(0, 0, xmax/10, xmax/10) - # p.translate(xmax/10, xmax/10) - # p.scale(1, 1.5) - # p.restore() + p.save() + for i in xrange(3): + col = [0, 0, 0, 200] + col[i] = 255 + p.setOpacity(0.3) + p.fillRect(0, 0, xmax/10, xmax/10, QBrush(QColor(*col))) + p.setOpacity(1) + p.drawRect(0, 0, xmax/10, xmax/10) + p.translate(xmax/10, xmax/10) + p.scale(1, 1.5) + p.restore() - # # p.scale(2, 2) - # # p.rotate(45) - # p.drawPixmap(0, 0, 2048, 2048, QPixmap(I('library.png'))) - # p.drawRect(0, 0, 2048, 2048) - - # p.save() - # p.drawLine(0, 0, 5000, 0) + # p.scale(2, 2) # p.rotate(45) - # p.drawLine(0, 0, 5000, 0) - # p.restore() + p.drawPixmap(0, 0, 2048, 2048, QPixmap(I('library.png'))) + p.drawRect(0, 0, 2048, 2048) f = p.font() f.setPointSize(20) diff --git a/src/calibre/ebooks/pdf/render/graphics.py b/src/calibre/ebooks/pdf/render/graphics.py new file mode 100644 index 0000000000..68efb2514a --- /dev/null +++ b/src/calibre/ebooks/pdf/render/graphics.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from math import sqrt + +from PyQt4.Qt import (QBrush, QPen, Qt, QPointF, QTransform, QPainterPath, + QPaintEngine) + +from calibre.ebooks.pdf.render.common import Array +from calibre.ebooks.pdf.render.serialize import Path, Color + +def convert_path(path): + p = Path() + i = 0 + while i < path.elementCount(): + elem = path.elementAt(i) + em = (elem.x, elem.y) + i += 1 + if elem.isMoveTo(): + p.move_to(*em) + elif elem.isLineTo(): + p.line_to(*em) + elif elem.isCurveTo(): + added = False + if path.elementCount() > i+1: + c1, c2 = path.elementAt(i), path.elementAt(i+1) + if (c1.type == path.CurveToDataElement and c2.type == + path.CurveToDataElement): + i += 2 + p.curve_to(em[0], em[1], c1.x, c1.y, c2.x, c2.y) + added = True + if not added: + raise ValueError('Invalid curve to operation') + return p + + +class GraphicsState(object): + + FIELDS = ('fill', 'stroke', 'opacity', 'transform', 'brush_origin', + 'clip', 'do_fill', 'do_stroke') + + def __init__(self): + self.fill = QBrush() + self.stroke = QPen() + self.opacity = 1.0 + self.transform = QTransform() + self.brush_origin = QPointF() + self.clip = QPainterPath() + self.do_fill = False + self.do_stroke = True + + def __eq__(self, other): + for x in self.FIELDS: + if getattr(other, x) != getattr(self, x): + return False + return True + + def copy(self): + ans = GraphicsState() + ans.fill = QBrush(self.fill) + ans.stroke = QPen(self.stroke) + ans.opacity = self.opacity + ans.transform = self.transform * QTransform() + ans.brush_origin = QPointF(self.brush_origin) + ans.clip = self.clip + ans.do_fill, ans.do_stroke = self.do_fill, self.do_stroke + return ans + +class Graphics(object): + + def __init__(self): + self.base_state = GraphicsState() + self.current_state = GraphicsState() + self.pending_state = None + + def update_state(self, state, painter): + flags = state.state() + if self.pending_state is None: + self.pending_state = self.current_state.copy() + + s = self.pending_state + + if flags & QPaintEngine.DirtyTransform: + s.transform = state.transform() + + if flags & QPaintEngine.DirtyBrushOrigin: + s.brush_origin = state.brushOrigin() + + if flags & QPaintEngine.DirtyBrush: + s.fill = state.brush() + + if flags & QPaintEngine.DirtyPen: + s.stroke = state.pen() + + if flags & QPaintEngine.DirtyOpacity: + s.opacity = state.opacity() + + if flags & QPaintEngine.DirtyClipPath or flags & QPaintEngine.DirtyClipRegion: + s.clip = painter.clipPath() + + def reset(self): + self.current_state = GraphicsState() + self.pending_state = None + + def __call__(self, pdf, pdf_system, painter): + # Apply the currently pending state to the PDF + if self.pending_state is None: + return + + pdf_state = self.current_state + ps = self.pending_state + + if (ps.transform != pdf_state.transform or ps.clip != pdf_state.clip): + pdf.restore_stack() + pdf.save_stack() + pdf_state = self.base_state + + if (pdf_state.transform != ps.transform): + pdf.transform(ps.transform) + + if (pdf_state.opacity != ps.opacity or pdf_state.stroke != ps.stroke): + self.apply_stroke(ps, pdf, pdf_system, painter) + + if (pdf_state.opacity != ps.opacity or pdf_state.fill != ps.fill or + pdf_state.brush_origin != ps.brush_origin): + self.apply_fill(ps, pdf, pdf_system, painter) + + if (pdf_state.clip != ps.clip): + p = convert_path(ps.clip) + fill_rule = {Qt.OddEvenFill:'evenodd', + Qt.WindingFill:'winding'}[ps.clip.fillRule()] + pdf.add_clip(p, fill_rule=fill_rule) + + self.current_state = self.pending_state + self.pending_state = None + + def apply_stroke(self, state, pdf, pdf_system, painter): + # TODO: Handle pens with non solid brushes by setting the colorspace + # for stroking to a pattern + # TODO: Support miter limit by using QPainterPathStroker + pen = state.stroke + self.pending_state.do_stroke = True + if pen.style() == Qt.NoPen: + self.pending_state.do_stroke = False + + # Width + w = pen.widthF() + if pen.isCosmetic(): + t = painter.transform() + w /= sqrt(t.m11()**2 + t.m22()**2) + pdf.serialize(w) + pdf.current_page.write(' w ') + + # Line cap + cap = {Qt.FlatCap:0, Qt.RoundCap:1, Qt.SquareCap: + 2}.get(pen.capStyle(), 0) + pdf.current_page.write('%d J '%cap) + + # Line join + join = {Qt.MiterJoin:0, Qt.RoundJoin:1, + Qt.BevelJoin:2}.get(pen.joinStyle(), 0) + pdf.current_page.write('%d j '%join) + + # Dash pattern + ps = {Qt.DashLine:[3], Qt.DotLine:[1,2], Qt.DashDotLine:[3,2,1,2], + Qt.DashDotDotLine:[3, 2, 1, 2, 1, 2]}.get(pen.style(), []) + if ps: + pdf.serialize(Array(ps)) + pdf.current_page.write(' d ') + + # Stroke fill + b = pen.brush() + vals = list(b.color().getRgbF()) + vals[-1] *= state.opacity + color = Color(*vals) + pdf.set_stroke_color(color) + + if vals[-1] < 1e-5 or b.style() == Qt.NoBrush: + self.pending_state.do_stroke = False + + def apply_fill(self, state, pdf, pdf_system, painter): + self.pending_state.do_fill = True + b = state.fill + if b.style() == Qt.NoBrush: + self.pending_state.do_fill = False + vals = list(b.color().getRgbF()) + vals[-1] *= state.opacity + color = Color(*vals) + pdf.set_fill_color(color) + diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 908c4ff919..be78ddda66 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -369,25 +369,8 @@ class PDFStream(object): op = 'W' if fill_rule == 'winding' else 'W*' self.current_page.write_line(op + ' ' + 'n') - def set_dash(self, array, phase=0): - array = Array(array) - serialize(array, self.current_page) - self.current_page.write(b' ') - serialize(phase, self.current_page) - self.current_page.write_line(' d') - - def set_line_width(self, width): - serialize(width, self.current_page) - self.current_page.write_line(' w') - - def set_line_cap(self, style): - serialize({'flat':0, 'round':1, 'square':2}.get(style), - self.current_page) - self.current_page.write_line(' J') - - def set_line_join(self, style): - serialize({'miter':0, 'round':1, 'bevel':2}[style], self.current_page) - self.current_page.write_line(' j') + def serialize(self, o): + serialize(o, self.current_page) def set_stroke_color(self, color): opacity = color.opacity From 5538e0ebdfe6b6c408644341838c3c24e3cf98a1 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 30 Dec 2012 20:50:03 -0500 Subject: [PATCH 21/46] Store: Fix B&N Plugin. --- src/calibre/gui2/store/stores/bn_plugin.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/store/stores/bn_plugin.py b/src/calibre/gui2/store/stores/bn_plugin.py index 65a7eee194..8f2f988974 100644 --- a/src/calibre/gui2/store/stores/bn_plugin.py +++ b/src/calibre/gui2/store/stores/bn_plugin.py @@ -6,6 +6,7 @@ __license__ = 'GPL 3' __copyright__ = '2011, John Schember ' __docformat__ = 'restructuredtext en' +import re import urllib from contextlib import closing @@ -50,12 +51,17 @@ class BNStore(BasicStoreConfig, StorePlugin): if not id: continue - cover_url = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@src')) + cover_url = '' + cover_id = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@id')) + m = re.search(r"%s'.*?srcUrl: '(?P.*?)'.*?}" % cover_id, raw) + if m: + cover_url = m.group('iurl') title = ''.join(data.xpath('descendant::p[@class="title"]//span[@class="name"]//text()')).strip() - if not title: continue + if not title: + continue - author = ', '.join(data.xpath('.//ul[@class="contributors"]//a[@class="subtle"]//text()')).strip() + author = ', '.join(data.xpath('.//ul[contains(@class, "contributors")]//a[contains(@class, "subtle")]//text()')).strip() price = ''.join(data.xpath('.//a[contains(@class, "bn-price")]//text()')) counter -= 1 From 9dbeb3c877f7477116d9cbc1de46edd3c7d0ec42 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 30 Dec 2012 21:12:32 -0500 Subject: [PATCH 22/46] Store: Fix Google Books. --- src/calibre/gui2/store/stores/google_books_plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/store/stores/google_books_plugin.py b/src/calibre/gui2/store/stores/google_books_plugin.py index 63fc3ef942..6ffeab517c 100644 --- a/src/calibre/gui2/store/stores/google_books_plugin.py +++ b/src/calibre/gui2/store/stores/google_books_plugin.py @@ -59,7 +59,7 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin): counter = max_results with closing(br.open(url, timeout=timeout)) as f: doc = html.fromstring(f.read()) - for data in doc.xpath('//ol[@id="rso"]/li'): + for data in doc.xpath('//ol/li'): if counter <= 0: break @@ -68,7 +68,7 @@ class GoogleBooksStore(BasicStoreConfig, StorePlugin): continue title = ''.join(data.xpath('.//h3/a//text()')) - authors = data.xpath('.//div[@class="f"]//a//text()') + authors = data.xpath('.//span[contains(@class, "f")]//a//text()') while authors and authors[-1].strip().lower() in ('preview', 'read', 'more editions'): authors = authors[:-1] if not authors: From 0b9720f2d2cb3a459e4a33dabf3703e56a21d6fd Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 30 Dec 2012 21:16:16 -0500 Subject: [PATCH 23/46] Store: Fix Smashwords plugin. --- src/calibre/gui2/store/stores/smashwords_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/store/stores/smashwords_plugin.py b/src/calibre/gui2/store/stores/smashwords_plugin.py index 2c69417612..983067ab51 100644 --- a/src/calibre/gui2/store/stores/smashwords_plugin.py +++ b/src/calibre/gui2/store/stores/smashwords_plugin.py @@ -76,7 +76,7 @@ class SmashwordsStore(BasicStoreConfig, StorePlugin): title = ''.join(data.xpath('//a[@class="bookTitle"]/text()')) subnote = ''.join(data.xpath('//span[@class="subnote"]/text()')) - author = ''.join(data.xpath('//span[@class="subnote"]/a/text()')) + author = ''.join(data.xpath('//span[@class="subnote"]//a[1]//text()')) if '$' in subnote: price = subnote.partition('$')[2] price = price.split(u'\xa0')[0] From 1772edc3fa0616e98a893edab3b49335e533a76c Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 30 Dec 2012 21:46:37 -0500 Subject: [PATCH 24/46] Store: Add Nook UK store. --- src/calibre/customize/builtins.py | 13 +++- .../gui2/store/stores/nook_uk_plugin.py | 75 +++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 src/calibre/gui2/store/stores/nook_uk_plugin.py diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 8229f32b57..a6dde30a94 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -1529,6 +1529,15 @@ class StoreNextoStore(StoreBase): formats = ['EPUB', 'MOBI', 'PDF'] affiliate = True +class StoreNookUKStore(StoreBase): + name = 'Nook UK' + author = 'John Schember' + description = u'Barnes & Noble S.à r.l, a subsidiary of Barnes & Noble, Inc., a leading retailer of content, digital media and educational products, is proud to bring the award-winning NOOK® reading experience and a leading digital bookstore to the UK.' + actual_plugin = 'calibre.gui2.store.stores.nook_uk_plugin:NookUKStore' + + headquarters = 'UK' + formats = ['NOOK'] + class StoreOpenBooksStore(StoreBase): name = 'Open Books' description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.' @@ -1660,7 +1669,7 @@ plugins += [ StoreAmazonITKindleStore, StoreAmazonUKKindleStore, StoreBaenWebScriptionStore, - StoreBNStore, StoreSonyStore, + StoreBNStore, StoreBeWriteStore, StoreBiblioStore, StoreBookotekaStore, @@ -1686,12 +1695,14 @@ plugins += [ StoreMillsBoonUKStore, StoreMobileReadStore, StoreNextoStore, + StoreNookUKStore, StoreOpenBooksStore, StoreOzonRUStore, StorePragmaticBookshelfStore, StorePublioStore, StoreRW2010Store, StoreSmashwordsStore, + StoreSonyStore, StoreVirtualoStore, StoreWaterstonesUKStore, StoreWeightlessBooksStore, diff --git a/src/calibre/gui2/store/stores/nook_uk_plugin.py b/src/calibre/gui2/store/stores/nook_uk_plugin.py new file mode 100644 index 0000000000..1ff8b688bb --- /dev/null +++ b/src/calibre/gui2/store/stores/nook_uk_plugin.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, print_function) + +__license__ = 'GPL 3' +__copyright__ = '2012, John Schember ' +__docformat__ = 'restructuredtext en' + +import re +import urllib +from contextlib import closing + +from lxml import html + +from PyQt4.Qt import QUrl + +from calibre import browser, url_slash_cleaner +from calibre.gui2 import open_url +from calibre.gui2.store import StorePlugin +from calibre.gui2.store.basic_config import BasicStoreConfig +from calibre.gui2.store.search_result import SearchResult +from calibre.gui2.store.web_store_dialog import WebStoreDialog + +class NookUKStore(BasicStoreConfig, StorePlugin): + + def open(self, parent=None, detail_item=None, external=False): + url = "http://uk.nook.com" + + if external or self.config.get('open_external', False): + open_url(QUrl(url_slash_cleaner(detail_item if detail_item else url))) + else: + d = WebStoreDialog(self.gui, url, parent, detail_item) + d.setWindowTitle(self.name) + d.set_tags(self.config.get('tags', '')) + d.exec_() + + def search(self, query, max_results=10, timeout=60): + url = u'http://uk.nook.com/s/%s?s%%5Bdref%%5D=1&s%%5Bkeyword%%5D=%s' % (query.replace(' ', '-'), urllib.quote(query)) + + br = browser() + + counter = max_results + with closing(br.open(url, timeout=timeout)) as f: + raw = f.read() + doc = html.fromstring(raw) + for data in doc.xpath('//ul[contains(@class, "product_list")]/li'): + if counter <= 0: + break + + id = ''.join(data.xpath('.//span[contains(@class, "image")]/a/@href')) + if not id: + continue + + cover_url = ''.join(data.xpath('.//span[contains(@class, "image")]//img/@data-src')) + + title = ''.join(data.xpath('.//div[contains(@class, "title")]//text()')).strip() + if not title: + continue + + author = ', '.join(data.xpath('.//div[contains(@class, "contributor")]//a/text()')).strip() + price = ''.join(data.xpath('.//div[contains(@class, "action")]//a//text()')).strip() + price = re.sub(r'[^\d.,£]', '', price); + + counter -= 1 + + s = SearchResult() + s.cover_url = cover_url + s.title = title.strip() + s.author = author.strip() + s.price = price.strip() + s.detail_item = 'http://uk.nook.com/' + id.strip() + s.drm = SearchResult.DRM_UNKNOWN + s.formats = 'Nook' + + yield s From 607d6fd8424a8b0174ed45140a57d05528d970ee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 09:30:47 +0530 Subject: [PATCH 25/46] ... --- src/qt-harfbuzz/src/Makefile.am | 69 --------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 src/qt-harfbuzz/src/Makefile.am diff --git a/src/qt-harfbuzz/src/Makefile.am b/src/qt-harfbuzz/src/Makefile.am deleted file mode 100644 index 51d06525da..0000000000 --- a/src/qt-harfbuzz/src/Makefile.am +++ /dev/null @@ -1,69 +0,0 @@ -## Process this file with automake to produce Makefile.in - -noinst_LTLIBRARIES = libharfbuzz-1.la - -MAINSOURCES = \ - harfbuzz-buffer.c \ - harfbuzz-stream.c \ - harfbuzz-dump.c \ - harfbuzz-gdef.c \ - harfbuzz-gpos.c \ - harfbuzz-gsub.c \ - harfbuzz-impl.c \ - harfbuzz-open.c \ - harfbuzz-shaper.cpp \ - harfbuzz-greek.c \ - harfbuzz-tibetan.c \ - harfbuzz-khmer.c \ - harfbuzz-indic.cpp \ - harfbuzz-hebrew.c \ - harfbuzz-arabic.c \ - harfbuzz-hangul.c \ - harfbuzz-myanmar.c \ - harfbuzz-thai.c - -EXTRA_SOURCES = harfbuzz.c - -PUBLICHEADERS = \ - harfbuzz.h \ - harfbuzz-buffer.h \ - harfbuzz-dump.h \ - harfbuzz-gdef.h \ - harfbuzz-gpos.h \ - harfbuzz-gsub.h \ - harfbuzz-open.h \ - harfbuzz-global.h \ - harfbuzz-external.h \ - harfbuzz-shaper.h \ - harfbuzz-stream.h - -PRIVATEHEADERS = \ - harfbuzz-impl.h \ - harfbuzz-buffer-private.h \ - harfbuzz-stream-private.h \ - harfbuzz-gdef-private.h \ - harfbuzz-gpos-private.h \ - harfbuzz-gsub-private.h \ - harfbuzz-open-private.h \ - harfbuzz-shaper-private.h - -libharfbuzz_1_la_SOURCES = \ - $(MAINSOURCES) \ - $(PUBLICHEADERS) \ - $(PRIVATEHEADERS) - -#noinst_PROGRAMS = harfbuzz-dump -# -#harfbuzz_dump_SOURCES = \ -# harfbuzz-dump-main.c -# -#harfbuzz_dump_LDADD = \ -# libharfbuzz-1.la - -EXTRA_DIST = \ - README \ - COPYING.FTL \ - COPYING.GPL \ - COPYING \ - $(EXTRA_SOURCES) - From e4d12dc5207d4d9b45b2661019a95f9b85c94e66 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 09:31:46 +0530 Subject: [PATCH 26/46] ... --- README | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README b/README index 2ffab4e2f6..a1e3081988 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ -calibre is an e-book library manager. It can view, convert and catalog e-books \ -in most of the major e-book formats. It can also talk to e-book reader \ -devices. It can go out to the internet and fetch metadata for your books. \ -It can download newspapers and convert them into e-books for convenient \ +calibre is an e-book library manager. It can view, convert and catalog e-books +in most of the major e-book formats. It can also talk to e-book reader +devices. It can go out to the internet and fetch metadata for your books. +It can download newspapers and convert them into e-books for convenient reading. It is cross platform, running on Linux, Windows and OS X. For screenshots: https://calibre-ebook.com/demo @@ -15,5 +15,5 @@ bzr branch lp:calibre To update your copy of the source code: bzr merge -Tarballs of the source code for each release are now available \ +Tarballs of the source code for each release are now available at http://code.google.com/p/calibre-ebook From fe37caf9b59a7626fb9278dfe08b3d416e25b443 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 09:54:32 +0530 Subject: [PATCH 27/46] ... --- src/calibre/gui2/preferences/server.ui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/preferences/server.ui b/src/calibre/gui2/preferences/server.ui index a229da594f..163221594f 100644 --- a/src/calibre/gui2/preferences/server.ui +++ b/src/calibre/gui2/preferences/server.ui @@ -273,7 +273,7 @@ <p>Remember to leave calibre running as the server only runs as long as calibre is running. -<p>To connect to the calibre server from your device you should use a URL of the form <b>http://myhostname:8080</b> as a new catalog in the Stanza reader on your iPhone. Here myhostname should be either the fully qualified hostname or the IP address of the computer calibre is running on. +<p>To connect to the calibre server from your device you should use a URL of the form <b>http://myhostname:8080</b>. Here myhostname should be either the fully qualified hostname or the IP address of the computer calibre is running on. If you want to access the server from anywhere in the world, you will have to setup port forwarding for it on your router. true From ed89e9b4657ea3f802eeb4d0963b0741a71a83ef Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 13:33:38 +0530 Subject: [PATCH 28/46] Add support for non-solid fills, tiled pixmaps, refactor testing --- src/calibre/ebooks/pdf/render/engine.py | 165 +++---------- src/calibre/ebooks/pdf/render/graphics.py | 254 ++++++++++++++++++++- src/calibre/ebooks/pdf/render/serialize.py | 110 ++++++++- src/calibre/ebooks/pdf/render/test.py | 99 ++++++++ 4 files changed, 476 insertions(+), 152 deletions(-) create mode 100644 src/calibre/ebooks/pdf/render/test.py diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index aa1fa17cc3..7987bd9c6e 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -13,9 +13,7 @@ from functools import wraps, partial from future_builtins import map import sip -from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QApplication, QPainter, - QTransform, QImage, QByteArray, QBuffer, - qRgba) +from PyQt4.Qt import (QPaintEngine, QPaintDevice, Qt, QTransform, QBrush) from calibre.constants import plugins from calibre.ebooks.pdf.render.serialize import (PDFStream, Path) @@ -51,11 +49,19 @@ class Font(FontMetrics): class PdfEngine(QPaintEngine): + FEATURES = QPaintEngine.AllFeatures & ~( + QPaintEngine.PorterDuff | QPaintEngine.PerspectiveTransform + | QPaintEngine.ObjectBoundingModeGradients + | QPaintEngine.LinearGradientFill + | QPaintEngine.RadialGradientFill + | QPaintEngine.ConicalGradientFill + ) + def __init__(self, file_object, page_width, page_height, left_margin, top_margin, right_margin, bottom_margin, width, height, errors=print, debug=print, compress=True, mark_links=False): - QPaintEngine.__init__(self, self.features) + QPaintEngine.__init__(self, self.FEATURES) self.file_object = file_object self.compress, self.mark_links = compress, mark_links self.page_height, self.page_width = page_height, page_width @@ -80,9 +86,6 @@ class PdfEngine(QPaintEngine): self.errors_occurred = False self.errors, self.debug = errors, debug self.fonts = {} - i = QImage(1, 1, QImage.Format_ARGB32) - i.fill(qRgba(0, 0, 0, 255)) - self.alpha_bit = i.constBits().asstring(4).find(b'\xff') self.current_page_num = 1 self.current_page_inited = False self.qt_hack, err = plugins['qt_hack'] @@ -107,13 +110,6 @@ class PdfEngine(QPaintEngine): self.pdf.save_stack() self.current_page_inited = True - @property - def features(self): - # gradient_flags = self.MaskedBrush | self.PatternBrush | self.PatternTransform - return (self.Antialiasing | self.AlphaBlend | self.ConstantOpacity | - self.PainterPaths | self.PaintOutsidePaintEvent | - self.PrimitiveTransform | self.PixmapTransform) #| gradient_flags - def begin(self, device): if not hasattr(self, 'pdf'): try: @@ -149,7 +145,23 @@ class PdfEngine(QPaintEngine): def type(self): return QPaintEngine.Pdf - # TODO: Tiled pixmap + def add_image(self, img, cache_key): + if img.isNull(): return + return self.pdf.add_image(img, cache_key) + + @store_error + def drawTiledPixmap(self, rect, pixmap, point): + self.apply_graphics_state() + brush = QBrush(pixmap) + color, opacity, pattern, do_fill = self.graphics.convert_brush( + brush, -point, 1.0, self.pdf, self.pdf_system, + self.painter().transform()) + self.pdf.save_stack() + self.pdf.apply_fill(color, pattern) + bl = rect.topLeft() + self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(), + stroke=False, fill=True) + self.pdf.restore_stack() @store_error def drawPixmap(self, rect, pixmap, source_rect): @@ -160,8 +172,8 @@ class PdfEngine(QPaintEngine): image = pixmap.toImage() ref = self.add_image(image, pixmap.cacheKey()) if ref is not None: - self.pdf.draw_image(rect.x(), rect.height()+rect.y(), rect.width(), - -rect.height(), ref) + self.pdf.draw_image(rect.x(), rect.y(), rect.width(), + rect.height(), ref) @store_error def drawImage(self, rect, image, source_rect, flags=Qt.AutoColor): @@ -171,72 +183,8 @@ class PdfEngine(QPaintEngine): image.copy(source_rect)) ref = self.add_image(image, image.cacheKey()) if ref is not None: - self.pdf.draw_image(rect.x(), rect.height()+rect.y(), rect.width(), - -rect.height(), ref) - - def add_image(self, img, cache_key): - if img.isNull(): return - ref = self.pdf.get_image(cache_key) - if ref is not None: - return ref - - fmt = img.format() - image = QImage(img) - if (image.depth() == 1 and img.colorTable().size() == 2 and - img.colorTable().at(0) == QColor(Qt.black).rgba() and - img.colorTable().at(1) == QColor(Qt.white).rgba()): - if fmt == QImage.Format_MonoLSB: - image = image.convertToFormat(QImage.Format_Mono) - fmt = QImage.Format_Mono - else: - if (fmt != QImage.Format_RGB32 and fmt != QImage.Format_ARGB32): - image = image.convertToFormat(QImage.Format_ARGB32) - fmt = QImage.Format_ARGB32 - - w = image.width() - h = image.height() - d = image.depth() - - if fmt == QImage.Format_Mono: - bytes_per_line = (w + 7) >> 3 - data = image.constBits().asstring(bytes_per_line * h) - return self.pdf.write_image(data, w, h, d, cache_key=cache_key) - - ba = QByteArray() - buf = QBuffer(ba) - image.save(buf, 'jpeg', 94) - data = bytes(ba.data()) - has_alpha = has_mask = False - soft_mask = mask = None - - if fmt == QImage.Format_ARGB32: - tmask = image.constBits().asstring(4*w*h)[self.alpha_bit::4] - sdata = bytearray(tmask) - vals = set(sdata) - vals.discard(255) - has_mask = bool(vals) - vals.discard(0) - has_alpha = bool(vals) - - if has_alpha: - soft_mask = self.pdf.write_image(tmask, w, h, 8) - elif has_mask: - # dither the soft mask to 1bit and add it. This also helps PDF - # viewers without transparency support - bytes_per_line = (w + 7) >> 3 - mdata = bytearray(0 for i in xrange(bytes_per_line * h)) - spos = mpos = 0 - for y in xrange(h): - for x in xrange(w): - if sdata[spos]: - mdata[mpos + x>>3] |= (0x80 >> (x&7)) - spos += 1 - mpos += bytes_per_line - mdata = bytes(mdata) - mask = self.pdf.write_image(mdata, w, h, 1) - - return self.pdf.write_image(data, w, h, 32, mask=mask, dct=True, - soft_mask=soft_mask, cache_key=cache_key) + self.pdf.draw_image(rect.x(), rect.y(), rect.width(), + rect.height(), ref) @store_error def updateState(self, state): @@ -411,55 +359,4 @@ class PdfDevice(QPaintDevice): # {{{ # }}} -if __name__ == '__main__': - from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap, QPainterPath) - QBrush, QColor, QPoint, QPixmap, QPainterPath - app = QApplication([]) - p = QPainter() - with open('/t/painter.pdf', 'wb') as f: - dev = PdfDevice(f, compress=False) - p.begin(dev) - dev.init_page() - xmax, ymax = p.viewport().width(), p.viewport().height() - b = p.brush() - try: - p.drawRect(0, 0, xmax, ymax) - # p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax), - # QPoint(0, ymax), QPoint(0, 0)) - # pp = QPainterPath() - # pp.addRect(0, 0, xmax, ymax) - # p.drawPath(pp) - p.save() - for i in xrange(3): - col = [0, 0, 0, 200] - col[i] = 255 - p.setOpacity(0.3) - p.fillRect(0, 0, xmax/10, xmax/10, QBrush(QColor(*col))) - p.setOpacity(1) - p.drawRect(0, 0, xmax/10, xmax/10) - p.translate(xmax/10, xmax/10) - p.scale(1, 1.5) - p.restore() - - # p.scale(2, 2) - # p.rotate(45) - p.drawPixmap(0, 0, 2048, 2048, QPixmap(I('library.png'))) - p.drawRect(0, 0, 2048, 2048) - - f = p.font() - f.setPointSize(20) - # f.setLetterSpacing(f.PercentageSpacing, 200) - # f.setUnderline(True) - # f.setOverline(True) - # f.setStrikeOut(True) - f.setFamily('Calibri') - p.setFont(f) - # p.setPen(QColor(0, 0, 255)) - # p.scale(2, 2) - # p.rotate(45) - p.drawText(QPoint(300, 300), 'Some—text not By’s ū --- Д AV ff ff') - finally: - p.end() - if dev.engine.errors_occurred: - raise SystemExit(1) diff --git a/src/calibre/ebooks/pdf/render/graphics.py b/src/calibre/ebooks/pdf/render/graphics.py index 68efb2514a..384809598a 100644 --- a/src/calibre/ebooks/pdf/render/graphics.py +++ b/src/calibre/ebooks/pdf/render/graphics.py @@ -9,13 +9,14 @@ __docformat__ = 'restructuredtext en' from math import sqrt -from PyQt4.Qt import (QBrush, QPen, Qt, QPointF, QTransform, QPainterPath, - QPaintEngine) +from PyQt4.Qt import ( + QBrush, QPen, Qt, QPointF, QTransform, QPainterPath, QPaintEngine, QImage) -from calibre.ebooks.pdf.render.common import Array +from calibre.ebooks.pdf.render.common import ( + Name, Array, fmtnum, Stream, Dictionary) from calibre.ebooks.pdf.render.serialize import Path, Color -def convert_path(path): +def convert_path(path): # {{{ p = Path() i = 0 while i < path.elementCount(): @@ -38,7 +39,201 @@ def convert_path(path): if not added: raise ValueError('Invalid curve to operation') return p +# }}} +class TilingPattern(Stream): + + def __init__(self, cache_key, matrix, w=8, h=8, paint_type=2, compress=False): + Stream.__init__(self, compress=compress) + self.paint_type = paint_type + self.w, self.h = w, h + self.matrix = (matrix.m11(), matrix.m12(), matrix.m21(), matrix.m22(), + matrix.dx(), matrix.dy()) + self.resources = Dictionary() + self.cache_key = (self.__class__.__name__, cache_key, self.matrix) + + def add_extra_keys(self, d): + d['Type'] = Name('Pattern') + d['PatternType'] = 1 + d['PaintType'] = self.paint_type + d['TilingType'] = 1 + d['BBox'] = Array([0, 0, self.w, self.h]) + d['XStep'] = self.w + d['YStep'] = self.h + d['Matrix'] = Array(self.matrix) + d['Resources'] = self.resources + +class QtPattern(TilingPattern): + + qt_patterns = ( # {{{ + "0 J\n" + "6 w\n" + "[] 0 d\n" + "4 0 m\n" + "4 8 l\n" + "0 4 m\n" + "8 4 l\n" + "S\n", # Dense1Pattern + + "0 J\n" + "2 w\n" + "[6 2] 1 d\n" + "0 0 m\n" + "0 8 l\n" + "8 0 m\n" + "8 8 l\n" + "S\n" + "[] 0 d\n" + "2 0 m\n" + "2 8 l\n" + "6 0 m\n" + "6 8 l\n" + "S\n" + "[6 2] -3 d\n" + "4 0 m\n" + "4 8 l\n" + "S\n", # Dense2Pattern + + "0 J\n" + "2 w\n" + "[6 2] 1 d\n" + "0 0 m\n" + "0 8 l\n" + "8 0 m\n" + "8 8 l\n" + "S\n" + "[2 2] -1 d\n" + "2 0 m\n" + "2 8 l\n" + "6 0 m\n" + "6 8 l\n" + "S\n" + "[6 2] -3 d\n" + "4 0 m\n" + "4 8 l\n" + "S\n", # Dense3Pattern + + "0 J\n" + "2 w\n" + "[2 2] 1 d\n" + "0 0 m\n" + "0 8 l\n" + "8 0 m\n" + "8 8 l\n" + "S\n" + "[2 2] -1 d\n" + "2 0 m\n" + "2 8 l\n" + "6 0 m\n" + "6 8 l\n" + "S\n" + "[2 2] 1 d\n" + "4 0 m\n" + "4 8 l\n" + "S\n", # Dense4Pattern + + "0 J\n" + "2 w\n" + "[2 6] -1 d\n" + "0 0 m\n" + "0 8 l\n" + "8 0 m\n" + "8 8 l\n" + "S\n" + "[2 2] 1 d\n" + "2 0 m\n" + "2 8 l\n" + "6 0 m\n" + "6 8 l\n" + "S\n" + "[2 6] 3 d\n" + "4 0 m\n" + "4 8 l\n" + "S\n", # Dense5Pattern + + "0 J\n" + "2 w\n" + "[2 6] -1 d\n" + "0 0 m\n" + "0 8 l\n" + "8 0 m\n" + "8 8 l\n" + "S\n" + "[2 6] 3 d\n" + "4 0 m\n" + "4 8 l\n" + "S\n", # Dense6Pattern + + "0 J\n" + "2 w\n" + "[2 6] -1 d\n" + "0 0 m\n" + "0 8 l\n" + "8 0 m\n" + "8 8 l\n" + "S\n", # Dense7Pattern + + "1 w\n" + "0 4 m\n" + "8 4 l\n" + "S\n", # HorPattern + + "1 w\n" + "4 0 m\n" + "4 8 l\n" + "S\n", # VerPattern + + "1 w\n" + "4 0 m\n" + "4 8 l\n" + "0 4 m\n" + "8 4 l\n" + "S\n", # CrossPattern + + "1 w\n" + "-1 5 m\n" + "5 -1 l\n" + "3 9 m\n" + "9 3 l\n" + "S\n", # BDiagPattern + + "1 w\n" + "-1 3 m\n" + "5 9 l\n" + "3 -1 m\n" + "9 5 l\n" + "S\n", # FDiagPattern + + "1 w\n" + "-1 3 m\n" + "5 9 l\n" + "3 -1 m\n" + "9 5 l\n" + "-1 5 m\n" + "5 -1 l\n" + "3 9 m\n" + "9 3 l\n" + "S\n", # DiagCrossPattern + ) # }}} + + def __init__(self, pattern_num, matrix): + super(QtPattern, self).__init__(pattern_num, matrix) + self.write(self.qt_patterns[pattern_num-2]) + +class TexturePattern(TilingPattern): + + def __init__(self, pixmap, matrix, pdf): + image = pixmap.toImage() + cache_key = pixmap.cacheKey() + imgref = pdf.add_image(image, cache_key) + paint_type = (2 if image.format() in {QImage.Format_MonoLSB, + QImage.Format_Mono} else 1) + super(TexturePattern, self).__init__( + cache_key, matrix, w=image.width(), h=image.height(), + paint_type=paint_type) + m = (self.w, 0, 0, -self.h, 0, self.h) + self.resources['XObject'] = Dictionary({'Texture':imgref}) + self.write_line('%s cm /Texture Do'%(' '.join(map(fmtnum, m)))) class GraphicsState(object): @@ -54,6 +249,7 @@ class GraphicsState(object): self.clip = QPainterPath() self.do_fill = False self.do_stroke = True + self.qt_pattern_cache = {} def __eq__(self, other): for x in self.FIELDS: @@ -140,6 +336,43 @@ class Graphics(object): self.current_state = self.pending_state self.pending_state = None + def convert_brush(self, brush, brush_origin, global_opacity, pdf, + pdf_system, qt_system): + # Convert a QBrush to PDF operators + style = brush.style() + + pattern = color = None + opacity = 1.0 + do_fill = True + + matrix = (QTransform.fromTranslate(brush_origin.x(), brush_origin.y()) + * pdf_system * qt_system.inverted()[0]) + vals = list(brush.color().getRgbF()) + + if style <= Qt.DiagCrossPattern: + opacity = global_opacity * vals[-1] + color = vals[:3] + + if style > Qt.SolidPattern: + pattern = pdf.add_pattern(QtPattern(style, matrix)) + + if opacity < 1e-4 or style == Qt.NoBrush: + do_fill = False + + elif style == Qt.TexturePattern: + pat = TexturePattern(brush.texture(), matrix, pdf) + opacity = global_opacity + if pat.paint_type == 2: + opacity *= vals[-1] + color = vals[:3] + pattern = pdf.add_pattern(pat) + + if opacity < 1e-4 or style == Qt.NoBrush: + do_fill = False + + # TODO: Add support for gradient fills + return color, opacity, pattern, do_fill + def apply_stroke(self, state, pdf, pdf_system, painter): # TODO: Handle pens with non solid brushes by setting the colorspace # for stroking to a pattern @@ -172,7 +405,7 @@ class Graphics(object): Qt.DashDotDotLine:[3, 2, 1, 2, 1, 2]}.get(pen.style(), []) if ps: pdf.serialize(Array(ps)) - pdf.current_page.write(' d ') + pdf.current_page.write(' 0 d ') # Stroke fill b = pen.brush() @@ -186,11 +419,8 @@ class Graphics(object): def apply_fill(self, state, pdf, pdf_system, painter): self.pending_state.do_fill = True - b = state.fill - if b.style() == Qt.NoBrush: - self.pending_state.do_fill = False - vals = list(b.color().getRgbF()) - vals[-1] *= state.opacity - color = Color(*vals) - pdf.set_fill_color(color) + color, opacity, pattern, self.pending_state.do_fill = self.convert_brush( + state.fill, state.brush_origin, state.opacity, pdf, pdf_system, + painter.transform()) + pdf.apply_fill(color, pattern, opacity) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index be78ddda66..54a5f674b4 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -12,6 +12,8 @@ from future_builtins import map from itertools import izip from collections import namedtuple +from PyQt4.Qt import QBuffer, QByteArray, QImage, Qt, QColor, qRgba + from calibre.constants import (__appname__, __version__) from calibre.ebooks.pdf.render.common import ( Reference, EOL, serialize, Stream, Dictionary, String, Name, Array, @@ -90,6 +92,7 @@ class Page(Stream): self.opacities = {} self.fonts = {} self.xobjects = {} + self.patterns = {} def set_opacity(self, opref): if opref not in self.opacities: @@ -108,6 +111,11 @@ class Page(Stream): self.xobjects[imgref] = 'Image%d'%len(self.xobjects) return self.xobjects[imgref] + def add_pattern(self, patternref): + if patternref not in self.patterns: + self.patterns[patternref] = 'Pat%d'%len(self.patterns) + return self.patterns[patternref] + def add_resources(self): r = Dictionary() if self.opacities: @@ -125,6 +133,13 @@ class Page(Stream): for ref, name in self.xobjects.iteritems(): xobjects[name] = ref r['XObject'] = xobjects + if self.patterns: + r['ColorSpace'] = Dictionary({'PCSp':Array( + [Name('Pattern'), Name('DeviceRGB')])}) + patterns = Dictionary() + for ref, name in self.patterns.iteritems(): + patterns[name] = ref + r['Pattern'] = patterns if r: self.page_dict['Resources'] = r @@ -299,8 +314,12 @@ class PDFStream(object): self.stroke_opacities, self.fill_opacities = {}, {} self.font_manager = FontManager(self.objects, self.compress) self.image_cache = {} + self.pattern_cache = {} self.debug = debug self.links = Links(self, mark_links, page_size) + i = QImage(1, 1, QImage.Format_ARGB32) + i.fill(qRgba(0, 0, 0, 255)) + self.alpha_bit = i.constBits().asstring(4).find(b'\xff') @property def page_tree(self): @@ -380,13 +399,12 @@ class PDFStream(object): self.current_page.set_opacity(self.stroke_opacities[opacity]) self.current_page.write_line(' '.join(map(fmtnum, color[:3])) + ' SC') - def set_fill_color(self, color): - opacity = color.opacity + def set_fill_opacity(self, opacity): + opacity = float(opacity) if opacity not in self.fill_opacities: op = Dictionary({'Type':Name('ExtGState'), 'ca': opacity}) self.fill_opacities[opacity] = self.objects.add(op) self.current_page.set_opacity(self.fill_opacities[opacity]) - self.current_page.write_line(' '.join(map(fmtnum, color[:3])) + ' sc') def end_page(self): pageref = self.current_page.end(self.objects, self.stream) @@ -425,13 +443,93 @@ class PDFStream(object): self.objects.commit(r, self.stream) return r - def draw_image(self, x, y, xscale, yscale, imgref): + def add_image(self, img, cache_key): + ref = self.get_image(cache_key) + if ref is not None: + return ref + + fmt = img.format() + image = QImage(img) + if (image.depth() == 1 and img.colorTable().size() == 2 and + img.colorTable().at(0) == QColor(Qt.black).rgba() and + img.colorTable().at(1) == QColor(Qt.white).rgba()): + if fmt == QImage.Format_MonoLSB: + image = image.convertToFormat(QImage.Format_Mono) + fmt = QImage.Format_Mono + else: + if (fmt != QImage.Format_RGB32 and fmt != QImage.Format_ARGB32): + image = image.convertToFormat(QImage.Format_ARGB32) + fmt = QImage.Format_ARGB32 + + w = image.width() + h = image.height() + d = image.depth() + + if fmt == QImage.Format_Mono: + bytes_per_line = (w + 7) >> 3 + data = image.constBits().asstring(bytes_per_line * h) + return self.write_image(data, w, h, d, cache_key=cache_key) + + ba = QByteArray() + buf = QBuffer(ba) + image.save(buf, 'jpeg', 94) + data = bytes(ba.data()) + has_alpha = has_mask = False + soft_mask = mask = None + + if fmt == QImage.Format_ARGB32: + tmask = image.constBits().asstring(4*w*h)[self.alpha_bit::4] + sdata = bytearray(tmask) + vals = set(sdata) + vals.discard(255) + has_mask = bool(vals) + vals.discard(0) + has_alpha = bool(vals) + + if has_alpha: + soft_mask = self.write_image(tmask, w, h, 8) + elif has_mask: + # dither the soft mask to 1bit and add it. This also helps PDF + # viewers without transparency support + bytes_per_line = (w + 7) >> 3 + mdata = bytearray(0 for i in xrange(bytes_per_line * h)) + spos = mpos = 0 + for y in xrange(h): + for x in xrange(w): + if sdata[spos]: + mdata[mpos + x>>3] |= (0x80 >> (x&7)) + spos += 1 + mpos += bytes_per_line + mdata = bytes(mdata) + mask = self.write_image(mdata, w, h, 1) + + return self.write_image(data, w, h, 32, mask=mask, dct=True, + soft_mask=soft_mask, cache_key=cache_key) + + def add_pattern(self, pattern): + if pattern.cache_key not in self.pattern_cache: + self.pattern_cache[pattern.cache_key] = self.objects.add(pattern) + return self.current_page.add_pattern(self.pattern_cache[pattern.cache_key]) + + def draw_image(self, x, y, width, height, imgref): name = self.current_page.add_image(imgref) - self.current_page.write('q %s 0 0 %s %s %s cm '%(fmtnum(xscale), - fmtnum(yscale), fmtnum(x), fmtnum(y))) + self.current_page.write('q %s 0 0 %s %s %s cm '%(fmtnum(width), + fmtnum(-height), fmtnum(x), fmtnum(y+height))) serialize(Name(name), self.current_page) self.current_page.write_line(' Do Q') + def apply_fill(self, color=None, pattern=None, opacity=None): + if opacity is not None: + self.set_fill_opacity(opacity) + wl = self.current_page.write_line + if color is not None and pattern is None: + wl(' '.join(map(fmtnum, color)) + ' rg') + elif color is None and pattern is not None: + wl('/Pattern cs /%s scn'%pattern) + elif color is not None and pattern is not None: + col = ' '.join(map(fmtnum, color)) + wl('/PCSp cs %s /%s scn'%(col, pattern)) + def end(self): if self.current_page.getvalue(): self.end_page() diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py new file mode 100644 index 0000000000..3ea447dfc7 --- /dev/null +++ b/src/calibre/ebooks/pdf/render/test.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os +from tempfile import gettempdir + +from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, + QApplication, QPainter, Qt, QImage) +QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, Qt + +from calibre.ebooks.pdf.render.engine import PdfDevice + +def full(dev): + p = QPainter(dev) + if isinstance(dev, PdfDevice): + dev.init_page() + + xmax, ymax = p.viewport().width(), p.viewport().height() + b = p.brush() + try: + p.drawRect(0, 0, xmax, ymax) + p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax), + QPoint(0, ymax), QPoint(0, 0)) + pp = QPainterPath() + pp.addRect(0, 0, xmax, ymax) + p.drawPath(pp) + p.save() + for i in xrange(3): + col = [0, 0, 0, 200] + col[i] = 255 + p.setOpacity(0.3) + p.fillRect(0, 0, xmax/10, xmax/10, QBrush(QColor(*col))) + p.setOpacity(1) + p.drawRect(0, 0, xmax/10, xmax/10) + p.translate(xmax/10, xmax/10) + p.scale(1, 1.5) + p.restore() + + # p.scale(2, 2) + # p.rotate(45) + p.drawPixmap(0, 0, xmax/4, xmax/4, QPixmap(I('library.png'))) + p.drawRect(0, 0, xmax/4, xmax/4) + + f = p.font() + f.setPointSize(20) + # f.setLetterSpacing(f.PercentageSpacing, 200) + f.setUnderline(True) + # f.setOverline(True) + # f.setStrikeOut(True) + f.setFamily('Calibri') + p.setFont(f) + # p.setPen(QColor(0, 0, 255)) + # p.scale(2, 2) + # p.rotate(45) + p.drawText(QPoint(xmax/3.9, 30), 'Some—text not By’s ū --- Д AV ff ff') + + b = QBrush(Qt.HorPattern) + b.setColor(QColor(Qt.blue)) + pix = QPixmap(I('console.png')) + w = xmax/4 + p.fillRect(0, ymax/3, w, w, b) + p.fillRect(xmax/3, ymax/3, w, w, QBrush(pix)) + p.drawTiledPixmap(QRectF(2*xmax/3, ymax/3, w, w), pix) + finally: + p.end() + if isinstance(dev, PdfDevice): + if dev.engine.errors_occurred: + raise SystemExit(1) + +def main(): + app = QApplication([]) + app + tdir = gettempdir() + pdf = os.path.join(tdir, 'painter.pdf') + func = full + with open(pdf, 'wb') as f: + dev = PdfDevice(f, xdpi=100, ydpi=100, compress=False) + img = QImage(dev.width(), dev.height(), + QImage.Format_ARGB32_Premultiplied) + img.setDotsPerMeterX(100*39.37) + img.setDotsPerMeterY(100*39.37) + img.fill(Qt.white) + func(dev) + func(img) + path = os.path.join(tdir, 'painter.png') + img.save(path) + print ('PDF written to:', pdf) + print ('Image written to:', path) + +if __name__ == '__main__': + main() + + From 88a13dc8ee2e03b074beffa18b94d844b1ceedd2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 14:44:19 +0530 Subject: [PATCH 29/46] Fix drawTiledPixmap() to use the correct origin --- src/calibre/ebooks/pdf/render/engine.py | 4 ++-- src/calibre/ebooks/pdf/render/test.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 7987bd9c6e..6afbef223f 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -153,12 +153,12 @@ class PdfEngine(QPaintEngine): def drawTiledPixmap(self, rect, pixmap, point): self.apply_graphics_state() brush = QBrush(pixmap) + bl = rect.topLeft() color, opacity, pattern, do_fill = self.graphics.convert_brush( - brush, -point, 1.0, self.pdf, self.pdf_system, + brush, bl-point, 1.0, self.pdf, self.pdf_system, self.painter().transform()) self.pdf.save_stack() self.pdf.apply_fill(color, pattern) - bl = rect.topLeft() self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(), stroke=False, fill=True) self.pdf.restore_stack() diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index 3ea447dfc7..7a53741ca8 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -11,8 +11,9 @@ import os from tempfile import gettempdir from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, - QApplication, QPainter, Qt, QImage) -QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, Qt + QApplication, QPainter, Qt, QImage, QLinearGradient, + QPointF) +QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, Qt, QPointF from calibre.ebooks.pdf.render.engine import PdfDevice @@ -66,7 +67,14 @@ def full(dev): w = xmax/4 p.fillRect(0, ymax/3, w, w, b) p.fillRect(xmax/3, ymax/3, w, w, QBrush(pix)) - p.drawTiledPixmap(QRectF(2*xmax/3, ymax/3, w, w), pix) + x, y = 2*xmax/3, ymax/3 + p.drawTiledPixmap(QRectF(x, y, w, w), pix, QPointF(10, 10)) + + x, y = 1, ymax/1.9 + g = QLinearGradient(QPointF(x, y), QPointF(x+w, y+w)) + g.setColorAt(0, QColor('#00f')) + g.setColorAt(1, QColor('#006')) + p.fillRect(x, y, w, w, QBrush(g)) finally: p.end() if isinstance(dev, PdfDevice): From c84fa4bf80987b8610203a516e3856ee653dc15d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 15:51:59 +0530 Subject: [PATCH 30/46] ... --- src/calibre/ebooks/pdf/render/test.py | 111 +++++++++++++------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index 7a53741ca8..cf077607a0 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -17,64 +17,65 @@ QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, Qt, QPointF from calibre.ebooks.pdf.render.engine import PdfDevice -def full(dev): +def full(p, xmax, ymax): + p.drawRect(0, 0, xmax, ymax) + p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax), + QPoint(0, ymax), QPoint(0, 0)) + pp = QPainterPath() + pp.addRect(0, 0, xmax, ymax) + p.drawPath(pp) + p.save() + for i in xrange(3): + col = [0, 0, 0, 200] + col[i] = 255 + p.setOpacity(0.3) + p.fillRect(0, 0, xmax/10, xmax/10, QBrush(QColor(*col))) + p.setOpacity(1) + p.drawRect(0, 0, xmax/10, xmax/10) + p.translate(xmax/10, xmax/10) + p.scale(1, 1.5) + p.restore() + + # p.scale(2, 2) + # p.rotate(45) + p.drawPixmap(0, 0, xmax/4, xmax/4, QPixmap(I('library.png'))) + p.drawRect(0, 0, xmax/4, xmax/4) + + f = p.font() + f.setPointSize(20) + # f.setLetterSpacing(f.PercentageSpacing, 200) + f.setUnderline(True) + # f.setOverline(True) + # f.setStrikeOut(True) + f.setFamily('Calibri') + p.setFont(f) + # p.setPen(QColor(0, 0, 255)) + # p.scale(2, 2) + # p.rotate(45) + p.drawText(QPoint(xmax/3.9, 30), 'Some—text not By’s ū --- Д AV ff ff') + + b = QBrush(Qt.HorPattern) + b.setColor(QColor(Qt.blue)) + pix = QPixmap(I('console.png')) + w = xmax/4 + p.fillRect(0, ymax/3, w, w, b) + p.fillRect(xmax/3, ymax/3, w, w, QBrush(pix)) + x, y = 2*xmax/3, ymax/3 + p.drawTiledPixmap(QRectF(x, y, w, w), pix, QPointF(10, 10)) + + x, y = 1, ymax/1.9 + g = QLinearGradient(QPointF(x, y), QPointF(x+w, y+w)) + g.setColorAt(0, QColor('#00f')) + g.setColorAt(1, QColor('#fff')) + p.fillRect(x, y, w, w, QBrush(g)) + +def run(dev, func): p = QPainter(dev) if isinstance(dev, PdfDevice): dev.init_page() - xmax, ymax = p.viewport().width(), p.viewport().height() - b = p.brush() try: - p.drawRect(0, 0, xmax, ymax) - p.drawPolyline(QPoint(0, 0), QPoint(xmax, 0), QPoint(xmax, ymax), - QPoint(0, ymax), QPoint(0, 0)) - pp = QPainterPath() - pp.addRect(0, 0, xmax, ymax) - p.drawPath(pp) - p.save() - for i in xrange(3): - col = [0, 0, 0, 200] - col[i] = 255 - p.setOpacity(0.3) - p.fillRect(0, 0, xmax/10, xmax/10, QBrush(QColor(*col))) - p.setOpacity(1) - p.drawRect(0, 0, xmax/10, xmax/10) - p.translate(xmax/10, xmax/10) - p.scale(1, 1.5) - p.restore() - - # p.scale(2, 2) - # p.rotate(45) - p.drawPixmap(0, 0, xmax/4, xmax/4, QPixmap(I('library.png'))) - p.drawRect(0, 0, xmax/4, xmax/4) - - f = p.font() - f.setPointSize(20) - # f.setLetterSpacing(f.PercentageSpacing, 200) - f.setUnderline(True) - # f.setOverline(True) - # f.setStrikeOut(True) - f.setFamily('Calibri') - p.setFont(f) - # p.setPen(QColor(0, 0, 255)) - # p.scale(2, 2) - # p.rotate(45) - p.drawText(QPoint(xmax/3.9, 30), 'Some—text not By’s ū --- Д AV ff ff') - - b = QBrush(Qt.HorPattern) - b.setColor(QColor(Qt.blue)) - pix = QPixmap(I('console.png')) - w = xmax/4 - p.fillRect(0, ymax/3, w, w, b) - p.fillRect(xmax/3, ymax/3, w, w, QBrush(pix)) - x, y = 2*xmax/3, ymax/3 - p.drawTiledPixmap(QRectF(x, y, w, w), pix, QPointF(10, 10)) - - x, y = 1, ymax/1.9 - g = QLinearGradient(QPointF(x, y), QPointF(x+w, y+w)) - g.setColorAt(0, QColor('#00f')) - g.setColorAt(1, QColor('#006')) - p.fillRect(x, y, w, w, QBrush(g)) + func(p, xmax, ymax) finally: p.end() if isinstance(dev, PdfDevice): @@ -94,8 +95,8 @@ def main(): img.setDotsPerMeterX(100*39.37) img.setDotsPerMeterY(100*39.37) img.fill(Qt.white) - func(dev) - func(img) + run(dev, func) + run(img, func) path = os.path.join(tdir, 'painter.png') img.save(path) print ('PDF written to:', pdf) From 9caf30fc1a3405e6cb4b01baebac2cfb0c482de7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 16:20:14 +0530 Subject: [PATCH 31/46] ... --- src/calibre/ebooks/pdf/render/test.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index cf077607a0..8935650a92 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -82,12 +82,22 @@ def run(dev, func): if dev.engine.errors_occurred: raise SystemExit(1) +def brush(p, xmax, ymax): + x = y = 0 + w = xmax/3 + 10 + g = QLinearGradient(QPointF(0, 0), QPointF(0, w)) + g.setSpread(g.RepeatSpread) + g.setColorAt(0, QColor('#00f')) + g.setColorAt(1, QColor('#fff')) + p.fillRect(x, y, w, w, QBrush(g)) + p.drawRect(x, y, w, w) + def main(): app = QApplication([]) app tdir = gettempdir() pdf = os.path.join(tdir, 'painter.pdf') - func = full + func = brush with open(pdf, 'wb') as f: dev = PdfDevice(f, xdpi=100, ydpi=100, compress=False) img = QImage(dev.width(), dev.height(), From 32165539ea51f6e071526d60c883d51e4f700a76 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 16:41:19 +0530 Subject: [PATCH 32/46] ... --- src/calibre/ebooks/pdf/render/test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index 8935650a92..55fd5f8aea 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -98,12 +98,13 @@ def main(): tdir = gettempdir() pdf = os.path.join(tdir, 'painter.pdf') func = brush + dpi = 100 with open(pdf, 'wb') as f: - dev = PdfDevice(f, xdpi=100, ydpi=100, compress=False) + dev = PdfDevice(f, xdpi=dpi, ydpi=dpi, compress=False) img = QImage(dev.width(), dev.height(), QImage.Format_ARGB32_Premultiplied) - img.setDotsPerMeterX(100*39.37) - img.setDotsPerMeterY(100*39.37) + img.setDotsPerMeterX(dpi*39.37) + img.setDotsPerMeterY(dpi*39.37) img.fill(Qt.white) run(dev, func) run(img, func) From 9f13e30737ef809f731c1c576519acc554b6e63d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 18:42:08 +0530 Subject: [PATCH 33/46] Workaround for Qt's broken emulation of gradients with texture patterns --- src/calibre/ebooks/pdf/render/engine.py | 19 +++-- src/calibre/ebooks/pdf/render/graphics.py | 94 +++++++++++++++++------ src/calibre/ebooks/pdf/render/test.py | 17 ++-- 3 files changed, 93 insertions(+), 37 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 6afbef223f..77f1f00c57 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -93,7 +93,11 @@ class PdfEngine(QPaintEngine): raise RuntimeError('Failed to load qt_hack with err: %s'%err) def apply_graphics_state(self): - self.graphics(self.pdf, self.pdf_system, self.painter()) + self.graphics(self.pdf_system, self.painter()) + + def resolve_fill(self, rect): + self.graphics.resolve_fill(rect, self.pdf_system, + self.painter().transform()) @property def do_fill(self): @@ -117,6 +121,7 @@ class PdfEngine(QPaintEngine): self.page_height), compress=self.compress, mark_links=self.mark_links, debug=self.debug) + self.graphics.begin(self.pdf) except: self.errors(traceback.format_exc()) self.errors_occurred = True @@ -155,7 +160,7 @@ class PdfEngine(QPaintEngine): brush = QBrush(pixmap) bl = rect.topLeft() color, opacity, pattern, do_fill = self.graphics.convert_brush( - brush, bl-point, 1.0, self.pdf, self.pdf_system, + brush, bl-point, 1.0, self.pdf_system, self.painter().transform()) self.pdf.save_stack() self.pdf.apply_fill(color, pattern) @@ -211,10 +216,12 @@ class PdfEngine(QPaintEngine): @store_error def drawRects(self, rects): self.apply_graphics_state() - for rect in rects: - bl = rect.topLeft() - self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(), - stroke=self.do_stroke, fill=self.do_fill) + with self.graphics: + for rect in rects: + self.resolve_fill(rect) + bl = rect.topLeft() + self.pdf.draw_rect(bl.x(), bl.y(), rect.width(), rect.height(), + stroke=self.do_stroke, fill=self.do_fill) def create_sfnt(self, text_item): get_table = partial(self.qt_hack.get_sfnt_table, text_item) diff --git a/src/calibre/ebooks/pdf/render/graphics.py b/src/calibre/ebooks/pdf/render/graphics.py index 384809598a..f9353d5358 100644 --- a/src/calibre/ebooks/pdf/render/graphics.py +++ b/src/calibre/ebooks/pdf/render/graphics.py @@ -8,6 +8,7 @@ __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' from math import sqrt +from collections import namedtuple from PyQt4.Qt import ( QBrush, QPen, Qt, QPointF, QTransform, QPainterPath, QPaintEngine, QImage) @@ -41,6 +42,8 @@ def convert_path(path): # {{{ return p # }}} +Brush = namedtuple('Brush', 'origin brush color') + class TilingPattern(Stream): def __init__(self, cache_key, matrix, w=8, h=8, paint_type=2, compress=False): @@ -222,18 +225,25 @@ class QtPattern(TilingPattern): class TexturePattern(TilingPattern): - def __init__(self, pixmap, matrix, pdf): - image = pixmap.toImage() - cache_key = pixmap.cacheKey() - imgref = pdf.add_image(image, cache_key) - paint_type = (2 if image.format() in {QImage.Format_MonoLSB, - QImage.Format_Mono} else 1) - super(TexturePattern, self).__init__( - cache_key, matrix, w=image.width(), h=image.height(), - paint_type=paint_type) - m = (self.w, 0, 0, -self.h, 0, self.h) - self.resources['XObject'] = Dictionary({'Texture':imgref}) - self.write_line('%s cm /Texture Do'%(' '.join(map(fmtnum, m)))) + def __init__(self, pixmap, matrix, pdf, clone=None): + if clone is None: + image = pixmap.toImage() + cache_key = pixmap.cacheKey() + imgref = pdf.add_image(image, cache_key) + paint_type = (2 if image.format() in {QImage.Format_MonoLSB, + QImage.Format_Mono} else 1) + super(TexturePattern, self).__init__( + cache_key, matrix, w=image.width(), h=image.height(), + paint_type=paint_type) + m = (self.w, 0, 0, -self.h, 0, self.h) + self.resources['XObject'] = Dictionary({'Texture':imgref}) + self.write_line('%s cm /Texture Do'%(' '.join(map(fmtnum, m)))) + else: + super(TexturePattern, self).__init__( + clone.cache_key[1], matrix, w=clone.w, h=clone.h, + paint_type=clone.paint_type) + self.resources['XObject'] = Dictionary(clone.resources['XObject']) + self.write(clone.getvalue()) class GraphicsState(object): @@ -275,6 +285,9 @@ class Graphics(object): self.current_state = GraphicsState() self.pending_state = None + def begin(self, pdf): + self.pdf = pdf + def update_state(self, state, painter): flags = state.state() if self.pending_state is None: @@ -304,13 +317,14 @@ class Graphics(object): self.current_state = GraphicsState() self.pending_state = None - def __call__(self, pdf, pdf_system, painter): + def __call__(self, pdf_system, painter): # Apply the currently pending state to the PDF if self.pending_state is None: return pdf_state = self.current_state ps = self.pending_state + pdf = self.pdf if (ps.transform != pdf_state.transform or ps.clip != pdf_state.clip): pdf.restore_stack() @@ -321,11 +335,11 @@ class Graphics(object): pdf.transform(ps.transform) if (pdf_state.opacity != ps.opacity or pdf_state.stroke != ps.stroke): - self.apply_stroke(ps, pdf, pdf_system, painter) + self.apply_stroke(ps, pdf_system, painter) if (pdf_state.opacity != ps.opacity or pdf_state.fill != ps.fill or pdf_state.brush_origin != ps.brush_origin): - self.apply_fill(ps, pdf, pdf_system, painter) + self.apply_fill(ps, pdf_system, painter) if (pdf_state.clip != ps.clip): p = convert_path(ps.clip) @@ -336,25 +350,28 @@ class Graphics(object): self.current_state = self.pending_state self.pending_state = None - def convert_brush(self, brush, brush_origin, global_opacity, pdf, + def convert_brush(self, brush, brush_origin, global_opacity, pdf_system, qt_system): # Convert a QBrush to PDF operators style = brush.style() + pdf = self.pdf - pattern = color = None + pattern = color = pat = None opacity = 1.0 do_fill = True matrix = (QTransform.fromTranslate(brush_origin.x(), brush_origin.y()) * pdf_system * qt_system.inverted()[0]) vals = list(brush.color().getRgbF()) + self.brushobj = None if style <= Qt.DiagCrossPattern: opacity = global_opacity * vals[-1] color = vals[:3] if style > Qt.SolidPattern: - pattern = pdf.add_pattern(QtPattern(style, matrix)) + pat = QtPattern(style, matrix) + pattern = pdf.add_pattern(pat) if opacity < 1e-4 or style == Qt.NoBrush: do_fill = False @@ -370,15 +387,17 @@ class Graphics(object): if opacity < 1e-4 or style == Qt.NoBrush: do_fill = False + self.brushobj = Brush(brush_origin, pat, color) # TODO: Add support for gradient fills return color, opacity, pattern, do_fill - def apply_stroke(self, state, pdf, pdf_system, painter): + def apply_stroke(self, state, pdf_system, painter): # TODO: Handle pens with non solid brushes by setting the colorspace # for stroking to a pattern # TODO: Support miter limit by using QPainterPathStroker pen = state.stroke self.pending_state.do_stroke = True + pdf = self.pdf if pen.style() == Qt.NoPen: self.pending_state.do_stroke = False @@ -417,10 +436,41 @@ class Graphics(object): if vals[-1] < 1e-5 or b.style() == Qt.NoBrush: self.pending_state.do_stroke = False - def apply_fill(self, state, pdf, pdf_system, painter): + def apply_fill(self, state, pdf_system, painter): self.pending_state.do_fill = True color, opacity, pattern, self.pending_state.do_fill = self.convert_brush( - state.fill, state.brush_origin, state.opacity, pdf, pdf_system, + state.fill, state.brush_origin, state.opacity, pdf_system, painter.transform()) - pdf.apply_fill(color, pattern, opacity) + self.pdf.apply_fill(color, pattern, opacity) + self.last_fill = self.brushobj + + def __enter__(self): + self.pdf.save_stack() + + def __exit__(self, *args): + self.pdf.restore_stack() + + def resolve_fill(self, rect, pdf_system, qt_system): + ''' + Qt's paint system does not update brushOrigin when using + TexturePatterns and it also uses TexturePatterns to emulate gradients, + leading to brokenness. So this method allows the paint engine to update + the brush origin before painting an object. While not perfect, this is + better than nothing. + ''' + if not self.current_state.do_fill: + return + + if isinstance(self.last_fill.brush, TexturePattern): + tl = rect.topLeft() + if tl == self.last_fill.origin: + return + + matrix = (QTransform.fromTranslate(tl.x(), tl.y()) + * pdf_system * qt_system.inverted()[0]) + + pat = TexturePattern(None, matrix, self.pdf, clone=self.last_fill.brush) + pattern = self.pdf.add_pattern(pat) + self.pdf.apply_fill(self.last_fill.color, pattern) + diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index 55fd5f8aea..555af9206f 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -83,21 +83,20 @@ def run(dev, func): raise SystemExit(1) def brush(p, xmax, ymax): - x = y = 0 - w = xmax/3 + 10 - g = QLinearGradient(QPointF(0, 0), QPointF(0, w)) - g.setSpread(g.RepeatSpread) - g.setColorAt(0, QColor('#00f')) - g.setColorAt(1, QColor('#fff')) - p.fillRect(x, y, w, w, QBrush(g)) - p.drawRect(x, y, w, w) + x = xmax/3 + y = 0 + w = xmax/2 + pix = QPixmap(I('console.png')) + p.fillRect(x, y, w, w, QBrush(pix)) + + p.fillRect(0, y+xmax/1.9, w, w, QBrush(pix)) def main(): app = QApplication([]) app tdir = gettempdir() pdf = os.path.join(tdir, 'painter.pdf') - func = brush + func = full dpi = 100 with open(pdf, 'wb') as f: dev = PdfDevice(f, xdpi=dpi, ydpi=dpi, compress=False) From 0dd9c26dbe085f6d2915f30022ec9484088fff49 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 31 Dec 2012 20:08:17 +0530 Subject: [PATCH 34/46] Handle non-solid pens --- src/calibre/ebooks/pdf/render/engine.py | 1 - src/calibre/ebooks/pdf/render/graphics.py | 18 ++++------- src/calibre/ebooks/pdf/render/serialize.py | 36 ++++++++++++---------- src/calibre/ebooks/pdf/render/test.py | 18 +++++++++-- 4 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/engine.py b/src/calibre/ebooks/pdf/render/engine.py index 77f1f00c57..b65aed0660 100644 --- a/src/calibre/ebooks/pdf/render/engine.py +++ b/src/calibre/ebooks/pdf/render/engine.py @@ -109,7 +109,6 @@ class PdfEngine(QPaintEngine): def init_page(self): self.pdf.transform(self.pdf_system) - self.pdf.set_rgb_colorspace() self.graphics.reset() self.pdf.save_stack() self.current_page_inited = True diff --git a/src/calibre/ebooks/pdf/render/graphics.py b/src/calibre/ebooks/pdf/render/graphics.py index f9353d5358..43b1c72fdb 100644 --- a/src/calibre/ebooks/pdf/render/graphics.py +++ b/src/calibre/ebooks/pdf/render/graphics.py @@ -15,7 +15,7 @@ from PyQt4.Qt import ( from calibre.ebooks.pdf.render.common import ( Name, Array, fmtnum, Stream, Dictionary) -from calibre.ebooks.pdf.render.serialize import Path, Color +from calibre.ebooks.pdf.render.serialize import Path def convert_path(path): # {{{ p = Path() @@ -392,8 +392,6 @@ class Graphics(object): return color, opacity, pattern, do_fill def apply_stroke(self, state, pdf_system, painter): - # TODO: Handle pens with non solid brushes by setting the colorspace - # for stroking to a pattern # TODO: Support miter limit by using QPainterPathStroker pen = state.stroke self.pending_state.do_stroke = True @@ -427,14 +425,10 @@ class Graphics(object): pdf.current_page.write(' 0 d ') # Stroke fill - b = pen.brush() - vals = list(b.color().getRgbF()) - vals[-1] *= state.opacity - color = Color(*vals) - pdf.set_stroke_color(color) - - if vals[-1] < 1e-5 or b.style() == Qt.NoBrush: - self.pending_state.do_stroke = False + color, opacity, pattern, self.pending_state.do_stroke = self.convert_brush( + pen.brush(), state.brush_origin, state.opacity, pdf_system, + painter.transform()) + self.pdf.apply_stroke(color, pattern, opacity) def apply_fill(self, state, pdf_system, painter): self.pending_state.do_fill = True @@ -458,7 +452,7 @@ class Graphics(object): the brush origin before painting an object. While not perfect, this is better than nothing. ''' - if not self.current_state.do_fill: + if not hasattr(self, 'last_fill') or not self.current_state.do_fill: return if isinstance(self.last_fill.brush, TexturePattern): diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 54a5f674b4..ce4ae7fc6c 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en' import hashlib from future_builtins import map from itertools import izip -from collections import namedtuple from PyQt4.Qt import QBuffer, QByteArray, QImage, Qt, QColor, qRgba @@ -23,8 +22,6 @@ from calibre.ebooks.pdf.render.links import Links PDFVER = b'%PDF-1.3' -Color = namedtuple('Color', 'red green blue opacity') - class IndirectObjects(object): def __init__(self): @@ -353,9 +350,6 @@ class PDFStream(object): cm = ' '.join(map(fmtnum, vals)) self.current_page.write_line(cm + ' cm') - def set_rgb_colorspace(self): - self.current_page.write_line('/DeviceRGB CS /DeviceRGB cs') - def save_stack(self): self.current_page.write_line('q') @@ -391,13 +385,11 @@ class PDFStream(object): def serialize(self, o): serialize(o, self.current_page) - def set_stroke_color(self, color): - opacity = color.opacity + def set_stroke_opacity(self, opacity): if opacity not in self.stroke_opacities: op = Dictionary({'Type':Name('ExtGState'), 'CA': opacity}) self.stroke_opacities[opacity] = self.objects.add(op) self.current_page.set_opacity(self.stroke_opacities[opacity]) - self.current_page.write_line(' '.join(map(fmtnum, color[:3])) + ' SC') def set_fill_opacity(self, opacity): opacity = float(opacity) @@ -518,17 +510,27 @@ class PDFStream(object): serialize(Name(name), self.current_page) self.current_page.write_line(' Do Q') + def apply_color_space(self, color, pattern, stroke=False): + wl = self.current_page.write_line + if color is not None and pattern is None: + wl(' '.join(map(fmtnum, color)) + (' RG' if stroke else ' rg')) + elif color is None and pattern is not None: + wl('/Pattern %s /%s %s'%('CS' if stroke else 'cs', pattern, + 'SCN' if stroke else 'scn')) + elif color is not None and pattern is not None: + col = ' '.join(map(fmtnum, color)) + wl('/PCSp %s %s /%s %s'%('CS' if stroke else 'cs', col, pattern, + 'SCN' if stroke else 'scn')) + def apply_fill(self, color=None, pattern=None, opacity=None): if opacity is not None: self.set_fill_opacity(opacity) - wl = self.current_page.write_line - if color is not None and pattern is None: - wl(' '.join(map(fmtnum, color)) + ' rg') - elif color is None and pattern is not None: - wl('/Pattern cs /%s scn'%pattern) - elif color is not None and pattern is not None: - col = ' '.join(map(fmtnum, color)) - wl('/PCSp cs %s /%s scn'%(col, pattern)) + self.apply_color_space(color, pattern) + + def apply_stroke(self, color=None, pattern=None, opacity=None): + if opacity is not None: + self.set_stroke_opacity(opacity) + self.apply_color_space(color, pattern, stroke=True) def end(self): if self.current_page.getvalue(): diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index 555af9206f..b631dc8276 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -12,7 +12,7 @@ from tempfile import gettempdir from PyQt4.Qt import (QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, QApplication, QPainter, Qt, QImage, QLinearGradient, - QPointF) + QPointF, QPen) QBrush, QColor, QPoint, QPixmap, QPainterPath, QRectF, Qt, QPointF from calibre.ebooks.pdf.render.engine import PdfDevice @@ -69,6 +69,14 @@ def full(p, xmax, ymax): g.setColorAt(1, QColor('#fff')) p.fillRect(x, y, w, w, QBrush(g)) + pen = QPen(QBrush(Qt.blue)) + pen.setWidth(xmax/3) + p.setPen(pen) + x += w + w/10 + y += w + p.drawLine(x, y, x+w, y) + + def run(dev, func): p = QPainter(dev) if isinstance(dev, PdfDevice): @@ -91,12 +99,18 @@ def brush(p, xmax, ymax): p.fillRect(0, y+xmax/1.9, w, w, QBrush(pix)) +def pen(p, xmax, ymax): + pix = QPixmap(I('console.png')) + pen = QPen(QBrush(pix), 60) + p.setPen(pen) + p.drawRect(0, xmax/3, xmax/3, xmax/2) + def main(): app = QApplication([]) app tdir = gettempdir() pdf = os.path.join(tdir, 'painter.pdf') - func = full + func = pen dpi = 100 with open(pdf, 'wb') as f: dev = PdfDevice(f, xdpi=dpi, ydpi=dpi, compress=False) From 0c0cb03bb0b48746adfc7e901ead88965cdc667b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 09:31:44 +0530 Subject: [PATCH 35/46] Updated Alternet --- recipes/alternet.recipe | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/recipes/alternet.recipe b/recipes/alternet.recipe index e58376cc42..0bd608e0e7 100644 --- a/recipes/alternet.recipe +++ b/recipes/alternet.recipe @@ -10,14 +10,12 @@ class Alternet(BasicNewsRecipe): category = 'News, Magazine' description = 'News magazine and online community' feeds = [ - (u'Front Page', u'http://feeds.feedblitz.com/alternet'), - (u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'), - (u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'), - (u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage') + (u'Front Page', u'http://feeds.feedblitz.com/alternet') ] + remove_attributes = ['width', 'align','cellspacing'] remove_javascript = True - use_embedded_content = False + use_embedded_content = True no_stylesheets = True language = 'en' encoding = 'UTF-8' From f77765ff3c458819ac8c0ae696a46012b5b70b3c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 09:52:44 +0530 Subject: [PATCH 36/46] Update NY Times --- recipes/nytimes.recipe | 83 +++++++++++++++++++++++++++++++++++--- recipes/nytimes_sub.recipe | 83 +++++++++++++++++++++++++++++++++++--- 2 files changed, 156 insertions(+), 10 deletions(-) diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index ba97a2c0be..f5b994275e 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -15,6 +15,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup class NYTimes(BasicNewsRecipe): recursions=1 # set this to zero to omit Related articles lists + match_regexps=[r'/[12][0-9][0-9][0-9]/[0-9]+/'] # speeds up processing by preventing index page links from being followed # set getTechBlogs to True to include the technology blogs # set tech_oldest_article to control article age @@ -24,6 +25,14 @@ class NYTimes(BasicNewsRecipe): tech_oldest_article = 14 tech_max_articles_per_feed = 25 + # set getPopularArticles to False if you don't want the Most E-mailed and Most Viewed articles + # otherwise you will get up to 20 of the most popular e-mailed and viewed articles (in each category) + getPopularArticles = True + popularPeriod = '1' # set this to the number of days to include in the measurement + # e.g. 7 will get the most popular measured over the last 7 days + # and 30 will get the most popular measured over 30 days. + # you still only get up to 20 articles in each category + # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored. headlinesOnly = True @@ -376,6 +385,7 @@ class NYTimes(BasicNewsRecipe): masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' + def short_title(self): return self.title @@ -384,6 +394,7 @@ class NYTimes(BasicNewsRecipe): from contextlib import closing import copy from calibre.ebooks.chardet import xml_to_unicode + print("ARTICLE_TO_SOUP "+url_or_raw) if re.match(r'\w+://', url_or_raw): br = self.clone_browser(self.browser) open_func = getattr(br, 'open_novisit', br.open) @@ -475,6 +486,67 @@ class NYTimes(BasicNewsRecipe): description=description, author=author, content='')) + def get_popular_articles(self,ans): + if self.getPopularArticles: + popular_articles = {} + key_list = [] + + def handleh3(h3tag): + try: + url = h3tag.a['href'] + except: + return ('','','','') + url = re.sub(r'\?.*', '', url) + if self.exclude_url(url): + return ('','','','') + url += '?pagewanted=all' + title = self.tag_to_string(h3tag.a,False) + h6tag = h3tag.findNextSibling('h6') + if h6tag is not None: + author = self.tag_to_string(h6tag,False) + else: + author = '' + ptag = h3tag.findNextSibling('p') + if ptag is not None: + desc = self.tag_to_string(ptag,False) + else: + desc = '' + return(title,url,author,desc) + + + have_emailed = False + emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod) + for h3tag in emailed_soup.findAll('h3'): + (title,url,author,desc) = handleh3(h3tag) + if url=='': + continue + if not have_emailed: + key_list.append('Most E-Mailed') + popular_articles['Most E-Mailed'] = [] + have_emailed = True + popular_articles['Most E-Mailed'].append( + dict(title=title, url=url, date=strftime('%a, %d %b'), + description=desc, author=author, + content='')) + have_viewed = False + viewed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-viewed?period='+self.popularPeriod) + for h3tag in viewed_soup.findAll('h3'): + (title,url,author,desc) = handleh3(h3tag) + if url=='': + continue + if not have_viewed: + key_list.append('Most Viewed') + popular_articles['Most Viewed'] = [] + have_viewed = True + popular_articles['Most Viewed'].append( + dict(title=title, url=url, date=strftime('%a, %d %b'), + description=desc, author=author, + content='')) + viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)] + for x in viewed_ans: + ans.append(x) + return ans + def get_tech_feeds(self,ans): if self.getTechBlogs: tech_articles = {} @@ -536,7 +608,7 @@ class NYTimes(BasicNewsRecipe): self.handle_article(lidiv) self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] - return self.filter_ans(self.get_tech_feeds(self.ans)) + return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans))) def parse_todays_index(self): @@ -569,7 +641,7 @@ class NYTimes(BasicNewsRecipe): self.handle_article(lidiv) self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] - return self.filter_ans(self.get_tech_feeds(self.ans)) + return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans))) def parse_headline_index(self): @@ -643,7 +715,7 @@ class NYTimes(BasicNewsRecipe): self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] - return self.filter_ans(self.get_tech_feeds(self.ans)) + return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans))) def parse_index(self): if self.headlinesOnly: @@ -731,7 +803,7 @@ class NYTimes(BasicNewsRecipe): def preprocess_html(self, soup): - #print("PREPROCESS TITLE="+self.tag_to_string(soup.title)) + #print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title)) skip_tag = soup.find(True, {'name':'skip'}) if skip_tag is not None: #url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) @@ -907,6 +979,7 @@ class NYTimes(BasicNewsRecipe): for aside in soup.findAll('div','aside'): aside.extract() soup = self.strip_anchors(soup,True) + #print("RECURSIVE: "+self.tag_to_string(soup.title)) if soup.find('div',attrs={'id':'blogcontent'}) is None: if first_fetch: @@ -1071,7 +1144,7 @@ class NYTimes(BasicNewsRecipe): divTag.replaceWith(tag) except: self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") - + #print(strftime("%H:%M:%S")+" -- POSTPROCESS TITLE="+self.tag_to_string(soup.title)) return soup def populate_article_metadata(self, article, soup, first): diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index d550a5158f..df44856293 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -15,6 +15,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup class NYTimes(BasicNewsRecipe): recursions=1 # set this to zero to omit Related articles lists + match_regexps=[r'/[12][0-9][0-9][0-9]/[0-9]+/'] # speeds up processing by preventing index page links from being followed # set getTechBlogs to True to include the technology blogs # set tech_oldest_article to control article age @@ -24,6 +25,14 @@ class NYTimes(BasicNewsRecipe): tech_oldest_article = 14 tech_max_articles_per_feed = 25 + # set getPopularArticles to False if you don't want the Most E-mailed and Most Viewed articles + # otherwise you will get up to 20 of the most popular e-mailed and viewed articles (in each category) + getPopularArticles = True + popularPeriod = '1' # set this to the number of days to include in the measurement + # e.g. 7 will get the most popular measured over the last 7 days + # and 30 will get the most popular measured over 30 days. + # you still only get up to 20 articles in each category + # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored. headlinesOnly = False @@ -376,6 +385,7 @@ class NYTimes(BasicNewsRecipe): masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' + def short_title(self): return self.title @@ -384,6 +394,7 @@ class NYTimes(BasicNewsRecipe): from contextlib import closing import copy from calibre.ebooks.chardet import xml_to_unicode + print("ARTICLE_TO_SOUP "+url_or_raw) if re.match(r'\w+://', url_or_raw): br = self.clone_browser(self.browser) open_func = getattr(br, 'open_novisit', br.open) @@ -475,6 +486,67 @@ class NYTimes(BasicNewsRecipe): description=description, author=author, content='')) + def get_popular_articles(self,ans): + if self.getPopularArticles: + popular_articles = {} + key_list = [] + + def handleh3(h3tag): + try: + url = h3tag.a['href'] + except: + return ('','','','') + url = re.sub(r'\?.*', '', url) + if self.exclude_url(url): + return ('','','','') + url += '?pagewanted=all' + title = self.tag_to_string(h3tag.a,False) + h6tag = h3tag.findNextSibling('h6') + if h6tag is not None: + author = self.tag_to_string(h6tag,False) + else: + author = '' + ptag = h3tag.findNextSibling('p') + if ptag is not None: + desc = self.tag_to_string(ptag,False) + else: + desc = '' + return(title,url,author,desc) + + + have_emailed = False + emailed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-emailed?period='+self.popularPeriod) + for h3tag in emailed_soup.findAll('h3'): + (title,url,author,desc) = handleh3(h3tag) + if url=='': + continue + if not have_emailed: + key_list.append('Most E-Mailed') + popular_articles['Most E-Mailed'] = [] + have_emailed = True + popular_articles['Most E-Mailed'].append( + dict(title=title, url=url, date=strftime('%a, %d %b'), + description=desc, author=author, + content='')) + have_viewed = False + viewed_soup = self.index_to_soup('http://www.nytimes.com/most-popular-viewed?period='+self.popularPeriod) + for h3tag in viewed_soup.findAll('h3'): + (title,url,author,desc) = handleh3(h3tag) + if url=='': + continue + if not have_viewed: + key_list.append('Most Viewed') + popular_articles['Most Viewed'] = [] + have_viewed = True + popular_articles['Most Viewed'].append( + dict(title=title, url=url, date=strftime('%a, %d %b'), + description=desc, author=author, + content='')) + viewed_ans = [(k, popular_articles[k]) for k in key_list if popular_articles.has_key(k)] + for x in viewed_ans: + ans.append(x) + return ans + def get_tech_feeds(self,ans): if self.getTechBlogs: tech_articles = {} @@ -536,7 +608,7 @@ class NYTimes(BasicNewsRecipe): self.handle_article(lidiv) self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] - return self.filter_ans(self.get_tech_feeds(self.ans)) + return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans))) def parse_todays_index(self): @@ -569,7 +641,7 @@ class NYTimes(BasicNewsRecipe): self.handle_article(lidiv) self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] - return self.filter_ans(self.get_tech_feeds(self.ans)) + return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans))) def parse_headline_index(self): @@ -643,7 +715,7 @@ class NYTimes(BasicNewsRecipe): self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] - return self.filter_ans(self.get_tech_feeds(self.ans)) + return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans))) def parse_index(self): if self.headlinesOnly: @@ -731,7 +803,7 @@ class NYTimes(BasicNewsRecipe): def preprocess_html(self, soup): - #print("PREPROCESS TITLE="+self.tag_to_string(soup.title)) + #print(strftime("%H:%M:%S")+" -- PREPROCESS TITLE="+self.tag_to_string(soup.title)) skip_tag = soup.find(True, {'name':'skip'}) if skip_tag is not None: #url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) @@ -907,6 +979,7 @@ class NYTimes(BasicNewsRecipe): for aside in soup.findAll('div','aside'): aside.extract() soup = self.strip_anchors(soup,True) + #print("RECURSIVE: "+self.tag_to_string(soup.title)) if soup.find('div',attrs={'id':'blogcontent'}) is None: if first_fetch: @@ -1071,7 +1144,7 @@ class NYTimes(BasicNewsRecipe): divTag.replaceWith(tag) except: self.log("ERROR: Problem in Add class=authorId to
so we can format with CSS") - + #print(strftime("%H:%M:%S")+" -- POSTPROCESS TITLE="+self.tag_to_string(soup.title)) return soup def populate_article_metadata(self, article, soup, first): From eca40f6b51bdb77555fbacad960df25274986ab7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 13:06:14 +0530 Subject: [PATCH 37/46] Speedup serialization of numbers to PDF by a factor of 10 --- .../ebooks/conversion/plugins/pdf_output.py | 10 +++- src/calibre/ebooks/pdf/render/common.py | 49 +++++++-------- src/calibre/ebooks/pdf/render/serialize.py | 60 ++----------------- src/calibre/ebooks/pdf/render/test.py | 5 +- src/calibre/utils/speedup.c | 40 +++++++++++++ 5 files changed, 79 insertions(+), 85 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/pdf_output.py b/src/calibre/ebooks/conversion/plugins/pdf_output.py index c042de7050..c2b0050dd7 100644 --- a/src/calibre/ebooks/conversion/plugins/pdf_output.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py @@ -232,7 +232,15 @@ class PDFOutput(OutputFormatPlugin): out_stream.seek(0) out_stream.truncate() self.log.debug('Rendering pages to PDF...') - writer.dump(items, out_stream, PDFMetadata(self.metadata)) + import time + st = time.time() + if False: + import cProfile + cProfile.runctx('writer.dump(items, out_stream, PDFMetadata(self.metadata))', + globals(), locals(), '/tmp/profile') + else: + writer.dump(items, out_stream, PDFMetadata(self.metadata)) + self.log('Rendered PDF in %g seconds:'%(time.time()-st)) if close: out_stream.close() diff --git a/src/calibre/ebooks/pdf/render/common.py b/src/calibre/ebooks/pdf/render/common.py index 5be06b1b98..03774e2d69 100644 --- a/src/calibre/ebooks/pdf/render/common.py +++ b/src/calibre/ebooks/pdf/render/common.py @@ -9,8 +9,10 @@ __docformat__ = 'restructuredtext en' import codecs, zlib from io import BytesIO -from struct import pack -from decimal import Decimal + +from calibre.constants import plugins, ispy3 + +pdf_float = plugins['speedup'][0].pdf_float EOL = b'\n' @@ -52,32 +54,25 @@ PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2' # Basic PDF datatypes {{{ -def format_float(f): - if abs(f) < 1e-7: - return '0' - places = 6 - a, b = type(u'')(Decimal(f).quantize(Decimal(10)**-places)).partition('.')[0::2] - b = b.rstrip('0') - if not b: - return '0' if a == '-0' else a - return '%s.%s'%(a, b) +ic = str if ispy3 else unicode +icb = (lambda x: str(x).encode('ascii')) if ispy3 else bytes def fmtnum(o): - if isinstance(o, (int, long)): - return type(u'')(o) - return format_float(o) + if isinstance(o, float): + return pdf_float(o) + return ic(o) def serialize(o, stream): - if hasattr(o, 'pdf_serialize'): - o.pdf_serialize(stream) - elif isinstance(o, bool): - stream.write(b'true' if o else b'false') + if isinstance(o, float): + stream.write_raw(pdf_float(o).encode('ascii')) elif isinstance(o, (int, long)): - stream.write(type(u'')(o).encode('ascii')) - elif isinstance(o, float): - stream.write(format_float(o).encode('ascii')) + stream.write_raw(icb(o)) + elif hasattr(o, 'pdf_serialize'): + o.pdf_serialize(stream) elif o is None: - stream.write(b'null') + stream.write_raw(b'null') + elif isinstance(o, bool): + stream.write_raw(b'true' if o else b'false') else: raise ValueError('Unknown object: %r'%o) @@ -103,13 +98,6 @@ class String(unicode): raw = codecs.BOM_UTF16_BE + s.encode('utf-16-be') stream.write(b'('+raw+b')') -class GlyphIndex(int): - - def pdf_serialize(self, stream): - byts = bytearray(pack(b'>H', self)) - stream.write('<%s>'%''.join(map( - lambda x: bytes(hex(x)[2:]).rjust(2, b'0'), byts))) - class Dictionary(dict): def pdf_serialize(self, stream): @@ -180,6 +168,9 @@ class Stream(BytesIO): super(Stream, self).write(raw if isinstance(raw, bytes) else raw.encode('ascii')) + def write_raw(self, raw): + BytesIO.write(self, raw) + class Reference(object): def __init__(self, num, obj): diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index ce4ae7fc6c..1e28e59a96 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -9,14 +9,13 @@ __docformat__ = 'restructuredtext en' import hashlib from future_builtins import map -from itertools import izip from PyQt4.Qt import QBuffer, QByteArray, QImage, Qt, QColor, qRgba from calibre.constants import (__appname__, __version__) from calibre.ebooks.pdf.render.common import ( Reference, EOL, serialize, Stream, Dictionary, String, Name, Array, - GlyphIndex, fmtnum) + fmtnum) from calibre.ebooks.pdf.render.fonts import FontManager from calibre.ebooks.pdf.render.links import Links @@ -166,54 +165,6 @@ class Path(object): def close(self): self.ops.append(('h',)) -class Text(object): - - def __init__(self): - self.transform = self.default_transform = [1, 0, 0, 1, 0, 0] - self.font_name = 'Times-Roman' - self.font_path = None - self.horizontal_scale = self.default_horizontal_scale = 100 - self.word_spacing = self.default_word_spacing = 0 - self.char_space = self.default_char_space = 0 - self.glyph_adjust = self.default_glyph_adjust = None - self.size = 12 - self.text = '' - - def set_transform(self, *args): - if len(args) == 1: - m = args[0] - vals = [m.m11(), m.m12(), m.m21(), m.m22(), m.dx(), m.dy()] - else: - vals = args - self.transform = vals - - def pdf_serialize(self, stream, font_name): - if not self.text: return - stream.write_line('BT ') - serialize(Name(font_name), stream) - stream.write(' %s Tf '%fmtnum(self.size)) - stream.write(' '.join(map(fmtnum, self.transform)) + ' Tm ') - if self.horizontal_scale != self.default_horizontal_scale: - stream.write('%s Tz '%fmtnum(self.horizontal_scale)) - if self.word_spacing != self.default_word_spacing: - stream.write('%s Tw '%fmtnum(self.word_spacing)) - if self.char_space != self.default_char_space: - stream.write('%s Tc '%fmtnum(self.char_space)) - stream.write_line() - if self.glyph_adjust is self.default_glyph_adjust: - serialize(String(self.text), stream) - stream.write(' Tj ') - else: - chars = Array() - frac, widths = self.glyph_adjust - for c, width in izip(self.text, widths): - chars.append(String(c)) - chars.append(int(width * frac)) - serialize(chars, stream) - stream.write(' TJ ') - stream.write_line('ET') - - class Catalog(Dictionary): def __init__(self, pagetree): @@ -244,7 +195,9 @@ class HashingStream(object): self.last_char = b'' def write(self, raw): - raw = raw if isinstance(raw, bytes) else raw.encode('ascii') + self.write_raw(raw if isinstance(raw, bytes) else raw.encode('ascii')) + + def write_raw(self, raw): self.f.write(raw) self.hashobj.update(raw) if raw: @@ -420,9 +373,8 @@ class PDFStream(object): self.current_page.write(' %s Tf '%fmtnum(size)) self.current_page.write('%s Tm '%' '.join(map(fmtnum, transform))) for x, y, glyph_id in glyphs: - self.current_page.write('%s %s Td '%(fmtnum(x), fmtnum(y))) - serialize(GlyphIndex(glyph_id), self.current_page) - self.current_page.write(' Tj ') + self.current_page.write_raw(('%s %s Td <%04X> Tj '%( + fmtnum(x), fmtnum(y), glyph_id)).encode('ascii')) self.current_page.write_line(b' ET') def get_image(self, cache_key): diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index b631dc8276..8fe1709491 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -105,12 +105,15 @@ def pen(p, xmax, ymax): p.setPen(pen) p.drawRect(0, xmax/3, xmax/3, xmax/2) +def text(p, xmax, ymax): + p.drawText(QPoint(0, ymax/3), 'Text') + def main(): app = QApplication([]) app tdir = gettempdir() pdf = os.path.join(tdir, 'painter.pdf') - func = pen + func = full dpi = 100 with open(pdf, 'wb') as f: dev = PdfDevice(f, xdpi=dpi, ydpi=dpi, compress=False) diff --git a/src/calibre/utils/speedup.c b/src/calibre/utils/speedup.c index 0626d351a4..171179a88a 100644 --- a/src/calibre/utils/speedup.c +++ b/src/calibre/utils/speedup.c @@ -3,6 +3,9 @@ #include +#define min(x, y) ((x < y) ? x : y) +#define max(x, y) ((x > y) ? x : y) + static PyObject * speedup_parse_date(PyObject *self, PyObject *args) { const char *raw, *orig, *tz; @@ -61,11 +64,48 @@ speedup_parse_date(PyObject *self, PyObject *args) { (tzh*60 + tzm)*sign*60); } + +static PyObject* +speedup_pdf_float(PyObject *self, PyObject *args) { + double f = 0.0, a = 0.0; + char *buf = "0", *dot; + void *free_buf = NULL; + int precision = 6, l = 0; + PyObject *ret; + + if(!PyArg_ParseTuple(args, "d", &f)) return NULL; + + a = fabs(f); + + if (a > 1.0e-7) { + if(a > 1) precision = min(max(0, 6-(int)log10(a)), 6); + buf = PyOS_double_to_string(f, 'f', precision, 0, NULL); + if (buf != NULL) { + free_buf = (void*)buf; + if (precision > 0) { + l = strlen(buf) - 1; + while (l > 0 && buf[l] == '0') l--; + if (buf[l] == ',' || buf[l] == '.') buf[l] = 0; + else buf[l+1] = 0; + if ( (dot = strchr(buf, ',')) ) *dot = '.'; + } + } else if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "Float->str failed."); + } + + ret = PyUnicode_FromString(buf); + if (free_buf != NULL) PyMem_Free(free_buf); + return ret; +} + static PyMethodDef speedup_methods[] = { {"parse_date", speedup_parse_date, METH_VARARGS, "parse_date()\n\nParse ISO dates faster." }, + {"pdf_float", speedup_pdf_float, METH_VARARGS, + "pdf_float()\n\nConvert float to a string representation suitable for PDF" + }, + {NULL, NULL, 0, NULL} }; From 7e0acebabc0839b31f63373441203910119031cf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 15:26:32 +0530 Subject: [PATCH 38/46] ... --- src/calibre/ebooks/pdf/render/graphics.py | 4 ++-- src/calibre/ebooks/pdf/render/test.py | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/graphics.py b/src/calibre/ebooks/pdf/render/graphics.py index 43b1c72fdb..71a8a22e85 100644 --- a/src/calibre/ebooks/pdf/render/graphics.py +++ b/src/calibre/ebooks/pdf/render/graphics.py @@ -396,8 +396,6 @@ class Graphics(object): pen = state.stroke self.pending_state.do_stroke = True pdf = self.pdf - if pen.style() == Qt.NoPen: - self.pending_state.do_stroke = False # Width w = pen.widthF() @@ -429,6 +427,8 @@ class Graphics(object): pen.brush(), state.brush_origin, state.opacity, pdf_system, painter.transform()) self.pdf.apply_stroke(color, pattern, opacity) + if pen.style() == Qt.NoPen: + self.pending_state.do_stroke = False def apply_fill(self, state, pdf_system, painter): self.pending_state.do_fill = True diff --git a/src/calibre/ebooks/pdf/render/test.py b/src/calibre/ebooks/pdf/render/test.py index 8fe1709491..d57678a057 100644 --- a/src/calibre/ebooks/pdf/render/test.py +++ b/src/calibre/ebooks/pdf/render/test.py @@ -69,13 +69,6 @@ def full(p, xmax, ymax): g.setColorAt(1, QColor('#fff')) p.fillRect(x, y, w, w, QBrush(g)) - pen = QPen(QBrush(Qt.blue)) - pen.setWidth(xmax/3) - p.setPen(pen) - x += w + w/10 - y += w - p.drawLine(x, y, x+w, y) - def run(dev, func): p = QPainter(dev) From f368250f1dac9903c31cc24fe9ae79ae0a92bf3c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 18:40:27 +0530 Subject: [PATCH 39/46] Nicer error message when no supported formats for conversion found --- src/calibre/gui2/convert/single.py | 7 ++++-- src/calibre/gui2/tools.py | 37 +++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/calibre/gui2/convert/single.py b/src/calibre/gui2/convert/single.py index 332dc4ae92..cff7ee8c3d 100644 --- a/src/calibre/gui2/convert/single.py +++ b/src/calibre/gui2/convert/single.py @@ -33,7 +33,10 @@ from calibre.utils.config import prefs from calibre.utils.logging import Log class NoSupportedInputFormats(Exception): - pass + + def __init__(self, available_formats): + Exception.__init__(self) + self.available_formats = available_formats def sort_formats_by_preference(formats, prefs): uprefs = [x.upper() for x in prefs] @@ -86,7 +89,7 @@ def get_supported_input_formats_for_book(db, book_id): input_formats = set([x.lower() for x in supported_input_formats()]) input_formats = sorted(available_formats.intersection(input_formats)) if not input_formats: - raise NoSupportedInputFormats + raise NoSupportedInputFormats(tuple(x for x in available_formats if x)) return input_formats diff --git a/src/calibre/gui2/tools.py b/src/calibre/gui2/tools.py index f4ee92b565..98a59ccdd5 100644 --- a/src/calibre/gui2/tools.py +++ b/src/calibre/gui2/tools.py @@ -88,20 +88,35 @@ def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{ changed = True d.break_cycles() - except NoSupportedInputFormats: - bad.append(book_id) + except NoSupportedInputFormats as nsif: + bad.append((book_id, nsif.available_formats)) if bad and show_no_format_warning: - res = [] - for id in bad: - title = db.title(id, True) - res.append('%s'%title) + if len(bad) == 1 and not bad[0][1]: + title = db.title(bad[0][0], True) + warning_dialog(parent, _('Could not convert'), '

'+ + _('Could not convert %s as it has no ebook files. If you ' + 'think it should have files, but calibre is not finding ' + 'them, that is most likely because you moved the book\'s ' + 'files around outside of calibre. You will need to find those files ' + 'and re-add them to calibre.')%title, show=True) + else: + res = [] + for id, available_formats in bad: + title = db.title(id, True) + if available_formats: + msg = _('No supported formats (Available formats: %s)')%( + ', '.join(available_formats)) + else: + msg = _('This book has no actual ebook files') + res.append('%s - %s'%(title, msg)) - msg = '%s' % '\n'.join(res) - warning_dialog(parent, _('Could not convert some books'), - _('Could not convert %(num)d of %(tot)d books, because no suitable source' - ' format was found.') % dict(num=len(res), tot=total), - msg).exec_() + + msg = '%s' % '\n'.join(res) + warning_dialog(parent, _('Could not convert some books'), + _('Could not convert %(num)d of %(tot)d books, because no supported source' + ' formats were found.') % dict(num=len(res), tot=total), + msg).exec_() return jobs, changed, bad # }}} From e1d40f36036ad1aac1c0785bba979e8b6a92d47f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 20:50:22 +0530 Subject: [PATCH 40/46] Show disabled device plugins in Preferences->Ignored Devices --- .../gui2/preferences/ignored_devices.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/calibre/gui2/preferences/ignored_devices.py b/src/calibre/gui2/preferences/ignored_devices.py index 99fa350f73..f4215c49ad 100644 --- a/src/calibre/gui2/preferences/ignored_devices.py +++ b/src/calibre/gui2/preferences/ignored_devices.py @@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import (QLabel, QVBoxLayout, QListWidget, QListWidgetItem, Qt) +from calibre.customize.ui import enable_plugin from calibre.gui2.preferences import ConfigWidgetBase, test_widget class ConfigWidget(ConfigWidgetBase): @@ -31,6 +32,18 @@ class ConfigWidget(ConfigWidgetBase): f.itemChanged.connect(self.changed_signal) f.itemDoubleClicked.connect(self.toggle_item) + self.la2 = la = QLabel(_( + 'The list of device plugins you have disabled. Uncheck an entry ' + 'to enable the plugin. calibre cannot detect devices that are ' + 'managed by disabled plugins.')) + la.setWordWrap(True) + l.addWidget(la) + + self.device_plugins = f = QListWidget(f) + l.addWidget(f) + f.itemChanged.connect(self.changed_signal) + f.itemDoubleClicked.connect(self.toggle_item) + def toggle_item(self, item): item.setCheckState(Qt.Checked if item.checkState() == Qt.Unchecked else Qt.Unchecked) @@ -46,6 +59,16 @@ class ConfigWidget(ConfigWidgetBase): item.setCheckState(Qt.Checked) self.devices.blockSignals(False) + self.device_plugins.blockSignals(True) + for dev in self.gui.device_manager.disabled_device_plugins: + n = dev.get_gui_name() + item = QListWidgetItem(n, self.device_plugins) + item.setData(Qt.UserRole, dev) + item.setFlags(Qt.ItemIsEnabled|Qt.ItemIsUserCheckable|Qt.ItemIsSelectable) + item.setCheckState(Qt.Checked) + self.device_plugins.sortItems() + self.device_plugins.blockSignals(False) + def restore_defaults(self): if self.devices.count() > 0: self.devices.clear() @@ -63,6 +86,12 @@ class ConfigWidget(ConfigWidgetBase): for dev, bl in devs.iteritems(): dev.set_user_blacklisted_devices(bl) + for i in xrange(self.device_plugins.count()): + e = self.device_plugins.item(i) + dev = e.data(Qt.UserRole).toPyObject() + if e.checkState() == Qt.Unchecked: + enable_plugin(dev) + return True # Restart required if __name__ == '__main__': From 90f19d6e8561c3c90ebff40be10fef1c8ee77af4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jan 2013 21:59:41 +0530 Subject: [PATCH 41/46] ... --- src/calibre/gui2/preferences/ignored_devices.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/calibre/gui2/preferences/ignored_devices.py b/src/calibre/gui2/preferences/ignored_devices.py index f4215c49ad..e0c0ae1dc1 100644 --- a/src/calibre/gui2/preferences/ignored_devices.py +++ b/src/calibre/gui2/preferences/ignored_devices.py @@ -7,7 +7,8 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from PyQt4.Qt import (QLabel, QVBoxLayout, QListWidget, QListWidgetItem, Qt) +from PyQt4.Qt import (QLabel, QVBoxLayout, QListWidget, QListWidgetItem, Qt, + QIcon) from calibre.customize.ui import enable_plugin from calibre.gui2.preferences import ConfigWidgetBase, test_widget @@ -66,6 +67,7 @@ class ConfigWidget(ConfigWidgetBase): item.setData(Qt.UserRole, dev) item.setFlags(Qt.ItemIsEnabled|Qt.ItemIsUserCheckable|Qt.ItemIsSelectable) item.setCheckState(Qt.Checked) + item.setIcon(QIcon(I('plugins.png'))) self.device_plugins.sortItems() self.device_plugins.blockSignals(False) From b3b37a2029bca2ad62ef90e1df0fa7844e8f4fa6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Jan 2013 08:25:27 +0530 Subject: [PATCH 42/46] Update Foreign Affairs --- recipes/foreignaffairs.recipe | 94 ++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/recipes/foreignaffairs.recipe b/recipes/foreignaffairs.recipe index 6b36170288..b383609860 100644 --- a/recipes/foreignaffairs.recipe +++ b/recipes/foreignaffairs.recipe @@ -11,21 +11,21 @@ class ForeignAffairsRecipe(BasicNewsRecipe): by Chen Wei weichen302@gmx.com, 2012-02-05''' __license__ = 'GPL v3' - __author__ = 'kwetal' + __author__ = 'Rick Shang, kwetal' language = 'en' version = 1.01 - title = u'Foreign Affairs (Subcription or (free) Registration)' + title = u'Foreign Affairs (Subcription)' publisher = u'Council on Foreign Relations' category = u'USA, Foreign Affairs' description = u'The leading forum for serious discussion of American foreign policy and international affairs.' no_stylesheets = True remove_javascript = True + needs_subscription = True INDEX = 'http://www.foreignaffairs.com' FRONTPAGE = 'http://www.foreignaffairs.com/magazine' - INCLUDE_PREMIUM = False remove_tags = [] @@ -68,43 +68,57 @@ class ForeignAffairsRecipe(BasicNewsRecipe): def parse_index(self): + answer = [] soup = self.index_to_soup(self.FRONTPAGE) - sec_start = soup.findAll('div', attrs={'class':'panel-separator'}) + #get dates + date = re.split('\s\|\s',self.tag_to_string(soup.head.title.string))[0] + self.timefmt = u' [%s]'%date + + sec_start = soup.findAll('div', attrs= {'class':'panel-pane'}) for sec in sec_start: - content = sec.nextSibling - if content: - section = self.tag_to_string(content.find('h2')) - articles = [] - - tags = [] - for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}): - tags.append(div) - for li in content.findAll('li'): - tags.append(li) - - for div in tags: - title = url = description = author = None - - if self.INCLUDE_PREMIUM: - found_premium = False - else: - found_premium = div.findAll('span', attrs={'class': - 'premium-icon'}) - if not found_premium: - tag = div.find('div', attrs={'class': 'views-field-title'}) - - if tag: - a = tag.find('a') - if a: - title = self.tag_to_string(a) - url = self.INDEX + a['href'] - author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})) - tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'}) - description = self.tag_to_string(tag_summary) - articles.append({'title':title, 'date':None, 'url':url, - 'description':description, 'author':author}) - if articles: + articles = [] + section = self.tag_to_string(sec.find('h2')) + if 'Books' in section: + reviewsection=sec.find('div', attrs = {'class': 'item-list'}) + for subsection in reviewsection.findAll('div'): + subsectiontitle=self.tag_to_string(subsection.span.a) + subsectionurl=self.INDEX + subsection.span.a['href'] + soup1 = self.index_to_soup(subsectionurl) + for div in soup1.findAll('div', attrs = {'class': 'views-field-title'}): + if div.find('a') is not None: + originalauthor=self.tag_to_string(div.findNext('div', attrs = {'class':'views-field-field-article-book-nid'}).div.a) + title=subsectiontitle+': '+self.tag_to_string(div.span.a)+' by '+originalauthor + url=self.INDEX+div.span.a['href'] + atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'}) + if atr is not None: + author=self.tag_to_string(atr.span.a) + else: + author='' + desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'}) + if desc is not None: + description=self.tag_to_string(desc.div.p) + else: + description='' + articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author}) + subsectiontitle='' + else: + for div in sec.findAll('div', attrs = {'class': 'views-field-title'}): + if div.find('a') is not None: + title=self.tag_to_string(div.span.a) + url=self.INDEX+div.span.a['href'] + atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'}) + if atr is not None: + author=self.tag_to_string(atr.span.a) + else: + author='' + desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'}) + if desc is not None: + description=self.tag_to_string(desc.div.p) + else: + description='' + articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author}) + if articles: answer.append((section, articles)) return answer @@ -115,15 +129,17 @@ class ForeignAffairsRecipe(BasicNewsRecipe): return soup - needs_subscription = True + def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: - br.open('https://www.foreignaffairs.com/user?destination=home') + br.open('https://www.foreignaffairs.com/user?destination=user%3Fop%3Dlo') br.select_form(nr = 1) br['name'] = self.username br['pass'] = self.password br.submit() return br + def cleanup(self): + self.browser.open('http://www.foreignaffairs.com/logout?destination=user%3Fop=lo') From 3bdc5da43e7bf874ca6d2274adbf70036bd40d61 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Jan 2013 08:25:43 +0530 Subject: [PATCH 43/46] ... --- manual/faq.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/manual/faq.rst b/manual/faq.rst index e9bb6fc70f..876a04d601 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -164,7 +164,6 @@ Follow these steps to find the problem: * Ensure your operating system is seeing the device. That is, the device should show up in Windows Explorer (in Windows) or Finder (in OS X). * In |app|, go to Preferences->Ignored Devices and check that your device is not being ignored - * In |app|, go to Preferences->Plugins->Device Interface plugin and make sure the plugin for your device is enabled, the plugin icon next to it should be green when it is enabled. * If all the above steps fail, go to Preferences->Miscellaneous and click debug device detection with your device attached and post the output as a ticket on `the calibre bug tracker `_. My device is non-standard or unusual. What can I do to connect to it? From f213f90fbe52e2a7b681f5e71ae2eb8b14bfde73 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Jan 2013 08:31:29 +0530 Subject: [PATCH 44/46] ... --- src/calibre/ebooks/pdf/render/serialize.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/calibre/ebooks/pdf/render/serialize.py b/src/calibre/ebooks/pdf/render/serialize.py index 1e28e59a96..b2a17734db 100644 --- a/src/calibre/ebooks/pdf/render/serialize.py +++ b/src/calibre/ebooks/pdf/render/serialize.py @@ -356,14 +356,6 @@ class PDFStream(object): self.page_tree.obj.add_page(pageref) self.current_page = Page(self.page_tree, compress=self.compress) - def draw_text(self, text_object): - if text_object.font_path is None: - fontref = self.font_manager.add_standard_font(text_object.font_name) - else: - raise NotImplementedError() - name = self.current_page.add_font(fontref) - text_object.pdf_serialize(self.current_page, name) - def draw_glyph_run(self, transform, size, font_metrics, glyphs): glyph_ids = {x[-1] for x in glyphs} fontref = self.font_manager.add_font(font_metrics, glyph_ids) From 99551f1a6da571d376ce262128331edb9f3d23d1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Jan 2013 09:02:24 +0530 Subject: [PATCH 45/46] Fix trutype embedded in PDF crashing ADE --- src/calibre/utils/fonts/sfnt/subset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py index 9e20127614..bb8b73d013 100644 --- a/src/calibre/utils/fonts/sfnt/subset.py +++ b/src/calibre/utils/fonts/sfnt/subset.py @@ -84,7 +84,7 @@ def do_warn(warnings, *args): def pdf_subset(sfnt, glyphs): for tag in tuple(sfnt.tables): if tag not in {b'hhea', b'head', b'hmtx', b'maxp', - b'OS/2', b'post', b'cvt', b'fpgm', b'glyf', b'loca', + b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ', b'VORG'}: # Remove non core tables since they are unused in PDF rendering del sfnt[tag] From 2fe223bf9713ab64fad4bf5059272cad9f263ce2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Jan 2013 10:42:49 +0530 Subject: [PATCH 46/46] Update bundled pyparsing --- src/calibre/utils/pyparsing.py | 318 +++++++++++++++------------------ 1 file changed, 147 insertions(+), 171 deletions(-) diff --git a/src/calibre/utils/pyparsing.py b/src/calibre/utils/pyparsing.py index 9be97dc287..149ccaf1b0 100644 --- a/src/calibre/utils/pyparsing.py +++ b/src/calibre/utils/pyparsing.py @@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when - embedded comments """ -__version__ = "1.5.6" -__versionTime__ = "26 June 2011 10:53" +__version__ = "1.5.7" +__versionTime__ = "17 November 2012 16:18" __author__ = "Paul McGuire " import string @@ -81,66 +81,51 @@ __all__ = [ 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', +'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', ] -""" -Detect if we are running version 3.X and make appropriate changes -Robert A. Clark -""" -_PY3K = sys.version_info[0] > 2 -if _PY3K: - _MAX_INT = sys.maxsize - basestring = str - unichr = chr - _ustr = str - alphas = string.ascii_lowercase + string.ascii_uppercase -else: - _MAX_INT = sys.maxint - range = xrange - set = lambda s : dict( [(c,0) for c in s] ) - alphas = string.lowercase + string.uppercase +_MAX_INT = sys.maxint +range = xrange +set = lambda s : dict( [(c,0) for c in s] ) - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - if isinstance(obj,unicode): - return obj +def _ustr(obj): + """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. + """ + if isinstance(obj,unicode): + return obj - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) + try: + # If this works, then _ustr(obj) has the same behaviour as str(obj), so + # it won't break any existing code. + return str(obj) - except UnicodeEncodeError: - # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) - # state that "The return value must be a string object". However, does a - # unicode object (being a subclass of basestring) count as a "string - # object"? - # If so, then return a unicode object: - return unicode(obj) - # Else encode it... but how? There are many choices... :) - # Replace unprintables with escape codes? - #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') - # Replace unprintables with question marks? - #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') - # ... - - alphas = string.lowercase + string.uppercase + except UnicodeEncodeError: + # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) + # state that "The return value must be a string object". However, does a + # unicode object (being a subclass of basestring) count as a "string + # object"? + # If so, then return a unicode object: + return unicode(obj) + # Else encode it... but how? There are many choices... :) + # Replace unprintables with escape codes? + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') + # Replace unprintables with question marks? + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') + # ... # build list of single arg builtins, tolerant of Python version, that can be used as parse actions singleArgBuiltins = [] import __builtin__ -for fname in "sum len enumerate sorted reversed list tuple set any all".split(): +for fname in "sum len sorted reversed list tuple set any all min max".split(): try: singleArgBuiltins.append(getattr(__builtin__,fname)) except AttributeError: @@ -159,7 +144,8 @@ def _xml_escape(data): class _Constants(object): pass -nums = string.digits +alphas = string.ascii_lowercase + string.ascii_uppercase +nums = "0123456789" hexnums = nums + "ABCDEFabcdef" alphanums = alphas + nums _bslash = chr(92) @@ -211,7 +197,7 @@ class ParseBaseException(Exception): return line_str.strip() def __dir__(self): return "loc msg pstr parserElement lineno col line " \ - "markInputLine __str__ __repr__".split() + "markInputline __str__ __repr__".split() class ParseException(ParseBaseException): """exception thrown when parse expressions don't match class; @@ -228,8 +214,8 @@ class ParseFatalException(ParseBaseException): pass class ParseSyntaxException(ParseFatalException): - """just like C{ParseFatalException}, but thrown internally when an - C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because + """just like C{L{ParseFatalException}}, but thrown internally when an + C{L{ErrorStop}} ('-' operator) indicates that parsing is to stop immediately because an unbacktrackable syntax error has been found""" def __init__(self, pe): super(ParseSyntaxException, self).__init__( @@ -444,16 +430,13 @@ class ParseResults(object): return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) def __str__( self ): - out = "[" - sep = "" + out = [] for i in self.__toklist: if isinstance(i, ParseResults): - out += sep + _ustr(i) + out.append(_ustr(i)) else: - out += sep + repr(i) - sep = ", " - out += "]" - return out + out.append(repr(i)) + return '[' + ', '.join(out) + ']' def _asStringList( self, sep='' ): out = [] @@ -616,7 +599,7 @@ class ParseResults(object): self.__parent = None def __dir__(self): - return dir(super(ParseResults,self)) + self.keys() + return dir(super(ParseResults,self)) + list(self.keys()) def col (loc,strg): """Returns current column within a string, counting newlines as line separators. @@ -624,7 +607,7 @@ def col (loc,strg): Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a + on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ @@ -636,7 +619,7 @@ def lineno(loc,strg): Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a + on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ @@ -666,33 +649,23 @@ def nullDebugAction(*args): pass 'decorator to trim function calls to match the arity of the target' -if not _PY3K: - def _trim_arity(func, maxargs=2): - limit = [0] - def wrapper(*args): - while 1: - try: - return func(*args[limit[0]:]) - except TypeError: - if limit[0] <= maxargs: - limit[0] += 1 - continue - raise - return wrapper -else: - def _trim_arity(func, maxargs=2): - limit = maxargs - def wrapper(*args): - #~ nonlocal limit - while 1: - try: - return func(*args[limit:]) - except TypeError: - if limit: - limit -= 1 - continue - raise - return wrapper +def _trim_arity(func, maxargs=2): + if func in singleArgBuiltins: + return lambda s,l,t: func(t) + limit = [0] + foundArity = [False] + def wrapper(*args): + while 1: + try: + ret = func(*args[limit[0]:]) + foundArity[0] = True + return ret + except TypeError: + if limit[0] <= maxargs and not foundArity[0]: + limit[0] += 1 + continue + raise + return wrapper class ParserElement(object): """Abstract base level parser element class.""" @@ -705,6 +678,13 @@ class ParserElement(object): ParserElement.DEFAULT_WHITE_CHARS = chars setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) + def inlineLiteralsUsing(cls): + """ + Set class to be used for inclusion of string literals into a parser. + """ + ParserElement.literalStringClass = cls + inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) + def __init__( self, savelist=False ): self.parseAction = list() self.failAction = None @@ -789,14 +769,14 @@ class ParserElement(object): C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - s = the original string being parsed (see note below) - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a ParseResults object + - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object If the functions in fns modify the tokens, they can return them as the return value from fn, and the modified list of tokens will replace the original. Otherwise, fn does not need to return any value. Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a + on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ @@ -818,7 +798,7 @@ class ParserElement(object): - loc = location where expression match was attempted and failed - expr = the parse expression that failed - err = the exception thrown - The function returns no value. It may throw C{ParseFatalException} + The function returns no value. It may throw C{L{ParseFatalException}} if it is desired to stop parsing immediately.""" self.failAction = fn return self @@ -872,15 +852,12 @@ class ParserElement(object): loc,tokens = self.parseImpl( instring, preloc, doActions ) except IndexError: raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException: + except ParseBaseException, err: #~ print ("Exception raised:", err) err = None if self.debugActions[2]: - err = sys.exc_info()[1] self.debugActions[2]( instring, tokensStart, self, err ) if self.failAction: - if err is None: - err = sys.exc_info()[1] self.failAction( instring, tokensStart, self, err ) raise else: @@ -910,10 +887,9 @@ class ParserElement(object): self.resultsName, asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), modal=self.modalResults ) - except ParseBaseException: + except ParseBaseException, err: #~ print "Exception raised in user parse action:", err if (self.debugActions[2] ): - err = sys.exc_info()[1] self.debugActions[2]( instring, tokensStart, self, err ) raise else: @@ -952,8 +928,7 @@ class ParserElement(object): value = self._parseNoCache( instring, loc, doActions, callPreParse ) ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) return value - except ParseBaseException: - pe = sys.exc_info()[1] + except ParseBaseException, pe: ParserElement._exprArgCache[ lookup ] = pe raise @@ -994,7 +969,7 @@ class ParserElement(object): If you want the grammar to require that the entire input string be successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{StringEnd()}). + the grammar with C{L{StringEnd()}}). Note: C{parseString} implicitly calls C{expandtabs()} on the input string, in order to report proper column numbers in parse actions. @@ -1023,12 +998,11 @@ class ParserElement(object): loc = self.preParse( instring, loc ) se = Empty() + StringEnd() se._parse( instring, loc ) - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc else: return tokens @@ -1076,16 +1050,15 @@ class ParserElement(object): loc = nextLoc else: loc = preloc+1 - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc def transformString( self, instring ): - """Extension to C{scanString}, to modify matching text with modified tokens that may + """Extension to C{L{scanString}}, to modify matching text with modified tokens that may be returned from a parse action. To use C{transformString}, define a grammar and attach a parse action to it that modifies the returned token list. Invoking C{transformString()} on a target string will then scan for matches, @@ -1110,33 +1083,31 @@ class ParserElement(object): out.append(instring[lastE:]) out = [o for o in out if o] return "".join(map(_ustr,_flatten(out))) - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc def searchString( self, instring, maxMatches=_MAX_INT ): - """Another extension to C{scanString}, simplifying the access to the tokens found + """Another extension to C{L{scanString}}, simplifying the access to the tokens found to match the given parse expression. May be called with optional C{maxMatches} argument, to clip searching after 'n' matches are found. """ try: return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc def __add__(self, other ): - """Implementation of + operator - returns And""" + """Implementation of + operator - returns C{L{And}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1144,9 +1115,9 @@ class ParserElement(object): return And( [ self, other ] ) def __radd__(self, other ): - """Implementation of + operator when left operand is not a C{ParserElement}""" + """Implementation of + operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1154,9 +1125,9 @@ class ParserElement(object): return other + self def __sub__(self, other): - """Implementation of - operator, returns C{And} with error stop""" + """Implementation of - operator, returns C{L{And}} with error stop""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1164,9 +1135,9 @@ class ParserElement(object): return And( [ self, And._ErrorStop(), other ] ) def __rsub__(self, other ): - """Implementation of - operator when left operand is not a C{ParserElement}""" + """Implementation of - operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1179,12 +1150,12 @@ class ParserElement(object): tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples may also include C{None} as in: - C{expr*(n,None)} or C{expr*(n,)} is equivalent - to C{expr*n + ZeroOrMore(expr)} + to C{expr*n + L{ZeroOrMore}(expr)} (read as "at least n instances of C{expr}") - C{expr*(None,n)} is equivalent to C{expr*(0,n)} (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} - - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} + - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} + - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} Note that C{expr*(None,n)} does not raise an exception if more than n exprs exist in the input stream; that is, @@ -1245,9 +1216,9 @@ class ParserElement(object): return self.__mul__(other) def __or__(self, other ): - """Implementation of | operator - returns C{MatchFirst}""" + """Implementation of | operator - returns C{L{MatchFirst}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1255,9 +1226,9 @@ class ParserElement(object): return MatchFirst( [ self, other ] ) def __ror__(self, other ): - """Implementation of | operator when left operand is not a C{ParserElement}""" + """Implementation of | operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1265,9 +1236,9 @@ class ParserElement(object): return other | self def __xor__(self, other ): - """Implementation of ^ operator - returns C{Or}""" + """Implementation of ^ operator - returns C{L{Or}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1275,9 +1246,9 @@ class ParserElement(object): return Or( [ self, other ] ) def __rxor__(self, other ): - """Implementation of ^ operator when left operand is not a C{ParserElement}""" + """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1285,9 +1256,9 @@ class ParserElement(object): return other ^ self def __and__(self, other ): - """Implementation of & operator - returns C{Each}""" + """Implementation of & operator - returns C{L{Each}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1295,9 +1266,9 @@ class ParserElement(object): return Each( [ self, other ] ) def __rand__(self, other ): - """Implementation of & operator when left operand is not a C{ParserElement}""" + """Implementation of & operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1305,11 +1276,11 @@ class ParserElement(object): return other & self def __invert__( self ): - """Implementation of ~ operator - returns C{NotAny}""" + """Implementation of ~ operator - returns C{L{NotAny}}""" return NotAny( self ) def __call__(self, name): - """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: + """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") could be written as:: userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") @@ -1403,15 +1374,17 @@ class ParserElement(object): try: file_contents = file_or_filename.read() except AttributeError: - f = open(file_or_filename, "rb") + f = open(file_or_filename, "r") file_contents = f.read() f.close() try: return self.parseString(file_contents, parseAll) - except ParseBaseException: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc + except ParseBaseException, exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc def getException(self): return ParseException("",0,self.errmsg,self) @@ -1515,10 +1488,11 @@ class Literal(Token): exc.pstr = instring raise exc _L = Literal +ParserElement.literalStringClass = Literal class Keyword(Token): """Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{Literal}:: + immediately followed by a non-keyword character. Compare with C{L{Literal}}:: Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} Accepts two optional constructor arguments in addition to the keyword string: @@ -1821,9 +1795,9 @@ class QuotedString(Token): - quoteChar - string of one or more characters defining the quote delimiting string - escChar - character to escape quotes, typically backslash (default=None) - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - - multiline - boolean indicating whether quotes can span multiple lines (default=False) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) """ super(QuotedString,self).__init__() @@ -2003,7 +1977,7 @@ class White(Token): by pyparsing grammars. This class is included when some whitespace structures are significant. Define with a string containing the whitespace characters to be matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{Word} class.""" + as defined for the C{L{Word}} class.""" whiteStrs = { " " : "", "\t": "", @@ -2331,7 +2305,8 @@ class And(ParseExpression): class _ErrorStop(Empty): def __init__(self, *args, **kwargs): - super(Empty,self).__init__(*args, **kwargs) + super(And._ErrorStop,self).__init__(*args, **kwargs) + self.name = '-' self.leaveWhitespace() def __init__( self, exprs, savelist = True ): @@ -2359,8 +2334,7 @@ class And(ParseExpression): loc, exprtokens = e._parse( instring, loc, doActions ) except ParseSyntaxException: raise - except ParseBaseException: - pe = sys.exc_info()[1] + except ParseBaseException, pe: raise ParseSyntaxException(pe) except IndexError: raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) @@ -2412,8 +2386,7 @@ class Or(ParseExpression): for e in self.exprs: try: loc2 = e.tryParse( instring, loc ) - except ParseException: - err = sys.exc_info()[1] + except ParseException, err: if err.loc > maxExcLoc: maxException = err maxExcLoc = err.loc @@ -2436,7 +2409,7 @@ class Or(ParseExpression): def __ixor__(self, other ): if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) return self.append( other ) #Or( [ self, other ] ) def __str__( self ): @@ -2495,7 +2468,7 @@ class MatchFirst(ParseExpression): def __ior__(self, other ): if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) return self.append( other ) #MatchFirst( [ self, other ] ) def __str__( self ): @@ -2916,13 +2889,14 @@ class Forward(ParseElementEnhance): thereby leaving b and c out as parseable alternatives. It is recommended that you explicitly group the values inserted into the C{Forward}:: fwdExpr << (a | b | c) + Converting to use the '<<=' operator instead will avoid this problem. """ def __init__( self, other=None ): super(Forward,self).__init__( other, savelist=False ) def __lshift__( self, other ): if isinstance( other, basestring ): - other = Literal(other) + other = ParserElement.literalStringClass(other) self.expr = other self.mayReturnEmpty = other.mayReturnEmpty self.strRepr = None @@ -2933,7 +2907,8 @@ class Forward(ParseElementEnhance): self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) return None - + __ilshift__ = __lshift__ + def leaveWhitespace( self ): self.skipWhitespace = False return self @@ -2993,7 +2968,7 @@ class Upcase(TokenConverter): DeprecationWarning,stacklevel=2) def postParse( self, instring, loc, tokenlist ): - return list(map( string.upper, tokenlist )) + return list(map( str.upper, tokenlist )) class Combine(TokenConverter): @@ -3029,7 +3004,7 @@ class Combine(TokenConverter): return retToks class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" + """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" def __init__( self, expr ): super(Group,self).__init__( expr ) self.saveAsList = True @@ -3105,8 +3080,7 @@ def traceParseAction(f): sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) try: ret = f(*paArgs) - except Exception: - exc = sys.exc_info()[1] + except Exception, exc: sys.stderr.write( "<}()}. """ def _replFunc(*args): return [replStr] @@ -3398,7 +3372,7 @@ def downcaseTokens(s,l,t): return [ tt.lower() for tt in map(_ustr,t) ] def keepOriginalText(s,startLoc,t): - """DEPRECATED - use new helper method C{originalTextFor}. + """DEPRECATED - use new helper method C{L{originalTextFor}}. Helper parse action to preserve original parsed text, overriding any nested parse actions.""" try: @@ -3464,7 +3438,7 @@ def makeXMLTags(tagStr): def withAttribute(*args,**attrDict): """Helper to create a validating parse action to be used with start tags created - with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag + with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag with a required attribute value, to avoid false matches on common tags such as C{} or C{

}. @@ -3499,7 +3473,7 @@ opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() -def operatorPrecedence( baseExpr, opList ): +def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached @@ -3518,13 +3492,15 @@ def operatorPrecedence( baseExpr, opList ): be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the pyparsing-defined - constants opAssoc.RIGHT and opAssoc.LEFT. + constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted) + - lpar - expression for matching left-parentheses (default=Suppress('(')) + - rpar - expression for matching right-parentheses (default=Suppress(')')) """ ret = Forward() - lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) + lastExpr = baseExpr | ( lpar + ret + rpar ) for i,operDef in enumerate(opList): opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] if arity == 3: @@ -3569,6 +3545,7 @@ def operatorPrecedence( baseExpr, opList ): lastExpr = thisExpr ret << lastExpr return ret +operatorPrecedence = infixNotation dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") @@ -3715,8 +3692,7 @@ if __name__ == "__main__": print ("tokens.columns = " + str(tokens.columns)) print ("tokens.tables = " + str(tokens.tables)) print (tokens.asXML("SQL",True)) - except ParseBaseException: - err = sys.exc_info()[1] + except ParseBaseException, err: print (teststring + "->") print (err.line) print (" "*(err.column-1) + "^")