|
|
|
@ -0,0 +1,899 @@
|
|
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
|
|
r"""
|
|
|
|
|
==============
|
|
|
|
|
smartypants.py
|
|
|
|
|
==============
|
|
|
|
|
|
|
|
|
|
----------------------------
|
|
|
|
|
SmartyPants ported to Python
|
|
|
|
|
----------------------------
|
|
|
|
|
|
|
|
|
|
Ported by `Chad Miller`_
|
|
|
|
|
Copyright (c) 2004, 2007 Chad Miller
|
|
|
|
|
|
|
|
|
|
original `SmartyPants`_ by `John Gruber`_
|
|
|
|
|
Copyright (c) 2003 John Gruber
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Synopsis
|
|
|
|
|
========
|
|
|
|
|
|
|
|
|
|
A smart-quotes plugin for Pyblosxom_.
|
|
|
|
|
|
|
|
|
|
The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
|
|
|
|
|
Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
|
|
|
|
|
into "smart" typographic punctuation HTML entities.
|
|
|
|
|
|
|
|
|
|
This software, *smartypants.py*, endeavours to be a functional port of
|
|
|
|
|
SmartyPants to Python, for use with Pyblosxom_.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Description
|
|
|
|
|
===========
|
|
|
|
|
|
|
|
|
|
SmartyPants can perform the following transformations:
|
|
|
|
|
|
|
|
|
|
- Straight quotes ( " and ' ) into "curly" quote HTML entities
|
|
|
|
|
- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
|
|
|
|
|
- Dashes (``--`` and ``---``) into en- and em-dash entities
|
|
|
|
|
- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
|
|
|
|
|
|
|
|
|
|
This means you can write, edit, and save your posts using plain old
|
|
|
|
|
ASCII straight quotes, plain dashes, and plain dots, but your published
|
|
|
|
|
posts (and final HTML output) will appear with smart quotes, em-dashes,
|
|
|
|
|
and proper ellipses.
|
|
|
|
|
|
|
|
|
|
SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
|
|
|
|
|
``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
|
|
|
|
|
display text where smart quotes and other "smart punctuation" would not be
|
|
|
|
|
appropriate, such as source code or example markup.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Backslash Escapes
|
|
|
|
|
=================
|
|
|
|
|
|
|
|
|
|
If you need to use literal straight quotes (or plain hyphens and
|
|
|
|
|
periods), SmartyPants accepts the following backslash escape sequences
|
|
|
|
|
to force non-smart punctuation. It does so by transforming the escape
|
|
|
|
|
sequence into a decimal-encoded HTML entity:
|
|
|
|
|
|
|
|
|
|
(FIXME: table here.)
|
|
|
|
|
|
|
|
|
|
.. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
|
|
|
|
|
.. comment ====== ===== =========
|
|
|
|
|
.. comment Escape Value Character
|
|
|
|
|
.. comment ====== ===== =========
|
|
|
|
|
.. comment \\\\\\\\ \ \\\\
|
|
|
|
|
.. comment \\\\" " "
|
|
|
|
|
.. comment \\\\' ' '
|
|
|
|
|
.. comment \\\\. . .
|
|
|
|
|
.. comment \\\\- - \-
|
|
|
|
|
.. comment \\\\` ` \`
|
|
|
|
|
.. comment ====== ===== =========
|
|
|
|
|
|
|
|
|
|
This is useful, for example, when you want to use straight quotes as
|
|
|
|
|
foot and inch marks: 6'2" tall; a 17" iMac.
|
|
|
|
|
|
|
|
|
|
Options
|
|
|
|
|
=======
|
|
|
|
|
|
|
|
|
|
For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
|
|
|
|
|
specify configuration options.
|
|
|
|
|
|
|
|
|
|
Numeric values are the easiest way to configure SmartyPants' behavior:
|
|
|
|
|
|
|
|
|
|
"0"
|
|
|
|
|
Suppress all transformations. (Do nothing.)
|
|
|
|
|
"1"
|
|
|
|
|
Performs default SmartyPants transformations: quotes (including
|
|
|
|
|
\`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
|
|
|
|
|
is used to signify an em-dash; there is no support for en-dashes.
|
|
|
|
|
|
|
|
|
|
"2"
|
|
|
|
|
Same as smarty_pants="1", except that it uses the old-school typewriter
|
|
|
|
|
shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
|
|
|
|
|
(dash dash dash)
|
|
|
|
|
for em-dashes.
|
|
|
|
|
|
|
|
|
|
"3"
|
|
|
|
|
Same as smarty_pants="2", but inverts the shorthand for dashes:
|
|
|
|
|
"``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
|
|
|
|
|
en-dashes.
|
|
|
|
|
|
|
|
|
|
"-1"
|
|
|
|
|
Stupefy mode. Reverses the SmartyPants transformation process, turning
|
|
|
|
|
the HTML entities produced by SmartyPants into their ASCII equivalents.
|
|
|
|
|
E.g. "“" is turned into a simple double-quote ("), "—" is
|
|
|
|
|
turned into two dashes, etc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
The following single-character attribute values can be combined to toggle
|
|
|
|
|
individual transformations from within the smarty_pants attribute. For
|
|
|
|
|
example, to educate normal quotes and em-dashes, but not ellipses or
|
|
|
|
|
\`\`backticks'' -style quotes:
|
|
|
|
|
|
|
|
|
|
``py['smartypants_attributes'] = "1"``
|
|
|
|
|
|
|
|
|
|
"q"
|
|
|
|
|
Educates normal quote characters: (") and (').
|
|
|
|
|
|
|
|
|
|
"b"
|
|
|
|
|
Educates \`\`backticks'' -style double quotes.
|
|
|
|
|
|
|
|
|
|
"B"
|
|
|
|
|
Educates \`\`backticks'' -style double quotes and \`single' quotes.
|
|
|
|
|
|
|
|
|
|
"d"
|
|
|
|
|
Educates em-dashes.
|
|
|
|
|
|
|
|
|
|
"D"
|
|
|
|
|
Educates em-dashes and en-dashes, using old-school typewriter shorthand:
|
|
|
|
|
(dash dash) for en-dashes, (dash dash dash) for em-dashes.
|
|
|
|
|
|
|
|
|
|
"i"
|
|
|
|
|
Educates em-dashes and en-dashes, using inverted old-school typewriter
|
|
|
|
|
shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
|
|
|
|
|
|
|
|
|
|
"e"
|
|
|
|
|
Educates ellipses.
|
|
|
|
|
|
|
|
|
|
"w"
|
|
|
|
|
Translates any instance of ``"`` into a normal double-quote character.
|
|
|
|
|
This should be of no interest to most people, but of particular interest
|
|
|
|
|
to anyone who writes their posts using Dreamweaver, as Dreamweaver
|
|
|
|
|
inexplicably uses this entity to represent a literal double-quote
|
|
|
|
|
character. SmartyPants only educates normal quotes, not entities (because
|
|
|
|
|
ordinarily, entities are used for the explicit purpose of representing the
|
|
|
|
|
specific character they represent). The "w" option must be used in
|
|
|
|
|
conjunction with one (or both) of the other quote options ("q" or "b").
|
|
|
|
|
Thus, if you wish to apply all SmartyPants transformations (quotes, en-
|
|
|
|
|
and em-dashes, and ellipses) and also translate ``"`` entities into
|
|
|
|
|
regular quotes so SmartyPants can educate them, you should pass the
|
|
|
|
|
following to the smarty_pants attribute:
|
|
|
|
|
|
|
|
|
|
The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
|
|
|
|
|
which no Smarty Pants rendering will occur.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Caveats
|
|
|
|
|
=======
|
|
|
|
|
|
|
|
|
|
Why You Might Not Want to Use Smart Quotes in Your Weblog
|
|
|
|
|
---------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
For one thing, you might not care.
|
|
|
|
|
|
|
|
|
|
Most normal, mentally stable individuals do not take notice of proper
|
|
|
|
|
typographic punctuation. Many design and typography nerds, however, break
|
|
|
|
|
out in a nasty rash when they encounter, say, a restaurant sign that uses
|
|
|
|
|
a straight apostrophe to spell "Joe's".
|
|
|
|
|
|
|
|
|
|
If you're the sort of person who just doesn't care, you might well want to
|
|
|
|
|
continue not caring. Using straight quotes -- and sticking to the 7-bit
|
|
|
|
|
ASCII character set in general -- is certainly a simpler way to live.
|
|
|
|
|
|
|
|
|
|
Even if you I *do* care about accurate typography, you still might want to
|
|
|
|
|
think twice before educating the quote characters in your weblog. One side
|
|
|
|
|
effect of publishing curly quote HTML entities is that it makes your
|
|
|
|
|
weblog a bit harder for others to quote from using copy-and-paste. What
|
|
|
|
|
happens is that when someone copies text from your blog, the copied text
|
|
|
|
|
contains the 8-bit curly quote characters (as well as the 8-bit characters
|
|
|
|
|
for em-dashes and ellipses, if you use these options). These characters
|
|
|
|
|
are not standard across different text encoding methods, which is why they
|
|
|
|
|
need to be encoded as HTML entities.
|
|
|
|
|
|
|
|
|
|
People copying text from your weblog, however, may not notice that you're
|
|
|
|
|
using curly quotes, and they'll go ahead and paste the unencoded 8-bit
|
|
|
|
|
characters copied from their browser into an email message or their own
|
|
|
|
|
weblog. When pasted as raw "smart quotes", these characters are likely to
|
|
|
|
|
get mangled beyond recognition.
|
|
|
|
|
|
|
|
|
|
That said, my own opinion is that any decent text editor or email client
|
|
|
|
|
makes it easy to stupefy smart quote characters into their 7-bit
|
|
|
|
|
equivalents, and I don't consider it my problem if you're using an
|
|
|
|
|
indecent text editor or email client.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Algorithmic Shortcomings
|
|
|
|
|
------------------------
|
|
|
|
|
|
|
|
|
|
One situation in which quotes will get curled the wrong way is when
|
|
|
|
|
apostrophes are used at the start of leading contractions. For example:
|
|
|
|
|
|
|
|
|
|
``'Twas the night before Christmas.``
|
|
|
|
|
|
|
|
|
|
In the case above, SmartyPants will turn the apostrophe into an opening
|
|
|
|
|
single-quote, when in fact it should be a closing one. I don't think
|
|
|
|
|
this problem can be solved in the general case -- every word processor
|
|
|
|
|
I've tried gets this wrong as well. In such cases, it's best to use the
|
|
|
|
|
proper HTML entity for closing single-quotes (``’``) by hand.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Bugs
|
|
|
|
|
====
|
|
|
|
|
|
|
|
|
|
To file bug reports or feature requests (other than topics listed in the
|
|
|
|
|
Caveats section above) please send email to: mailto:smartypantspy@chad.org
|
|
|
|
|
|
|
|
|
|
If the bug involves quotes being curled the wrong way, please send example
|
|
|
|
|
text to illustrate.
|
|
|
|
|
|
|
|
|
|
To Do list
|
|
|
|
|
----------
|
|
|
|
|
|
|
|
|
|
- Provide a function for use within templates to quote anything at all.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Version History
|
|
|
|
|
===============
|
|
|
|
|
|
|
|
|
|
1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
|
|
|
|
|
- Fixed bug where blocks of precious unalterable text was instead
|
|
|
|
|
interpreted. Thanks to Le Roux and Dirk van Oosterbosch.
|
|
|
|
|
|
|
|
|
|
1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
|
|
|
|
|
- Fix bogus magical quotation when there is no hint that the
|
|
|
|
|
user wants it, e.g., in "21st century". Thanks to Nathan Hamblen.
|
|
|
|
|
- Be smarter about quotes before terminating numbers in an en-dash'ed
|
|
|
|
|
range.
|
|
|
|
|
|
|
|
|
|
1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
|
|
|
|
|
- Fix a date-processing bug, as reported by jacob childress.
|
|
|
|
|
- Begin a test-suite for ensuring correct output.
|
|
|
|
|
- Removed import of "string", since I didn't really need it.
|
|
|
|
|
(This was my first every Python program. Sue me!)
|
|
|
|
|
|
|
|
|
|
1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
|
|
|
|
|
- Abort processing if the flavour is in forbidden-list. Default of
|
|
|
|
|
[ "rss" ] (Idea of Wolfgang SCHNERRING.)
|
|
|
|
|
- Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING.
|
|
|
|
|
|
|
|
|
|
1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
|
|
|
|
|
- Some single quotes weren't replaced properly. Diff-tesuji played
|
|
|
|
|
by Benjamin GEIGER.
|
|
|
|
|
|
|
|
|
|
1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
|
|
|
|
|
- Support upcoming pyblosxom 0.9 plugin verification feature.
|
|
|
|
|
|
|
|
|
|
1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
|
|
|
|
|
- Initial release
|
|
|
|
|
|
|
|
|
|
Version Information
|
|
|
|
|
-------------------
|
|
|
|
|
|
|
|
|
|
Version numbers will track the SmartyPants_ version numbers, with the addition
|
|
|
|
|
of an underscore and the smartypants.py version on the end.
|
|
|
|
|
|
|
|
|
|
New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
|
|
|
|
|
|
|
|
|
|
.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
|
|
|
|
|
|
|
|
|
|
Authors
|
|
|
|
|
=======
|
|
|
|
|
|
|
|
|
|
`John Gruber`_ did all of the hard work of writing this software in Perl for
|
|
|
|
|
`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
|
|
|
|
|
ported it to Python to use with Pyblosxom_.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Additional Credits
|
|
|
|
|
==================
|
|
|
|
|
|
|
|
|
|
Portions of the SmartyPants original work are based on Brad Choate's nifty
|
|
|
|
|
MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
|
|
|
|
|
this plug-in. Brad Choate is a fine hacker indeed.
|
|
|
|
|
|
|
|
|
|
`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
|
|
|
|
|
testing of the original SmartyPants.
|
|
|
|
|
|
|
|
|
|
`Rael Dornfest`_ ported SmartyPants to Blosxom.
|
|
|
|
|
|
|
|
|
|
.. _Brad Choate: http://bradchoate.com/
|
|
|
|
|
.. _Jeremy Hedley: http://antipixel.com/
|
|
|
|
|
.. _Charles Wiltgen: http://playbacktime.com/
|
|
|
|
|
.. _Rael Dornfest: http://raelity.org/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Copyright and License
|
|
|
|
|
=====================
|
|
|
|
|
|
|
|
|
|
SmartyPants_ license::
|
|
|
|
|
|
|
|
|
|
Copyright (c) 2003 John Gruber
|
|
|
|
|
(http://daringfireball.net/)
|
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
|
modification, are permitted provided that the following conditions are
|
|
|
|
|
met:
|
|
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
notice, this list of conditions and the following disclaimer in
|
|
|
|
|
the documentation and/or other materials provided with the
|
|
|
|
|
distribution.
|
|
|
|
|
|
|
|
|
|
* Neither the name "SmartyPants" nor the names of its contributors
|
|
|
|
|
may be used to endorse or promote products derived from this
|
|
|
|
|
software without specific prior written permission.
|
|
|
|
|
|
|
|
|
|
This software is provided by the copyright holders and contributors "as
|
|
|
|
|
is" and any express or implied warranties, including, but not limited
|
|
|
|
|
to, the implied warranties of merchantability and fitness for a
|
|
|
|
|
particular purpose are disclaimed. In no event shall the copyright
|
|
|
|
|
owner or contributors be liable for any direct, indirect, incidental,
|
|
|
|
|
special, exemplary, or consequential damages (including, but not
|
|
|
|
|
limited to, procurement of substitute goods or services; loss of use,
|
|
|
|
|
data, or profits; or business interruption) however caused and on any
|
|
|
|
|
theory of liability, whether in contract, strict liability, or tort
|
|
|
|
|
(including negligence or otherwise) arising in any way out of the use
|
|
|
|
|
of this software, even if advised of the possibility of such damage.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
smartypants.py license::
|
|
|
|
|
|
|
|
|
|
smartypants.py is a derivative work of SmartyPants.
|
|
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
|
modification, are permitted provided that the following conditions are
|
|
|
|
|
met:
|
|
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
notice, this list of conditions and the following disclaimer in
|
|
|
|
|
the documentation and/or other materials provided with the
|
|
|
|
|
distribution.
|
|
|
|
|
|
|
|
|
|
This software is provided by the copyright holders and contributors "as
|
|
|
|
|
is" and any express or implied warranties, including, but not limited
|
|
|
|
|
to, the implied warranties of merchantability and fitness for a
|
|
|
|
|
particular purpose are disclaimed. In no event shall the copyright
|
|
|
|
|
owner or contributors be liable for any direct, indirect, incidental,
|
|
|
|
|
special, exemplary, or consequential damages (including, but not
|
|
|
|
|
limited to, procurement of substitute goods or services; loss of use,
|
|
|
|
|
data, or profits; or business interruption) however caused and on any
|
|
|
|
|
theory of liability, whether in contract, strict liability, or tort
|
|
|
|
|
(including negligence or otherwise) arising in any way out of the use
|
|
|
|
|
of this software, even if advised of the possibility of such damage.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.. _John Gruber: http://daringfireball.net/
|
|
|
|
|
.. _Chad Miller: http://web.chad.org/
|
|
|
|
|
|
|
|
|
|
.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
|
|
|
|
|
.. _SmartyPants: http://daringfireball.net/projects/smartypants/
|
|
|
|
|
.. _Movable Type: http://www.movabletype.org/
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
default_smartypants_attr = "1"
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
tags_to_skip_regex = re.compile(r"<(/)?(pre|code|kbd|script|math)[^>]*>", re.I)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def verify_installation(request):
|
|
|
|
|
return 1
|
|
|
|
|
# assert the plugin is functional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def cb_story(args):
|
|
|
|
|
global default_smartypants_attr
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
forbidden_flavours = args["entry"]["smartypants_forbidden_flavours"]
|
|
|
|
|
except KeyError:
|
|
|
|
|
forbidden_flavours = [ "rss" ]
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
attributes = args["entry"]["smartypants_attributes"]
|
|
|
|
|
except KeyError:
|
|
|
|
|
attributes = default_smartypants_attr
|
|
|
|
|
|
|
|
|
|
if attributes is None:
|
|
|
|
|
attributes = default_smartypants_attr
|
|
|
|
|
|
|
|
|
|
entryData = args["entry"].getData()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if args["request"]["flavour"] in forbidden_flavours:
|
|
|
|
|
return
|
|
|
|
|
except KeyError:
|
|
|
|
|
if "<" in args["entry"]["body"][0:15]: # sniff the stream
|
|
|
|
|
return # abort if it looks like escaped HTML. FIXME
|
|
|
|
|
|
|
|
|
|
# FIXME: make these configurable, perhaps?
|
|
|
|
|
args["entry"]["body"] = smartyPants(entryData, attributes)
|
|
|
|
|
args["entry"]["title"] = smartyPants(args["entry"]["title"], attributes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### interal functions below here
|
|
|
|
|
|
|
|
|
|
def smartyPants(text, attr=default_smartypants_attr):
|
|
|
|
|
convert_quot = False # should we translate " entities into normal quotes?
|
|
|
|
|
|
|
|
|
|
# Parse attributes:
|
|
|
|
|
# 0 : do nothing
|
|
|
|
|
# 1 : set all
|
|
|
|
|
# 2 : set all, using old school en- and em- dash shortcuts
|
|
|
|
|
# 3 : set all, using inverted old school en and em- dash shortcuts
|
|
|
|
|
#
|
|
|
|
|
# q : quotes
|
|
|
|
|
# b : backtick quotes (``double'' only)
|
|
|
|
|
# B : backtick quotes (``double'' and `single')
|
|
|
|
|
# d : dashes
|
|
|
|
|
# D : old school dashes
|
|
|
|
|
# i : inverted old school dashes
|
|
|
|
|
# e : ellipses
|
|
|
|
|
# w : convert " entities to " for Dreamweaver users
|
|
|
|
|
|
|
|
|
|
skipped_tag_stack = []
|
|
|
|
|
do_dashes = "0"
|
|
|
|
|
do_backticks = "0"
|
|
|
|
|
do_quotes = "0"
|
|
|
|
|
do_ellipses = "0"
|
|
|
|
|
do_stupefy = "0"
|
|
|
|
|
|
|
|
|
|
if attr == "0":
|
|
|
|
|
# Do nothing.
|
|
|
|
|
return text
|
|
|
|
|
elif attr == "1":
|
|
|
|
|
do_quotes = "1"
|
|
|
|
|
do_backticks = "1"
|
|
|
|
|
do_dashes = "1"
|
|
|
|
|
do_ellipses = "1"
|
|
|
|
|
elif attr == "2":
|
|
|
|
|
# Do everything, turn all options on, use old school dash shorthand.
|
|
|
|
|
do_quotes = "1"
|
|
|
|
|
do_backticks = "1"
|
|
|
|
|
do_dashes = "2"
|
|
|
|
|
do_ellipses = "1"
|
|
|
|
|
elif attr == "3":
|
|
|
|
|
# Do everything, turn all options on, use inverted old school dash shorthand.
|
|
|
|
|
do_quotes = "1"
|
|
|
|
|
do_backticks = "1"
|
|
|
|
|
do_dashes = "3"
|
|
|
|
|
do_ellipses = "1"
|
|
|
|
|
elif attr == "-1":
|
|
|
|
|
# Special "stupefy" mode.
|
|
|
|
|
do_stupefy = "1"
|
|
|
|
|
else:
|
|
|
|
|
for c in attr:
|
|
|
|
|
if c == "q": do_quotes = "1"
|
|
|
|
|
elif c == "b": do_backticks = "1"
|
|
|
|
|
elif c == "B": do_backticks = "2"
|
|
|
|
|
elif c == "d": do_dashes = "1"
|
|
|
|
|
elif c == "D": do_dashes = "2"
|
|
|
|
|
elif c == "i": do_dashes = "3"
|
|
|
|
|
elif c == "e": do_ellipses = "1"
|
|
|
|
|
elif c == "w": convert_quot = "1"
|
|
|
|
|
else:
|
|
|
|
|
pass
|
|
|
|
|
# ignore unknown option
|
|
|
|
|
|
|
|
|
|
tokens = _tokenize(text)
|
|
|
|
|
result = []
|
|
|
|
|
in_pre = False
|
|
|
|
|
|
|
|
|
|
prev_token_last_char = ""
|
|
|
|
|
# This is a cheat, used to get some context
|
|
|
|
|
# for one-character tokens that consist of
|
|
|
|
|
# just a quote char. What we do is remember
|
|
|
|
|
# the last character of the previous text
|
|
|
|
|
# token, to use as context to curl single-
|
|
|
|
|
# character quote tokens correctly.
|
|
|
|
|
|
|
|
|
|
for cur_token in tokens:
|
|
|
|
|
if cur_token[0] == "tag":
|
|
|
|
|
# Don't mess with quotes inside some tags. This does not handle self <closing/> tags!
|
|
|
|
|
result.append(cur_token[1])
|
|
|
|
|
skip_match = tags_to_skip_regex.match(cur_token[1])
|
|
|
|
|
if skip_match is not None:
|
|
|
|
|
if not skip_match.group(1):
|
|
|
|
|
skipped_tag_stack.append(skip_match.group(2).lower())
|
|
|
|
|
in_pre = True
|
|
|
|
|
else:
|
|
|
|
|
if len(skipped_tag_stack) > 0:
|
|
|
|
|
if skip_match.group(2).lower() == skipped_tag_stack[-1]:
|
|
|
|
|
skipped_tag_stack.pop()
|
|
|
|
|
else:
|
|
|
|
|
pass
|
|
|
|
|
# This close doesn't match the open. This isn't XHTML. We should barf here.
|
|
|
|
|
if len(skipped_tag_stack) == 0:
|
|
|
|
|
in_pre = False
|
|
|
|
|
else:
|
|
|
|
|
t = cur_token[1]
|
|
|
|
|
last_char = t[-1:] # Remember last char of this token before processing.
|
|
|
|
|
if not in_pre:
|
|
|
|
|
t = processEscapes(t)
|
|
|
|
|
|
|
|
|
|
if convert_quot != "0":
|
|
|
|
|
t = re.sub('"', '"', t)
|
|
|
|
|
|
|
|
|
|
if do_dashes != "0":
|
|
|
|
|
if do_dashes == "1":
|
|
|
|
|
t = educateDashes(t)
|
|
|
|
|
if do_dashes == "2":
|
|
|
|
|
t = educateDashesOldSchool(t)
|
|
|
|
|
if do_dashes == "3":
|
|
|
|
|
t = educateDashesOldSchoolInverted(t)
|
|
|
|
|
|
|
|
|
|
if do_ellipses != "0":
|
|
|
|
|
t = educateEllipses(t)
|
|
|
|
|
|
|
|
|
|
# Note: backticks need to be processed before quotes.
|
|
|
|
|
if do_backticks != "0":
|
|
|
|
|
t = educateBackticks(t)
|
|
|
|
|
|
|
|
|
|
if do_backticks == "2":
|
|
|
|
|
t = educateSingleBackticks(t)
|
|
|
|
|
|
|
|
|
|
if do_quotes != "0":
|
|
|
|
|
if t == "'":
|
|
|
|
|
# Special case: single-character ' token
|
|
|
|
|
if re.match("\S", prev_token_last_char):
|
|
|
|
|
t = "’"
|
|
|
|
|
else:
|
|
|
|
|
t = "‘"
|
|
|
|
|
elif t == '"':
|
|
|
|
|
# Special case: single-character " token
|
|
|
|
|
if re.match("\S", prev_token_last_char):
|
|
|
|
|
t = "”"
|
|
|
|
|
else:
|
|
|
|
|
t = "“"
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
# Normal case:
|
|
|
|
|
t = educateQuotes(t)
|
|
|
|
|
|
|
|
|
|
if do_stupefy == "1":
|
|
|
|
|
t = stupefyEntities(t)
|
|
|
|
|
|
|
|
|
|
prev_token_last_char = last_char
|
|
|
|
|
result.append(t)
|
|
|
|
|
|
|
|
|
|
return "".join(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateQuotes(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
|
|
|
|
|
Returns: The string, with "educated" curly quote HTML entities.
|
|
|
|
|
|
|
|
|
|
Example input: "Isn't this fun?"
|
|
|
|
|
Example output: “Isn’t this fun?”
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
|
|
|
|
|
|
|
|
|
|
# Special case if the very first character is a quote
|
|
|
|
|
# followed by punctuation at a non-word-break. Close the quotes by brute force:
|
|
|
|
|
str = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", str)
|
|
|
|
|
str = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", str)
|
|
|
|
|
|
|
|
|
|
# Special case for double sets of quotes, e.g.:
|
|
|
|
|
# <p>He said, "'Quoted' words in a larger quote."</p>
|
|
|
|
|
str = re.sub(r""""'(?=\w)""", """“‘""", str)
|
|
|
|
|
str = re.sub(r"""'"(?=\w)""", """‘“""", str)
|
|
|
|
|
|
|
|
|
|
# Special case for decade abbreviations (the '80s):
|
|
|
|
|
str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
|
|
|
|
|
|
|
|
|
|
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
|
|
|
|
|
dec_dashes = r"""–|—"""
|
|
|
|
|
|
|
|
|
|
# Get most opening single quotes:
|
|
|
|
|
opening_single_quotes_regex = re.compile(r"""
|
|
|
|
|
(
|
|
|
|
|
\s | # a whitespace char, or
|
|
|
|
|
| # a non-breaking space entity, or
|
|
|
|
|
-- | # dashes, or
|
|
|
|
|
&[mn]dash; | # named dash entities
|
|
|
|
|
%s | # or decimal entities
|
|
|
|
|
&\#x201[34]; # or hex
|
|
|
|
|
)
|
|
|
|
|
' # the quote
|
|
|
|
|
(?=\w) # followed by a word character
|
|
|
|
|
""" % (dec_dashes,), re.VERBOSE)
|
|
|
|
|
str = opening_single_quotes_regex.sub(r"""\1‘""", str)
|
|
|
|
|
|
|
|
|
|
closing_single_quotes_regex = re.compile(r"""
|
|
|
|
|
(%s)
|
|
|
|
|
'
|
|
|
|
|
(?!\s | s\b | \d)
|
|
|
|
|
""" % (close_class,), re.VERBOSE)
|
|
|
|
|
str = closing_single_quotes_regex.sub(r"""\1’""", str)
|
|
|
|
|
|
|
|
|
|
closing_single_quotes_regex = re.compile(r"""
|
|
|
|
|
(%s)
|
|
|
|
|
'
|
|
|
|
|
(\s | s\b)
|
|
|
|
|
""" % (close_class,), re.VERBOSE)
|
|
|
|
|
str = closing_single_quotes_regex.sub(r"""\1’\2""", str)
|
|
|
|
|
|
|
|
|
|
# Any remaining single quotes should be opening ones:
|
|
|
|
|
str = re.sub(r"""'""", r"""‘""", str)
|
|
|
|
|
|
|
|
|
|
# Get most opening double quotes:
|
|
|
|
|
opening_double_quotes_regex = re.compile(r"""
|
|
|
|
|
(
|
|
|
|
|
\s | # a whitespace char, or
|
|
|
|
|
| # a non-breaking space entity, or
|
|
|
|
|
-- | # dashes, or
|
|
|
|
|
&[mn]dash; | # named dash entities
|
|
|
|
|
%s | # or decimal entities
|
|
|
|
|
&\#x201[34]; # or hex
|
|
|
|
|
)
|
|
|
|
|
" # the quote
|
|
|
|
|
(?=\w) # followed by a word character
|
|
|
|
|
""" % (dec_dashes,), re.VERBOSE)
|
|
|
|
|
str = opening_double_quotes_regex.sub(r"""\1“""", str)
|
|
|
|
|
|
|
|
|
|
# Double closing quotes:
|
|
|
|
|
closing_double_quotes_regex = re.compile(r"""
|
|
|
|
|
#(%s)? # character that indicates the quote should be closing
|
|
|
|
|
"
|
|
|
|
|
(?=\s)
|
|
|
|
|
""" % (close_class,), re.VERBOSE)
|
|
|
|
|
str = closing_double_quotes_regex.sub(r"""”""", str)
|
|
|
|
|
|
|
|
|
|
closing_double_quotes_regex = re.compile(r"""
|
|
|
|
|
(%s) # character that indicates the quote should be closing
|
|
|
|
|
"
|
|
|
|
|
""" % (close_class,), re.VERBOSE)
|
|
|
|
|
str = closing_double_quotes_regex.sub(r"""\1”""", str)
|
|
|
|
|
|
|
|
|
|
# Any remaining quotes should be opening ones.
|
|
|
|
|
str = re.sub(r'"', r"""“""", str)
|
|
|
|
|
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateBackticks(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
Returns: The string, with ``backticks'' -style double quotes
|
|
|
|
|
translated into HTML curly quote entities.
|
|
|
|
|
Example input: ``Isn't this fun?''
|
|
|
|
|
Example output: “Isn't this fun?”
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""``""", r"""“""", str)
|
|
|
|
|
str = re.sub(r"""''""", r"""”""", str)
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateSingleBackticks(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
Returns: The string, with `backticks' -style single quotes
|
|
|
|
|
translated into HTML curly quote entities.
|
|
|
|
|
|
|
|
|
|
Example input: `Isn't this fun?'
|
|
|
|
|
Example output: ‘Isn’t this fun?’
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""`""", r"""‘""", str)
|
|
|
|
|
str = re.sub(r"""'""", r"""’""", str)
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateDashes(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
|
|
|
|
|
Returns: The string, with each instance of "--" translated to
|
|
|
|
|
an em-dash HTML entity.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards)
|
|
|
|
|
str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards)
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateDashesOldSchool(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
|
|
|
|
|
Returns: The string, with each instance of "--" translated to
|
|
|
|
|
an en-dash HTML entity, and each "---" translated to
|
|
|
|
|
an em-dash HTML entity.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards)
|
|
|
|
|
str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards)
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateDashesOldSchoolInverted(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
|
|
|
|
|
Returns: The string, with each instance of "--" translated to
|
|
|
|
|
an em-dash HTML entity, and each "---" translated to
|
|
|
|
|
an en-dash HTML entity. Two reasons why: First, unlike the
|
|
|
|
|
en- and em-dash syntax supported by
|
|
|
|
|
EducateDashesOldSchool(), it's compatible with existing
|
|
|
|
|
entries written before SmartyPants 1.1, back when "--" was
|
|
|
|
|
only used for em-dashes. Second, em-dashes are more
|
|
|
|
|
common than en-dashes, and so it sort of makes sense that
|
|
|
|
|
the shortcut should be shorter to type. (Thanks to Aaron
|
|
|
|
|
Swartz for the idea.)
|
|
|
|
|
"""
|
|
|
|
|
str = re.sub(r"""---""", r"""–""", str) # em
|
|
|
|
|
str = re.sub(r"""--""", r"""—""", str) # en
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def educateEllipses(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
Returns: The string, with each instance of "..." translated to
|
|
|
|
|
an ellipsis HTML entity.
|
|
|
|
|
|
|
|
|
|
Example input: Huh...?
|
|
|
|
|
Example output: Huh…?
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""\.\.\.""", r"""…""", str)
|
|
|
|
|
str = re.sub(r"""\. \. \.""", r"""…""", str)
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def stupefyEntities(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
Returns: The string, with each SmartyPants HTML entity translated to
|
|
|
|
|
its ASCII counterpart.
|
|
|
|
|
|
|
|
|
|
Example input: “Hello — world.”
|
|
|
|
|
Example output: "Hello -- world."
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""–""", r"""-""", str) # en-dash
|
|
|
|
|
str = re.sub(r"""—""", r"""--""", str) # em-dash
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""‘""", r"""'""", str) # open single quote
|
|
|
|
|
str = re.sub(r"""’""", r"""'""", str) # close single quote
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""“""", r'''"''', str) # open double quote
|
|
|
|
|
str = re.sub(r"""”""", r'''"''', str) # close double quote
|
|
|
|
|
|
|
|
|
|
str = re.sub(r"""…""", r"""...""", str)# ellipsis
|
|
|
|
|
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def processEscapes(str):
|
|
|
|
|
r"""
|
|
|
|
|
Parameter: String.
|
|
|
|
|
Returns: The string, with after processing the following backslash
|
|
|
|
|
escape sequences. This is useful if you want to force a "dumb"
|
|
|
|
|
quote or other character to appear.
|
|
|
|
|
|
|
|
|
|
Escape Value
|
|
|
|
|
------ -----
|
|
|
|
|
\\ \
|
|
|
|
|
\" "
|
|
|
|
|
\' '
|
|
|
|
|
\. .
|
|
|
|
|
\- -
|
|
|
|
|
\` `
|
|
|
|
|
"""
|
|
|
|
|
str = re.sub(r"""\\\\""", r"""\""", str)
|
|
|
|
|
str = re.sub(r'''\\"''', r""""""", str)
|
|
|
|
|
str = re.sub(r"""\\'""", r"""'""", str)
|
|
|
|
|
str = re.sub(r"""\\\.""", r""".""", str)
|
|
|
|
|
str = re.sub(r"""\\-""", r"""-""", str)
|
|
|
|
|
str = re.sub(r"""\\`""", r"""`""", str)
|
|
|
|
|
|
|
|
|
|
return str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tokenize(str):
|
|
|
|
|
"""
|
|
|
|
|
Parameter: String containing HTML markup.
|
|
|
|
|
Returns: Reference to an array of the tokens comprising the input
|
|
|
|
|
string. Each token is either a tag (possibly with nested,
|
|
|
|
|
tags contained therein, such as <a href="<MTFoo>">, or a
|
|
|
|
|
run of text between tags. Each element of the array is a
|
|
|
|
|
two-element array; the first is either 'tag' or 'text';
|
|
|
|
|
the second is the actual value.
|
|
|
|
|
|
|
|
|
|
Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
|
|
|
|
|
<http://www.bradchoate.com/past/mtregex.php>
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
tokens = []
|
|
|
|
|
|
|
|
|
|
#depth = 6
|
|
|
|
|
#nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
|
|
|
|
|
#match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
|
|
|
|
|
# (?: <\? .*? \?> ) | # directives
|
|
|
|
|
# %s # nested tags """ % (nested_tags,)
|
|
|
|
|
tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
|
|
|
|
|
|
|
|
|
|
token_match = tag_soup.search(str)
|
|
|
|
|
|
|
|
|
|
previous_end = 0
|
|
|
|
|
while token_match is not None:
|
|
|
|
|
if token_match.group(1):
|
|
|
|
|
tokens.append(['text', token_match.group(1)])
|
|
|
|
|
|
|
|
|
|
tokens.append(['tag', token_match.group(2)])
|
|
|
|
|
|
|
|
|
|
previous_end = token_match.end()
|
|
|
|
|
token_match = tag_soup.search(str, token_match.end())
|
|
|
|
|
|
|
|
|
|
if previous_end < len(str):
|
|
|
|
|
tokens.append(['text', str[previous_end:]])
|
|
|
|
|
|
|
|
|
|
return tokens
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
import locale
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
locale.setlocale(locale.LC_ALL, '')
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
from docutils.core import publish_string
|
|
|
|
|
docstring_html = publish_string(__doc__, writer_name='html')
|
|
|
|
|
|
|
|
|
|
print docstring_html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Unit test output goes out stderr. No worries.
|
|
|
|
|
import unittest
|
|
|
|
|
sp = smartyPants
|
|
|
|
|
|
|
|
|
|
class TestSmartypantsAllAttributes(unittest.TestCase):
|
|
|
|
|
# the default attribute is "1", which means "all".
|
|
|
|
|
|
|
|
|
|
def test_dates(self):
|
|
|
|
|
self.assertEqual(sp("1440-80's"), "1440-80’s")
|
|
|
|
|
self.assertEqual(sp("1440-'80s"), "1440-‘80s")
|
|
|
|
|
self.assertEqual(sp("1440---'80s"), "1440–‘80s")
|
|
|
|
|
self.assertEqual(sp("1960s"), "1960s") # no effect.
|
|
|
|
|
self.assertEqual(sp("1960's"), "1960’s")
|
|
|
|
|
self.assertEqual(sp("one two '60s"), "one two ‘60s")
|
|
|
|
|
self.assertEqual(sp("'60s"), "‘60s")
|
|
|
|
|
|
|
|
|
|
def test_skip_tags(self):
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""),
|
|
|
|
|
"""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""")
|
|
|
|
|
self.assertEqual(
|
|
|
|
|
sp("""<p>He said "Let's write some code." This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>"""),
|
|
|
|
|
"""<p>He said “Let’s write some code.” This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_ordinal_numbers(self):
|
|
|
|
|
self.assertEqual(sp("21st century"), "21st century") # no effect.
|
|
|
|
|
self.assertEqual(sp("3rd"), "3rd") # no effect.
|
|
|
|
|
|
|
|
|
|
def test_educated_quotes(self):
|
|
|
|
|
self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''')
|
|
|
|
|
|
|
|
|
|
unittest.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__author__ = "Chad Miller <smartypantspy@chad.org>"
|
|
|
|
|
__version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400"
|
|
|
|
|
__url__ = "http://wiki.chad.org/SmartyPantsPy"
|
|
|
|
|
__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
|