or tags.
$prev_token_last_char = ""; # This is a cheat, used to get some context
# for one-character tokens that consist of
# just a quote char. What we do is remember
# the last character of the previous text
# token, to use as context to curl single-
# character quote tokens correctly.
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags.
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
$last_char = substr($t, -1); # Remember last char of this token before processing.
if (! $in_pre) {
$t = ProcessEscapes($t);
if ($convert_quot) {
$t = preg_replace('/"/', '"', $t);
}
if ($do_dashes) {
if ($do_dashes == 1) $t = EducateDashes($t);
if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
}
if ($do_ellipses) $t = EducateEllipses($t);
# Note: backticks need to be processed before quotes.
if ($do_backticks) {
$t = EducateBackticks($t);
if ($do_backticks == 2) $t = EducateSingleBackticks($t);
}
if ($do_quotes) {
if ($t == "'") {
# Special case: single-character ' token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "’";
}
else {
$t = "‘";
}
}
else if ($t == '"') {
# Special case: single-character " token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "”";
}
else {
$t = "“";
}
}
else {
# Normal case:
$t = EducateQuotes($t);
}
}
# Run the ligature education function. PHP SmartyPants has taken care of
# all the rest. MYW 8/1/05
if ($do_ligatures) $t = EducateLigatures($t);
if ($do_stupefy) $t = StupefyEntities($t);
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
return $result;
}
function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
global $smartypants_attr, $sp_tags_to_skip;
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_quotes="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
$do_backticks; # should we educate ``backticks'' -style quotes?
if ($attr == 0) {
# do nothing;
return $text;
}
else if ($attr == 2) {
# smarten ``backticks'' -style quotes
$do_backticks = 1;
}
else {
$do_backticks = 0;
}
# Special case to handle quotes at the very end of $text when preceded by
# an HTML tag. Add a space to give the quote education algorithm a bit of
# context, so that it can guess correctly that it's a closing quote:
$add_extra_space = 0;
if (preg_match("/>['\"]\\z/", $text)) {
$add_extra_space = 1; # Remember, so we can trim the extra space later.
$text .= " ";
}
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
$prev_token_last_char = ""; # This is a cheat, used to get some context
# for one-character tokens that consist of
# just a quote char. What we do is remember
# the last character of the previous text
# token, to use as context to curl single-
# character quote tokens correctly.
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
$last_char = substr($t, -1); # Remember last char of this token before processing.
if (! $in_pre) {
$t = ProcessEscapes($t);
if ($do_backticks) {
$t = EducateBackticks($t);
}
if ($t == "'") {
# Special case: single-character ' token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "’";
}
else {
$t = "‘";
}
}
else if ($t == '"') {
# Special case: single-character " token
if (preg_match('/\S/', $prev_token_last_char)) {
$t = "”";
}
else {
$t = "“";
}
}
else {
# Normal case:
$t = EducateQuotes($t);
}
}
$prev_token_last_char = $last_char;
$result .= $t;
}
}
if ($add_extra_space) {
preg_replace('/ \z/', '', $result); # Trim trailing space if we added one earlier.
}
return $result;
}
function SmartDashes($text, $attr = NULL, $ctx = NULL) {
global $smartypants_attr, $sp_tags_to_skip;
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_dashes="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
# reference to the subroutine to use for dash education, default to EducateDashes:
$dash_sub_ref = 'EducateDashes';
if ($attr == 0) {
# do nothing;
return $text;
}
else if ($attr == 2) {
# use old smart dash shortcuts, "--" for en, "---" for em
$dash_sub_ref = 'EducateDashesOldSchool';
}
else if ($attr == 3) {
# inverse of 2, "--" for em, "---" for en
$dash_sub_ref = 'EducateDashesOldSchoolInverted';
}
$tokens;
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
if (! $in_pre) {
$t = ProcessEscapes($t);
$t = $dash_sub_ref($t);
}
$result .= $t;
}
}
return $result;
}
function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_ellipses="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
if ($attr == 0) {
# do nothing;
return $text;
}
$tokens;
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
if (! $in_pre) {
$t = ProcessEscapes($t);
$t = EducateEllipses($t);
}
$result .= $t;
}
}
return $result;
}
# Given a stream of text, replaces ligature matches in appropriate locations with the
# corresponding ligature. Basically, a rewritten copy of SmartElipses MYW 8/1/05
function SmartLigatures($text, $attr = NULL, $ctx = NULL) {
# Paramaters:
$text; # text to be parsed
$attr; # value of the smart_ellipses="" attribute
$ctx; # MT context object (unused)
if ($attr == NULL) $attr = $smartypants_attr;
if ($attr == 0) {
# do nothing;
return $text;
}
$tokens;
$tokens = _TokenizeHTML($text);
$result = '';
$in_pre = 0; # Keep track of when we're inside or tags
foreach ($tokens as $cur_token) {
if ($cur_token[0] == "tag") {
# Don't mess with quotes inside tags
$result .= $cur_token[1];
if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
$in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
}
} else {
$t = $cur_token[1];
if (! $in_pre) {
$t = ProcessEscapes($t);
$t = EducateLigatures($t);
}
$result .= $t;
}
}
return $result;
}
function EducateQuotes($_) {
#
# Parameter: String.
#
# Returns: The string, with "educated" curly quote HTML entities.
#
# Example input: "Isn't this fun?"
# Example output: “Isn’t this fun?”
#
# Make our own "punctuation" character class, because the POSIX-style
# [:PUNCT:] is only available in Perl 5.6 or later:
$punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
# Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force:
$_ = preg_replace(
array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
array('’', '”'), $_);
# Special case for double sets of quotes, e.g.:
# He said, "'Quoted' words in a larger quote."
$_ = preg_replace(
array("/\"'(?=\w)/", "/'\"(?=\w)/"),
array('“‘', '‘“'), $_);
# Special case for decade abbreviations (the '80s):
$_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
$close_class = '[^\ \t\r\n\[\{\(\-]';
$dec_dashes = '&\#8211;|&\#8212;';
# Get most opening single quotes:
$_ = preg_replace("{
(
\\s | # a whitespace char, or
| # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
$dec_dashes | # or decimal entities
&\\#x201[34]; # or hex
)
' # the quote
(?=\\w) # followed by a word character
}x", '\1‘', $_);
# Single closing quotes:
$_ = preg_replace("{
($close_class)?
'
(?(1)| # If $1 captured, then do nothing;
(?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
) # char or an 's' at a word ending position. This
# is a special case to handle something like:
# \"Custer's Last Stand.\"
}xi", '\1’', $_);
# Any remaining single quotes should be opening ones:
$_ = str_replace("'", '‘', $_);
# Get most opening double quotes:
$_ = preg_replace("{
(
\\s | # a whitespace char, or
| # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
$dec_dashes | # or decimal entities
&\\#x201[34]; # or hex
)
\" # the quote
(?=\\w) # followed by a word character
}x", '\1“', $_);
# Double closing quotes:
$_ = preg_replace("{
($close_class)?
\"
(?(1)|(?=\\s)) # If $1 captured, then do nothing;
# if not, then make sure the next char is whitespace.
}x", '\1”', $_);
# Any remaining quotes should be opening ones.
$_ = str_replace('"', '“', $_);
return $_;
}
function EducateBackticks($_) {
#
# Parameter: String.
# Returns: The string, with ``backticks'' -style double quotes
# translated into HTML curly quote entities.
#
# Example input: ``Isn't this fun?''
# Example output: “Isn't this fun?”
#
$_ = str_replace(array("``", "''",),
array('“', '”'), $_);
return $_;
}
function EducateSingleBackticks($_) {
#
# Parameter: String.
# Returns: The string, with `backticks' -style single quotes
# translated into HTML curly quote entities.
#
# Example input: `Isn't this fun?'
# Example output: ‘Isn’t this fun?’
#
$_ = str_replace(array("`", "'",),
array('‘', '’'), $_);
return $_;
}
function EducateDashes($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an em-dash HTML entity.
#
$_ = str_replace('--', '—', $_);
return $_;
}
function EducateDashesOldSchool($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an en-dash HTML entity, and each "---" translated to
# an em-dash HTML entity.
#
# em en
$_ = str_replace(array("---", "--",),
array('—', '–'), $_);
return $_;
}
function EducateDashesOldSchoolInverted($_) {
#
# Parameter: String.
#
# Returns: The string, with each instance of "--" translated to
# an em-dash HTML entity, and each "---" translated to
# an en-dash HTML entity. Two reasons why: First, unlike the
# en- and em-dash syntax supported by
# EducateDashesOldSchool(), it's compatible with existing
# entries written before SmartyPants 1.1, back when "--" was
# only used for em-dashes. Second, em-dashes are more
# common than en-dashes, and so it sort of makes sense that
# the shortcut should be shorter to type. (Thanks to Aaron
# Swartz for the idea.)
#
# en em
$_ = str_replace(array("---", "--",),
array('–', '—'), $_);
return $_;
}
function EducateEllipses($_) {
#
# Parameter: String.
# Returns: The string, with each instance of "..." translated to
# an ellipsis HTML entity. Also converts the case where
# there are spaces between the dots.
#
# Example input: Huh...?
# Example output: Huh…?
#
$_ = str_replace(array("...", ". . .",), '…', $_);
return $_;
}
# Ligature education function. MYW 8/1/05
function EducateLigatures($_) {
#
# Parameter: String.
# Returns: The string, with each instance of "fi" and "fl" translated to
# the corresponding ligature's HTML entity.
#
# Example input: Fin is finally off the floor.
# Example output: Fin is finally off the floor.
#
### Further ligatures added by Helen Natasha Moore, s29.8.09
$_ = str_replace(array("ffi", "ffl", "ff", "fi", "fl", "st",), array('ffi', 'ffl', 'ff', 'fi', 'fl', 'st'), $_);
return $_;
}
function StupefyEntities($_) {
#
# Parameter: String.
# Returns: The string, with each SmartyPants HTML entity translated to
# its ASCII counterpart.
#
# Example input: “Hello — world.”
# Example output: "Hello -- world."
#
# en-dash em-dash
$_ = str_replace(array('–', '—'),
array('-', '--'), $_);
# single quote open close
$_ = str_replace(array('‘', '’'), "'", $_);
# double quote open close
$_ = str_replace(array('“', '”'), '"', $_);
$_ = str_replace('…', '...', $_); # ellipsis
# Ligature stupification. MYW 8/1/05
$_ = str_replace(array('fi', 'fl'),
array('fi', 'fl'), $_);
return $_;
}
function ProcessEscapes($_) {
#
# Parameter: String.
# Returns: The string, with after processing the following backslash
# escape sequences. This is useful if you want to force a "dumb"
# quote or other character to appear.
#
# Escape Value
# ------ -----
# \\ \
# \" "
# \' '
# \. .
# \- -
# \` `
# ### Added ligature escapes MYW 8/1/05
# \f f
#
$_ = str_replace(
array('\\', '\"', "\'", '\.', '\-', '\`', '\f'),
array('\', '"', ''', '.', '-', '`', 'f'), $_);
return $_;
}
# _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
# We only define it if it is not already defined.
if (!function_exists('_TokenizeHTML')) :
function _TokenizeHTML($str) {
#
# Parameter: String containing HTML markup.
# Returns: An array of the tokens comprising the input
# string. Each token is either a tag (possibly with nested,
# tags contained therein, such as , or a
# run of text between tags. Each element of the array is a
# two-element array; the first is either 'tag' or 'text';
# the second is the actual value.
#
#
# Regular expression derived from the _tokenize() subroutine in
# Brad Choate's MTRegex plugin.
#
#
$index = 0;
$tokens = array();
$match = '(?s:)|'. # comment
'(?s:<\?.*?\?>)|'. # processing instruction
# regular tags
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (++$index % 2 && $part != '')
$tokens[] = array('text', $part);
else
$tokens[] = array('tag', $part);
}
return $tokens;
}
endif;
/*
PHP SmartyPants
===============
Ligature Patch
--------------
Several lines (all commented with initials and the date) were added to the
PHP script to allow it to change the ugly "fi" and "fl" combinations (in the
appropriate places, of course) into the corresponding beautiful ligatures. Only
fi and fl were implemented, because these are the only two "standard" ligatures
implemented by all fonts. This idea could be extended to allow for other
ligature replacements for fonts that support it, but this would have to be
provided as a user flag. In the mean time, the ligature addition is just a basic
hack to make pages prettier. Much thanks to both John Gruber and Michael Fortin
for allowing me the pleasure of playing around with this stuff. Here's hoping
to make the internet a prettier place, one page at a time...
Mikhail Wolfson
Description
-----------
This is a PHP translation of the original SmartyPants quote educator written in
Perl by John Gruber.
SmartyPants is a web publishing utility that translates plain ASCII
punctuation characters into "smart" typographic punctuation HTML
entities. SmartyPants can perform the following transformations:
* Straight quotes (`"` and `'`) into "curly" quote HTML entities
* Backticks-style quotes (` ``like this'' `) into "curly" quote HTML
entities
* Dashes (`--` and `---`) into en- and em-dash entities
* Three consecutive dots (`...`) into an ellipsis entity
SmartyPants does not modify characters within ``, ``, ``,
`