DeMoronize(text)
Last updated September 2, 2010
Version: 2 | Requires: ColdFusion 5 | Library: StrLib
Description:
Fixes text using Microsoft Latin-1 "Extentions", namely ASCII characters 128-160. Supplies semicolons where missing in HTML numeric and common non-numeric entities.
This is a rough port of John Walker's demoroniser, written in Perl.
http://www.fourmilab.ch/webtools/demoroniser/
Return Values:
Returns a string.
Example:
<cfoutput>With MS Latin-1 Extentions:<br>#MSText#</cfoutput>
<cfset ValidText = DeMoronize(MSText)>
<cfoutput>Valid ASCII:<br>#ValidText#</cfoutput>
Parameters:
| Name | Description | Required |
|---|---|---|
| text | Text to be modified. | Yes |
Full UDF Source:
<cfscript>
/**
* Fixes text using Microsoft Latin-1 "Extentions", namely ASCII characters 128-160.
* ASCII8217 mod by Tony Brandner
*
* @param text Text to be modified. (Required)
* @return Returns a string.
* @author Shawn Porter (sporter@rit.net)
* @version 2, September 2, 2010
*/
function deMoronize (text) {
var i = 0;
// map incompatible non-ISO characters into plausible
// substitutes
text = Replace(text, Chr(128), "€", "All");
text = Replace(text, Chr(130), ",", "All");
text = Replace(text, Chr(131), "<em>f</em>", "All");
text = Replace(text, Chr(132), ",,", "All");
text = Replace(text, Chr(133), "...", "All");
text = Replace(text, Chr(136), "^", "All");
text = Replace(text, Chr(139), ")", "All");
text = Replace(text, Chr(140), "Oe", "All");
text = Replace(text, Chr(145), "`", "All");
text = Replace(text, Chr(146), "'", "All");
text = Replace(text, Chr(147), """", "All");
text = Replace(text, Chr(148), """", "All");
text = Replace(text, Chr(149), "*", "All");
text = Replace(text, Chr(150), "-", "All");
text = Replace(text, Chr(151), "--", "All");
text = Replace(text, Chr(152), "~", "All");
text = Replace(text, Chr(153), "™", "All");
text = Replace(text, Chr(155), ")", "All");
text = Replace(text, Chr(156), "oe", "All");
// remove any remaining ASCII 128-159 characters
for (i = 128; i LTE 159; i = i + 1)
text = Replace(text, Chr(i), "", "All");
// map Latin-1 supplemental characters into
// their &name; encoded substitutes
text = Replace(text, Chr(160), " ", "All");
text = Replace(text, Chr(163), "£", "All");
text = Replace(text, Chr(169), "©", "All");
text = Replace(text, Chr(176), "°", "All");
// encode ASCII 160-255 using ? format
for (i = 160; i LTE 255; i = i + 1)
text = REReplace(text, "(#Chr(i)#)", "##i#;", "All");
for (i = 8216; i LTE 8218; i = i + 1) text = Replace(text, Chr(i), "'", "All");
// supply missing semicolon at end of numeric entities
text = ReReplace(text, "#([0-2][[:digit:]]{2})([^;])", "#\1;\2", "All");
// fix obscure numeric rendering of < > &
text = ReReplace(text, "#038;", "&", "All");
text = ReReplace(text, "#060;", "<", "All");
text = ReReplace(text, "#062;", ">", "All");
// supply missing semicolon at the end of & "
text = ReReplace(text, "&(^;)", "&\1", "All");
text = ReReplace(text, ""(^;)", ""\1", "All");
return text;
}
</cfscript>
Search CFLib.org
Latest Additions
Tayo Akinmade added
arrayTrim
3 day(s) ago
Will Belden added
longTime
9 day(s) ago
James Sleeman added
quickSort
19 day(s) ago
Ben Forta added
GetHostAddress
22 day(s) ago
Top Rated
EksporSQLData
Rated 5.0, 16 time(s)
backupDatabase
Rated 5.0, 13 time(s)
indentXml
Rated 5.0, 10 time(s)
generateSsccAsn
Rated 5.0, 4 time(s)