以下函数解码命名和数字 HTML 实体,并在 UTF-8 上工作。需要 iconv。
function decodeHtmlEnt($str) {
$ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8');
$p2 = -1;
for(;;) {
$p = strpos($ret, '&#', $p2+1);
if ($p === FALSE)
break;
$p2 = strpos($ret, ';', $p);
if ($p2 === FALSE)
break;
if (substr($ret, $p+2, 1) == 'x')
$char = hexdec(substr($ret, $p+3, $p2-$p-3));
else
$char = intval(substr($ret, $p+2, $p2-$p-2));
//echo "$char\n";
$newchar = iconv(
'UCS-4', 'UTF-8',
chr(($char>>24)&0xFF).chr(($char>>16)&0xFF).chr(($char>>8)&0xFF).chr($char&0xFF)
);
//echo "$newchar<$p<$p2<<\n";
$ret = substr_replace($ret, $newchar, $p, 1+$p2-$p);
$p2 = $p + strlen($newchar);
}
return $ret;
}