array('href', 'title'),
'img' => array('src', 'align', 'alt'),
'font' => array('family', 'color', 'size'),
'b' => 0,
'i' => 0,
'u' => 0,
's' => 0,
'strong' => 0,
'code' => 0,
'li' => 0,
'ul' => 0,
'ol' => 0,
'center' => 0,
'abbr' => array('title'),
//etc...
);
//self explanatory really; boolean value telling HtmlFormatter whether or not inline styling is allowed
//(this must also be set to 1 if you're using a whitelist and allow style attributes somewhere)
define('HTML_ALLOW_INLINE_STYLING', 1);
//should we convert newlines (ie. \n;\r;\r\n) to line-breaks (
) or leave them as they are?
define('HTML_CONVERT_NEWLINES', 1);
//are html comments allowed?
define('HTML_ALLOW_COMMENTS', 1);
//this option tells the parser to make sure there are no stray opening or closing tags anywhere
//provides more protection against forgetful/poor markup and/or malicious users, but will take slightly longer
define('HTML_POLICE_TAGS', 1);
//allow youtube and google videos to be posted, tags are:
// (google video) -or-
// (youtube) -or-
// (myspace)
define('HTML_VIDEO_TAG', 0);
//index = type (what you specify in the tag)
//value = replacement html; VIDEO_ID = video id (duh) (can only contain numbers, letters, - and _)
if(HTML_VIDEO_TAG)
$Html_VideoLinks = array(
'google' => '',
'youtube' => '',
'myspace' => ''
);
//which tags should simply be removed (be warned, most of these are here because they are not safe
//enough to allow/clean, remove at your own risk)
$Html_DisallowedTags = array('link', 'iframe', 'frame', 'frameset', 'object', 'param', 'embed', 'style',
'applet', 'meta', 'layer', 'import', 'xml', 'script', 'body', 'html', 'head', 'title', 'ilayer');
//don't parse anything in these tags
//(as this is geared towards vanilla, pre is not included by default)
$Html_Literals = array('code', 'samp');
//which url protocols are allowed
$Html_AllowedProtocols = array('http', 'https', 'ftp', 'news', 'nntp', 'feed', 'gopher', 'mailto');
//protocol to replace invalid protocols with
$Html_DefaultProtocol = 'http://';
//END SETTINGS
//unclosed or orphaned tags
$Html_TagArray = array();
//entites and their equivelents
$Html_EntityTable = array_flip(get_html_translation_table(HTML_ENTITIES));
unset($Html_EntityTable['&'], $Html_EntityTable['<'], $Html_EntityTable['>']);
class HtmlFormatter extends StringFormatter
{
var $AllowedProtocols;
var $DefaultProtocol;
var $FreestandingLoose = array('li', 'option', 'dt', 'dd', 'td', 'tfoot', 'th', 'tbody', 'thead', 'tr', 'colgroup');
var $Freestanding = array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param');
var $TagArray;
function HtmlFormatter()
{
$this->AllowedProtocols = &$GLOBALS['Html_AllowedProtocols'];
$this->DefaultProtocol = &$GLOBALS['Html_DefaultProtocol'];
$this->TagArray = &$GLOBALS['Html_TagArray'];
}
function Execute($String)
{
$this->TagArray = array('normal' => array(), 'extraclosing' => array());
$String = str_replace(chr(0), ' ', $String);
//comments
$String = preg_replace_callback(
'/|$)/s',
create_function(
'$m',
HTML_ALLOW_COMMENTS ?
'if($m[2]==\'-->\')return \'\';else return \'\';'
:
'return \'\';'
),
$String
);
//handle literals
$String = preg_replace_callback(
'/<('.implode('|', $GLOBALS['Html_Literals']).')((?>[^>A-Za-z\d][^>]*)|)>(.+?)<\/\1((?>[^>A-Za-z\d][^>]*)|)>/si',
create_function(
'$m',
'return \'<\'.$m[1].$m[2].\'>\'.htmlspecialchars($m[3]).\'\'.$m[1].\'>\';'
),
$String
);
//clean up any stray '<'
$String = preg_replace(
'/<(?![A-Za-z\/'.(HTML_ALLOW_COMMENTS?'!':'').'])/i',
'<',
$String
);
//go through and check attributes of each tag
$sReturn = preg_replace_callback('/<((?>[^>]+))(>|$)/', array($this, 'RemoveEvilAttribs'), $String);
if(HTML_POLICE_TAGS)
{
$this->TagArray['normal'] = array_reverse($this->TagArray['normal'], 1);
while(list($i, $v) = each($this->TagArray['normal']))
{
if(in_array($i, $this->FreestandingLoose)) continue;
if($v > 0) $sReturn .= str_repeat(''.$i.'>', $v);
}
//now we manage orphaned closing tags
while(list($i, $v) = each($this->TagArray['extraclosing']))
{
if($v > 0) $sReturn = str_repeat('<'.$i.'>', $v) . $sReturn;
}
}
if(HTML_VIDEO_TAG)
$sReturn = preg_replace_callback(
'/