We are no longer offering accounts on this server. Consider https://gitlab.freedesktop.org/ as a place to host projects.

Commit 81ea0f81 authored by Evan Prodromou's avatar Evan Prodromou

Add HTMLPurifier to extlib

HTMLPurifier defangs arbitrary submitted HTML. We're using it in the
OStatus plugin, but it may be valuable for other parts of the codebase
(I think OEmbed might benefit, for example).
parent ed45df04
<?php
/**
* This is a stub include that automatically configures the include path.
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifier/Bootstrap.php';
require_once 'HTMLPurifier.autoload.php';
// vim: et sw=4 sts=4
<?php
/**
* @file
* Convenience file that registers autoload handler for HTML Purifier.
*/
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
// We need unregister for our pre-registering functionality
HTMLPurifier_Bootstrap::registerAutoload();
if (function_exists('__autoload')) {
// Be polite and ensure that userland autoload gets retained
spl_autoload_register('__autoload');
}
} elseif (!function_exists('__autoload')) {
function __autoload($class) {
return HTMLPurifier_Bootstrap::autoload($class);
}
}
// vim: et sw=4 sts=4
<?php
/**
* @file
* Defines a function wrapper for HTML Purifier for quick use.
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
*/
/**
* Purify HTML.
* @param $html String HTML to purify
* @param $config Configuration to use, can be any value accepted by
* HTMLPurifier_Config::create()
*/
function HTMLPurifier($html, $config = null) {
static $purifier = false;
if (!$purifier) {
$purifier = new HTMLPurifier();
}
return $purifier->purify($html, $config);
}
// vim: et sw=4 sts=4
<?php
/**
* @file
* This file was auto-generated by generate-includes.php and includes all of
* the core files required by HTML Purifier. Use this if performance is a
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.0.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
* because 'require' not 'require_once' is used.
*
* @warning
* This file requires that the include path contains the HTML Purifier
* library directory; this is not auto-set.
*/
require 'HTMLPurifier.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
require 'HTMLPurifier/AttrTypes.php';
require 'HTMLPurifier/AttrValidator.php';
require 'HTMLPurifier/Bootstrap.php';
require 'HTMLPurifier/Definition.php';
require 'HTMLPurifier/CSSDefinition.php';
require 'HTMLPurifier/ChildDef.php';
require 'HTMLPurifier/Config.php';
require 'HTMLPurifier/ConfigSchema.php';
require 'HTMLPurifier/ContentSets.php';
require 'HTMLPurifier/Context.php';
require 'HTMLPurifier/DefinitionCache.php';
require 'HTMLPurifier/DefinitionCacheFactory.php';
require 'HTMLPurifier/Doctype.php';
require 'HTMLPurifier/DoctypeRegistry.php';
require 'HTMLPurifier/ElementDef.php';
require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php';
require 'HTMLPurifier/EntityParser.php';
require 'HTMLPurifier/ErrorCollector.php';
require 'HTMLPurifier/ErrorStruct.php';
require 'HTMLPurifier/Exception.php';
require 'HTMLPurifier/Filter.php';
require 'HTMLPurifier/Generator.php';
require 'HTMLPurifier/HTMLDefinition.php';
require 'HTMLPurifier/HTMLModule.php';
require 'HTMLPurifier/HTMLModuleManager.php';
require 'HTMLPurifier/IDAccumulator.php';
require 'HTMLPurifier/Injector.php';
require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php';
require 'HTMLPurifier/TagTransform.php';
require 'HTMLPurifier/Token.php';
require 'HTMLPurifier/TokenFactory.php';
require 'HTMLPurifier/URI.php';
require 'HTMLPurifier/URIDefinition.php';
require 'HTMLPurifier/URIFilter.php';
require 'HTMLPurifier/URIParser.php';
require 'HTMLPurifier/URIScheme.php';
require 'HTMLPurifier/URISchemeRegistry.php';
require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php';
require 'HTMLPurifier/AttrDef/Switch.php';
require 'HTMLPurifier/AttrDef/Text.php';
require 'HTMLPurifier/AttrDef/URI.php';
require 'HTMLPurifier/AttrDef/CSS/Number.php';
require 'HTMLPurifier/AttrDef/CSS/AlphaValue.php';
require 'HTMLPurifier/AttrDef/CSS/Background.php';
require 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require 'HTMLPurifier/AttrDef/CSS/Border.php';
require 'HTMLPurifier/AttrDef/CSS/Color.php';
require 'HTMLPurifier/AttrDef/CSS/Composite.php';
require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
require 'HTMLPurifier/AttrDef/CSS/Multiple.php';
require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require 'HTMLPurifier/AttrDef/CSS/URI.php';
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require 'HTMLPurifier/AttrDef/HTML/Class.php';
require 'HTMLPurifier/AttrDef/HTML/Color.php';
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require 'HTMLPurifier/AttrDef/HTML/ID.php';
require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
require 'HTMLPurifier/AttrDef/HTML/Length.php';
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
require 'HTMLPurifier/AttrDef/URI/Email.php';
require 'HTMLPurifier/AttrDef/URI/Host.php';
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
require 'HTMLPurifier/AttrDef/URI/IPv6.php';
require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
require 'HTMLPurifier/AttrTransform/Background.php';
require 'HTMLPurifier/AttrTransform/BdoDir.php';
require 'HTMLPurifier/AttrTransform/BgColor.php';
require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
require 'HTMLPurifier/AttrTransform/Border.php';
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
require 'HTMLPurifier/AttrTransform/Input.php';
require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
require 'HTMLPurifier/AttrTransform/NameSync.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
require 'HTMLPurifier/ChildDef/Table.php';
require 'HTMLPurifier/DefinitionCache/Decorator.php';
require 'HTMLPurifier/DefinitionCache/Null.php';
require 'HTMLPurifier/DefinitionCache/Serializer.php';
require 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require 'HTMLPurifier/HTMLModule/Bdo.php';
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/Name.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require 'HTMLPurifier/Injector/AutoParagraph.php';
require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Strategy/Composite.php';
require 'HTMLPurifier/Strategy/Core.php';
require 'HTMLPurifier/Strategy/FixNesting.php';
require 'HTMLPurifier/Strategy/MakeWellFormed.php';
require 'HTMLPurifier/Strategy/RemoveForeignElements.php';
require 'HTMLPurifier/Strategy/ValidateAttributes.php';
require 'HTMLPurifier/TagTransform/Font.php';
require 'HTMLPurifier/TagTransform/Simple.php';
require 'HTMLPurifier/Token/Comment.php';
require 'HTMLPurifier/Token/Tag.php';
require 'HTMLPurifier/Token/Empty.php';
require 'HTMLPurifier/Token/End.php';
require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';
require 'HTMLPurifier/URIScheme/mailto.php';
require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php';
require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php';
<?php
/**
* @file
* Emulation layer for code that used kses(), substituting in HTML Purifier.
*/
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
function kses($string, $allowed_html, $allowed_protocols = null) {
$config = HTMLPurifier_Config::createDefault();
$allowed_elements = array();
$allowed_attributes = array();
foreach ($allowed_html as $element => $attributes) {
$allowed_elements[$element] = true;
foreach ($attributes as $attribute => $x) {
$allowed_attributes["$element.$attribute"] = true;
}
}
$config->set('HTML.AllowedElements', $allowed_elements);
$config->set('HTML.AllowedAttributes', $allowed_attributes);
$allowed_schemes = array();
if ($allowed_protocols !== null) {
$config->set('URI.AllowedSchemes', $allowed_protocols);
}
$purifier = new HTMLPurifier($config);
return $purifier->purify($string);
}
// vim: et sw=4 sts=4
<?php
/**
* @file
* Convenience stub file that adds HTML Purifier's library file to the path
* without any other side-effects.
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
// vim: et sw=4 sts=4
<?php
/*! @mainpage
*
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* is safe for output onto webpages. It achieves this by:
*
* -# Lexing (parsing into tokens) the document,
* -# Executing various strategies on the tokens:
* -# Removing all elements not in the whitelist,
* -# Making the tokens well-formed,
* -# Fixing the nesting of the nodes, and
* -# Validating attributes of the nodes; and
* -# Generating HTML from the purified tokens.
*
* However, most users will only need to interface with the HTMLPurifier
* and HTMLPurifier_Config.
*/
/*
HTML Purifier 4.0.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
* @note There are several points in which configuration can be specified
* for HTML Purifier. The precedence of these (from lowest to
* highest) is as follows:
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
* are *not* merged (this behavior may change in the future).
*
* @todo We need an easier way to inject strategies using the configuration
* object.
*/
class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.0.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.0.0';
/** Global configuration object */
public $config;
/** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
private $filters = array();
/** Single instance of HTML Purifier */
private static $instance;
protected $strategy, $generator;
/**
* Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
*/
public $context;
/**
* Initializes the purifier.
* @param $config Optional HTMLPurifier_Config object for all instances of
* the purifier, if omitted, a default configuration is
* supplied (which can be overridden on a per-use basis).
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
public function __construct($config = null) {
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
}
/**
* Adds a filter to process the output. First come first serve
* @param $filter HTMLPurifier_Filter object
*/
public function addFilter($filter) {
trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
$this->filters[] = $filter;
}
/**
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
*
* @param $html String of HTML to purify
* @param $config HTMLPurifier_Config object for this operation, if omitted,
* defaults to the config object specified during this
* object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*/
public function purify($html, $config = null) {
// :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
// configuration dependant
$lexer = HTMLPurifier_Lexer::create($config);
$context = new HTMLPurifier_Context();
// setup HTML generator
$this->generator = new HTMLPurifier_Generator($config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
if ($config->get('Core.CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context);
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector($context);
$context->register('ErrorCollector', $error_collector);
}
// setup id_accumulator context, necessary due to the fact that
// AttrValidator can be called from many places
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
// setup filters
$filter_flags = $config->getBatch('Filter');
$custom_filters = $filter_flags['Custom'];
unset($filter_flags['Custom']);
$filters = array();
foreach ($filter_flags as $filter => $flag) {
if (!$flag) continue;
if (strpos($filter, '.') !== false) continue;
$class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class;
}
foreach ($custom_filters as $filter) {
// maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
$filters[] = $filter;
}
$filters = array_merge($filters, $this->filters);
// maybe prepare(), but later
for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
$html = $filters[$i]->preFilter($html, $config, $context);
}
// purified HTML
$html =
$this->generator->generateFromTokens(
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$lexer->tokenizeHTML(