File: //proc/self/cwd/wp-content/plugins/autodescription/inc/classes/data/filter/sanitize.class.php
<?php
/**
* @package The_SEO_Framework\Classes\Data\Filter\Sanitize
* @subpackage The_SEO_Framework\Data\Filter
*/
namespace The_SEO_Framework\Data\Filter;
\defined( 'THE_SEO_FRAMEWORK_PRESENT' ) or die;
use \The_SEO_Framework\{
Helper,
Helper\Format\Strings,
Meta,
};
/**
* The SEO Framework plugin
* Copyright (C) 2023 - 2024 Sybre Waaijer, CyberWire B.V. (https://cyberwire.nl/)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as published
* by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Holds a collection of data sanitization methods.
*
* @since 5.0.0
* @access protected
* Use tsf()->filter()->sanitize() instead.
*/
class Sanitize {
/**
* Sanitizes input to a boolean integer, i.e. 0, 1,
*
* Uses double casting. First, we cast to boolean, then to int.
*
* @since 2.2.2
* @since 2.8.0 Method is now public.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_one_zero`.
*
* @param mixed $value The value to cast to a boolean integer.
* @return int A boolean as a string (1 or 0)
*/
public static function boolean_integer( $value ) {
return (int) (bool) $value;
}
/**
* Sanitizes input to a numeric string, like '0', '1', '2'.
*
* Uses double casting. First, we cast to integer, then to string.
* Rounds floats down. Converts non-numeric inputs to '0'.
*
* @since 3.0.0
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_numeric_string`.
*
* @param mixed $value The value to cast to a numeric string.
* @return string An integer as string.
*/
public static function numeric_string( $value ) {
return (string) (int) $value;
}
/**
* Sanitizes color hexadecimals to either 3 or 6 length: rgb or rrggbb.
* Removes leading hashtags.
* Makes the input lowercase.
*
* @since 2.8.0
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_color_hex`.
* 3. Now accepts longer strings, and shortens them to the correct length.
*
* @param string $color String with potentially unwanted hex values.
* @return string The sanitized color hex.
*/
public static function rgb_hex( $color ) {
preg_match(
'/^(?:[a-f\d]{3}){1,2}/i',
trim( $color, '# ' ),
$matches,
);
return strtolower( $matches[0] ?? '' );
}
/**
* Sanitizes color hexadecimals to either 3, 4, 6, or 8 length: rgb, rgba, rrggbb, rrggbbaa.
* Removes leading hashtags.
* Makes the input lowercase.
*
* @since 5.0.0
*
* @param string $color String with potentially unwanted hex values.
* @return string The sanitized color hex.
*/
public static function rgba_hex( $color ) {
// If rgb, we must only get rgb[a], not rgb[aa].
// If rrggbb, we must only get rrggbb[aa], not rrggbb[a].
preg_match(
'/^(?:[a-f\d]{8}|[a-f\d]{6}|[a-f\d]{3,4})/i',
trim( $color, '# ' ),
$matches,
);
return strtolower( $matches[0] ?? '' );
}
/**
* Sanitizes metadata content.
* Returns single-line, trimmed text without duplicated spaces, nbsp, or tabs.
* Also converts back-solidi to their respective HTML entities for non-destructive handling.
* Also adds a capital P, dangit.
* Finally, it texturizes the content.
*
* @since 5.0.0
*
* @param string $text The text.
* @return string One line sanitized text.
*/
public static function metadata_content( $text ) {
if ( ! \is_scalar( $text ) || ! \strlen( $text ) ) return '';
return \wptexturize(
\capital_P_dangit(
static::backward_solidus_to_entity(
static::lone_hyphen_to_entity(
static::remove_repeated_spacing(
trim(
static::tab_to_space(
static::newline_to_space(
static::nbsp_to_space(
(string) $text,
),
),
),
),
),
),
),
),
);
}
/**
* Normalizes metadata content for string comparison.
* The data returned is considered insecure.
*
* @since 5.0.0
*
* @param string $text The input text with possible repeated spacing.
* @return string The input string without repeated spaces.
*/
public static function normalize_metadata_content_for_strcmp( $text ) {
// Why not blog_charset? Because blog_charset is there only to onboard non-UTF-8 to UTF-8.
return html_entity_decode(
static::metadata_content( $text ),
\ENT_QUOTES | \ENT_SUBSTITUTE | \ENT_HTML5,
'UTF-8',
);
}
/**
* Sanitizes text by removing repeated spaces.
*
* @since 2.8.2
* @since 2.9.4 Now no longer fails when first two characters are spaces.
* @since 3.1.0 1. Now also catches non-breaking spaces.
* 2. Now uses a regex pattern.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_dupe_space`.
* 3. Now replaces the spaces with the original spacing type, instead of only \u20.
*
* @param string $text The input text with possible repeated spacing.
* @return string The input string without repeated spaces.
*/
public static function remove_repeated_spacing( $text ) {
return preg_replace_callback(
'/(\p{Zs}){2,}/u',
// Unicode support sans mb_*: Calculate the bytes of the match and then remove that length.
fn( $matches ) => substr( $matches[1], 0, \strlen( $matches[1] ) ),
$text,
);
}
/**
* Replaces non-transformative hyphens with entity hyphens.
* Duplicated simple hyphens are preserved.
*
* Regex challenge, make the columns without an x light up:
* xxx - xx - xxx- - - xxxxxx xxxxxx- xxxxx - -
* --- - -- - ---- - - ------ ------- ----- - -
*
* The answer? `/((-{2,3})(*SKIP)-|-)(?(2)(*FAIL))/`
* Sybre-kamisama desu.
*
* @since 4.0.5
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_hyphen`.
*
* @param string $text String with potential hyphens.
* @return string A string with safe HTML encoded hyphens.
*/
public static function lone_hyphen_to_entity( $text ) {
// str_replace is faster than putting these alternative sequences in the `-|-` regex below.
// That'd be this: "/((?'h'-|&\#45;|\xe2\x80\x90){2,3}(*SKIP)(?&h)|(?&h))(?(h)(*FAIL))/u"
return str_replace(
[ '-', "\xe2\x80\x90" ], // Should we consider �...00045;?
'-',
preg_replace( '/((-{2,3})(*SKIP)-|-)(?(2)(*FAIL))/', '-', $text ),
);
}
/**
* Replaces non-break spaces with regular spaces.
*
* This addresses a quirk in TinyMCE, where paragraph newlines are populated with nbsp.
* TODO: Perhaps we should address that quirk directly, instead of removing indiscriminately.
* e.g., like `strip_newline_urls` and `strip_paragraph_urls`.
*
* @since 2.8.2
* @since 3.1.0 Now catches all non-breaking characters.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_nbsp`.
*
* @param string $text String with potentially unwanted nbsp values.
* @return string A spacey string.
*/
public static function nbsp_to_space( $text ) {
return str_replace( [ ' ', ' ', ' ', "\xc2\xa0" ], ' ', $text );
}
/**
* Replaces backslash with entity backslash.
*
* @since 2.8.2
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_bsol`.
* 3. No longer removes backslashes.
*
* @param string $text String with potentially unwanted \ values.
* @return string A string with safe HTML encoded backslashes.
*/
public static function backward_solidus_to_entity( $text ) {
return str_replace( '\\', '\', $text );
}
/**
* Converts multilines to single lines.
*
* @since 2.8.2
* @since 3.1.0 Simplified method.
* @since 4.1.0 1. Made this method about 25~92% faster (more replacements = more faster). 73% slower on empty strings (negligible).
* 2. Now also strips form-feed and vertical whitespace characters--might they appear in the wild.
* 3. Now also strips horizontal tabs (reverted in 4.1.1).
* @since 4.1.1 1. Now uses real bytes, instead of sequences (causing uneven transformations, plausibly emptying content).
* 2. No longer transforms horizontal tabs. Use `s_tabs()` instead.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_singleline`.
* @link https://www.php.net/manual/en/regexp.reference.escape.php
*
* @param string $text The input value with possible multiline.
* @return string The input string without multiple lines.
*/
public static function newline_to_space( $text ) {
// Use x20 because it's a human-visible real space.
return trim(
strtr( $text, "\x0A\x0B\x0C\x0D", "\x20\x20\x20\x20" ),
);
}
/**
* Removes tabs and replaces it with spaces.
*
* @since 2.8.2
* @since 4.1.1 Now uses real bytes, instead of sequences (causing uneven transformations, plausibly emptying content).
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_tabs`.
* @link https://www.php.net/manual/en/regexp.reference.escape.php
*
* @param string $text The input value with possible tabs.
* @return string The input string without tabs.
*/
public static function tab_to_space( $text ) {
// Use x20 because it's a human-visible real space.
return strtr( $text, "\x09", "\x20" );
}
/**
* Returns a -1, 0, or 1, based on nearest value. This is close to a sign function, but not quite.
*
* Obviously, we're not working with actual quantum bits, but its implementation is based on it:
* If more negative, unset.
* If nought, change nothing.
* If more positive, set.
*
* @since 4.0.0
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_qubit`.
* 3. Now considers .3334 the turnover point, instead of 0.33000...0001.
* @since 5.1.0 Now considers .3333 (off by .00003) the turnover point for the negative side,
* instead of -0.3333999...999 (off by .00006).
*
* @param float|int $value The qubit to test; ideally be -1, 0, or 1.
* @return int -1, 0, or 1.
*/
public static function qubit( $value ) {
return $value >= .3334 <=> -.3333 >= $value;
}
/**
* Sanitizes the Redirect URL.
*
* @since 2.2.4
* @since 2.8.0 Method is now public.
* @since 3.0.6 Noqueries is now disabled by default.
* @since 4.0.0 1. Removed rudimentary relative URL testing.
* 2. Removed input transformation filters, and with that, removed redundant multisite spam protection.
* 3. Now allows all protocols. Enjoy!
* 4. Now no longer lets through double-absolute URLs (e.g. `https://example.com/https://example.com/path/to/file/`)
* when filter `the_seo_framework_allow_external_redirect` is set to false.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_redirect_url`.
* 3. No longer provides stripping URL queries via a filter.
*
* @param string $url String with potentially unwanted redirect URL.
* @return string The Sanitized Redirect URL
*/
public static function redirect_url( $url ) {
$url = trim( $url );
if ( empty( $url ) ) return '';
// This is also checked when performing a redirect.
if ( ! Helper\Redirect::allow_external_redirect() ) {
$url = Meta\URI\Utils::set_url_scheme( Meta\URI\Utils::convert_path_to_url(
Meta\URI\Utils::set_url_scheme( $url, 'relative' ),
) );
}
// All WP defined protocols are allowed.
return \sanitize_url( $url );
}
/**
* Cleans known parameters from image details.
*
* @since 4.0.0
* @since 4.0.2 Now finds smaller images when they're over 4K.
* @since 4.0.5 Now faults images with filename extensions APNG, BMP, ICO, TIFF, or SVG.
* @since 4.1.4 Fixed theoretical issue where a different image could be set when width
* and height are supplied and either over 4K, but no ID is given.
* @since 4.2.4 Now accepts, processes, and returns filesizes under index `filesize`.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_image_details`.
* 3. Now sanitizes the caption.
*
* @param array|array[] $details {
* An array of image details, or an array of an array thereof.
*
* @type string $url Required. The image URL.
* @type int $id Optional. The image ID. Used to fetch the largest image possible.
* @type int $width Optional. The image width in pixels.
* @type int $height Optional. The image height in pixels.
* @type string $alt Optional. The image alt tag.
* @type string $caption Optional. The image caption.
* @type int $filesize Optional. The image filesize in bytes.
* }
* @return array|array[] $details {
* An array of image details, or an array of an array thereof if the input was multidimensional.
*
* @type string $url The image URL.
* @type int $id The image ID.
* @type int $width The image width in pixels.
* @type int $height The image height in pixels.
* @type string $alt The image alt tag.
* @type string $caption The image caption.
* @type int $filesize The image filesize in bytes.
* }
*/
public static function image_details( $details ) {
// This is over 350x faster than a polyfill for `array_is_list()`.
if ( isset( $details[0] ) && array_values( $details ) === $details ) {
foreach ( $details as $deets )
$sanitized_details[] = static::image_details( $deets ); // phpcs:ignore, VariableAnalysis.CodeAnalysis
return $sanitized_details ?? [];
}
$defaults = [
'url' => '',
'id' => 0,
'width' => 0,
'height' => 0,
'alt' => '',
'caption' => '',
'filesize' => 0,
];
if ( empty( $details ) ) return $defaults;
[ $url, $id, $width, $height, $alt, $caption, $filesize ] = array_values( array_merge( $defaults, $details ) );
if ( empty( $url ) ) return $defaults;
$url = \sanitize_url( Meta\URI\Utils::make_absolute_current_scheme_url( $url ), [ 'https', 'http' ] );
if ( empty( $url ) ) return $defaults;
/**
* Skip APNG, BMP, ICO, TIFF, and SVG.
*
* @link <https://developer.twitter.com/en/docs/tweets/optimize-with-cards/overview/markup>
* @link <https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Image_types>
* jp(e)g, png, webp, and gif are supported. Assume all non-matches to fall in those categories,
* since we don't perform a live MIME-test.
*
* Tested with Facebook; they ignore them too. There's no documentation available.
* TODO Should we even test for this here, or at the image generators' type?
* It seems, however, that _all_ services we want to communicate with ignore these types, anyway.
*/
if ( \in_array(
strtolower( strtok( pathinfo( $url, \PATHINFO_EXTENSION ), '?' ) ),
[ 'apng', 'bmp', 'ico', 'cur', 'svg', 'tif', 'tiff' ],
true,
) ) return $defaults;
$width = \absint( $width );
$height = \absint( $height );
if ( empty( $width ) || empty( $height ) )
$width = $height = 0;
// TODO add filter for 5 * \MB_IN_BYTES; Facebook allows 8MB; Twitter 5MB (Q4 2022).
if ( $id && ( $width > 4096 || $height > 4096 || $filesize > 5 * \MB_IN_BYTES ) ) {
$new_image = Meta\Image\Utils::get_largest_image_src( $id, 4096, 5 * \MB_IN_BYTES );
$url = $new_image ? \sanitize_url(
Meta\URI\Utils::make_absolute_current_scheme_url( $new_image[0] ),
[ 'https', 'http' ],
) : '';
if ( empty( $url ) ) return $defaults;
// No sanitization needed. PHP's getimagesize() returns the correct values.
$width = $new_image[1];
$height = $new_image[2];
}
if ( $alt ) {
$alt = \wp_strip_all_tags( $alt );
// 420: https://developer.twitter.com/en/docs/tweets/optimize-with-cards/overview/summary.html
// Don't "ai"-trim if under, it's unlikely to always be a sentence. Trim to 417 to account for plausibly appended "...".
$alt = \strlen( $alt ) > 420 ? Strings::clamp_sentence( $alt, 0, 417 ) : $alt;
}
if ( $caption ) {
$caption = \wp_strip_all_tags( $caption, true );
}
return compact( 'url', 'id', 'width', 'height', 'alt', 'caption', 'filesize' );
}
/**
* Sanitizes the Facebook profile link. Makes an actual Facebook link if it isn't already.
*
* @since 2.2.2
* @since 2.8.0 Method is now public.
* @since 3.0.6 Now allows a sole query argument when profile.php is used.
* @since 4.0.0 1. No longer returns a plain Facebook URL when the entry path is sanitized to become empty.
* 2. Now returns empty when using only spaces and tabs.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_facebook_profile`.
*
* @param string $link The unsanitized Facebook profile URL.
* @return string The sanitized Facebook profile URL.
*/
public static function facebook_profile_link( $link ) {
$path = trim( Meta\URI\Utils::get_relative_part_from_url( $link ), ' /' );
// /0 is a valid profile link.
if ( ! \strlen( $path ) ) return '';
$link = "https://www.facebook.com/{$path}";
if ( str_contains( $link, 'profile.php' ) ) {
// Extract query parameters.
parse_str( parse_url( $link, \PHP_URL_QUERY ), $r );
if ( empty( $r['id'] ) ) return '';
$link = 'https://www.facebook.com/profile.php?id=' . \absint( $r['id'] );
}
return \sanitize_url( $link );
}
/**
* Sanitizes a Twitter/X profile handle.
*
* @since 2.2.2
* @since 2.8.0 Method is now public.
* @since 3.0.0 1. Now removes '@' from the URL path.
* 2. Now removes spaces and tabs.
* @since 4.0.0 1. Now returns empty on lone `@` entries.
* 2. Now returns empty when using only spaces and tabs.
* @since 5.0.0 1. Moved from `\The_SEO_Framework\Load`.
* 2. Renamed from `s_twitter_name`.
*
* @param string $handle An unsanitized profile handle.
* @return string A sanitized profile handle with '@' prefixed to it.
*/
public static function twitter_profile_handle( $handle ) {
$handle = preg_replace(
'/[^a-z\d_]/i',
'',
trim( Meta\URI\Utils::get_relative_part_from_url( $handle ), ' /@' )
);
$length = \strlen( $handle );
// 4~15 since 2013 -- support shorter (/0 exists) and the theoretical 18 limit:
if ( $length < 1 || $length > 18 )
return '';
return "@$handle";
}
}