HEX
Server: LiteSpeed
System: Linux php-prod-1.spaceapp.ru 5.15.0-157-generic #167-Ubuntu SMP Wed Sep 17 21:35:53 UTC 2025 x86_64
User: xnsbb3110 (1041)
PHP: 8.1.33
Disabled: NONE
Upload Files
File: //proc/self/cwd/wp-content/plugins/autodescription/inc/classes/sitemap/optimized/base.class.php
<?php
/**
 * @package The_SEO_Framework\Classes\Sitemap\Optimized\Base
 * @subpackage The_SEO_Framework\Sitemap
 */

namespace The_SEO_Framework\Sitemap\Optimized;

\defined( 'THE_SEO_FRAMEWORK_PRESENT' ) or die;

use \The_SEO_Framework\{
	Data,
	Data\Filter\Escape,
	Meta,
	Sitemap,
};
use \The_SEO_Framework\Helper\{
	Format\Time,
	Post_Type,
	Query,
};

/**
 * The SEO Framework plugin
 * Copyright (C) 2019 - 2024 Sybre Waaijer, CyberWire B.V. (https://cyberwire.nl/)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3 as published
 * by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Generates the base sitemap.
 *
 * @since 4.0.0
 * @since 4.2.0 Renamed from `\The_SEO_Framework\Builders\Sitemap_Base`.
 * @since 5.0.0 Moved from `\The_SEO_Framework\Builders\Sitemap`.
 *
 * @access private
 */
class Base extends Main {

	/**
	 * @since 4.1.2
	 * @var bool
	 */
	public $base_is_regenerated = false;

	/**
	 * @since 4.1.2
	 * @var bool
	 */
	public $base_is_prerendering = false;

	/**
	 * Generates the sitemap, and stores the generated content in the database.
	 *
	 * Note that this will work sporadically with translation plugins; however,
	 * it will not conflict, since a unique caching key is generated for each language.
	 * TODO consider expanding this feature for multilingual sites?
	 *
	 * @hook tsf_sitemap_cron_hook_before 10
	 * @since 4.1.2
	 * @since 4.2.1 Now no longer lowers the PHP execution time limit from unlimited to 3 minutes.
	 * @since 5.0.0 1. Can now prerender sitemap on a $sitemap_id basis.
	 *              2. Is now static.
	 *
	 * @param string $sitemap_id The sitemap ID to prerender.
	 * @return void Early if the sitemap's already generated.
	 */
	public static function prerender_sitemap( $sitemap_id = 'base' ) {

		if ( ! Sitemap\Cache::is_sitemap_cache_enabled() ) return;

		// Don't prerender if the sitemap is already generated.
		if ( false !== Sitemap\Cache::get_cached_sitemap_content( $sitemap_id ) ) return;

		$ini_max_execution_time = (int) ini_get( 'max_execution_time' );
		if ( 0 !== $ini_max_execution_time )
			set_time_limit( max( $ini_max_execution_time, 3 * \MINUTE_IN_SECONDS ) );

		// Somehow, the 'base' key is unavailable, the database failed, or a lock is already in place. Either way, bail.
		if ( ! Sitemap\Lock::lock_sitemap( $sitemap_id ) ) return;

		$sitemap_base = new self(); // TODO make static? We needn't more than one instance, right?

		$sitemap_base->prepare_generation();
		$sitemap_base->base_is_prerendering = true;

		Sitemap\Cache::cache_sitemap_content( $sitemap_base->build_sitemap(), $sitemap_id );

		Sitemap\Lock::unlock_sitemap( $sitemap_id );

		$sitemap_base->shutdown_generation();
		$sitemap_base->base_is_regenerated = true;
	}

	/**
	 * Returns the generated sitemap. Also stores it in the database when caching is enabled.
	 *
	 * @since 4.1.2
	 * @since 5.0.0 Can now generate cache on a $sitemap_id basis.
	 * @abstract
	 *
	 * @param string $sitemap_id The sitemap ID. Expected either 'base' or 'index'--or otherwise overwritten via the API.
	 * @return string The sitemap content.
	 */
	public function generate_sitemap( $sitemap_id = 'base' ) {

		$_caching_enabled = Sitemap\Cache::is_sitemap_cache_enabled();

		$sitemap_content = $_caching_enabled
			? Sitemap\Cache::get_cached_sitemap_content( $sitemap_id )
			: false;

		if ( false === $sitemap_content ) {

			$this->prepare_generation();
			$_caching_enabled && Sitemap\Lock::lock_sitemap( $sitemap_id );

			$sitemap_content = $this->build_sitemap();

			$this->shutdown_generation();
			$this->base_is_regenerated = true;

			if ( $_caching_enabled ) {
				Sitemap\Cache::cache_sitemap_content( $sitemap_content, $sitemap_id );
				Sitemap\Lock::unlock_sitemap( $sitemap_id );
			}
		}

		return $sitemap_content;
	}

	/**
	 * Generate sitemap.xml content.
	 *
	 * @since 2.2.9
	 * @since 2.8.0 Now adjusts memory limit when possible.
	 * @since 2.9.3 No longer crashes on WordPress sites below WP 4.6.
	 * @since 3.0.4 No longer outputs empty URL entries.
	 * @since 3.1.0 1. Removed the WP<4.6 function_exists check.
	 *              2. Now uses WordPress's built-in memory raiser function, with "context" sitemap.
	 * @since 4.0.0 1. Now assesses all public post types, in favor of qubit options.
	 *              2. Improved performance by a factor of two+.
	 *              3. Renamed method from "generate_sitemap" to abstract extension "build_sitemap".
	 *              4. Moved to \The_SEO_Framework\Builders\Sitemap\Base
	 * @override
	 * @slow The queried results are not stored in WP Post's cache, which would allow direct access
	 *       to all values of the post (if requested). This is because we're using
	 *       `'fields' => 'ids'` instead of `'fields' => 'all'`. However, this would fill RAM
	 *       linearly: at 1000 posts, we'd hit 28MB already, 10 000 would be ~280MB, exceeding max.
	 * @link <https://w.org/support/topic/sitemap-and-memory-exhaustion/#post-13331896>
	 *
	 * @return string The sitemap content.
	 */
	public function build_sitemap() {

		/**
		 * @since 4.2.7
		 * @param \The_SEO_Framework\Sitemap\Optimized\Base
		 */
		\do_action( 'the_seo_framework_build_sitemap_base', $this );

		$content         = '';
		$this->url_count = 0;

		$show_modified = (bool) Data\Plugin::get_option( 'sitemaps_modified' );

		/**
		 * @since 2.2.9
		 * @param bool $timestamp Whether to display the timestamp.
		 */
		$timestamp = (bool) \apply_filters( 'the_seo_framework_sitemap_timestamp', true );

		if ( $timestamp )
			$content .= \sprintf(
				"<!-- %s -->\n",
				\sprintf(
					$this->base_is_prerendering
						/* translators: %s = timestamp */
						? \esc_html__( 'Sitemap is prerendered on %s', 'autodescription' )
						/* translators: %s = timestamp */
						: \esc_html__( 'Sitemap is generated on %s', 'autodescription' ),
					\current_time( 'Y-m-d H:i:s \G\M\T' ),
				),
			);

		foreach ( $this->generate_front_and_blog_url_items(
			[ 'show_modified' => $show_modified ],
		) as $_values ) {
			$content .= $this->build_url_item( $_values );
		}

		$post_types = array_diff( Post_Type::get_all_supported(), [ 'attachment' ] );

		/**
		 * @since 4.0.0
		 * @param array $post_types The supported post types.
		 */
		$post_types = (array) \apply_filters( 'the_seo_framework_sitemap_supported_post_types', $post_types );

		$non_hierarchical_post_types = [];
		$hierarchical_post_types     = [];

		foreach ( $post_types as $_post_type ) {
			if ( \is_post_type_hierarchical( $_post_type ) ) {
				$hierarchical_post_types[] = $_post_type;
			} else {
				$non_hierarchical_post_types[] = $_post_type;
			}
		}

		$wp_query = new \WP_Query;
		$wp_query->init();
		$hierarchical_post_ids = $non_hierarchical_post_ids = [];

		if ( $hierarchical_post_types ) {
			$_exclude_ids = array_filter( [
				(int) \get_option( 'page_on_front' ),
				(int) \get_option( 'page_for_posts' ),
			] );

			$_hierarchical_posts_limit = Sitemap\Utils::get_sitemap_post_limit( 'hierarchical' );

			/**
			 * @since 4.0.0
			 * @since 5.0.5 1. Now sets orderby to 'lastmod', from 'date'.
			 *              2. Now sets order to 'DESC', from 'ASC'.
			 * @param array $args The query arguments.
			 * @link <https://w.org/support/topic/sitemap-and-memory-exhaustion/#post-13331896>
			 */
			$_args = (array) \apply_filters(
				'the_seo_framework_sitemap_hpt_query_args',
				[
					'posts_per_page' => $_hierarchical_posts_limit + \count( $_exclude_ids ),
					'post_type'      => $hierarchical_post_types,
					'orderby'        => 'lastmod',
					'order'          => 'DESC',
					'post_status'    => 'publish',
					'has_password'   => false,
					'fields'         => 'ids',
					'cache_results'  => false,
					'no_found_rows'  => true,
				],
			);

			if ( $_args['post_type'] ) {
				$wp_query->query = $wp_query->query_vars = $_args;

				$hierarchical_post_ids = array_diff( $wp_query->get_posts(), $_exclude_ids );

				// Stop confusion: trim query to set value (by one or two, depending on whether the homepage and blog are included).
				// This is ultimately redundant, but it'll stop support requests by making the input value more accurate.
				if ( \count( $hierarchical_post_ids ) > $_hierarchical_posts_limit ) {
					array_splice( $hierarchical_post_ids, $_hierarchical_posts_limit );
				}
			}
		}

		if ( $non_hierarchical_post_types ) {
			/**
			 * @since 4.0.0
			 * @param array $args The query arguments.
			 */
			$_args = (array) \apply_filters(
				'the_seo_framework_sitemap_nhpt_query_args',
				[
					// phpcs:ignore, WordPress.WP.PostsPerPage -- This is a sitemap, it will be slow.
					'posts_per_page' => Sitemap\Utils::get_sitemap_post_limit( 'nonhierarchical' ),
					'post_type'      => $non_hierarchical_post_types,
					'orderby'        => 'lastmod',
					'order'          => 'DESC',
					'post_status'    => 'publish',
					'has_password'   => false,
					'fields'         => 'ids',
					'cache_results'  => false,
					'no_found_rows'  => true,
				],
			);

			if ( $_args['post_type'] ) {
				$wp_query->query = $wp_query->query_vars = $_args;

				$non_hierarchical_post_ids = $wp_query->get_posts();
			}
		}

		// Destroy query instance.
		$wp_query = null;

		/**
		 * @since 4.1.0
		 * @param int[] $_items                    The post IDs that will be parsed in the sitemap.
		 *                                         When it totals for more than 49998 items, they'll be spliced.
		 * @param int[] $hierarchical_post_ids     The post IDs from hierarchical post types.
		 * @param int[] $non_hierarchical_post_ids The post IDs from non-hierarchical post types.
		 */
		$_items      = (array) \apply_filters(
			'the_seo_framework_sitemap_items',
			array_merge( $hierarchical_post_ids, $non_hierarchical_post_ids ),
			$hierarchical_post_ids,
			$non_hierarchical_post_ids,
		);
		$total_items = \count( $_items );

		// 49998 = 50000-2 (home+blog), max sitemap items.
		if ( $total_items > 49998 ) array_splice( $_items, 49998 );
		// We could also calculate the sitemap length (may not be above 10 MB)...
		// ...but that'd mean each entry must be at least 200 chars long on avg. Good luck with that.

		foreach ( $this->generate_url_item_values(
			$_items,
			[
				'show_modified' => $show_modified,
				'total_items'   => $total_items,
			],
		) as $_values ) {
			$content .= static::build_url_item( $_values );
		}

		/**
		 * NOTE to devs: Use this filter if you want to let the generator build the string (lower memory usage).
		 * This filter also keeps track toward the sitemap limit via $count.
		 */
		if ( \has_filter( 'the_seo_framework_sitemap_additional_urls' ) ) {
			foreach ( $this->generate_additional_base_urls(
				[
					'show_modified' => $show_modified,
					'count'         => $this->url_count,
				]
			) as $_values ) {
				$content .= static::build_url_item( $_values );
			}
		}

		/**
		 * This filter accepts a simple string, which may strain the memory usage if not generated (via co-routine).
		 *
		 * @since 2.5.2
		 * @since 4.0.0 Added $args parameter.
		 * @since 4.2.0 No longer forwards the 'show_priority' index in the second ($args) parameter.
		 * @param string $extend Custom sitemap extension. Must be escaped.
		 * @param array $args {
		 *     The sitemap extension arguments.
		 *
		 *     @type bool $show_modified Whether to display modified date.
		 *     @type int  $count         The total sitemap items before adding additional URLs.
		 * }
		 */
		$extend = (string) \apply_filters(
			'the_seo_framework_sitemap_extend',
			'',
			[
				'show_modified' => $show_modified,
				'count'         => $this->url_count,
			],
		);

		if ( $extend )
			$content .= "\t$extend\n";

		return $content;
	}

	/**
	 * Generates front-and blog page sitemap items.
	 *
	 * @since 4.0.0
	 * @since 5.0.0 Removed second parameter `&$count`.
	 * @since 5.0.5 1. Now tests for is_protected, is_draft, and is_post_included_in_sitemap for the front page.
	 *              2. Now tests for is_protected and is_draft for the posts page.
	 * @generator
	 *
	 * @param array $args {
	 *     The generator arguments.
	 *
	 *     @type bool $show_modified Whether to display the last modified date.
	 * }
	 * @yield array|void : {
	 *     @type string $loc      The URI of the page.
	 *     @type string $lastmod  The page's last modified date.
	 * }
	 */
	protected function generate_front_and_blog_url_items( $args ) {

		if ( Query\Utils::has_page_on_front() ) {
			$front_page_id = (int) \get_option( 'page_on_front' );
			$posts_page_id = (int) \get_option( 'page_for_posts' );

			// Assert 404 here; these are queried separately from all other entries that have these tests builtin.
			if (
				   $front_page_id // Might not be assigned.
				&& ! Data\Post::is_protected( $front_page_id )
				&& ! Data\Post::is_draft( $front_page_id )
				&& Sitemap\Utils::is_post_included_in_sitemap( $front_page_id )
			) {
				yield from $this->generate_url_item_values(
					[ $front_page_id ],
					$args,
				);
			}

			// Assert 404 here; these are queried separately from all other entries that have these tests builtin.
			if (
				   $posts_page_id // Might not be assigned.
				&& ! Data\Post::is_protected( $posts_page_id )
				&& ! Data\Post::is_draft( $posts_page_id )
				&& Sitemap\Utils::is_post_included_in_sitemap( $posts_page_id )
			) {
				foreach ( $this->generate_url_item_values(
					[ $posts_page_id ],
					$args,
				) as $_values ) {
					if ( $_values['loc'] && $args['show_modified'] ) {
						$latests_posts = \wp_get_recent_posts(
							[
								'numberposts'  => 1,
								'post_type'    => 'post',
								'post_status'  => 'publish',
								'has_password' => false,
								'orderby'      => 'post_date',
								'order'        => 'DESC',
								'offset'       => 0,
							],
							\OBJECT,
						);
						$_publish_post = $latests_posts[0]->post_date_gmt ?? '0000-00-00 00:00:00';
						$_lastmod_blog = $_values['lastmod']; // Inferred from generator generate_url_item_values()

						/**
						 * @since 4.1.1
						 * @param string $lastmod The lastmod time in SQL notation (`Y-m-d H:i:s`). Expected to explicitly follow that format!
						 */
						$_values['lastmod'] = (string) \apply_filters(
							'the_seo_framework_sitemap_blog_lastmod',
							strtotime( $_publish_post ) > strtotime( $_lastmod_blog )
								? $_publish_post
								: $_lastmod_blog,
						);
					}

					yield $_values;
				}
			}
		} else {
			// Blog page as front. Unique; cannot go through generate_url_item_values().
			if ( Sitemap\Utils::is_post_included_in_sitemap( 0 ) ) {
				// Reset.
				$_values        = [];
				$_values['loc'] = Meta\URI::get_bare_front_page_url();

				if ( $args['show_modified'] ) {
					$latests_posts = \wp_get_recent_posts(
						[
							'numberposts'  => 1,
							'post_type'    => 'post',
							'post_status'  => 'publish',
							'has_password' => false,
							'orderby'      => 'post_date',
							'order'        => 'DESC',
							'offset'       => 0,
						],
						\OBJECT,
					);

					/**
					 * @since 4.1.1
					 * @param string $lastmod The lastmod time in SQL notation (`Y-m-d H:i:s`). Expected to explicitly follow that format!
					 */
					$_values['lastmod'] = (string) \apply_filters(
						'the_seo_framework_sitemap_blog_lastmod',
						$latests_posts[0]->post_date_gmt ?? '0000-00-00 00:00:00',
					);
				}

				++$this->url_count;
				yield $_values;
			}
		}
	}

	/**
	 * Generates sitemap URL item values.
	 *
	 * @since 4.0.0
	 * @since 4.1.1 Now clears WordPress's post cache every time an item is generated.
	 * @since 5.0.0 Removed second parameter `&$count`.
	 * @generator
	 * @iterator
	 *
	 * @param int[] $post_ids The post IDs to go over.
	 * @param array $args     {
	 *     The generator arguments.
	 *
	 *     @type bool $show_modified Whether to display the last modified date.
	 * }
	 * @yield array|void : {
	 *     string loc
	 *     string lastmod
	 * }
	 */
	protected function generate_url_item_values( $post_ids, $args ) {

		foreach ( $post_ids as $post_id ) {
			// Setup post cache, which is also used in is_post_included_in_sitemap() and get_bare_singular_url().
			$post = \get_post( $post_id );

			if ( Sitemap\Utils::is_post_included_in_sitemap( $post_id ) ) {
				$_values = [
					'loc' => Meta\URI::get_bare_singular_url( $post_id ),
				];

				if ( $args['show_modified'] )
					$_values['lastmod'] = $post->post_modified_gmt ?? '0000-00-00 00:00:00';

				++$this->url_count;
				yield $_values;
			}

			// Only clean post cache when NOT using a caching plugin.
			\WP_CACHE or \clean_post_cache( $post );
		}
	}

	/**
	 * Builds, escapes, and returns a sitemap URL item.
	 *
	 * @since 4.0.0
	 * @since 4.1.1 Now uses `create_xml_entry()` to parse the XML.
	 * @since 5.0.0 Is now static.
	 *
	 * @param array $args {
	 *     The URL item arguments.
	 *
	 *     @type string  $loc     The item's URI.
	 *     @type ?string $lastmod SQL timestamp string (Y-m-d H:i:s). Expected to be GMT. Null or '0000-00-00 00:00:00' to omit.
	 * }
	 * @return string The sitemap item.
	 */
	protected static function build_url_item( $args ) {

		if ( empty( $args['loc'] ) ) return '';

		$xml = [
			'loc' => Escape::xml_uri( $args['loc'] ),
		];

		if ( isset( $args['lastmod'] ) && '0000-00-00 00:00:00' !== $args['lastmod'] ) {
			// XML safe.
			$xml['lastmod'] = Time::convert_to_preferred_format( $args['lastmod'] );
		}

		return static::create_xml_entry( [ 'url' => $xml ], 1 );
	}

	/**
	 * Retrieves additional URLs and builds items from them.
	 *
	 * @since 4.0.0
	 * @since 4.0.1 1. Converted to generator and iterator. Therefore, renamed function.
	 *              2. Now actually does something.
	 * @since 5.0.0 Removed second parameter `&$count`.
	 * @generator
	 * @iterator
	 *
	 * @param array $args {
	 *     @type bool $show_modified Whether to display modified date.
	 *     @type int  $count         The total sitemap items before adding additional URLs.
	 * }
	 * @yield array|void : {
	 *     @type string $loc      The URI of the page.
	 *     @type string $lastmod  The page's last modified date.
	 * }
	 */
	protected function generate_additional_base_urls( $args ) {
		/**
		 * @since 2.5.2
		 * @since 3.2.2 Invalid URLs are now skipped.
		 * @since 4.0.0 Added $args parameter.
		 * @since 4.2.0 No longer forwards the 'show_priority' index in the second ($args) parameter.
		 * @example return value: [ 'http://example.com' => [ 'lastmod' => '2024-04-10 14:52:06' ] ]
		 * @param array $custom_urls {
		 *     An array of custom URLs, keyed by the absolute url to the page.
		 *
		 *     @type string $lastmod UNIXTIME <GMT+0> Last modified date, e.g. "2016-01-26 13:04:55"
		 * }
		 * @param array $args {
		 *     The sitemap URL extension arguments.
		 *
		 *     @type bool $show_modified Whether to display modified date.
		 *     @type int  $count         Estimate: The total sitemap items before adding additional URLs.
		 * }
		 */
		$custom_urls = (array) \apply_filters( 'the_seo_framework_sitemap_additional_urls', [], $args );

		foreach ( $custom_urls as $url => $values ) {
			if ( ! \is_array( $values ) ) {
				// If there are no args, it's assigned as URL (per example)
				$url = $values;
			}

			// Test if URL is valid.
			if ( ! \sanitize_url( $url, [ 'https', 'http' ] ) ) continue;

			// Reset.
			$_values        = [];
			$_values['loc'] = $url;

			if ( $args['show_modified'] )
				$_values['lastmod'] = ! empty( $values['lastmod'] ) ? $values['lastmod'] : '0000-00-00 00:00:00';

			++$this->url_count;
			yield $_values;
		}
	}
}