* * @copyright Copyright (C) 2008-2019, Yoast BV * The following code is a derivative work of the code from the Yoast(https://github.com/Yoast/wordpress-seo/), which is licensed under GPL v3. */ namespace RankMath\Sitemap; use RankMath\Helper; use RankMath\Traits\Hooker; defined( 'ABSPATH' ) || exit; /** * Generator class. */ class Generator extends XML { use Hooker; /** * XSL stylesheet for styling a sitemap for web browsers. * * @var string */ protected $stylesheet = ''; /** * Holds the get_bloginfo( 'charset' ) value to reuse for performance. * * @var string */ protected $charset = 'UTF-8'; /** * If data encoding needs to be converted for output. * * @var boolean */ protected $needs_conversion = false; /** * Timezone. * * @var Timezone */ public $timezone; /** * Providers array. * * @var Provider */ public $providers = []; /** * The maximum number of entries per sitemap page. * * @var int */ private $max_entries; /** * Set up object properties. */ public function __construct() { $this->stylesheet = preg_replace( '/(^http[s]?:)/', '', Router::get_base_url( 'main-sitemap.xsl' ) ); $this->stylesheet = 'stylesheet . '"?>'; $this->charset = get_bloginfo( 'charset' ); $this->output_charset = $this->charset; $this->timezone = new Timezone(); if ( 'UTF-8' !== $this->charset && function_exists( 'mb_list_encodings' ) && in_array( $this->charset, mb_list_encodings(), true ) ) { $this->output_charset = 'UTF-8'; } $this->needs_conversion = $this->output_charset !== $this->charset; $this->instantiate(); } /** * Instantiate required objects. */ private function instantiate() { // Initialize sitemap providers classes. $this->providers = [ new \RankMath\Sitemap\Providers\Post_Type(), new \RankMath\Sitemap\Providers\Taxonomy(), ]; // Author Provider. if ( true === Helper::is_author_archive_indexable() ) { $this->providers[] = new \RankMath\Sitemap\Providers\Author(); } $external_providers = $this->do_filter( 'sitemap/providers', [] ); foreach ( $external_providers as $provider ) { if ( is_object( $provider ) ) { $this->providers[] = $provider; } } } /** * Produce final XML output with debug information. * * @param string $type Sitemap type. * @param int $page Page number to retrieve. * @return string */ public function get_output( $type, $page ) { $output = 'get_output_charset() ) . '"?>'; if ( $this->stylesheet ) { /** * Filter the stylesheet URL for the XML sitemap. * * @param string $stylesheet Stylesheet URL. */ $output .= $this->do_filter( "sitemap/{$type}_stylesheet_url", $this->stylesheet ) . "\n"; } $content = $this->build_sitemap( $type, $page ); if ( '' !== $content ) { return $output . $content; } return ''; } /** * Attempts to build the requested sitemap. * * @param string $type Sitemap type. * @param int $page Page number to retrieve. * @return string */ public function build_sitemap( $type, $page ) { $this->max_entries = absint( Helper::get_settings( 'sitemap.items_per_page', 100 ) ); /** * Filter the type of sitemap to build. * * @param string $type Sitemap type, determined by the request. */ $type = $this->do_filter( 'sitemap/build_type', $type ); if ( '1' === $type ) { return $this->build_root_map(); } foreach ( $this->providers as $provider ) { if ( ! $provider->handles_type( $type ) ) { continue; } $links = $provider->get_sitemap_links( $type, $this->max_entries, $page ); if ( empty( $links ) && ( empty( $provider->should_show_empty ) || $page > 1 ) ) { continue; } return $this->get_sitemap( $links, $type, $page ); } return $this->do_filter( "sitemap/{$type}/content", '' ); } /** * Build the root sitemap (example.com/sitemap_index.xml) which lists sub-sitemaps for other content types. */ public function build_root_map() { $links = []; foreach ( $this->providers as $provider ) { $links = array_merge( $links, $provider->get_index_links( $this->max_entries ) ); } if ( empty( $links ) ) { return ''; } return $this->get_index( $links ); } /** * Produce XML output for sitemap index. * * @param array $links Set of sitemaps index links. * @return string */ public function get_index( $links ) { $xml = '' . "\n"; foreach ( $links as $link ) { $xml .= $this->sitemap_index_url( $link ); } /** * Filter to append sitemaps to the index. * * @param string $index String to append to sitemaps index, defaults to empty. */ $xml .= $this->do_filter( 'sitemap/index', '' ); $xml .= ''; return $xml; } /** * Produce XML output for urlset. * * @param array $links Set of sitemap links. * @param string $type Sitemap type. * @param int $current_page Current sitemap page number. * @return string */ public function get_sitemap( $links, $type, $current_page ) { $urlset = '' . "\n"; /** * Filters the `urlset` for a sitemap by type. * * @param string $urlset The output for the sitemap's `urlset`. */ $xml = $this->do_filter( "sitemap/{$type}_urlset", $urlset ); foreach ( $links as $url ) { $method = $type . '_sitemap_url'; $xml .= has_filter( "rank_math/sitemap/{$method}" ) ? $this->do_filter( "sitemap/{$method}", $url, $this ) : $this->sitemap_url( $url ); } /** * Filter to add extra URLs to the XML sitemap by type. * * Only runs for the first page, not on all. * * @param string $content String content to add, defaults to empty. */ if ( 1 === $current_page ) { $xml .= $this->do_filter( "sitemap/{$type}_content", '' ); } $xml .= ''; return $xml; } /** * Build the `` tag for a given URL. * * @param array $url Array of parts that make up this entry. * @return string */ protected function sitemap_index_url( $url ) { $date = null; if ( ! empty( $url['lastmod'] ) ) { $date = $this->timezone->format_date( $url['lastmod'] ); } $output = $this->newline( '', 1 ); $output .= $this->newline( '' . htmlspecialchars( $url['loc'] ) . '', 2 ); $output .= empty( $date ) ? '' : $this->newline( '' . htmlspecialchars( $date ) . '', 2 ); $output .= $this->newline( '', 1 ); return $output; } /** * Build the `` tag for a given URL. * * Public access for backwards compatibility reasons. * * @param array $url Array of parts that make up this entry. * @return string */ public function sitemap_url( $url ) { $date = null; if ( ! empty( $url['mod'] ) ) { // Create a DateTime object date in the correct timezone. $date = $this->timezone->format_date( $url['mod'] ); } $output = $this->newline( '', 1 ); $output .= $this->newline( '' . $this->encode_url_rfc3986( htmlspecialchars( $url['loc'] ) ) . '', 2 ); $output .= empty( $date ) ? '' : $this->newline( '' . htmlspecialchars( $date ) . '', 2 ); $output .= $this->sitemap_images( $url ); $output .= $this->newline( '', 1 ); /** * Filters the output for the sitemap url tag. * * @param string $output The output for the sitemap url tag. * @param array $url The sitemap url array on which the output is based. */ return $this->do_filter( 'sitemap/url', $output, $url ); } /** * Sitemap Images. * * @param array $url Array of parts that make up this entry. * @return string */ public function sitemap_images( $url ) { if ( empty( $url['images'] ) ) { return ''; } $output = ''; foreach ( $url['images'] as $img ) { if ( empty( $img['src'] ) ) { continue; } $output .= $this->newline( '', 2 ); $output .= $this->newline( '' . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . '', 3 ); $output .= $this->newline( '', 2 ); } return $output; } /** * Convret encoding if needed. * * @param string $data Data to be added. * @param string $tag Tag to create CDATA for. * @param integer $indent Tab indent count. */ public function add_cdata( $data, $tag, $indent = 0 ) { if ( $this->needs_conversion ) { $data = mb_convert_encoding( $data, $this->output_charset, $this->charset ); } $data = _wp_specialchars( html_entity_decode( $data, ENT_QUOTES, $this->output_charset ) ); return $this->newline( "<{$tag}>", $indent ); } /** * Apply some best effort conversion to comply with RFC3986. * * @param string $url URL to encode. * @return string */ public function encode_url_rfc3986( $url ) { if ( filter_var( $url, FILTER_VALIDATE_URL ) ) { return $url; } $url = $this->encode_url_path( $url ); $url = $this->encode_url_query( $url ); return $url; } /** * Apply some best effort conversion to comply with RFC3986. * * @param string $url URL to encode. * @return string */ private function encode_url_path( $url ) { $path = wp_parse_url( $url, PHP_URL_PATH ); if ( empty( $path ) || '/' === $path ) { return $url; } $encoded_path = explode( '/', $path ); // First decode the path, to prevent double encoding. $encoded_path = array_map( 'rawurldecode', $encoded_path ); $encoded_path = array_map( 'rawurlencode', $encoded_path ); $encoded_path = implode( '/', $encoded_path ); $encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP <5.3. return str_replace( $path, $encoded_path, $url ); } /** * Apply some best effort conversion to comply with RFC3986. * * @param string $url URL to encode. * @return string */ private function encode_url_query( $url ) { $query = wp_parse_url( $url, PHP_URL_QUERY ); if ( empty( $query ) ) { return $url; } parse_str( $query, $parsed_query ); if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+. $parsed_query = http_build_query( $parsed_query, '', '&', PHP_QUERY_RFC3986 ); } else { $parsed_query = http_build_query( $parsed_query, '', '&' ); $parsed_query = str_replace( '+', '%20', $parsed_query ); $parsed_query = str_replace( '%7E', '~', $parsed_query ); } return str_replace( $query, $parsed_query, $url ); } /** * Write a newline with indent count. * * @param string $content Content to write. * @param integer $indent Count of indent. * @return string */ public function newline( $content, $indent = 0 ) { return str_repeat( "\t", $indent ) . $content . "\n"; } }