kupshop/bundles/KupShop/KupShopBundle/Util/StringUtil.php

<?php

namespace KupShop\KupShopBundle\Util;

use DOMDocument;
use KupShop\KupShopBundle\Context\DomainContext;
use SimpleXMLElement;

class StringUtil
{
    public static function startsWith($haystack, $needle)
    {
        $length = strlen($needle);

        return substr($haystack, 0, $length) === $needle;
    }

    public static function endsWith($haystack, $needle)
    {
        $length = strlen($needle);
        if ($length == 0) {
            return true;
        }

        return substr($haystack, -$length) === $needle;
    }

    /**
     * @return string
     */
    public static function unicode_trim($string)
    {
        return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
    }

    /**
     * @return string urlencoded with replaced / for _ and + for -
     */
    public static function base64ToUrl(string $base64String): string
    {
        return urlencode(str_replace('+', '-', str_replace('/', '_', $base64String)));
    }

    /**
     * @param string $urlString (should be urldecoded)
     *
     * @return string with replaced _ for / and - for +
     */
    public static function urlToBase64(string $urlString): string
    {
        return str_replace('-', '+', str_replace('_', '/', $urlString));
    }

    /**
     * Create SEO friendly url from given string.
     *
     * @return string
     */
    public static function slugify($string)
    {
        $string = static::normalize($string);
        $string = preg_replace('/[^a-z^A-Z^0-9^-]/i', '-', $string);
        $string = preg_replace('/-+/i', '-', $string);
        $string = trim($string, '-');

        return $string;
    }

    /**
     * @return string
     */
    public static function normalize($string)
    {
        static $transliterator;
        if (!isset($transliterator)) {
            $transliterator = \Transliterator::createFromRules(':: Any-Latin; :: Lower(); :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC; ł > l;');
        }

        return $transliterator->transliterate($string);
    }

    public static function removeAccents($string): string
    {
        static $transliterator;
        if (!isset($transliterator)) {
            $transliterator = \Transliterator::createFromRules(':: Any-Latin; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC; ł > l;');
        }

        return $transliterator->transliterate($string);
    }

    /**
     * Remove TAB, CARRIAGE RETURN, LINE FEED characters and deduplicate spaces.
     */
    public static function normalizeWhitespace(string $string): string
    {
        $string = trim($string);
        // remove TAB, CARRIAGE RETURN, LINE FEED
        $string = preg_replace("/[\t\r\n]/i", '', $string);
        // deduplicate spaces
        $string = preg_replace('/( ){2,}/i', ' ', $string);

        return $string;
    }

    public static function prettyXML($string)
    {
        if (!$string) {
            return;
        }

        $domxml = new \DOMDocument('1.0');
        $domxml->preserveWhiteSpace = false;
        $domxml->formatOutput = true;
        /* @var $xml SimpleXMLElement */
        $domxml->loadXML($string);

        return $domxml->saveXML();
    }

    public static function duplicateCode(string $code, $delimiter = '-'): string
    {
        $index = 1;
        $count = 0;
        $code = preg_replace_callback(
            '/'.$delimiter.'(\d+(?!.*\d+))/',
            function ($matches) use ($delimiter) {
                return $delimiter.($matches[1] + 1);
            },
            $code,
            -1,
            $count
        );

        if (!$count) {
            $code = $code.$delimiter.$index;
        }

        return $code;
    }

    public static function isAbsoluteUrl($url): bool
    {
        return str_starts_with($url, 'https://');
    }

    public static function absoluteUrl($url, $domain = null): string
    {
        if (self::isAbsoluteUrl($url)) {
            return $url;
        }

        if (!$domain) {
            $domain = Contexts::get(DomainContext::class)->getActiveId();
        }

        return 'https://'.$domain.'/'.ltrim($url, '/');
    }

    public static function unicodeToEntities(string $string)
    {
        return preg_replace_callback("/[\x{FFFF}-\x{FFFFF}]+/u", function ($matches) {
            return '&#'.hexdec(bin2hex(mb_convert_encoding("{$matches[0]}", 'UTF-32', 'UTF-8'))).';';
        }, $string);
    }

    public static function convertToLatin(string $string)
    {
        static $transliterator;
        if (!isset($transliterator)) {
            $transliterator = \Transliterator::create('Russian-Latin/BGN');
        }

        if ($transliterator) {
            return $transliterator->transliterate($string);
        }

        return $string;
    }

    public static function replacePlaceholders(string $str, array $params)
    {
        return preg_replace_callback('/{(.+?)}/', function ($matches) use ($params) {
            // If placeholder does not exist, keep it in output
            return $params[$matches[1]] ?? $matches[0];
        }, $str);
    }

    public static function htmlToCleanText(string $html): string
    {
        if (empty($html)) {
            return $html;
        }

        // Decode HTML entities
        $decoded = html_entity_decode($html, ENT_QUOTES | ENT_HTML5, 'UTF-8');

        // Convert encoding to UTF-8
        $utf8Encoded = mb_convert_encoding($decoded, 'HTML-ENTITIES', 'UTF-8');

        // Load HTML into DOMDocument with proper encoding
        $dom = new \DOMDocument();
        // Suppress warnings due to malformed HTML
        @$dom->loadHTML('<?xml encoding="utf-8" ?>'.$utf8Encoded, LIBXML_NOERROR | LIBXML_NOWARNING);

        // Extract text with proper spacing
        $text = self::getTextWithProperSpacing($dom->documentElement);

        // Normalize whitespace (convert multiple spaces/newlines to a single space)
        $normalized = preg_replace('/\s+/', ' ', $text);

        return trim($normalized);
    }

    private static function getTextWithProperSpacing($node, bool $isInlineParent = false): string
    {
        $text = '';
        foreach ($node->childNodes as $child) {
            if ($child->nodeType === XML_TEXT_NODE) {
                $text .= $child->wholeText;
            } elseif ($child->nodeType === XML_ELEMENT_NODE) {
                $isInlineChild = self::isInlineElement($child);
                $childText = self::getTextWithProperSpacing($child, $isInlineChild);

                // Concatenate text from inline elements directly
                if ($isInlineParent || $isInlineChild) {
                    $text .= $childText;
                } else {
                    // Add a space before and after block elements
                    $text .= ' '.$childText.' ';
                }
            }
        }

        return $text;
    }

    private static function isInlineElement($node): bool
    {
        $inlineElements = [
            'b',
            'i',
            'span',
            'strong',
            'em',
            'u',
            'a',
            'abbr',
            'acronym',
            'bdo',
            'big',
            'cite',
            'code',
            'dfn',
            'kbd',
            'q',
            'samp',
            'small',
            'sub',
            'sup',
            'tt',
            'var',
        ];

        return in_array($node->nodeName, $inlineElements);
    }

    public static function cutToWords(string $string, int $length)
    {
        if (mb_strlen($string) > $length) {
            return preg_replace('/\s+?(\S+)?$/'.\Smarty::$_UTF8_MODIFIER,
                '',
                mb_substr($string, 0, $length + 1, \Smarty::$_CHARSET)
            );
        }

        return $string;
    }

    public static function replacePunctuationWithPeriod(string $string)
    {
        return preg_replace('/[[:punct:]](?=$)/', '.', $string);
    }
}