Files
kupshop/bundles/KupShop/KupShopBundle/Util/StringUtil.php
2025-08-02 16:30:27 +02:00

285 lines
7.7 KiB
PHP

<?php
namespace KupShop\KupShopBundle\Util;
use DOMDocument;
use KupShop\KupShopBundle\Context\DomainContext;
use SimpleXMLElement;
class StringUtil
{
public static function startsWith($haystack, $needle)
{
$length = strlen($needle);
return substr($haystack, 0, $length) === $needle;
}
public static function endsWith($haystack, $needle)
{
$length = strlen($needle);
if ($length == 0) {
return true;
}
return substr($haystack, -$length) === $needle;
}
/**
* @return string
*/
public static function unicode_trim($string)
{
return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
}
/**
* @return string urlencoded with replaced / for _ and + for -
*/
public static function base64ToUrl(string $base64String): string
{
return urlencode(str_replace('+', '-', str_replace('/', '_', $base64String)));
}
/**
* @param string $urlString (should be urldecoded)
*
* @return string with replaced _ for / and - for +
*/
public static function urlToBase64(string $urlString): string
{
return str_replace('-', '+', str_replace('_', '/', $urlString));
}
/**
* Create SEO friendly url from given string.
*
* @return string
*/
public static function slugify($string)
{
$string = static::normalize($string);
$string = preg_replace('/[^a-z^A-Z^0-9^-]/i', '-', $string);
$string = preg_replace('/-+/i', '-', $string);
$string = trim($string, '-');
return $string;
}
/**
* @return string
*/
public static function normalize($string)
{
static $transliterator;
if (!isset($transliterator)) {
$transliterator = \Transliterator::createFromRules(':: Any-Latin; :: Lower(); :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC; ł > l;');
}
return $transliterator->transliterate($string);
}
public static function removeAccents($string): string
{
static $transliterator;
if (!isset($transliterator)) {
$transliterator = \Transliterator::createFromRules(':: Any-Latin; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC; ł > l;');
}
return $transliterator->transliterate($string);
}
/**
* Remove TAB, CARRIAGE RETURN, LINE FEED characters and deduplicate spaces.
*/
public static function normalizeWhitespace(string $string): string
{
$string = trim($string);
// remove TAB, CARRIAGE RETURN, LINE FEED
$string = preg_replace("/[\t\r\n]/i", '', $string);
// deduplicate spaces
$string = preg_replace('/( ){2,}/i', ' ', $string);
return $string;
}
public static function prettyXML($string)
{
if (!$string) {
return;
}
$domxml = new \DOMDocument('1.0');
$domxml->preserveWhiteSpace = false;
$domxml->formatOutput = true;
/* @var $xml SimpleXMLElement */
$domxml->loadXML($string);
return $domxml->saveXML();
}
public static function duplicateCode(string $code, $delimiter = '-'): string
{
$index = 1;
$count = 0;
$code = preg_replace_callback(
'/'.$delimiter.'(\d+(?!.*\d+))/',
function ($matches) use ($delimiter) {
return $delimiter.($matches[1] + 1);
},
$code,
-1,
$count
);
if (!$count) {
$code = $code.$delimiter.$index;
}
return $code;
}
public static function isAbsoluteUrl($url): bool
{
return str_starts_with($url, 'https://');
}
public static function absoluteUrl($url, $domain = null): string
{
if (self::isAbsoluteUrl($url)) {
return $url;
}
if (!$domain) {
$domain = Contexts::get(DomainContext::class)->getActiveId();
}
return 'https://'.$domain.'/'.ltrim($url, '/');
}
public static function unicodeToEntities(string $string)
{
return preg_replace_callback("/[\x{FFFF}-\x{FFFFF}]+/u", function ($matches) {
return '&#'.hexdec(bin2hex(mb_convert_encoding("{$matches[0]}", 'UTF-32', 'UTF-8'))).';';
}, $string);
}
public static function convertToLatin(string $string)
{
static $transliterator;
if (!isset($transliterator)) {
$transliterator = \Transliterator::create('Russian-Latin/BGN');
}
if ($transliterator) {
return $transliterator->transliterate($string);
}
return $string;
}
public static function replacePlaceholders(string $str, array $params)
{
return preg_replace_callback('/{(.+?)}/', function ($matches) use ($params) {
// If placeholder does not exist, keep it in output
return $params[$matches[1]] ?? $matches[0];
}, $str);
}
public static function htmlToCleanText(string $html): string
{
if (empty($html)) {
return $html;
}
// Decode HTML entities
$decoded = html_entity_decode($html, ENT_QUOTES | ENT_HTML5, 'UTF-8');
// Convert encoding to UTF-8
$utf8Encoded = mb_convert_encoding($decoded, 'HTML-ENTITIES', 'UTF-8');
// Load HTML into DOMDocument with proper encoding
$dom = new \DOMDocument();
// Suppress warnings due to malformed HTML
@$dom->loadHTML('<?xml encoding="utf-8" ?>'.$utf8Encoded, LIBXML_NOERROR | LIBXML_NOWARNING);
// Extract text with proper spacing
$text = self::getTextWithProperSpacing($dom->documentElement);
// Normalize whitespace (convert multiple spaces/newlines to a single space)
$normalized = preg_replace('/\s+/', ' ', $text);
return trim($normalized);
}
private static function getTextWithProperSpacing($node, bool $isInlineParent = false): string
{
$text = '';
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_TEXT_NODE) {
$text .= $child->wholeText;
} elseif ($child->nodeType === XML_ELEMENT_NODE) {
$isInlineChild = self::isInlineElement($child);
$childText = self::getTextWithProperSpacing($child, $isInlineChild);
// Concatenate text from inline elements directly
if ($isInlineParent || $isInlineChild) {
$text .= $childText;
} else {
// Add a space before and after block elements
$text .= ' '.$childText.' ';
}
}
}
return $text;
}
private static function isInlineElement($node): bool
{
$inlineElements = [
'b',
'i',
'span',
'strong',
'em',
'u',
'a',
'abbr',
'acronym',
'bdo',
'big',
'cite',
'code',
'dfn',
'kbd',
'q',
'samp',
'small',
'sub',
'sup',
'tt',
'var',
];
return in_array($node->nodeName, $inlineElements);
}
public static function cutToWords(string $string, int $length)
{
if (mb_strlen($string) > $length) {
return preg_replace('/\s+?(\S+)?$/'.\Smarty::$_UTF8_MODIFIER,
'',
mb_substr($string, 0, $length + 1, \Smarty::$_CHARSET)
);
}
return $string;
}
public static function replacePunctuationWithPeriod(string $string)
{
return preg_replace('/[[:punct:]](?=$)/', '.', $string);
}
}