285 lines
7.7 KiB
PHP
285 lines
7.7 KiB
PHP
<?php
|
|
|
|
namespace KupShop\KupShopBundle\Util;
|
|
|
|
use DOMDocument;
|
|
use KupShop\KupShopBundle\Context\DomainContext;
|
|
use SimpleXMLElement;
|
|
|
|
class StringUtil
|
|
{
|
|
public static function startsWith($haystack, $needle)
|
|
{
|
|
$length = strlen($needle);
|
|
|
|
return substr($haystack, 0, $length) === $needle;
|
|
}
|
|
|
|
public static function endsWith($haystack, $needle)
|
|
{
|
|
$length = strlen($needle);
|
|
if ($length == 0) {
|
|
return true;
|
|
}
|
|
|
|
return substr($haystack, -$length) === $needle;
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public static function unicode_trim($string)
|
|
{
|
|
return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $string);
|
|
}
|
|
|
|
/**
|
|
* @return string urlencoded with replaced / for _ and + for -
|
|
*/
|
|
public static function base64ToUrl(string $base64String): string
|
|
{
|
|
return urlencode(str_replace('+', '-', str_replace('/', '_', $base64String)));
|
|
}
|
|
|
|
/**
|
|
* @param string $urlString (should be urldecoded)
|
|
*
|
|
* @return string with replaced _ for / and - for +
|
|
*/
|
|
public static function urlToBase64(string $urlString): string
|
|
{
|
|
return str_replace('-', '+', str_replace('_', '/', $urlString));
|
|
}
|
|
|
|
/**
|
|
* Create SEO friendly url from given string.
|
|
*
|
|
* @return string
|
|
*/
|
|
public static function slugify($string)
|
|
{
|
|
$string = static::normalize($string);
|
|
$string = preg_replace('/[^a-z^A-Z^0-9^-]/i', '-', $string);
|
|
$string = preg_replace('/-+/i', '-', $string);
|
|
$string = trim($string, '-');
|
|
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public static function normalize($string)
|
|
{
|
|
static $transliterator;
|
|
if (!isset($transliterator)) {
|
|
$transliterator = \Transliterator::createFromRules(':: Any-Latin; :: Lower(); :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC; ł > l;');
|
|
}
|
|
|
|
return $transliterator->transliterate($string);
|
|
}
|
|
|
|
public static function removeAccents($string): string
|
|
{
|
|
static $transliterator;
|
|
if (!isset($transliterator)) {
|
|
$transliterator = \Transliterator::createFromRules(':: Any-Latin; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC; ł > l;');
|
|
}
|
|
|
|
return $transliterator->transliterate($string);
|
|
}
|
|
|
|
/**
|
|
* Remove TAB, CARRIAGE RETURN, LINE FEED characters and deduplicate spaces.
|
|
*/
|
|
public static function normalizeWhitespace(string $string): string
|
|
{
|
|
$string = trim($string);
|
|
// remove TAB, CARRIAGE RETURN, LINE FEED
|
|
$string = preg_replace("/[\t\r\n]/i", '', $string);
|
|
// deduplicate spaces
|
|
$string = preg_replace('/( ){2,}/i', ' ', $string);
|
|
|
|
return $string;
|
|
}
|
|
|
|
public static function prettyXML($string)
|
|
{
|
|
if (!$string) {
|
|
return;
|
|
}
|
|
|
|
$domxml = new \DOMDocument('1.0');
|
|
$domxml->preserveWhiteSpace = false;
|
|
$domxml->formatOutput = true;
|
|
/* @var $xml SimpleXMLElement */
|
|
$domxml->loadXML($string);
|
|
|
|
return $domxml->saveXML();
|
|
}
|
|
|
|
public static function duplicateCode(string $code, $delimiter = '-'): string
|
|
{
|
|
$index = 1;
|
|
$count = 0;
|
|
$code = preg_replace_callback(
|
|
'/'.$delimiter.'(\d+(?!.*\d+))/',
|
|
function ($matches) use ($delimiter) {
|
|
return $delimiter.($matches[1] + 1);
|
|
},
|
|
$code,
|
|
-1,
|
|
$count
|
|
);
|
|
|
|
if (!$count) {
|
|
$code = $code.$delimiter.$index;
|
|
}
|
|
|
|
return $code;
|
|
}
|
|
|
|
public static function isAbsoluteUrl($url): bool
|
|
{
|
|
return str_starts_with($url, 'https://');
|
|
}
|
|
|
|
public static function absoluteUrl($url, $domain = null): string
|
|
{
|
|
if (self::isAbsoluteUrl($url)) {
|
|
return $url;
|
|
}
|
|
|
|
if (!$domain) {
|
|
$domain = Contexts::get(DomainContext::class)->getActiveId();
|
|
}
|
|
|
|
return 'https://'.$domain.'/'.ltrim($url, '/');
|
|
}
|
|
|
|
public static function unicodeToEntities(string $string)
|
|
{
|
|
return preg_replace_callback("/[\x{FFFF}-\x{FFFFF}]+/u", function ($matches) {
|
|
return '&#'.hexdec(bin2hex(mb_convert_encoding("{$matches[0]}", 'UTF-32', 'UTF-8'))).';';
|
|
}, $string);
|
|
}
|
|
|
|
public static function convertToLatin(string $string)
|
|
{
|
|
static $transliterator;
|
|
if (!isset($transliterator)) {
|
|
$transliterator = \Transliterator::create('Russian-Latin/BGN');
|
|
}
|
|
|
|
if ($transliterator) {
|
|
return $transliterator->transliterate($string);
|
|
}
|
|
|
|
return $string;
|
|
}
|
|
|
|
public static function replacePlaceholders(string $str, array $params)
|
|
{
|
|
return preg_replace_callback('/{(.+?)}/', function ($matches) use ($params) {
|
|
// If placeholder does not exist, keep it in output
|
|
return $params[$matches[1]] ?? $matches[0];
|
|
}, $str);
|
|
}
|
|
|
|
public static function htmlToCleanText(string $html): string
|
|
{
|
|
if (empty($html)) {
|
|
return $html;
|
|
}
|
|
|
|
// Decode HTML entities
|
|
$decoded = html_entity_decode($html, ENT_QUOTES | ENT_HTML5, 'UTF-8');
|
|
|
|
// Convert encoding to UTF-8
|
|
$utf8Encoded = mb_convert_encoding($decoded, 'HTML-ENTITIES', 'UTF-8');
|
|
|
|
// Load HTML into DOMDocument with proper encoding
|
|
$dom = new \DOMDocument();
|
|
// Suppress warnings due to malformed HTML
|
|
@$dom->loadHTML('<?xml encoding="utf-8" ?>'.$utf8Encoded, LIBXML_NOERROR | LIBXML_NOWARNING);
|
|
|
|
// Extract text with proper spacing
|
|
$text = self::getTextWithProperSpacing($dom->documentElement);
|
|
|
|
// Normalize whitespace (convert multiple spaces/newlines to a single space)
|
|
$normalized = preg_replace('/\s+/', ' ', $text);
|
|
|
|
return trim($normalized);
|
|
}
|
|
|
|
private static function getTextWithProperSpacing($node, bool $isInlineParent = false): string
|
|
{
|
|
$text = '';
|
|
foreach ($node->childNodes as $child) {
|
|
if ($child->nodeType === XML_TEXT_NODE) {
|
|
$text .= $child->wholeText;
|
|
} elseif ($child->nodeType === XML_ELEMENT_NODE) {
|
|
$isInlineChild = self::isInlineElement($child);
|
|
$childText = self::getTextWithProperSpacing($child, $isInlineChild);
|
|
|
|
// Concatenate text from inline elements directly
|
|
if ($isInlineParent || $isInlineChild) {
|
|
$text .= $childText;
|
|
} else {
|
|
// Add a space before and after block elements
|
|
$text .= ' '.$childText.' ';
|
|
}
|
|
}
|
|
}
|
|
|
|
return $text;
|
|
}
|
|
|
|
private static function isInlineElement($node): bool
|
|
{
|
|
$inlineElements = [
|
|
'b',
|
|
'i',
|
|
'span',
|
|
'strong',
|
|
'em',
|
|
'u',
|
|
'a',
|
|
'abbr',
|
|
'acronym',
|
|
'bdo',
|
|
'big',
|
|
'cite',
|
|
'code',
|
|
'dfn',
|
|
'kbd',
|
|
'q',
|
|
'samp',
|
|
'small',
|
|
'sub',
|
|
'sup',
|
|
'tt',
|
|
'var',
|
|
];
|
|
|
|
return in_array($node->nodeName, $inlineElements);
|
|
}
|
|
|
|
public static function cutToWords(string $string, int $length)
|
|
{
|
|
if (mb_strlen($string) > $length) {
|
|
return preg_replace('/\s+?(\S+)?$/'.\Smarty::$_UTF8_MODIFIER,
|
|
'',
|
|
mb_substr($string, 0, $length + 1, \Smarty::$_CHARSET)
|
|
);
|
|
}
|
|
|
|
return $string;
|
|
}
|
|
|
|
public static function replacePunctuationWithPeriod(string $string)
|
|
{
|
|
return preg_replace('/[[:punct:]](?=$)/', '.', $string);
|
|
}
|
|
}
|