Files
kupshop/bundles/KupShop/FeedGeneratorBundle/Utils/ExternalFeedCache.php
2025-08-02 16:30:27 +02:00

299 lines
11 KiB
PHP

<?php
namespace KupShop\FeedGeneratorBundle\Utils;
use KupShop\AdminBundle\Util\ActivityLog;
use KupShop\FeedGeneratorBundle\Feed\ExternalFeed;
use KupShop\FeedsBundle\FeedsBundle;
use KupShop\KupShopBundle\Util\System\PathFinder;
use Symfony\Component\Lock\LockFactory;
use Symfony\Component\Lock\Store\FlockStore;
class ExternalFeedCache
{
private array $feedRow;
protected ?array $basePath = null;
private LockFactory $lockFactory;
public function __construct(array $feedRow, ?array $basePath = null)
{
$this->feedRow = $feedRow;
if (is_string($this->feedRow['data'])) {
$this->feedRow['data'] = json_decode($this->feedRow['data'], true);
}
$this->basePath = $basePath ?? null;
$store = new FlockStore($this->getCacheDirPath());
$this->lockFactory = new LockFactory($store);
}
private function deleteDanglingFiles()
{
$cacheDir = $this->getCacheDirPath();
$files = array_filter(scandir($cacheDir), function ($val) use ($cacheDir) {
$fqFileName = $cacheDir.$val;
return !is_dir($fqFileName);
});
foreach ($files as $file) {
$fqFileName = $cacheDir.$file;
if (time() - filemtime($fqFileName) > 30 * 24 * 60 * 60) { // delete files older than 30 days
unlink($fqFileName);
}
}
}
private function getCacheDirPath(): string
{
$tmpDir = PathFinder::getService()->getTmpDir().'feed_external/';
if (!file_exists($tmpDir)) {
mkdir($tmpDir);
}
return $tmpDir;
}
private function getCacheFileName(string $resourceUrl, string $type, bool $ignoreAge = false): ?string
{
$tmpDir = $this->getCacheDirPath();
switch ($type) {
case ExternalFeed::TYPE_SOURCE:
$fileName = $tmpDir.md5($resourceUrl).'.xml';
break;
case ExternalFeed::TYPE_DESCRIPTION:
$fileName = $tmpDir.md5($resourceUrl).'.xsd';
break;
default:
return null;
}
if ($ignoreAge || file_exists($fileName) && (time() - filemtime($fileName)) < 10 * 60) {
return $fileName;
}
return null;
}
private function doCacheFile(string $resourceUrl, string $type): ?string
{
$fileName = $this->getCacheFileName($resourceUrl, $type, true);
$downloadOK = false;
$subsequentThread = false;
$invalidContentBeginning = null;
$fetchHttpCode = -1;
$fetchError = '';
$lock = $this->lockFactory->createLock($fileName);
$maxAge = 600; // Maximum age of the file in seconds (10 minutes)
if ($lock->acquire(true)) {
clearstatcache(); // Clear cached results of filemtime()
$fileModifiedTime = file_exists($fileName) ? filemtime($fileName) : false;
$currentTime = time();
if (!$fileModifiedTime || ($currentTime - $fileModifiedTime > $maxAge)) {
if ($this->fetchRemoteUrl($resourceUrl, $fileName.'.tmp', $fetchHttpCode, $fetchError)) {
// check content type of downloaded file
if ($fetchHttpCode === 200 && mime_content_type($fileName.'.tmp') === 'text/xml' && $this->readableXML($fileName.'.tmp', $fetchError)) {
$downloadOK = rename($fileName.'.tmp', $fileName); // valid content type and readable XML -> update cache
} elseif (file_exists($fileName.'.tmp')) {
$invalidContentBeginning = file_get_contents($fileName.'.tmp', false, null, 0, 500);
@unlink($fileName.'.tmp');
} else {
@unlink($fileName.'.tmp');
}
} else {
$fetchError = !empty($fetchError) ? $fetchError : 'Unknown error';
@unlink($fileName.'.tmp');
}
} else {
$subsequentThread = true; // cached file should be present at this point
}
$lock->release();
}
if ($downloadOK || $subsequentThread) {
return $fileName;
} else {
$data = [
'externalURL' => $resourceUrl,
'fileName' => $fileName,
'feedID' => $this->feedRow['id'],
'feedName' => $this->feedRow['name'],
];
if (!empty($invalidContentBeginning)) {
$data['contentBeginning'] = $invalidContentBeginning.'...[skipped]';
}
$data['fetchHttpCode'] = $fetchHttpCode;
$data['fetchError'] = $fetchError;
if (file_exists($fileName)) {
addActivityLog(
ActivityLog::SEVERITY_WARNING,
ActivityLog::TYPE_COMMUNICATION,
sprintf('Feed: Failed to load external feed, using old cache instead - "%s" [%s]', $data['feedName'], $data['feedID']),
[
...ActivityLog::addObjectData([$data['feedID'] => $data['feedName']], 'feeds'),
...$data,
],
[FeedsBundle::LOG_TAG_FEED]
);
return $fileName; // fallback to older cached file
} else {
addActivityLog(
ActivityLog::SEVERITY_ERROR,
ActivityLog::TYPE_COMMUNICATION,
sprintf('Feed: Failed to load external feed with no cache present - "%s" [%s]', $data['feedName'], $data['feedID']),
[
...ActivityLog::addObjectData([$data['feedID'] => $data['feedName']], 'feeds'),
...$data,
],
[FeedsBundle::LOG_TAG_FEED]
);
return null;
}
}
}
public function getDescriptionUrl(): string
{
$url = $this->feedRow['data'][ExternalFeed::TYPE_DESCRIPTION];
$res = $this->getCacheFileName($url, ExternalFeed::TYPE_DESCRIPTION);
if ($res === null) {
$res = $this->doCacheFile($url, ExternalFeed::TYPE_DESCRIPTION);
}
return $res ?? $url;
}
public function getSourceUrl(): string
{
$url = $this->feedRow['data'][ExternalFeed::TYPE_SOURCE];
$res = $this->getCacheFileName($url, ExternalFeed::TYPE_SOURCE);
if ($res === null) {
$res = $this->doCacheFile($url, ExternalFeed::TYPE_SOURCE);
}
return $res ?? $url;
}
public function setCacheItem(string $searchKey, array $item): void
{
$urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]);
$value = json_encode($item);
setCache('external_feed_'.$urlHash.$searchKey, $value, 10 * 60);
}
public function getCacheItem(string $searchKey): ?array
{
$urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]);
$res = getCache('external_feed_'.$urlHash.$searchKey);
if (!is_string($res)) {
return null;
}
return json_decode($res, true);
}
public function clearCache(): void
{
$descr = $this->feedRow['data'][ExternalFeed::TYPE_DESCRIPTION];
$source = $this->feedRow['data'][ExternalFeed::TYPE_SOURCE];
$urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]);
unlink($this->getCacheFileName($descr, ExternalFeed::TYPE_DESCRIPTION));
unlink($this->getCacheFileName($source, ExternalFeed::TYPE_SOURCE));
clearCache('external_feed_'.$urlHash, true);
$this->deleteDanglingFiles();
}
/**
* Fetches a remote file using cURL and saves it to a specified file.
*
* @param string $url the URL of the resource to fetch
* @param string $outputFilePath the path to the file where the response will be saved
* @param int &$httpCode HTTP status code returned by the request
* @param string &$error Error message in case of failure
*
* @return bool true on success, or false on failure
*/
protected function fetchRemoteUrl(string $url, string $outputFilePath, int &$httpCode, string &$error): bool
{
$ch = curl_init($url);
// Open the output file for writing
$fileHandle = fopen($outputFilePath, 'w');
if (!$fileHandle) {
$error = "Failed to open temporary file: {$outputFilePath}";
return false;
}
// Set cURL options
curl_setopt($ch, CURLOPT_FILE, $fileHandle); // Stream the response directly into the file
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Follow redirects
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60); // Connection timeout (in seconds)
curl_setopt($ch, CURLOPT_TIMEOUT, 5 * 60); // Execution timeout (in seconds)
curl_setopt($ch, CURLOPT_FAILONERROR, true); // Treat HTTP errors as failures
curl_setopt($ch, CURLOPT_USERAGENT, 'ExternalFeed/1.0'); // Optional: Set a user agent
// hotfix for ZNZ Elnino https://trello.com/c/t0JmotPj
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
fclose($fileHandle); // Close the file handle
curl_close($ch);
// Check for errors
if ($response === false || $httpCode >= 400) {
unlink($outputFilePath); // Clean up the temporary file on failure
return false;
}
return true;
}
protected function readableXML(string $filePath, string &$errorMsg = ''): bool
{
$reader = new \XMLReader();
if (!$reader->open($filePath)) {
$errorMsg = 'Cannot open source XML';
return false;
}
// search for root element
while ($reader->read()) {
if ($reader->nodeType === \XMLReader::ELEMENT) {
if (!empty($this->basePath)) {
$elName = (string) $reader->name;
$expectedRootName = reset($this->basePath);
if ($elName != $expectedRootName) {
$reader->close();
$errorMsg = 'Invalid root element in source XML, expecting: "'.$expectedRootName.'", found: "'.$elName.'"';
return false;
}
}
$reader->close();
return true;
}
}
$reader->close();
$errorMsg = 'Cannot read source XML: missing root element';
return false;
}
}