299 lines
11 KiB
PHP
299 lines
11 KiB
PHP
<?php
|
|
|
|
namespace KupShop\FeedGeneratorBundle\Utils;
|
|
|
|
use KupShop\AdminBundle\Util\ActivityLog;
|
|
use KupShop\FeedGeneratorBundle\Feed\ExternalFeed;
|
|
use KupShop\FeedsBundle\FeedsBundle;
|
|
use KupShop\KupShopBundle\Util\System\PathFinder;
|
|
use Symfony\Component\Lock\LockFactory;
|
|
use Symfony\Component\Lock\Store\FlockStore;
|
|
|
|
class ExternalFeedCache
|
|
{
|
|
private array $feedRow;
|
|
|
|
protected ?array $basePath = null;
|
|
|
|
private LockFactory $lockFactory;
|
|
|
|
public function __construct(array $feedRow, ?array $basePath = null)
|
|
{
|
|
$this->feedRow = $feedRow;
|
|
if (is_string($this->feedRow['data'])) {
|
|
$this->feedRow['data'] = json_decode($this->feedRow['data'], true);
|
|
}
|
|
$this->basePath = $basePath ?? null;
|
|
$store = new FlockStore($this->getCacheDirPath());
|
|
$this->lockFactory = new LockFactory($store);
|
|
}
|
|
|
|
private function deleteDanglingFiles()
|
|
{
|
|
$cacheDir = $this->getCacheDirPath();
|
|
$files = array_filter(scandir($cacheDir), function ($val) use ($cacheDir) {
|
|
$fqFileName = $cacheDir.$val;
|
|
|
|
return !is_dir($fqFileName);
|
|
});
|
|
foreach ($files as $file) {
|
|
$fqFileName = $cacheDir.$file;
|
|
if (time() - filemtime($fqFileName) > 30 * 24 * 60 * 60) { // delete files older than 30 days
|
|
unlink($fqFileName);
|
|
}
|
|
}
|
|
}
|
|
|
|
private function getCacheDirPath(): string
|
|
{
|
|
$tmpDir = PathFinder::getService()->getTmpDir().'feed_external/';
|
|
if (!file_exists($tmpDir)) {
|
|
mkdir($tmpDir);
|
|
}
|
|
|
|
return $tmpDir;
|
|
}
|
|
|
|
private function getCacheFileName(string $resourceUrl, string $type, bool $ignoreAge = false): ?string
|
|
{
|
|
$tmpDir = $this->getCacheDirPath();
|
|
|
|
switch ($type) {
|
|
case ExternalFeed::TYPE_SOURCE:
|
|
$fileName = $tmpDir.md5($resourceUrl).'.xml';
|
|
break;
|
|
case ExternalFeed::TYPE_DESCRIPTION:
|
|
$fileName = $tmpDir.md5($resourceUrl).'.xsd';
|
|
break;
|
|
default:
|
|
return null;
|
|
}
|
|
if ($ignoreAge || file_exists($fileName) && (time() - filemtime($fileName)) < 10 * 60) {
|
|
return $fileName;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private function doCacheFile(string $resourceUrl, string $type): ?string
|
|
{
|
|
$fileName = $this->getCacheFileName($resourceUrl, $type, true);
|
|
$downloadOK = false;
|
|
$subsequentThread = false;
|
|
$invalidContentBeginning = null;
|
|
$fetchHttpCode = -1;
|
|
$fetchError = '';
|
|
$lock = $this->lockFactory->createLock($fileName);
|
|
$maxAge = 600; // Maximum age of the file in seconds (10 minutes)
|
|
if ($lock->acquire(true)) {
|
|
clearstatcache(); // Clear cached results of filemtime()
|
|
$fileModifiedTime = file_exists($fileName) ? filemtime($fileName) : false;
|
|
$currentTime = time();
|
|
|
|
if (!$fileModifiedTime || ($currentTime - $fileModifiedTime > $maxAge)) {
|
|
if ($this->fetchRemoteUrl($resourceUrl, $fileName.'.tmp', $fetchHttpCode, $fetchError)) {
|
|
// check content type of downloaded file
|
|
if ($fetchHttpCode === 200 && mime_content_type($fileName.'.tmp') === 'text/xml' && $this->readableXML($fileName.'.tmp', $fetchError)) {
|
|
$downloadOK = rename($fileName.'.tmp', $fileName); // valid content type and readable XML -> update cache
|
|
} elseif (file_exists($fileName.'.tmp')) {
|
|
$invalidContentBeginning = file_get_contents($fileName.'.tmp', false, null, 0, 500);
|
|
@unlink($fileName.'.tmp');
|
|
} else {
|
|
@unlink($fileName.'.tmp');
|
|
}
|
|
} else {
|
|
$fetchError = !empty($fetchError) ? $fetchError : 'Unknown error';
|
|
@unlink($fileName.'.tmp');
|
|
}
|
|
} else {
|
|
$subsequentThread = true; // cached file should be present at this point
|
|
}
|
|
|
|
$lock->release();
|
|
}
|
|
|
|
if ($downloadOK || $subsequentThread) {
|
|
return $fileName;
|
|
} else {
|
|
$data = [
|
|
'externalURL' => $resourceUrl,
|
|
'fileName' => $fileName,
|
|
'feedID' => $this->feedRow['id'],
|
|
'feedName' => $this->feedRow['name'],
|
|
];
|
|
if (!empty($invalidContentBeginning)) {
|
|
$data['contentBeginning'] = $invalidContentBeginning.'...[skipped]';
|
|
}
|
|
$data['fetchHttpCode'] = $fetchHttpCode;
|
|
$data['fetchError'] = $fetchError;
|
|
if (file_exists($fileName)) {
|
|
addActivityLog(
|
|
ActivityLog::SEVERITY_WARNING,
|
|
ActivityLog::TYPE_COMMUNICATION,
|
|
sprintf('Feed: Failed to load external feed, using old cache instead - "%s" [%s]', $data['feedName'], $data['feedID']),
|
|
[
|
|
...ActivityLog::addObjectData([$data['feedID'] => $data['feedName']], 'feeds'),
|
|
...$data,
|
|
],
|
|
[FeedsBundle::LOG_TAG_FEED]
|
|
);
|
|
|
|
return $fileName; // fallback to older cached file
|
|
} else {
|
|
addActivityLog(
|
|
ActivityLog::SEVERITY_ERROR,
|
|
ActivityLog::TYPE_COMMUNICATION,
|
|
sprintf('Feed: Failed to load external feed with no cache present - "%s" [%s]', $data['feedName'], $data['feedID']),
|
|
[
|
|
...ActivityLog::addObjectData([$data['feedID'] => $data['feedName']], 'feeds'),
|
|
...$data,
|
|
],
|
|
[FeedsBundle::LOG_TAG_FEED]
|
|
);
|
|
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
public function getDescriptionUrl(): string
|
|
{
|
|
$url = $this->feedRow['data'][ExternalFeed::TYPE_DESCRIPTION];
|
|
|
|
$res = $this->getCacheFileName($url, ExternalFeed::TYPE_DESCRIPTION);
|
|
if ($res === null) {
|
|
$res = $this->doCacheFile($url, ExternalFeed::TYPE_DESCRIPTION);
|
|
}
|
|
|
|
return $res ?? $url;
|
|
}
|
|
|
|
public function getSourceUrl(): string
|
|
{
|
|
$url = $this->feedRow['data'][ExternalFeed::TYPE_SOURCE];
|
|
|
|
$res = $this->getCacheFileName($url, ExternalFeed::TYPE_SOURCE);
|
|
if ($res === null) {
|
|
$res = $this->doCacheFile($url, ExternalFeed::TYPE_SOURCE);
|
|
}
|
|
|
|
return $res ?? $url;
|
|
}
|
|
|
|
public function setCacheItem(string $searchKey, array $item): void
|
|
{
|
|
$urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]);
|
|
$value = json_encode($item);
|
|
setCache('external_feed_'.$urlHash.$searchKey, $value, 10 * 60);
|
|
}
|
|
|
|
public function getCacheItem(string $searchKey): ?array
|
|
{
|
|
$urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]);
|
|
$res = getCache('external_feed_'.$urlHash.$searchKey);
|
|
if (!is_string($res)) {
|
|
return null;
|
|
}
|
|
|
|
return json_decode($res, true);
|
|
}
|
|
|
|
public function clearCache(): void
|
|
{
|
|
$descr = $this->feedRow['data'][ExternalFeed::TYPE_DESCRIPTION];
|
|
$source = $this->feedRow['data'][ExternalFeed::TYPE_SOURCE];
|
|
$urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]);
|
|
|
|
unlink($this->getCacheFileName($descr, ExternalFeed::TYPE_DESCRIPTION));
|
|
unlink($this->getCacheFileName($source, ExternalFeed::TYPE_SOURCE));
|
|
clearCache('external_feed_'.$urlHash, true);
|
|
$this->deleteDanglingFiles();
|
|
}
|
|
|
|
/**
|
|
* Fetches a remote file using cURL and saves it to a specified file.
|
|
*
|
|
* @param string $url the URL of the resource to fetch
|
|
* @param string $outputFilePath the path to the file where the response will be saved
|
|
* @param int &$httpCode HTTP status code returned by the request
|
|
* @param string &$error Error message in case of failure
|
|
*
|
|
* @return bool true on success, or false on failure
|
|
*/
|
|
protected function fetchRemoteUrl(string $url, string $outputFilePath, int &$httpCode, string &$error): bool
|
|
{
|
|
$ch = curl_init($url);
|
|
|
|
// Open the output file for writing
|
|
$fileHandle = fopen($outputFilePath, 'w');
|
|
if (!$fileHandle) {
|
|
$error = "Failed to open temporary file: {$outputFilePath}";
|
|
|
|
return false;
|
|
}
|
|
|
|
// Set cURL options
|
|
curl_setopt($ch, CURLOPT_FILE, $fileHandle); // Stream the response directly into the file
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Follow redirects
|
|
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60); // Connection timeout (in seconds)
|
|
curl_setopt($ch, CURLOPT_TIMEOUT, 5 * 60); // Execution timeout (in seconds)
|
|
curl_setopt($ch, CURLOPT_FAILONERROR, true); // Treat HTTP errors as failures
|
|
curl_setopt($ch, CURLOPT_USERAGENT, 'ExternalFeed/1.0'); // Optional: Set a user agent
|
|
|
|
// hotfix for ZNZ Elnino https://trello.com/c/t0JmotPj
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
|
|
|
|
$response = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$error = curl_error($ch);
|
|
|
|
fclose($fileHandle); // Close the file handle
|
|
curl_close($ch);
|
|
|
|
// Check for errors
|
|
if ($response === false || $httpCode >= 400) {
|
|
unlink($outputFilePath); // Clean up the temporary file on failure
|
|
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
protected function readableXML(string $filePath, string &$errorMsg = ''): bool
|
|
{
|
|
$reader = new \XMLReader();
|
|
if (!$reader->open($filePath)) {
|
|
$errorMsg = 'Cannot open source XML';
|
|
|
|
return false;
|
|
}
|
|
|
|
// search for root element
|
|
while ($reader->read()) {
|
|
if ($reader->nodeType === \XMLReader::ELEMENT) {
|
|
if (!empty($this->basePath)) {
|
|
$elName = (string) $reader->name;
|
|
$expectedRootName = reset($this->basePath);
|
|
if ($elName != $expectedRootName) {
|
|
$reader->close();
|
|
$errorMsg = 'Invalid root element in source XML, expecting: "'.$expectedRootName.'", found: "'.$elName.'"';
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
$reader->close();
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
$reader->close();
|
|
$errorMsg = 'Cannot read source XML: missing root element';
|
|
|
|
return false;
|
|
}
|
|
}
|