feedRow = $feedRow; if (is_string($this->feedRow['data'])) { $this->feedRow['data'] = json_decode($this->feedRow['data'], true); } $this->basePath = $basePath ?? null; $store = new FlockStore($this->getCacheDirPath()); $this->lockFactory = new LockFactory($store); } private function deleteDanglingFiles() { $cacheDir = $this->getCacheDirPath(); $files = array_filter(scandir($cacheDir), function ($val) use ($cacheDir) { $fqFileName = $cacheDir.$val; return !is_dir($fqFileName); }); foreach ($files as $file) { $fqFileName = $cacheDir.$file; if (time() - filemtime($fqFileName) > 30 * 24 * 60 * 60) { // delete files older than 30 days unlink($fqFileName); } } } private function getCacheDirPath(): string { $tmpDir = PathFinder::getService()->getTmpDir().'feed_external/'; if (!file_exists($tmpDir)) { mkdir($tmpDir); } return $tmpDir; } private function getCacheFileName(string $resourceUrl, string $type, bool $ignoreAge = false): ?string { $tmpDir = $this->getCacheDirPath(); switch ($type) { case ExternalFeed::TYPE_SOURCE: $fileName = $tmpDir.md5($resourceUrl).'.xml'; break; case ExternalFeed::TYPE_DESCRIPTION: $fileName = $tmpDir.md5($resourceUrl).'.xsd'; break; default: return null; } if ($ignoreAge || file_exists($fileName) && (time() - filemtime($fileName)) < 10 * 60) { return $fileName; } return null; } private function doCacheFile(string $resourceUrl, string $type): ?string { $fileName = $this->getCacheFileName($resourceUrl, $type, true); $downloadOK = false; $subsequentThread = false; $invalidContentBeginning = null; $fetchHttpCode = -1; $fetchError = ''; $lock = $this->lockFactory->createLock($fileName); $maxAge = 600; // Maximum age of the file in seconds (10 minutes) if ($lock->acquire(true)) { clearstatcache(); // Clear cached results of filemtime() $fileModifiedTime = file_exists($fileName) ? filemtime($fileName) : false; $currentTime = time(); if (!$fileModifiedTime || ($currentTime - $fileModifiedTime > $maxAge)) { if ($this->fetchRemoteUrl($resourceUrl, $fileName.'.tmp', $fetchHttpCode, $fetchError)) { // check content type of downloaded file if ($fetchHttpCode === 200 && mime_content_type($fileName.'.tmp') === 'text/xml' && $this->readableXML($fileName.'.tmp', $fetchError)) { $downloadOK = rename($fileName.'.tmp', $fileName); // valid content type and readable XML -> update cache } elseif (file_exists($fileName.'.tmp')) { $invalidContentBeginning = file_get_contents($fileName.'.tmp', false, null, 0, 500); @unlink($fileName.'.tmp'); } else { @unlink($fileName.'.tmp'); } } else { $fetchError = !empty($fetchError) ? $fetchError : 'Unknown error'; @unlink($fileName.'.tmp'); } } else { $subsequentThread = true; // cached file should be present at this point } $lock->release(); } if ($downloadOK || $subsequentThread) { return $fileName; } else { $data = [ 'externalURL' => $resourceUrl, 'fileName' => $fileName, 'feedID' => $this->feedRow['id'], 'feedName' => $this->feedRow['name'], ]; if (!empty($invalidContentBeginning)) { $data['contentBeginning'] = $invalidContentBeginning.'...[skipped]'; } $data['fetchHttpCode'] = $fetchHttpCode; $data['fetchError'] = $fetchError; if (file_exists($fileName)) { addActivityLog( ActivityLog::SEVERITY_WARNING, ActivityLog::TYPE_COMMUNICATION, sprintf('Feed: Failed to load external feed, using old cache instead - "%s" [%s]', $data['feedName'], $data['feedID']), [ ...ActivityLog::addObjectData([$data['feedID'] => $data['feedName']], 'feeds'), ...$data, ], [FeedsBundle::LOG_TAG_FEED] ); return $fileName; // fallback to older cached file } else { addActivityLog( ActivityLog::SEVERITY_ERROR, ActivityLog::TYPE_COMMUNICATION, sprintf('Feed: Failed to load external feed with no cache present - "%s" [%s]', $data['feedName'], $data['feedID']), [ ...ActivityLog::addObjectData([$data['feedID'] => $data['feedName']], 'feeds'), ...$data, ], [FeedsBundle::LOG_TAG_FEED] ); return null; } } } public function getDescriptionUrl(): string { $url = $this->feedRow['data'][ExternalFeed::TYPE_DESCRIPTION]; $res = $this->getCacheFileName($url, ExternalFeed::TYPE_DESCRIPTION); if ($res === null) { $res = $this->doCacheFile($url, ExternalFeed::TYPE_DESCRIPTION); } return $res ?? $url; } public function getSourceUrl(): string { $url = $this->feedRow['data'][ExternalFeed::TYPE_SOURCE]; $res = $this->getCacheFileName($url, ExternalFeed::TYPE_SOURCE); if ($res === null) { $res = $this->doCacheFile($url, ExternalFeed::TYPE_SOURCE); } return $res ?? $url; } public function setCacheItem(string $searchKey, array $item): void { $urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]); $value = json_encode($item); setCache('external_feed_'.$urlHash.$searchKey, $value, 10 * 60); } public function getCacheItem(string $searchKey): ?array { $urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]); $res = getCache('external_feed_'.$urlHash.$searchKey); if (!is_string($res)) { return null; } return json_decode($res, true); } public function clearCache(): void { $descr = $this->feedRow['data'][ExternalFeed::TYPE_DESCRIPTION]; $source = $this->feedRow['data'][ExternalFeed::TYPE_SOURCE]; $urlHash = md5($this->feedRow['data'][ExternalFeed::TYPE_SOURCE]); unlink($this->getCacheFileName($descr, ExternalFeed::TYPE_DESCRIPTION)); unlink($this->getCacheFileName($source, ExternalFeed::TYPE_SOURCE)); clearCache('external_feed_'.$urlHash, true); $this->deleteDanglingFiles(); } /** * Fetches a remote file using cURL and saves it to a specified file. * * @param string $url the URL of the resource to fetch * @param string $outputFilePath the path to the file where the response will be saved * @param int &$httpCode HTTP status code returned by the request * @param string &$error Error message in case of failure * * @return bool true on success, or false on failure */ protected function fetchRemoteUrl(string $url, string $outputFilePath, int &$httpCode, string &$error): bool { $ch = curl_init($url); // Open the output file for writing $fileHandle = fopen($outputFilePath, 'w'); if (!$fileHandle) { $error = "Failed to open temporary file: {$outputFilePath}"; return false; } // Set cURL options curl_setopt($ch, CURLOPT_FILE, $fileHandle); // Stream the response directly into the file curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Follow redirects curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 60); // Connection timeout (in seconds) curl_setopt($ch, CURLOPT_TIMEOUT, 5 * 60); // Execution timeout (in seconds) curl_setopt($ch, CURLOPT_FAILONERROR, true); // Treat HTTP errors as failures curl_setopt($ch, CURLOPT_USERAGENT, 'ExternalFeed/1.0'); // Optional: Set a user agent // hotfix for ZNZ Elnino https://trello.com/c/t0JmotPj curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $error = curl_error($ch); fclose($fileHandle); // Close the file handle curl_close($ch); // Check for errors if ($response === false || $httpCode >= 400) { unlink($outputFilePath); // Clean up the temporary file on failure return false; } return true; } protected function readableXML(string $filePath, string &$errorMsg = ''): bool { $reader = new \XMLReader(); if (!$reader->open($filePath)) { $errorMsg = 'Cannot open source XML'; return false; } // search for root element while ($reader->read()) { if ($reader->nodeType === \XMLReader::ELEMENT) { if (!empty($this->basePath)) { $elName = (string) $reader->name; $expectedRootName = reset($this->basePath); if ($elName != $expectedRootName) { $reader->close(); $errorMsg = 'Invalid root element in source XML, expecting: "'.$expectedRootName.'", found: "'.$elName.'"'; return false; } } $reader->close(); return true; } } $reader->close(); $errorMsg = 'Cannot read source XML: missing root element'; return false; } }