Skip to content

Commit

Permalink
Use Guzzle API for HTTP Crawling Requests (#1837)
Browse files Browse the repository at this point in the history
  • Loading branch information
timohund authored Feb 15, 2018
1 parent 02f8b8a commit 776b219
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 103 deletions.
81 changes: 55 additions & 26 deletions Classes/IndexQueue/PageIndexerRequest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@

use ApacheSolrForTypo3\Solr\System\Configuration\ExtensionConfiguration;
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
use Psr\Http\Message\ResponseInterface;
use GuzzleHttp\Exception\ServerException;
use GuzzleHttp\Exception\ClientException;
use TYPO3\CMS\Core\Http\RequestFactory;
use TYPO3\CMS\Core\Utility\GeneralUtility;

/**
Expand Down Expand Up @@ -102,20 +106,27 @@ class PageIndexerRequest
*/
protected $extensionConfiguration;

/**
* @var RequestFactory
*/
protected $requestFactory;

/**
* PageIndexerRequest constructor.
*
* @param string $jsonEncodedParameters json encoded header
* @param SolrLogManager|null $solrLogManager
* @param ExtensionConfiguration|null $extensionConfiguration
* @param RequestFactory|null $requestFactory
*/
public function __construct($jsonEncodedParameters = null, SolrLogManager $solrLogManager = null, ExtensionConfiguration $extensionConfiguration = null)
public function __construct($jsonEncodedParameters = null, SolrLogManager $solrLogManager = null, ExtensionConfiguration $extensionConfiguration = null, RequestFactory $requestFactory = null)
{
$this->requestId = uniqid();
$this->timeout = (float)ini_get('default_socket_timeout');

$this->logger = is_null($solrLogManager) ? GeneralUtility::makeInstance(SolrLogManager::class, __CLASS__) : $solrLogManager;
$this->extensionConfiguration = is_null($extensionConfiguration) ? GeneralUtility::makeInstance(ExtensionConfiguration::class) : $extensionConfiguration;
$this->logger = $solrLogManager ?? GeneralUtility::makeInstance(SolrLogManager::class, __CLASS__);
$this->extensionConfiguration = $extensionConfiguration ?? GeneralUtility::makeInstance(ExtensionConfiguration::class);
$this->requestFactory = $requestFactory ?? GeneralUtility::makeInstance(RequestFactory::class);

if (is_null($jsonEncodedParameters)) {
return;
Expand Down Expand Up @@ -190,7 +201,7 @@ protected function getUrlAndDecodeResponse($url, PageIndexerResponse $response)
$headers = $this->getHeaders();
$rawResponse = $this->getUrl($url, $headers, $this->timeout);
// convert JSON response to response object properties
$decodedResponse = $response->getResultsFromJson($rawResponse);
$decodedResponse = $response->getResultsFromJson($rawResponse->getBody()->getContents());

if ($rawResponse === false || $decodedResponse === false) {
$this->logger->log(
Expand All @@ -200,8 +211,8 @@ protected function getUrlAndDecodeResponse($url, PageIndexerResponse $response)
'request ID' => $this->requestId,
'request url' => $url,
'request headers' => $headers,
'response headers' => $http_response_header, // automatically defined by file_get_contents()
'raw response body' => $rawResponse
'response headers' => $rawResponse->getHeaders(),
'raw response body' => $rawResponse->getBody()->getContents()
]
);

Expand Down Expand Up @@ -235,11 +246,7 @@ public function getHeaders()
];

$indexerRequestData = array_merge($indexerRequestData, $this->parameters);
$headers[] = 'X-Tx-Solr-Iq: ' . json_encode($indexerRequestData);

if (!empty($this->username) && !empty($this->password)) {
$headers[] = 'Authorization: Basic ' . base64_encode($this->username . ':' . $this->password);
}
$headers[] = 'X-Tx-Solr-Iq: ' . json_encode($indexerRequestData, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT | JSON_UNESCAPED_SLASHES);

return $headers;
}
Expand Down Expand Up @@ -393,26 +400,48 @@ public function setTimeout($timeout)
* @param string $url
* @param string[] $headers
* @param float $timeout
* @return string
* @return ResponseInterface
* @throws \Exception
*/
protected function getUrl($url, $headers, $timeout)
protected function getUrl($url, $headers, $timeout): ResponseInterface
{
$options = [
'http' => [
'header' => implode(CRLF, $headers),
'timeout' => $timeout
],
];
try {
$options = $this->buildGuzzleOptions($headers, $timeout);
$response = $this->requestFactory->request($url, 'GET', $options);
} catch (ClientException $e) {
$response = $e->getResponse();
} catch (ServerException $e) {
$response = $e->getResponse();
}

return $response;
}

/**
* Build the options array for the guzzle client.
*
* @param array $headers
* @param float $timeout
* @return array
*/
protected function buildGuzzleOptions($headers, $timeout)
{
$finalHeaders = [];

foreach ($headers as $header) {
list($name, $value) = explode(':', $header, 2);
$finalHeaders[$name] = trim($value);
}

$options = ['headers' => $finalHeaders, 'timeout' => $timeout];
if (!empty($this->username) && !empty($this->password)) {
$options['auth'] = [$this->username, $this->password];
}

if ($this->extensionConfiguration->getIsSelfSignedCertificatesEnabled()) {
$options['ssl'] = [
'verify_peer' => false,
'allow_self_signed'=> true
];
$options['verify'] = false;
}

$context = stream_context_create($options);
$rawResponse = file_get_contents($url, false, $context);
return $rawResponse;
return $options;
}
}
67 changes: 0 additions & 67 deletions Tests/Integration/Task/IndexQueueWorkerTaskTest.php
Original file line number Diff line number Diff line change
@@ -1,33 +1,4 @@
<?php
namespace ApacheSolrForTypo3\Solr\IndexQueue;

use ApacheSolrForTypo3\Solr\Tests\Integration\Task\IndexQueueDependencyFaker;

/**
* This function is used to overwrite uniqid in the IndexQueue context to provide a fake request id.
* We use this since this is a integration test and the unique id could not be injected from outside.
*
* @return string
*/
function uniqid()
{
return IndexQueueDependencyFaker::getRequestId();
}

/**
* This function fakes the file_get_contents calls to provide a faked frontend indexing response.
*
* @param string $url
* @param bool $flags
* @param resource $context
*
* @return string
*/
function file_get_contents($url, $flags, $context)
{
return IndexQueueDependencyFaker::getHttpContent($url, $flags,
$context);
}

namespace ApacheSolrForTypo3\Solr\Tests\Integration\Task;

Expand Down Expand Up @@ -89,44 +60,6 @@ public function setUp()
$this->indexQueue = GeneralUtility::makeInstance(Queue::class);
}

/**
* @test
*/
public function canTriggerFrontendIndexingAndMarkQueueEntryAsProcessed()
{
$this->importDataSetFromFixture('can_trigger_frontend_calls_for_page_index.xml');

// we expect that the index queue is empty before we start
$this->assertFalse($this->indexQueue->containsIndexedItem('pages', 1));

/** @var $beUser \TYPO3\CMS\Core\Authentication\BackendUserAuthentication */
$beUser = GeneralUtility::makeInstance(BackendUserAuthentication::class);
$GLOBALS['BE_USER'] = $beUser;

/** @var $languageService \TYPO3\CMS\Lang\LanguageService */
$languageService = GeneralUtility::makeInstance(LanguageService::class);
$GLOBALS['LANG'] = $languageService;

$charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
$GLOBALS['LANG']->csConvObj = $charsetConverter;

$siteRepository = GeneralUtility::makeInstance(SiteRepository::class);
$site = $siteRepository->getFirstAvailableSite();
/** @var $indexQueueQueueWorkerTask \ApacheSolrForTypo3\Solr\Task\IndexQueueWorkerTask */
$indexQueueQueueWorkerTask = GeneralUtility::makeInstance(IndexQueueWorkerTask::class);
$indexQueueQueueWorkerTask->setDocumentsToIndexLimit(1);
$indexQueueQueueWorkerTask->setRootPageId($site->getRootPageId());

$progressBefore = $indexQueueQueueWorkerTask->getProgress();
$indexQueueQueueWorkerTask->execute();
$progressAfter = $indexQueueQueueWorkerTask->getProgress();

// we expect that the index queue is empty before we start
$this->assertTrue($this->indexQueue->containsIndexedItem('pages', 1));
$this->assertEquals(0.0, $progressBefore, 'Wrong progress before');
$this->assertEquals(100.0, $progressAfter, 'Wrong progress after');
}

/**
* @test
*/
Expand Down
51 changes: 41 additions & 10 deletions Tests/Unit/IndexQueue/PageIndexerRequestTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
use ApacheSolrForTypo3\Solr\IndexQueue\PageIndexerRequest;
use ApacheSolrForTypo3\Solr\System\Configuration\ExtensionConfiguration;
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
use ApacheSolrForTypo3\Solr\System\Records\Pages\PagesRepository;
use ApacheSolrForTypo3\Solr\Tests\Unit\UnitTest;
use Psr\Http\Message\ResponseInterface;
use Psr\Http\Message\StreamInterface;
use TYPO3\CMS\Core\Http\RequestFactory;
use TYPO3\CMS\Core\Utility\GeneralUtility;

/**
Expand Down Expand Up @@ -172,16 +176,25 @@ public function canSendRequestToSslSite()
*/
public function authenticationHeaderIsSetWhenUsernameAndPasswordHaveBeenPassed()
{
$queueItemMock = $this->getDumbMock(Item::class);
$requestFactoryMock = $this->getDumbMock(RequestFactory::class);
$requestFactoryMock->expects($this->once())->method('request')->willReturnCallback(function($url, $method, $options) {
$this->assertSame(['bob', 'topsecret'], $options['auth'], 'Authentication options have not been set');
$this->assertSame('GET', $method, 'Unexpected http method');

$pageIndexerRequest = $this->getPageIndexerRequest();
$pageIndexerRequest->setIndexQueueItem($queueItemMock);
$pageIndexerRequest->setAuthorizationCredentials('bob','topsecret');
return $this->getFakedGuzzleResponse($this->getFixtureContentByName('fakeResponse.json'));
});

$headers = $pageIndexerRequest->getHeaders();
$solrLogManagerMock = $this->getDumbMock(SolrLogManager::class);
$extensionConfigurationMock = $this->getDumbMock(ExtensionConfiguration::class);

$testParameters = json_encode(['requestId' => '581f76be71f60']);
$pageIndexerRequest = new PageIndexerRequest($testParameters, $solrLogManagerMock, $extensionConfigurationMock, $requestFactoryMock);

$expetedHeader = 'Authorization: Basic ' . base64_encode('bob:topsecret');
$this->assertContains($expetedHeader, $headers, 'Headers did not contain authentication details');
$queueItemMock = $this->getDumbMock(Item::class);
$pageIndexerRequest->setIndexQueueItem($queueItemMock);
$pageIndexerRequest->setAuthorizationCredentials('bob', 'topsecret');

$pageIndexerRequest->send('https://7.6.local.typo3.org/about/typo3/');
}

/**
Expand Down Expand Up @@ -214,13 +227,14 @@ public function canSetUserAgent()

/**
* @param string $jsonEncodedParameter
* @param RequestFactory $requestFactory
* @return PageIndexerRequest
*/
protected function getPageIndexerRequest($jsonEncodedParameter = null)
protected function getPageIndexerRequest($jsonEncodedParameter = null, RequestFactory $requestFactory = null)
{
$solrLogManagerMock = $this->getDumbMock(SolrLogManager::class);
$extensionConfigurationMock = $this->getDumbMock(ExtensionConfiguration::class);
$request = new PageIndexerRequest($jsonEncodedParameter, $solrLogManagerMock, $extensionConfigurationMock);
$request = new PageIndexerRequest($jsonEncodedParameter, $solrLogManagerMock, $extensionConfigurationMock, $requestFactory);
return $request;
}

Expand All @@ -236,8 +250,25 @@ protected function getMockedPageIndexerRequestWithUsedFakeResponse($testParamete
/** @var $requestMock PageIndexerRequest */
$requestMock = $this->getMockBuilder(PageIndexerRequest::class)->setMethods(['getUrl'])->setConstructorArgs([$testParameters, $solrLogManagerMock, $extensionConfigurationMock])->getMock();

$responseMock = $this->getFakedGuzzleResponse($fakeResponse);

// we fake the response from a captured response json file
$requestMock->expects($this->once())->method('getUrl')->will($this->returnValue($fakeResponse));
$requestMock->expects($this->once())->method('getUrl')->willReturn($responseMock);
return $requestMock;
}

/**
* @param $fakeResponse
* @return ResponseInterface
*/
protected function getFakedGuzzleResponse($fakeResponse): ResponseInterface
{
$bodyStream = $this->getDumbMock(StreamInterface::class);
$bodyStream->expects($this->any())->method('getContents')->willReturn($fakeResponse);

/** @var $responseMock ResponseInterface */
$responseMock = $this->getDumbMock(ResponseInterface::class);
$responseMock->expects($this->any())->method('getBody')->willReturn($bodyStream);
return $responseMock;
}
}

0 comments on commit 776b219

Please sign in to comment.