diff --git a/mobile/test/common/integration/client_integration_test.cc b/mobile/test/common/integration/client_integration_test.cc index 92f3cab3b42b..70d5e18ebeaa 100644 --- a/mobile/test/common/integration/client_integration_test.cc +++ b/mobile/test/common/integration/client_integration_test.cc @@ -603,6 +603,42 @@ TEST_P(ClientIntegrationTest, InvalidDomainFakeResolver) { ASSERT_EQ(cc_.on_error_calls, 0); ASSERT_EQ(cc_.on_headers_calls, 1); ASSERT_EQ(cc_.status, "200"); + + // Kick off a second request. There should be no resolution. + stream_ = createNewStream(stream_prototype_, createDefaultStreamCallbacks()); + stream_->sendHeaders(std::make_unique(default_request_headers_), + true); + terminal_callback_.waitReady(); + EXPECT_EQ(1, getCounterValue("dns_cache.base_dns_cache.dns_query_attempt")); +} + +TEST_P(ClientIntegrationTest, InvalidDomainReresolveWithNoAddresses) { + builder_.setRuntimeGuard("reresolve_null_addresses", true); + Network::OverrideAddrInfoDnsResolverFactory factory; + Registry::InjectFactory inject_factory(factory); + Registry::InjectFactory::forceAllowDuplicates(); + + initialize(); + default_request_headers_.setHost( + absl::StrCat("www.doesnotexist.com:", fake_upstreams_[0]->localAddress()->ip()->port())); + stream_ = stream_prototype_->start(createDefaultStreamCallbacks(), explicit_flow_control_); + stream_->sendHeaders(std::make_unique(default_request_headers_), + true); + // Unblock resolve, but resolve to the bad domain. + ASSERT_TRUE(waitForCounterGe("dns_cache.base_dns_cache.dns_query_attempt", 1)); + Network::TestResolver::unblockResolve(); + terminal_callback_.waitReady(); + + // The stream should fail. + ASSERT_EQ(cc_.on_error_calls, 1); + + // A new stream will kick off a new resolution because the address was null. + stream_ = createNewStream(stream_prototype_, createDefaultStreamCallbacks()); + stream_->sendHeaders(std::make_unique(default_request_headers_), + true); + Network::TestResolver::unblockResolve(); + terminal_callback_.waitReady(); + EXPECT_EQ(2, getCounterValue("dns_cache.base_dns_cache.dns_query_attempt")); } TEST_P(ClientIntegrationTest, ReresolveAndDrain) { diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 0db64624c70f..272d5a482821 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -150,6 +150,8 @@ FALSE_RUNTIME_GUARD(envoy_reloadable_features_quic_defer_logging_to_ack_listener FALSE_RUNTIME_GUARD(envoy_reloadable_features_use_config_in_happy_eyeballs); // TODO(#33474) removed it once GRO packet dropping is fixed. FALSE_RUNTIME_GUARD(envoy_reloadable_features_prefer_quic_client_udp_gro); +// TODO(alyssar) evaluate and either make this a config knob or remove. +FALSE_RUNTIME_GUARD(envoy_reloadable_features_reresolve_null_addresses); // A flag to set the maximum TLS version for google_grpc client to TLS1.2, when needed for // compliance restrictions. diff --git a/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc b/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc index 12a2446ecf2b..e50dd7c6d2a8 100644 --- a/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc +++ b/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.cc @@ -65,7 +65,7 @@ DnsCacheImpl::DnsCacheImpl( ? DnsHostInfo::normalizeHostForDfp(hostname.address(), hostname.port_value()) : hostname.address(); ENVOY_LOG(debug, "DNS pre-resolve starting for host {}", host); - startCacheLoad(host, hostname.port_value(), false); + startCacheLoad(host, hostname.port_value(), false, false); } } @@ -106,33 +106,44 @@ DnsCacheImpl::loadDnsCacheEntry(absl::string_view raw_host, uint16_t default_por is_proxy_lookup ? "proxy mode " : ""); ThreadLocalHostInfo& tls_host_info = *tls_slot_; - auto [is_overflow, host_info] = [&]() { + bool is_overflow = false; + absl::optional host_info = absl::nullopt; + bool ignore_cached_entries = false; + + { absl::ReaderMutexLock read_lock{&primary_hosts_lock_}; + is_overflow = primary_hosts_.size() >= max_hosts_; auto tls_host = primary_hosts_.find(host); - return std::make_tuple( - primary_hosts_.size() >= max_hosts_, - (tls_host != primary_hosts_.end() && tls_host->second->host_info_->firstResolveComplete()) - ? absl::optional(tls_host->second->host_info_) - : absl::nullopt); - }(); + if (tls_host != primary_hosts_.end() && tls_host->second->host_info_->firstResolveComplete()) { + host_info = tls_host->second->host_info_; + } + }; if (host_info) { ENVOY_LOG(debug, "cache hit for host '{}'", host); - return {LoadDnsCacheEntryStatus::InCache, nullptr, host_info}; - } else if (is_overflow) { + if (Runtime::runtimeFeatureEnabled("envoy.reloadable_features.reresolve_null_addresses") && + !is_proxy_lookup && *host_info && (*host_info)->address() == nullptr) { + ENVOY_LOG(debug, "ignoring null address cache hit for miss for host '{}'", host); + ignore_cached_entries = true; + // fall through to handle below + } else { + return {LoadDnsCacheEntryStatus::InCache, nullptr, host_info}; + } + } + if (is_overflow) { ENVOY_LOG(debug, "DNS cache overflow for host '{}'", host); stats_.host_overflow_.inc(); return {LoadDnsCacheEntryStatus::Overflow, nullptr, absl::nullopt}; - } else { - ENVOY_LOG(debug, "cache miss for host '{}', posting to main thread", host); - main_thread_dispatcher_.post([this, host = std::string(host), default_port, is_proxy_lookup]() { - startCacheLoad(host, default_port, is_proxy_lookup); - }); - return {LoadDnsCacheEntryStatus::Loading, - std::make_unique(tls_host_info.pending_resolutions_, host, - callbacks), - absl::nullopt}; } + ENVOY_LOG(debug, "cache miss for host '{}', posting to main thread", host); + main_thread_dispatcher_.post( + [this, host = std::string(host), default_port, is_proxy_lookup, ignore_cached_entries]() { + startCacheLoad(host, default_port, is_proxy_lookup, ignore_cached_entries); + }); + return {LoadDnsCacheEntryStatus::Loading, + std::make_unique(tls_host_info.pending_resolutions_, host, + callbacks), + absl::nullopt}; } Upstream::ResourceAutoIncDecPtr DnsCacheImpl::canCreateDnsRequest() { @@ -176,7 +187,7 @@ DnsCacheImpl::addUpdateCallbacks(UpdateCallbacks& callbacks) { } void DnsCacheImpl::startCacheLoad(const std::string& host, uint16_t default_port, - bool is_proxy_lookup) { + bool is_proxy_lookup, bool ignore_cached_entries) { ASSERT(main_thread_dispatcher_.isThreadSafe()); // It's possible for multiple requests to race trying to start a resolution. If a host is @@ -192,8 +203,15 @@ void DnsCacheImpl::startCacheLoad(const std::string& host, uint16_t default_port }(); if (primary_host) { - ENVOY_LOG(debug, "main thread resolve for host '{}' skipped. Entry present", host); - return; + if (!ignore_cached_entries) { + // This may mean a resolve is in-progress, or a resolve completed before we + // switched contexts to startCacheLoad. Either way the caller will be informed. + ENVOY_LOG(debug, "main thread resolve for host '{}' skipped. Entry present", host); + return; + } + // The host was in cache but we want to force a refresh. Remove the host + // entirely to ensure initial resolve logic works as expected. + removeHost(host, *primary_host); } primary_host = createHost(host, default_port); @@ -220,7 +238,7 @@ DnsCacheImpl::PrimaryHostInfo* DnsCacheImpl::createHost(const std::string& host, std::make_unique( *this, std::string(host_attributes.host_), host_attributes.port_.value_or(default_port), - host_attributes.is_ip_address_, [this, host]() { onReResolve(host); }, + host_attributes.is_ip_address_, [this, host]() { onReResolveAlarm(host); }, [this, host]() { onResolveTimeout(host); })) .first->second.get(); } @@ -246,12 +264,8 @@ void DnsCacheImpl::onResolveTimeout(const std::string& host) { finishResolve(host, Network::DnsResolver::ResolutionStatus::Failure, {}); } -void DnsCacheImpl::onReResolve(const std::string& host) { +void DnsCacheImpl::onReResolveAlarm(const std::string& host) { ASSERT(main_thread_dispatcher_.isThreadSafe()); - // If we need to erase the host, hold onto the PrimaryHostInfo object that owns this callback. - // This is defined at function scope so that it is only erased on function exit to avoid - // use-after-free issues - PrimaryHostInfoPtr host_to_erase; auto& primary_host = getPrimaryHost(host); const std::chrono::steady_clock::duration now_duration = @@ -261,26 +275,35 @@ void DnsCacheImpl::onReResolve(const std::string& host) { last_used_time.count(), host_ttl_.count()); if ((now_duration - last_used_time) > host_ttl_) { ENVOY_LOG(debug, "host='{}' TTL expired, removing", host); - // If the host has no address then that means that the DnsCacheImpl has never - // runAddUpdateCallbacks for this host, and thus the callback targets are not aware of it. - // Therefore, runRemoveCallbacks should only be ran if the host's address != nullptr. - if (primary_host.host_info_->address()) { - runRemoveCallbacks(host); - } - { - removeCacheEntry(host); - absl::WriterMutexLock writer_lock{&primary_hosts_lock_}; - auto host_it = primary_hosts_.find(host); - ASSERT(host_it != primary_hosts_.end()); - host_to_erase = std::move(host_it->second); - primary_hosts_.erase(host_it); - } - notifyThreads(host, primary_host.host_info_); + removeHost(host, primary_host); } else { startResolve(host, primary_host); } } +void DnsCacheImpl::removeHost(const std::string& host, const PrimaryHostInfo& primary_host) { + // If we need to erase the host, hold onto the PrimaryHostInfo object that owns this callback. + // This is defined at function scope so that it is only erased on function exit to avoid + // use-after-free issues + PrimaryHostInfoPtr host_to_erase; + + // If the host has no address then that means that the DnsCacheImpl has never + // runAddUpdateCallbacks for this host, and thus the callback targets are not aware of it. + // Therefore, runRemoveCallbacks should only be ran if the host's address != nullptr. + if (primary_host.host_info_->address()) { + runRemoveCallbacks(host); + } + { + removeCacheEntry(host); + absl::WriterMutexLock writer_lock{&primary_hosts_lock_}; + auto host_it = primary_hosts_.find(host); + ASSERT(host_it != primary_hosts_.end()); + host_to_erase = std::move(host_it->second); + primary_hosts_.erase(host_it); + } + notifyThreads(host, primary_host.host_info_); +} + void DnsCacheImpl::forceRefreshHosts() { ENVOY_LOG(debug, "beginning DNS cache force refresh"); // Tell the underlying resolver to reset itself since we likely just went through a network diff --git a/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.h b/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.h index c7bd9656897b..38d94d5bc398 100644 --- a/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.h +++ b/source/extensions/common/dynamic_forward_proxy/dns_cache_impl.h @@ -204,7 +204,8 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable